From 001b49b6c32d6ddd7201fbe1d0de2ff9b5831f3c Mon Sep 17 00:00:00 2001 From: James Lu Date: Fri, 24 Mar 2017 19:08:25 -0700 Subject: [PATCH] Wikifetch: prefer links again when available --- Wikifetch/plugin.py | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/Wikifetch/plugin.py b/Wikifetch/plugin.py index f6d3222..017aa08 100644 --- a/Wikifetch/plugin.py +++ b/Wikifetch/plugin.py @@ -171,11 +171,16 @@ class Wikifetch(callbacks.Plugin): # extract the address we got it from - most sites have the perm link # inside the page itself try: - addr = tree.find(".//div[@class='printfooter']/a").attrib['href'] - addr = re.sub('([&?]|(amp;)?)oldid=\d+$', '', addr) - except: - # If any of the above post-processing tricks fail, just ignore - pass + addr = tree.find(".//link[@rel='canonical']").attrib['href'] + except (ValueError, AttributeError): + self.log.debug('Wikifetch: failed link extraction, skipping') + try: + addr = tree.find(".//div[@class='printfooter']/a").attrib['href'] + addr = re.sub('([&?]|(amp;)?)oldid=\d+$', '', addr) + except (ValueError, AttributeError): + self.log.debug('Wikifetch: failed printfooter link extraction, skipping') + # If any of the above post-processing tricks fail, just ignore + pass # check if it's a disambiguation page disambig = tree.xpath('//table[@id="disambigbox"]') or \