mirror of
https://github.com/jlu5/SupyPlugins.git
synced 2025-04-26 21:11:14 -05:00
Wikifetch: prefer <link rel="canonical"> links again when available
This commit is contained in:
parent
b9481184cb
commit
001b49b6c3
@ -171,11 +171,16 @@ class Wikifetch(callbacks.Plugin):
|
|||||||
# extract the address we got it from - most sites have the perm link
|
# extract the address we got it from - most sites have the perm link
|
||||||
# inside the page itself
|
# inside the page itself
|
||||||
try:
|
try:
|
||||||
addr = tree.find(".//div[@class='printfooter']/a").attrib['href']
|
addr = tree.find(".//link[@rel='canonical']").attrib['href']
|
||||||
addr = re.sub('([&?]|(amp;)?)oldid=\d+$', '', addr)
|
except (ValueError, AttributeError):
|
||||||
except:
|
self.log.debug('Wikifetch: failed <link rel="canonical"> link extraction, skipping')
|
||||||
# If any of the above post-processing tricks fail, just ignore
|
try:
|
||||||
pass
|
addr = tree.find(".//div[@class='printfooter']/a").attrib['href']
|
||||||
|
addr = re.sub('([&?]|(amp;)?)oldid=\d+$', '', addr)
|
||||||
|
except (ValueError, AttributeError):
|
||||||
|
self.log.debug('Wikifetch: failed printfooter link extraction, skipping')
|
||||||
|
# If any of the above post-processing tricks fail, just ignore
|
||||||
|
pass
|
||||||
|
|
||||||
# check if it's a disambiguation page
|
# check if it's a disambiguation page
|
||||||
disambig = tree.xpath('//table[@id="disambigbox"]') or \
|
disambig = tree.xpath('//table[@id="disambigbox"]') or \
|
||||||
|
Loading…
x
Reference in New Issue
Block a user