mirror of
https://github.com/jlu5/SupyPlugins.git
synced 2025-04-27 13:31:10 -05:00
Wikifetch: ignore GPS coordinates from articles for countries, etc.
This commit is contained in:
parent
d000140891
commit
d147207ad1
@ -204,15 +204,20 @@ class Wikifetch(callbacks.Plugin):
|
|||||||
reply += format(_('This article appears to be a talk page: %u'), addr)
|
reply += format(_('This article appears to be a talk page: %u'), addr)
|
||||||
else:
|
else:
|
||||||
# Get the first paragraph as text.
|
# Get the first paragraph as text.
|
||||||
p = text_content.xpath("./p[1]")
|
paragraphs = []
|
||||||
if len(p) == 0 or 'wiki/Special:Search' in addr:
|
for p in text_content.xpath("./p"):
|
||||||
|
# Skip geographic coordinates, e.g. on articles for countries
|
||||||
|
if not p.xpath(".//span[@class='geo-dec']"):
|
||||||
|
paragraphs.append(p)
|
||||||
|
|
||||||
|
if (not paragraphs) or 'wiki/Special:Search' in addr:
|
||||||
if 'wikipedia:wikiproject' in addr.lower():
|
if 'wikipedia:wikiproject' in addr.lower():
|
||||||
reply += format(_('This page appears to be a WikiProject page, '
|
reply += format(_('This page appears to be a WikiProject page, '
|
||||||
'but it is too complex for us to parse: %u'), addr)
|
'but it is too complex for us to parse: %u'), addr)
|
||||||
else:
|
else:
|
||||||
irc.error(_('Not found, or page malformed.'), Raise=True)
|
irc.error(_('Not found, or page malformed.'), Raise=True)
|
||||||
else:
|
else:
|
||||||
p = p[0]
|
p = paragraphs[0]
|
||||||
# Replace <b> tags with IRC-style bold, this has to be
|
# Replace <b> tags with IRC-style bold, this has to be
|
||||||
# done indirectly because unescaped '\x02' is invalid in XML
|
# done indirectly because unescaped '\x02' is invalid in XML
|
||||||
for b_tag in p.xpath('//b'):
|
for b_tag in p.xpath('//b'):
|
||||||
|
@ -45,6 +45,13 @@ if network:
|
|||||||
def testStripInlineCitations(self):
|
def testStripInlineCitations(self):
|
||||||
self.assertNotRegexp('wiki UNICEF', '\[\d+\]')
|
self.assertNotRegexp('wiki UNICEF', '\[\d+\]')
|
||||||
|
|
||||||
|
def testIgnoreCoordinates(self):
|
||||||
|
# Articles for countries, cities, landmarks, etc. have GPS coordinates added to the top right.
|
||||||
|
# These should be ignored because we want to focus on the actual article text.
|
||||||
|
self.assertNotRegexp('wiki Canada', 'Coordinates\:')
|
||||||
|
self.assertNotRegexp('wiki Eiffel Tower', 'Coordinates\:')
|
||||||
|
self.assertNotRegexp('wiki Poland', 'Coordinates\:')
|
||||||
|
|
||||||
def testDisambig(self):
|
def testDisambig(self):
|
||||||
self.assertRegexp('wiki Python', 'is a disambiguation page.*'
|
self.assertRegexp('wiki Python', 'is a disambiguation page.*'
|
||||||
'Possible results include:.*?,.*?,')
|
'Possible results include:.*?,.*?,')
|
||||||
|
Loading…
x
Reference in New Issue
Block a user