mirror of
https://github.com/jlu5/SupyPlugins.git
synced 2025-04-27 13:31:10 -05:00
Wikifetch: ignore GPS coordinates from articles for countries, etc.
This commit is contained in:
parent
d000140891
commit
d147207ad1
@ -204,15 +204,20 @@ class Wikifetch(callbacks.Plugin):
|
||||
reply += format(_('This article appears to be a talk page: %u'), addr)
|
||||
else:
|
||||
# Get the first paragraph as text.
|
||||
p = text_content.xpath("./p[1]")
|
||||
if len(p) == 0 or 'wiki/Special:Search' in addr:
|
||||
paragraphs = []
|
||||
for p in text_content.xpath("./p"):
|
||||
# Skip geographic coordinates, e.g. on articles for countries
|
||||
if not p.xpath(".//span[@class='geo-dec']"):
|
||||
paragraphs.append(p)
|
||||
|
||||
if (not paragraphs) or 'wiki/Special:Search' in addr:
|
||||
if 'wikipedia:wikiproject' in addr.lower():
|
||||
reply += format(_('This page appears to be a WikiProject page, '
|
||||
'but it is too complex for us to parse: %u'), addr)
|
||||
else:
|
||||
irc.error(_('Not found, or page malformed.'), Raise=True)
|
||||
else:
|
||||
p = p[0]
|
||||
p = paragraphs[0]
|
||||
# Replace <b> tags with IRC-style bold, this has to be
|
||||
# done indirectly because unescaped '\x02' is invalid in XML
|
||||
for b_tag in p.xpath('//b'):
|
||||
|
@ -45,6 +45,13 @@ if network:
|
||||
def testStripInlineCitations(self):
|
||||
self.assertNotRegexp('wiki UNICEF', '\[\d+\]')
|
||||
|
||||
def testIgnoreCoordinates(self):
|
||||
# Articles for countries, cities, landmarks, etc. have GPS coordinates added to the top right.
|
||||
# These should be ignored because we want to focus on the actual article text.
|
||||
self.assertNotRegexp('wiki Canada', 'Coordinates\:')
|
||||
self.assertNotRegexp('wiki Eiffel Tower', 'Coordinates\:')
|
||||
self.assertNotRegexp('wiki Poland', 'Coordinates\:')
|
||||
|
||||
def testDisambig(self):
|
||||
self.assertRegexp('wiki Python', 'is a disambiguation page.*'
|
||||
'Possible results include:.*?,.*?,')
|
||||
|
Loading…
x
Reference in New Issue
Block a user