mirror of
https://github.com/jlu5/SupyPlugins.git
synced 2025-04-27 13:31:10 -05:00
Wikifetch: skip looking at empty leading paragraphs
This commit is contained in:
parent
79559d48f1
commit
9221d87c29
@ -206,8 +206,18 @@ class Wikifetch(callbacks.Plugin):
|
|||||||
# Get the first paragraph as text.
|
# Get the first paragraph as text.
|
||||||
paragraphs = []
|
paragraphs = []
|
||||||
for p in text_content.xpath("./p"):
|
for p in text_content.xpath("./p"):
|
||||||
|
self.log.debug('Wikifetch: looking at paragraph %s', p.text_content())
|
||||||
|
|
||||||
# Skip geographic coordinates, e.g. on articles for countries
|
# Skip geographic coordinates, e.g. on articles for countries
|
||||||
if not p.xpath(".//span[@class='geo-dec']"):
|
if p.xpath(".//span[@class='geo-dec']"):
|
||||||
|
continue
|
||||||
|
# 2018-07-19: some articles have an empty p tag with this class and no content (why?)
|
||||||
|
elif 'mw-empty-elt' in p.attrib.get('class', ''):
|
||||||
|
continue
|
||||||
|
# Skip <p> tags with no content, for obvious reasons
|
||||||
|
elif not p.text_content().strip():
|
||||||
|
continue
|
||||||
|
|
||||||
paragraphs.append(p)
|
paragraphs.append(p)
|
||||||
|
|
||||||
if (not paragraphs) or 'wiki/Special:Search' in addr:
|
if (not paragraphs) or 'wiki/Special:Search' in addr:
|
||||||
|
Loading…
x
Reference in New Issue
Block a user