mirror of
https://github.com/jlu5/SupyPlugins.git
synced 2025-04-26 04:51:08 -05:00
58 lines
1.7 KiB
Python
58 lines
1.7 KiB
Python
#!/usr/bin/env python3
|
|
import argparse
|
|
import re
|
|
import sys
|
|
|
|
import mwparserfromhell
|
|
|
|
def fmt_node(w, summary=False):
|
|
s = ''
|
|
for node in w.nodes:
|
|
if isinstance(node, mwparserfromhell.nodes.text.Text):
|
|
text = str(node)
|
|
elif isinstance(node, mwparserfromhell.nodes.tag.Tag):
|
|
text = fmt_node(node.contents, summary=summary)
|
|
elif isinstance(node, mwparserfromhell.nodes.wikilink.Wikilink):
|
|
text = node.text or node.title
|
|
if ':' in node.title:
|
|
continue
|
|
elif isinstance(node, mwparserfromhell.nodes.external_link.ExternalLink):
|
|
text = node.title or node.url
|
|
else:
|
|
continue
|
|
|
|
if s or text.strip():
|
|
s += str(text)
|
|
if summary:
|
|
lines = s.lstrip().split('\n\n')
|
|
if len(lines) > 1:
|
|
s = lines[0]
|
|
break
|
|
return s.strip()
|
|
|
|
_RE_EMPTY_PARENTHESES = re.compile(r' ?\(\s+\)')
|
|
def _cleanup(text):
|
|
"""Attempt to clean up text a bit further."""
|
|
text = re.sub(_RE_EMPTY_PARENTHESES, '', text)
|
|
return text
|
|
|
|
def fmt(text, clean=True, **kwargs):
|
|
w = mwparserfromhell.parse(text)
|
|
output = fmt_node(w, **kwargs)
|
|
if clean:
|
|
output = _cleanup(output)
|
|
return output
|
|
|
|
def main():
|
|
parser = argparse.ArgumentParser(
|
|
description="Generate plain text summaries from Wikitext input")
|
|
parser.add_argument('--no-summary', '-ns', action='store_true',
|
|
help='Return the whole page instead of just the first paragraph')
|
|
args = parser.parse_args()
|
|
|
|
result = fmt(sys.stdin.read(), summary=not args.no_summary)
|
|
print(result)
|
|
|
|
if __name__ == '__main__':
|
|
main()
|