FML: update scraper for fmylife.com changes

This commit is contained in:
James Lu 2019-06-23 11:42:02 -07:00
parent 5672008a31
commit 3c72c3a4f9

View File

@ -29,7 +29,7 @@
### ###
import collections import collections
from supybot import utils, plugins, ircutils, callbacks from supybot import utils, plugins, ircutils, callbacks, log
from supybot.commands import * from supybot.commands import *
try: try:
from supybot.i18n import PluginInternationalization from supybot.i18n import PluginInternationalization
@ -52,21 +52,25 @@ class FML(callbacks.Plugin):
@staticmethod @staticmethod
def _parse_panel(panel, fml_id=None): def _parse_panel(panel, fml_id=None):
"""Parses a FML entry panel for data. Returns a (fml_id, text, num_upvotes, num_downvotes) tuple.""" """Parses a FML entry panel for data. Returns a (fml_id, text, num_upvotes, num_downvotes) tuple."""
if panel and panel.p: if panel:
text = panel.p.text.strip() content = panel.find(class_='article-link')
log.debug("FML: parsing panel %s", panel)
log.debug("FML: parsing content %s", content)
if not content:
return
text = content.text.strip()
if not text.endswith(' FML'): # Ignore ads, promos, previews if not text.endswith(' FML'): # Ignore ads, promos, previews
return return
# If not given, extract the FML ID from the link # If not given, extract the FML ID from the link
if fml_id is None: if fml_id is None and content.name == 'a':
link = panel.p.a['href'] link = content['href']
fml_id = link.split('_', 1)[-1].split('.', 1)[0] fml_id = link.rsplit('_', 1)[-1].split('.', 1)[0]
voteup_btn = panel.find('button', class_='vote-up') vote_counts = panel.find_all('div', class_='vote-count')
votedown_btn = panel.find('button', class_='vote-down') upvotes = vote_counts[0].text.strip()
downvotes = vote_counts[1].text.strip()
upvotes = voteup_btn.text.strip()
downvotes = votedown_btn.text.strip()
data = (fml_id, text, upvotes, downvotes) data = (fml_id, text, upvotes, downvotes)
return data return data
@ -77,7 +81,7 @@ class FML(callbacks.Plugin):
soup = BeautifulSoup(html) soup = BeautifulSoup(html)
results_count = 0 results_count = 0
for panel in soup.find_all('div', class_='panel-content'): for panel in soup.find_all('div', class_='article-contents'):
data = self._parse_panel(panel) data = self._parse_panel(panel)
if data: if data:
self.log.debug('FML: got entry: %s', str(data)) self.log.debug('FML: got entry: %s', str(data))
@ -95,7 +99,7 @@ class FML(callbacks.Plugin):
if query: # Explicit ID given if query: # Explicit ID given
html = utils.web.getUrl(self.URL_ARTICLE % query) html = utils.web.getUrl(self.URL_ARTICLE % query)
soup = BeautifulSoup(html) soup = BeautifulSoup(html)
panel = soup.find('div', class_='panel-content') panel = soup.find('div', class_='article-contents')
data = self._parse_panel(panel, fml_id=query) data = self._parse_panel(panel, fml_id=query)
else: # Random search else: # Random search
if not len(self.cached_results): if not len(self.cached_results):