mirror of
https://github.com/jlu5/SupyPlugins.git
synced 2025-04-26 21:11:14 -05:00
FML: update scraper for fmylife.com changes
This commit is contained in:
parent
5672008a31
commit
3c72c3a4f9
@ -29,7 +29,7 @@
|
|||||||
###
|
###
|
||||||
import collections
|
import collections
|
||||||
|
|
||||||
from supybot import utils, plugins, ircutils, callbacks
|
from supybot import utils, plugins, ircutils, callbacks, log
|
||||||
from supybot.commands import *
|
from supybot.commands import *
|
||||||
try:
|
try:
|
||||||
from supybot.i18n import PluginInternationalization
|
from supybot.i18n import PluginInternationalization
|
||||||
@ -52,21 +52,25 @@ class FML(callbacks.Plugin):
|
|||||||
@staticmethod
|
@staticmethod
|
||||||
def _parse_panel(panel, fml_id=None):
|
def _parse_panel(panel, fml_id=None):
|
||||||
"""Parses a FML entry panel for data. Returns a (fml_id, text, num_upvotes, num_downvotes) tuple."""
|
"""Parses a FML entry panel for data. Returns a (fml_id, text, num_upvotes, num_downvotes) tuple."""
|
||||||
if panel and panel.p:
|
if panel:
|
||||||
text = panel.p.text.strip()
|
content = panel.find(class_='article-link')
|
||||||
|
log.debug("FML: parsing panel %s", panel)
|
||||||
|
log.debug("FML: parsing content %s", content)
|
||||||
|
|
||||||
|
if not content:
|
||||||
|
return
|
||||||
|
text = content.text.strip()
|
||||||
if not text.endswith(' FML'): # Ignore ads, promos, previews
|
if not text.endswith(' FML'): # Ignore ads, promos, previews
|
||||||
return
|
return
|
||||||
|
|
||||||
# If not given, extract the FML ID from the link
|
# If not given, extract the FML ID from the link
|
||||||
if fml_id is None:
|
if fml_id is None and content.name == 'a':
|
||||||
link = panel.p.a['href']
|
link = content['href']
|
||||||
fml_id = link.split('_', 1)[-1].split('.', 1)[0]
|
fml_id = link.rsplit('_', 1)[-1].split('.', 1)[0]
|
||||||
|
|
||||||
voteup_btn = panel.find('button', class_='vote-up')
|
vote_counts = panel.find_all('div', class_='vote-count')
|
||||||
votedown_btn = panel.find('button', class_='vote-down')
|
upvotes = vote_counts[0].text.strip()
|
||||||
|
downvotes = vote_counts[1].text.strip()
|
||||||
upvotes = voteup_btn.text.strip()
|
|
||||||
downvotes = votedown_btn.text.strip()
|
|
||||||
|
|
||||||
data = (fml_id, text, upvotes, downvotes)
|
data = (fml_id, text, upvotes, downvotes)
|
||||||
return data
|
return data
|
||||||
@ -77,7 +81,7 @@ class FML(callbacks.Plugin):
|
|||||||
soup = BeautifulSoup(html)
|
soup = BeautifulSoup(html)
|
||||||
|
|
||||||
results_count = 0
|
results_count = 0
|
||||||
for panel in soup.find_all('div', class_='panel-content'):
|
for panel in soup.find_all('div', class_='article-contents'):
|
||||||
data = self._parse_panel(panel)
|
data = self._parse_panel(panel)
|
||||||
if data:
|
if data:
|
||||||
self.log.debug('FML: got entry: %s', str(data))
|
self.log.debug('FML: got entry: %s', str(data))
|
||||||
@ -95,7 +99,7 @@ class FML(callbacks.Plugin):
|
|||||||
if query: # Explicit ID given
|
if query: # Explicit ID given
|
||||||
html = utils.web.getUrl(self.URL_ARTICLE % query)
|
html = utils.web.getUrl(self.URL_ARTICLE % query)
|
||||||
soup = BeautifulSoup(html)
|
soup = BeautifulSoup(html)
|
||||||
panel = soup.find('div', class_='panel-content')
|
panel = soup.find('div', class_='article-contents')
|
||||||
data = self._parse_panel(panel, fml_id=query)
|
data = self._parse_panel(panel, fml_id=query)
|
||||||
else: # Random search
|
else: # Random search
|
||||||
if not len(self.cached_results):
|
if not len(self.cached_results):
|
||||||
|
Loading…
x
Reference in New Issue
Block a user