IMDb/Lyrics/YouTube: simplify URL parsing

This commit is contained in:
oddluck 2020-02-25 01:09:09 +00:00
parent aaaebe6983
commit bbfa1c72e9
3 changed files with 7 additions and 12 deletions

View File

@ -40,6 +40,7 @@ import json
import re
from fake_useragent import UserAgent
from bs4 import BeautifulSoup
from urllib.parse import urljoin, urlparse
try:
from supybot.i18n import PluginInternationalization
@ -62,9 +63,7 @@ class IMDb(callbacks.Plugin):
soup = BeautifulSoup(data.text)
elements = soup.select('.r a')
url = elements[0]['href']
url = re.split('https?://', url)[-1]
url = re.sub("&rct=.*", "", url)
url = "https://{0}".format(url)
url = urljoin(url, urlparse(url).path)
except Exception:
return
else:
@ -77,7 +76,7 @@ class IMDb(callbacks.Plugin):
apikey = self.registryValue('omdbAPI')
url = self.dosearch(query)
if url:
imdb_id = url.split("/title/")[1].rstrip("/")
imdb_id = url.split("/title/")[-1].rstrip("/")
omdb_url = "http://www.omdbapi.com/?i=%s&plot=short&r=json&tomatoes=true&apikey=%s" % (imdb_id, apikey)
else:
irc.reply("No results found for {0}".format(query))

View File

@ -38,6 +38,7 @@ import requests
import re
import pylyrics3
from fake_useragent import UserAgent
from urllib.parse import urljoin, urlparse
try:
from supybot.i18n import PluginInternationalization
@ -60,10 +61,8 @@ class Lyrics(callbacks.Plugin):
soup = BeautifulSoup(data.text)
elements = soup.select('.r a')
url = elements[0]['href']
url = re.split('https?://', url)[-1]
url = re.sub("&rct=.*", "", url)
url = "https://{0}".format(url)
title = soup.find("h3").getText()
urljoin(url, urlparse(url).path)
title = soup.find("h3").getText().replace(":", " - ").split('|')[0]
except Exception:
return
else:
@ -90,7 +89,7 @@ class Lyrics(callbacks.Plugin):
else:
try:
lyrics = self.getlyrics(url)
irc.reply(title.replace(":", " - "))
irc.reply(title)
irc.reply(lyrics)
except Exception:
irc.reply("Unable to retrieve lyrics from {0}".format(url))

View File

@ -66,9 +66,6 @@ class YouTube(callbacks.Plugin):
soup = BeautifulSoup(data.text)
elements = soup.select('.r a')
url = elements[0]['href']
url = re.split('https?://', url)[-1]
url = re.sub("&rct=.*", "", url)
url = "https://{0}".format(url)
except Exception:
return
else: