YouTube/IMDb: remove url parsing

This commit is contained in:
oddluck 2020-03-02 23:14:14 +00:00
parent f8e7ed3286
commit c5f0919751
2 changed files with 4 additions and 6 deletions

View File

@ -39,7 +39,6 @@ import requests
import json
from fake_useragent import UserAgent
from bs4 import BeautifulSoup
from urllib.parse import urljoin, urlparse, quote_plus
try:
from supybot.i18n import PluginInternationalization
@ -57,14 +56,14 @@ class IMDb(callbacks.Plugin):
try:
url = None
searchurl = "https://www.google.com/search?&q="
searchurl += quote_plus("{0} site:imdb.com/title/".format(query))
searchurl += "{0} site:imdb.com/title/".format(query)
ua = UserAgent(fallback="Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:74.0) Gecko/20100101 Firefox/74.0")
header = {'User-Agent':str(ua.random)}
data = requests.get(searchurl, headers=header, timeout=10)
data.raise_for_status()
soup = BeautifulSoup(data.content)
elements = soup.select('.r a')
url = urljoin(elements[0]['href'], urlparse(url).path)
url = elements[0]['href']
except Exception:
pass
return url

View File

@ -39,7 +39,6 @@ import requests
import re
import pylyrics3
from fake_useragent import UserAgent
from urllib.parse import urljoin, urlparse, quote_plus
try:
from supybot.i18n import PluginInternationalization
@ -58,7 +57,7 @@ class Lyrics(callbacks.Plugin):
url = None
title = None
searchurl = "https://www.google.com/search?&q="
searchurl += quote_plus("{0} site:lyrics.fandom.com/wiki/".format(lyric))
searchurl += "{0} site:lyrics.fandom.com/wiki/".format(lyric)
ua = UserAgent(fallback="Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:74.0) Gecko/20100101 Firefox/74.0")
header = {'User-Agent':str(ua.random)}
data = requests.get(searchurl, headers=header, timeout=10)
@ -66,7 +65,7 @@ class Lyrics(callbacks.Plugin):
soup = BeautifulSoup(data.content)
elements = soup.select('.r a')
title = soup.find("h3").getText().replace(":", " - ").split('|')[0]
url = urljoin(elements[0]['href'], urlparse(url).path)
url = elements[0]['href']
except Exception:
pass
return title, url