YouTube/IMDb: remove url parsing

This commit is contained in:
oddluck 2020-03-02 23:14:14 +00:00
parent f8e7ed3286
commit c5f0919751
2 changed files with 4 additions and 6 deletions

View File

@ -39,7 +39,6 @@ import requests
import json import json
from fake_useragent import UserAgent from fake_useragent import UserAgent
from bs4 import BeautifulSoup from bs4 import BeautifulSoup
from urllib.parse import urljoin, urlparse, quote_plus
try: try:
from supybot.i18n import PluginInternationalization from supybot.i18n import PluginInternationalization
@ -57,14 +56,14 @@ class IMDb(callbacks.Plugin):
try: try:
url = None url = None
searchurl = "https://www.google.com/search?&q=" searchurl = "https://www.google.com/search?&q="
searchurl += quote_plus("{0} site:imdb.com/title/".format(query)) searchurl += "{0} site:imdb.com/title/".format(query)
ua = UserAgent(fallback="Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:74.0) Gecko/20100101 Firefox/74.0") ua = UserAgent(fallback="Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:74.0) Gecko/20100101 Firefox/74.0")
header = {'User-Agent':str(ua.random)} header = {'User-Agent':str(ua.random)}
data = requests.get(searchurl, headers=header, timeout=10) data = requests.get(searchurl, headers=header, timeout=10)
data.raise_for_status() data.raise_for_status()
soup = BeautifulSoup(data.content) soup = BeautifulSoup(data.content)
elements = soup.select('.r a') elements = soup.select('.r a')
url = urljoin(elements[0]['href'], urlparse(url).path) url = elements[0]['href']
except Exception: except Exception:
pass pass
return url return url

View File

@ -39,7 +39,6 @@ import requests
import re import re
import pylyrics3 import pylyrics3
from fake_useragent import UserAgent from fake_useragent import UserAgent
from urllib.parse import urljoin, urlparse, quote_plus
try: try:
from supybot.i18n import PluginInternationalization from supybot.i18n import PluginInternationalization
@ -58,7 +57,7 @@ class Lyrics(callbacks.Plugin):
url = None url = None
title = None title = None
searchurl = "https://www.google.com/search?&q=" searchurl = "https://www.google.com/search?&q="
searchurl += quote_plus("{0} site:lyrics.fandom.com/wiki/".format(lyric)) searchurl += "{0} site:lyrics.fandom.com/wiki/".format(lyric)
ua = UserAgent(fallback="Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:74.0) Gecko/20100101 Firefox/74.0") ua = UserAgent(fallback="Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:74.0) Gecko/20100101 Firefox/74.0")
header = {'User-Agent':str(ua.random)} header = {'User-Agent':str(ua.random)}
data = requests.get(searchurl, headers=header, timeout=10) data = requests.get(searchurl, headers=header, timeout=10)
@ -66,7 +65,7 @@ class Lyrics(callbacks.Plugin):
soup = BeautifulSoup(data.content) soup = BeautifulSoup(data.content)
elements = soup.select('.r a') elements = soup.select('.r a')
title = soup.find("h3").getText().replace(":", " - ").split('|')[0] title = soup.find("h3").getText().replace(":", " - ").split('|')[0]
url = urljoin(elements[0]['href'], urlparse(url).path) url = elements[0]['href']
except Exception: except Exception:
pass pass
return title, url return title, url