IMDb/Lyrics: use google/ddg search plugins

This commit is contained in:
oddluck 2020-05-19 17:09:49 -04:00
parent f8b52b7b4c
commit c94b470859
10 changed files with 194 additions and 290 deletions

View File

@ -4,16 +4,25 @@ Obtain an [OMDB API Key](https://omdbapi.com/apikey.aspx)
`config plugins.imdb.omdbAPI your_key_here`
`config plugins.imdb.googleSearch True/False` enable/disable google searches
`config plugins.imdb.google 0-2` 0 to disable search using the Google plugin. 1 to set first priority. 2 to set second priority.
`config plugins.imdb.ddg 0-2` 0 to disable search using the DDG plugin. 1 to set first priority. 2 to set second priority.
`config plugins.imdb.template` change the reply template
Default template:
`\x02\x031,8 IMDb \x0F\x02 :: $title ($year, $country, [$rated], $genre, $runtime) :: IMDb: $imdbRating | MC: $metascore | RT: $tomatoMeter :: http://imdb.com/title/$imdbID :: $plot :: Director: $director :: Cast: $actors :: Writer: $writer`
`$logo :: $title ($year, $country, [$rated], $genre, $runtime) :: IMDb: $imdbRating | MC: $metascore | RT: $tomatoMeter :: http://imdb.com/title/$imdbID :: $plot :: Director: $director :: Cast: $actors :: Writer: $writer`
`config plugins.imdb.logo` change the template logo
Default logo: `\x02\x031,8 IMDb \x03`
### Available variables for IMDB template ###
Variable | Description
---------------|------------
logo | Colored IMDB logo
title | Movie title
year | Release year
country | Country
@ -22,6 +31,7 @@ plot | Plot
imdbID | IMDB tile ID#
imdbRating | IMDB rating
metascore | Metacritic score
tomatometer | Rotten Tomatoes score
released | Release date
genre | Genre
awards | Awards won
@ -34,4 +44,4 @@ website | Website URL
language | Language
boxOffice | Box Office
production | Production company
poster | Poster URL
poster | Poster URL

View File

@ -37,7 +37,7 @@ import supybot.world as world
# Use this for the version of this plugin. You may wish to put a CVS keyword
# in here if you're keeping the plugin in CVS or some similar system.
__version__ = "2020.02.24+git"
__version__ = "2020.05.19+git"
# XXX Replace this with an appropriate author or supybot.Author instance.
__author__ = supybot.Author("butterscotchstallion", "butterscotchstallion", "")

View File

@ -57,11 +57,19 @@ conf.registerChannelValue(
IMDb,
"template",
registry.String(
"\x02\x031,8 IMDb \x0F\x02 :: $title ($year, $country, [$rated], $genre, "
"$runtime) :: IMDb: $imdbRating | MC: $metascore | RT: $tomatoMeter :: "
"http://imdb.com/title/$imdbID :: $plot :: Director: $director :: Cast: "
"$actors :: Writer: $writer",
_("""Template for the output of a search query."""),
"$logo :: $title ($year, $country, [$rated], $genre, $runtime) "
":: IMDb: $imdbRating | MC: $metascore | RT: $tomatoMeter :: "
"http://imdb.com/title/$imdbID :: $plot :: Director: $director :: "
"Cast: $actors :: Writer: $writer",
_("""IMDb reply template."""),
),
)
conf.registerChannelValue(
IMDb,
"logo",
registry.String(
"\x02\x031,8 IMDb \x03", _("""Logo used with {{logo}} in template""")
),
)
@ -74,28 +82,32 @@ conf.registerChannelValue(
),
)
conf.registerGlobalValue(
IMDb, "omdbAPI", registry.String("", _("""OMDB API Key"""), private=True)
conf.registerChannelValue(
IMDb,
"google",
registry.Integer(
1,
"""
Google search priority. Google plugin must be loaded.
0 = disabled. 1 = first. 2 = second.
""",
),
)
conf.registerChannelValue(
IMDb,
"googleSearch",
registry.Boolean(True, _("""Use google to perform searches for better results.""")),
)
conf.registerGlobalValue(
IMDb,
"userAgents",
registry.CommaSeparatedListOfStrings(
[
"Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:75.0) Gecko/20100101 Firefox/75.0",
"Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:76.0) Gecko/20100101 Firefox/76.0",
"Mozilla/5.0 (Linux x86_64; rv:75.0) Gecko/20100101 Firefox/75.0",
"Mozilla/5.0 (Linux x86_64; rv:76.0) Gecko/20100101 Firefox/76.0",
],
_("""Reported user agent when fetching links"""),
"ddg",
registry.Integer(
2,
"""
DDG search priority. DDG plugin must be loaded.
0 = disabled. 1 = first. 2 = second.
""",
),
)
conf.registerGlobalValue(
IMDb, "omdbAPI", registry.String("", _("""OMDB API Key"""), private=True)
)
# vim:set shiftwidth=4 tabstop=4 expandtab textwidth=79:

View File

@ -35,11 +35,9 @@ import supybot.ircutils as ircutils
import supybot.ircmsgs as ircmsgs
import supybot.callbacks as callbacks
import supybot.log as log
import json, random, re
import requests
import json
from bs4 import BeautifulSoup
import random
import re
from string import Template
try:
from supybot.i18n import PluginInternationalization
@ -51,234 +49,108 @@ except ImportError:
_ = lambda x: x
class lowercase_dict(dict):
def __getitem__(self, name):
return dict.__getitem__(self, name.lower())
class lowercase_template(Template):
def safe_substitute(self, mapping=None, **kws):
if mapping is None:
mapping = {}
m = lowercase_dict((k.lower(), v) for k, v in mapping.items())
m.update(lowercase_dict((k.lower(), v) for k, v in kws.items()))
return Template.safe_substitute(self, m)
class IMDb(callbacks.Plugin):
"""Queries OMDB database for information about IMDb titles"""
threaded = True
def dosearch(self, query):
try:
url = None
searchurl = "https://www.google.com/search?&q="
searchurl += "{0} site:imdb.com/title/".format(query)
agents = self.registryValue("userAgents")
ua = random.choice(agents)
header = {"User-Agent": ua}
data = requests.get(searchurl, headers=header, timeout=10)
data.raise_for_status()
soup = BeautifulSoup(data.content)
url = soup.find(
"a", attrs={"href": re.compile(r"https://www.imdb.com/title/tt\d+/$")}
)["href"]
except Exception:
pass
return url
def dosearch(self, irc, channel, text):
google = ddg = match = None
if self.registryValue("google", channel) > 0:
google = irc.getCallback("google")
if self.registryValue("ddg", channel) > 0:
ddg = irc.getCallback("ddg")
if not google and not ddg:
return
query = "site:www.imdb.com/title/ %s" % text
pattern = re.compile(r"https?://www.imdb.com/title/tt\d+/$")
for i in range(1, 3):
if google and self.registryValue("google", channel) == i:
results = google.decode(google.search(query, irc.network, channel))
for r in results:
match = re.search(pattern, r["url"])
if match:
break
elif ddg and self.registryValue("ddg", channel) == i:
results = ddg.search_core(
query, channel_context=channel, max_results=10, show_snippet=False
)
for r in results:
match = re.search(pattern, r[2])
if match:
break
if match:
return match.group(0)
else:
return
def imdb(self, irc, msg, args, query):
"""<title>
Queries the OMDB API about an IMDb title. Search by title name or IMDb ID.
"""
channel = msg.channel
url = result = None
id = stop = False
apikey = self.registryValue("omdbAPI")
if not apikey:
irc.reply("Error: You must set an API key to use this plugin.")
url = response = result = None
if not self.registryValue("omdbAPI"):
irc.error("Error: You must set an API key to use this plugin.")
return
if re.match(r"tt\d+", query.strip()):
id = True
url = "http://imdb.com/title/{0}".format(query.strip())
if not id and self.registryValue("googleSearch", channel):
url = self.dosearch(query)
if url and "imdb.com/title/" in url:
imdb_id = url.split("/title/")[1].rstrip("/")
omdb_url = "http://www.omdbapi.com/?i=%s&plot=short&r=json&apikey=%s" % (
imdb_id,
apikey,
id = re.match(r"tt\d+", query.strip())
if id:
url = "http://imdb.com/title/{0}".format(id.group(0))
if not id:
url = self.dosearch(irc, msg.channel, query)
if url:
id = url.split("/title/")[1].rstrip("/")
url = "http://www.omdbapi.com/?" + utils.web.urlencode(
{
"i": id,
"plot": "short",
"r": "json",
"apikey": self.registryValue("omdbAPI"),
}
)
log.debug("IMDb: requesting %s" % omdb_url)
log.debug("IMDb: requesting %s" % url)
else:
omdb_url = "http://www.omdbapi.com/?t=%s&plot=short&r=json&apikey=%s" % (
query,
apikey,
url = "http://www.omdbapi.com/?" + utils.web.urlencode(
{
"t": query,
"plot": "short",
"r": "json",
"apikey": self.registryValue("omdbAPI"),
}
)
try:
request = requests.get(omdb_url, timeout=10)
if request.status_code == requests.codes.ok:
response = json.loads(request.content)
not_found = "Error" in response
unknown_error = response["Response"] != "True"
if not_found or unknown_error:
match = re.match(r"(.*) \(*(\d\d\d\d)\)*$", query.strip())
if match:
query = match.group(1).strip()
year = match.group(2).strip()
omdb_url = (
"http://www.omdbapi.com/?t=%s&y=%s&plot=short&r=json&apikey=%s"
% (query, year, apikey)
)
request = requests.get(omdb_url, timeout=10)
if request.status_code == requests.codes.ok:
response = json.loads(request.content)
not_found = "Error" in response
unknown_error = response["Response"] != "True"
if not_found or unknown_error:
log.debug("IMDb: OMDB error for %s" % (omdb_url))
else:
log.error(
"IMDb OMDB API %s - %s"
% (request.status_code, request.content.decode())
)
else:
log.debug("IMDb: OMDB error for %s" % (omdb_url))
query = re.sub(r"\d\d\d\d", "", query)
omdb_url = (
"http://www.omdbapi.com/?s=%s&plot=short&r=json&apikey=%s"
% (query, apikey)
)
request = requests.get(omdb_url, timeout=10)
if request.status_code == requests.codes.ok:
response = json.loads(request.content)
not_found = "Error" in response
unknown_error = response["Response"] != "True"
if not_found or unknown_error:
log.debug("IMDb: OMDB error for %s" % (omdb_url))
elif (
response.get("Search")
and len(response.get("Search")) == 1
):
imdb_id = response["Search"][0]["imdbID"]
omdb_url = (
"http://www.omdbapi.com/?i=%s&plot=short&r=json&apikey=%s"
% (imdb_id, apikey)
)
request = requests.get(omdb_url, timeout=10)
if request.status_code == requests.codes.ok:
response = json.loads(request.content)
not_found = "Error" in response
unknown_error = response["Response"] != "True"
if not_found or unknown_error:
log.debug(
"IMDb: OMDB error for %s" % (omdb_url)
)
else:
log.error(
"IMDb OMDB API %s - %s"
% (
request.status_code,
request.content.decode(),
)
)
elif (
response.get("Search")
and len(response.get("Search")) > 1
):
reply = "No title found. Did you mean:"
for item in response["Search"]:
reply += " {0} ({1}) [{2}],".format(
item["Title"], item["Year"], item["imdbID"]
)
irc.reply(reply.rstrip(","))
not_found = stop = True
return
else:
log.error(
"IMDb OMDB API %s - %s"
% (request.status_code, request.content.decode())
log.debug("IMDb: requesting %s" % url)
request = utils.web.getUrl(url).decode()
response = json.loads(request)
if response["Response"] != "False":
imdb_template = lowercase_template(
self.registryValue("template", msg.channel)
)
response["logo"] = self.registryValue("logo", msg.channel)
for rating in response["Ratings"]:
if rating["Source"] == "Rotten Tomatoes":
response["tomatometer"] = rating.get("Value")
if rating["Source"] == "Metacritic":
response["metascore"] = "{0}%".format(
rating.get("Value").split("/")[0]
)
if not not_found or not unknown_error:
meta = tomato = None
imdb_template = self.registryValue("template", channel)
imdb_template = imdb_template.replace(
"$title", str(response.get("Title"))
)
imdb_template = imdb_template.replace(
"$year", str(response.get("Year"))
)
imdb_template = imdb_template.replace(
"$country", str(response.get("Country"))
)
imdb_template = imdb_template.replace(
"$director", str(response.get("Director"))
)
imdb_template = imdb_template.replace(
"$plot", str(response.get("Plot"))
)
imdb_template = imdb_template.replace(
"$imdbID", str(response.get("imdbID"))
)
imdb_template = imdb_template.replace(
"$imdbRating", str(response.get("imdbRating"))
)
for rating in response["Ratings"]:
if rating["Source"] == "Rotten Tomatoes":
tomato = rating.get("Value")
if rating["Source"] == "Metacritic":
meta = "{0}%".format(rating.get("Value").split("/")[0])
if meta:
imdb_template = imdb_template.replace("$metascore", meta)
else:
imdb_template = imdb_template.replace("$metascore", "N/A")
if tomato:
imdb_template = imdb_template.replace("$tomatoMeter", tomato)
else:
imdb_template = imdb_template.replace("$tomatoMeter", "N/A")
imdb_template = imdb_template.replace(
"$released", str(response.get("Released"))
)
imdb_template = imdb_template.replace(
"$genre", str(response.get("Genre"))
)
imdb_template = imdb_template.replace(
"$released", str(response.get("Released"))
)
imdb_template = imdb_template.replace(
"$awards", str(response.get("Awards"))
)
imdb_template = imdb_template.replace(
"$actors", str(response.get("Actors"))
)
imdb_template = imdb_template.replace(
"$rated", str(response.get("Rated"))
)
imdb_template = imdb_template.replace(
"$runtime", str(response.get("Runtime"))
)
imdb_template = imdb_template.replace(
"$writer", str(response.get("Writer"))
)
imdb_template = imdb_template.replace(
"$votes", str(response.get("imdbVotes"))
)
imdb_template = imdb_template.replace(
"$boxOffice", str(response.get("BoxOffice"))
)
imdb_template = imdb_template.replace(
"$production", str(response.get("Production"))
)
imdb_template = imdb_template.replace(
"$website", str(response.get("Website"))
)
imdb_template = imdb_template.replace(
"$poster", str(response.get("Poster"))
)
result = imdb_template
else:
log.error(
"IMDb OMDB API %s - %s"
% (request.status_code, request.content.decode())
)
except requests.exceptions.Timeout as e:
log.error("IMDb Timeout: %s" % (str(e)))
except requests.exceptions.ConnectionError as e:
log.error("IMDb ConnectionError: %s" % (str(e)))
except requests.exceptions.HTTPError as e:
log.error("IMDb HTTPError: %s" % (str(e)))
finally:
if result is not None:
irc.reply(result, prefixNick=False)
elif not stop:
irc.error(self.registryValue("noResultsMessage", channel))
result = imdb_template.safe_substitute(response)
if result:
irc.reply(result, prefixNick=False)
else:
irc.error(self.registryValue("noResultsMessage", msg.channel))
imdb = wrap(imdb, ["text"])

View File

@ -1,2 +0,0 @@
requests
beautifulsoup4

View File

@ -1,3 +1,5 @@
Limnoria plugin to return song lyrics from http://lyrics.wikia.com/
`config plugins.lyrics.googleSearch True/False` enable/disable google searches
`config plugins.lyrics.google 0-2` 0 to disable search using the Google plugin. 1 to set first priority. 2 to set second priority.
`config plugins.lyrics.ddg 0-2` 0 to disable search using the DDG plugin. 1 to set first priority. 2 to set second priority.

View File

@ -36,7 +36,7 @@ import supybot.world as world
# Use this for the version of this plugin. You may wish to put a CVS keyword
# in here if you're keeping the plugin in CVS or some similar system.
__version__ = "2020.02.24+git"
__version__ = "2020.05.19+git"
# XXX Replace this with an appropriate author or supybot.Author instance.
__author__ = supybot.Author("oddluck", "oddluck", "oddluck@riseup.net")

View File

@ -54,20 +54,24 @@ Lyrics = conf.registerPlugin("Lyrics")
conf.registerChannelValue(
Lyrics,
"googleSearch",
registry.Boolean(True, _("""Use google to perform searches for better results.""")),
)
conf.registerGlobalValue(
Lyrics,
"userAgents",
registry.CommaSeparatedListOfStrings(
[
"Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:75.0) Gecko/20100101 Firefox/75.0",
"Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:76.0) Gecko/20100101 Firefox/76.0",
"Mozilla/5.0 (Linux x86_64; rv:75.0) Gecko/20100101 Firefox/75.0",
"Mozilla/5.0 (Linux x86_64; rv:76.0) Gecko/20100101 Firefox/76.0",
],
_("""Reported user agent when fetching links"""),
"google",
registry.Integer(
1,
"""
Google search priority. Google plugin must be loaded.
0 = disabled. 1 = first. 2 = second.
""",
),
)
conf.registerChannelValue(
Lyrics,
"ddg",
registry.Integer(
2,
"""
DDG search priority. DDG plugin must be loaded.
0 = disabled. 1 = first. 2 = second.
""",
),
)

View File

@ -34,11 +34,9 @@ import supybot.ircutils as ircutils
import supybot.callbacks as callbacks
import supybot.ircmsgs as ircmsgs
import supybot.log as log
from bs4 import BeautifulSoup
import requests
import re
import re, random
import pylyrics3
import random
try:
from supybot.i18n import PluginInternationalization
@ -55,25 +53,37 @@ class Lyrics(callbacks.Plugin):
threaded = True
def dosearch(self, lyric):
try:
url = None
title = None
searchurl = "https://www.google.com/search?&q="
searchurl += "{0} site:lyrics.fandom.com/wiki/".format(lyric)
agents = self.registryValue("userAgents")
ua = random.choice(agents)
header = {"User-Agent": ua}
data = requests.get(searchurl, headers=header, timeout=10)
data.raise_for_status()
log.debug(data.content.decode())
soup = BeautifulSoup(data.content)
elements = soup.select(".r a")
title = soup.find("h3").getText().replace(":", " - ").split("|")[0]
url = elements[0]["href"]
except Exception:
pass
return title, url
def dosearch(self, irc, channel, text):
google = ddg = title = None
if self.registryValue("google", channel) > 0:
google = irc.getCallback("google")
if self.registryValue("ddg", channel) > 0:
ddg = irc.getCallback("ddg")
if not google and not ddg:
return
query = "site:lyrics.fandom.com/wiki/ %s" % text
pattern = re.compile(r"https?://lyrics.fandom.com/wiki/.*")
for i in range(1, 3):
if google and self.registryValue("google", channel) == i:
results = google.decode(google.search(query, irc.network, channel))
for r in results:
match = re.search(pattern, r["url"])
if match:
title = r["title"].replace(":", " - ").split("|")[0]
break
elif self.registryValue("ddg", channel) == i:
results = ddg.search_core(
query, channel_context=channel, max_results=10, show_snippet=False
)
for r in results:
match = re.search(pattern, r[2])
if match:
title = r[0].replace(":", " - ").split("|")[0]
break
if match and title:
return title, match.group(0)
else:
return
def getlyrics(self, query):
lyrics = None
@ -98,11 +108,9 @@ class Lyrics(callbacks.Plugin):
"""<query>
Get song lyrics from Lyrics Wiki.
"""
channel = msg.channel
title = None
url = None
if self.registryValue("googleSearch", channel):
title, url = self.dosearch(lyric)
title, url = self.dosearch(irc, msg.channel, lyric)
if url and title and "lyrics.fandom.com/wiki/" in url:
try:
lyrics = self.getlyrics(url)

View File

@ -1,3 +1 @@
requests
beautifulsoup4
pylyrics3