IMDb/Lyrics: use google/ddg search plugins

2025-04-25 20:41:21 -05:00 · 2020-05-19 17:09:49 -04:00 · 2020-05-19 17:09:49 -04:00 · c94b470859
commit c94b470859
parent f8b52b7b4c
10 changed files with 194 additions and 290 deletions
--- a/IMDb/README.md
+++ b/IMDb/README.md
@ -4,16 +4,25 @@ Obtain an [OMDB API Key](https://omdbapi.com/apikey.aspx)
 `config plugins.imdb.omdbAPI your_key_here`
-`config plugins.imdb.googleSearch True/False` enable/disable google searches 
+`config plugins.imdb.google 0-2` 0 to disable search using the Google plugin. 1 to set first priority. 2 to set second priority.
 `config plugins.imdb.ddg 0-2` 0 to disable search using the DDG plugin. 1 to set first priority. 2 to set second priority.
 `config plugins.imdb.template` change the reply template
 Default template:
-`\x02\x031,8 IMDb \x0F\x02 :: $title ($year, $country, [$rated], $genre, $runtime) :: IMDb: $imdbRating | MC: $metascore | RT: $tomatoMeter :: http://imdb.com/title/$imdbID :: $plot :: Director: $director :: Cast: $actors :: Writer: $writer`
+`$logo :: $title ($year, $country, [$rated], $genre, $runtime) :: IMDb: $imdbRating | MC: $metascore | RT: $tomatoMeter :: http://imdb.com/title/$imdbID :: $plot :: Director: $director :: Cast: $actors :: Writer: $writer`
 `config plugins.imdb.logo` change the template logo
 Default logo: `\x02\x031,8 IMDb \x03`
 ### Available variables for IMDB template ###
 Variable       | Description
 ---------------|------------
 logo           | Colored IMDB logo
 title          | Movie title
 year           | Release year
 country        | Country
@ -22,6 +31,7 @@ plot           | Plot
 imdbID         | IMDB tile ID#
 imdbRating     | IMDB rating
 metascore      | Metacritic score
 tomatometer    | Rotten Tomatoes score
 released       | Release date
 genre          | Genre
 awards         | Awards won
@ -34,4 +44,4 @@ website        | Website URL
 language       | Language
 boxOffice      | Box Office
 production     | Production company
-poster         | Poster URL
+poster         | Poster URL
--- a/IMDb/init.py
+++ b/IMDb/init.py
@ -37,7 +37,7 @@ import supybot.world as world
 # Use this for the version of this plugin.  You may wish to put a CVS keyword
 # in here if you're keeping the plugin in CVS or some similar system.
-__version__ = "2020.02.24+git"
+__version__ = "2020.05.19+git"
 # XXX Replace this with an appropriate author or supybot.Author instance.
 __author__ = supybot.Author("butterscotchstallion", "butterscotchstallion", "")
--- a/IMDb/config.py
+++ b/IMDb/config.py
@ -57,11 +57,19 @@ conf.registerChannelValue(
    IMDb,
    "template",
    registry.String(
-        "\x02\x031,8 IMDb \x0F\x02 :: $title ($year, $country, [$rated], $genre, "
+        "$logo :: $title ($year, $country, [$rated], $genre, $runtime) "
-        "$runtime) :: IMDb: $imdbRating | MC: $metascore | RT: $tomatoMeter :: "
+        ":: IMDb: $imdbRating | MC: $metascore | RT: $tomatoMeter :: "
-        "http://imdb.com/title/$imdbID :: $plot :: Director: $director :: Cast: "
+        "http://imdb.com/title/$imdbID :: $plot :: Director: $director :: "
-        "$actors :: Writer: $writer",
+        "Cast: $actors :: Writer: $writer",
-        _("""Template for the output of a search query."""),
+        _("""IMDb reply template."""),
    ),
 )
 conf.registerChannelValue(
    IMDb,
    "logo",
    registry.String(
        "\x02\x031,8 IMDb \x03", _("""Logo used with {{logo}} in template""")
    ),
 )
@ -74,28 +82,32 @@ conf.registerChannelValue(
    ),
 )
-conf.registerGlobalValue(
+conf.registerChannelValue(
-    IMDb, "omdbAPI", registry.String("", _("""OMDB API Key"""), private=True)
+    IMDb,
    "google",
    registry.Integer(
        1,
        """
        Google search priority. Google plugin must be loaded.
        0 = disabled. 1 = first. 2 = second.
        """,
    ),
 )
 conf.registerChannelValue(
    IMDb,
-    "googleSearch",
+    "ddg",
-    registry.Boolean(True, _("""Use google to perform searches for better results.""")),
+    registry.Integer(
-)
+        2,
-
+        """
-conf.registerGlobalValue(
+        DDG search priority. DDG plugin must be loaded.
-    IMDb,
+        0 = disabled. 1 = first. 2 = second.
-    "userAgents",
+        """,
    registry.CommaSeparatedListOfStrings(
        [
            "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:75.0) Gecko/20100101 Firefox/75.0",
            "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:76.0) Gecko/20100101 Firefox/76.0",
            "Mozilla/5.0 (Linux x86_64; rv:75.0) Gecko/20100101 Firefox/75.0",
            "Mozilla/5.0 (Linux x86_64; rv:76.0) Gecko/20100101 Firefox/76.0",
        ],
        _("""Reported user agent when fetching links"""),
    ),
 )
 conf.registerGlobalValue(
    IMDb, "omdbAPI", registry.String("", _("""OMDB API Key"""), private=True)
 )
 # vim:set shiftwidth=4 tabstop=4 expandtab textwidth=79:
--- a/IMDb/plugin.py
+++ b/IMDb/plugin.py
@ -35,11 +35,9 @@ import supybot.ircutils as ircutils
 import supybot.ircmsgs as ircmsgs
 import supybot.callbacks as callbacks
 import supybot.log as log
 import json, random, re
 import requests
-import json
+from string import Template
 from bs4 import BeautifulSoup
 import random
 import re
 try:
    from supybot.i18n import PluginInternationalization
@ -51,234 +49,108 @@ except ImportError:
    _ = lambda x: x
 class lowercase_dict(dict):
    def __getitem__(self, name):
        return dict.__getitem__(self, name.lower())
 class lowercase_template(Template):
    def safe_substitute(self, mapping=None, **kws):
        if mapping is None:
            mapping = {}
        m = lowercase_dict((k.lower(), v) for k, v in mapping.items())
        m.update(lowercase_dict((k.lower(), v) for k, v in kws.items()))
        return Template.safe_substitute(self, m)
 class IMDb(callbacks.Plugin):
    """Queries OMDB database for information about IMDb titles"""
    threaded = True
-    def dosearch(self, query):
+    def dosearch(self, irc, channel, text):
-        try:
+        google = ddg = match = None
-            url = None
+        if self.registryValue("google", channel) > 0:
-            searchurl = "https://www.google.com/search?&q="
+            google = irc.getCallback("google")
-            searchurl += "{0} site:imdb.com/title/".format(query)
+        if self.registryValue("ddg", channel) > 0:
-            agents = self.registryValue("userAgents")
+            ddg = irc.getCallback("ddg")
-            ua = random.choice(agents)
+        if not google and not ddg:
-            header = {"User-Agent": ua}
+            return
-            data = requests.get(searchurl, headers=header, timeout=10)
+        query = "site:www.imdb.com/title/ %s" % text
-            data.raise_for_status()
+        pattern = re.compile(r"https?://www.imdb.com/title/tt\d+/$")
-            soup = BeautifulSoup(data.content)
+        for i in range(1, 3):
-            url = soup.find(
+            if google and self.registryValue("google", channel) == i:
-                "a", attrs={"href": re.compile(r"https://www.imdb.com/title/tt\d+/$")}
+                results = google.decode(google.search(query, irc.network, channel))
-            )["href"]
+                for r in results:
-        except Exception:
+                    match = re.search(pattern, r["url"])
-            pass
+                    if match:
-        return url
+                        break
            elif ddg and self.registryValue("ddg", channel) == i:
                results = ddg.search_core(
                    query, channel_context=channel, max_results=10, show_snippet=False
                )
                for r in results:
                    match = re.search(pattern, r[2])
                    if match:
                        break
        if match:
            return match.group(0)
        else:
            return
    def imdb(self, irc, msg, args, query):
        """<title>
        Queries the OMDB API about an IMDb title. Search by title name or IMDb ID.
        """
-        channel = msg.channel
+        url = response = result = None
-        url = result = None
+        if not self.registryValue("omdbAPI"):
-        id = stop = False
+            irc.error("Error: You must set an API key to use this plugin.")
        apikey = self.registryValue("omdbAPI")
        if not apikey:
            irc.reply("Error: You must set an API key to use this plugin.")
            return
-        if re.match(r"tt\d+", query.strip()):
+        id = re.match(r"tt\d+", query.strip())
-            id = True
+        if id:
-            url = "http://imdb.com/title/{0}".format(query.strip())
+            url = "http://imdb.com/title/{0}".format(id.group(0))
-        if not id and self.registryValue("googleSearch", channel):
+        if not id:
-            url = self.dosearch(query)
+            url = self.dosearch(irc, msg.channel, query)
-        if url and "imdb.com/title/" in url:
+        if url:
-            imdb_id = url.split("/title/")[1].rstrip("/")
+            id = url.split("/title/")[1].rstrip("/")
-            omdb_url = "http://www.omdbapi.com/?i=%s&plot=short&r=json&apikey=%s" % (
+            url = "http://www.omdbapi.com/?" + utils.web.urlencode(
-                imdb_id,
+                {
-                apikey,
+                    "i": id,
                    "plot": "short",
                    "r": "json",
                    "apikey": self.registryValue("omdbAPI"),
                }
            )
-            log.debug("IMDb: requesting %s" % omdb_url)
+            log.debug("IMDb: requesting %s" % url)
        else:
-            omdb_url = "http://www.omdbapi.com/?t=%s&plot=short&r=json&apikey=%s" % (
+            url = "http://www.omdbapi.com/?" + utils.web.urlencode(
-                query,
+                {
-                apikey,
+                    "t": query,
                    "plot": "short",
                    "r": "json",
                    "apikey": self.registryValue("omdbAPI"),
                }
            )
-        try:
+            log.debug("IMDb: requesting %s" % url)
-            request = requests.get(omdb_url, timeout=10)
+        request = utils.web.getUrl(url).decode()
-            if request.status_code == requests.codes.ok:
+        response = json.loads(request)
-                response = json.loads(request.content)
+        if response["Response"] != "False":
-                not_found = "Error" in response
+            imdb_template = lowercase_template(
-                unknown_error = response["Response"] != "True"
+                self.registryValue("template", msg.channel)
-                if not_found or unknown_error:
+            )
-                    match = re.match(r"(.*) \(*(\d\d\d\d)\)*$", query.strip())
+            response["logo"] = self.registryValue("logo", msg.channel)
-                    if match:
+            for rating in response["Ratings"]:
-                        query = match.group(1).strip()
+                if rating["Source"] == "Rotten Tomatoes":
-                        year = match.group(2).strip()
+                    response["tomatometer"] = rating.get("Value")
-                        omdb_url = (
+                if rating["Source"] == "Metacritic":
-                            "http://www.omdbapi.com/?t=%s&y=%s&plot=short&r=json&apikey=%s"
+                    response["metascore"] = "{0}%".format(
-                            % (query, year, apikey)
+                        rating.get("Value").split("/")[0]
                        )
                        request = requests.get(omdb_url, timeout=10)
                        if request.status_code == requests.codes.ok:
                            response = json.loads(request.content)
                            not_found = "Error" in response
                            unknown_error = response["Response"] != "True"
                            if not_found or unknown_error:
                                log.debug("IMDb: OMDB error for %s" % (omdb_url))
                        else:
                            log.error(
                                "IMDb OMDB API %s - %s"
                                % (request.status_code, request.content.decode())
                            )
                    else:
                        log.debug("IMDb: OMDB error for %s" % (omdb_url))
                        query = re.sub(r"\d\d\d\d", "", query)
                        omdb_url = (
                            "http://www.omdbapi.com/?s=%s&plot=short&r=json&apikey=%s"
                            % (query, apikey)
                        )
                        request = requests.get(omdb_url, timeout=10)
                        if request.status_code == requests.codes.ok:
                            response = json.loads(request.content)
                            not_found = "Error" in response
                            unknown_error = response["Response"] != "True"
                            if not_found or unknown_error:
                                log.debug("IMDb: OMDB error for %s" % (omdb_url))
                            elif (
                                response.get("Search")
                                and len(response.get("Search")) == 1
                            ):
                                imdb_id = response["Search"][0]["imdbID"]
                                omdb_url = (
                                    "http://www.omdbapi.com/?i=%s&plot=short&r=json&apikey=%s"
                                    % (imdb_id, apikey)
                                )
                                request = requests.get(omdb_url, timeout=10)
                                if request.status_code == requests.codes.ok:
                                    response = json.loads(request.content)
                                    not_found = "Error" in response
                                    unknown_error = response["Response"] != "True"
                                    if not_found or unknown_error:
                                        log.debug(
                                            "IMDb: OMDB error for %s" % (omdb_url)
                                        )
                                else:
                                    log.error(
                                        "IMDb OMDB API %s - %s"
                                        % (
                                            request.status_code,
                                            request.content.decode(),
                                        )
                                    )
                            elif (
                                response.get("Search")
                                and len(response.get("Search")) > 1
                            ):
                                reply = "No title found. Did you mean:"
                                for item in response["Search"]:
                                    reply += " {0} ({1}) [{2}],".format(
                                        item["Title"], item["Year"], item["imdbID"]
                                    )
                                irc.reply(reply.rstrip(","))
                                not_found = stop = True
                                return
                else:
                    log.error(
                        "IMDb OMDB API %s - %s"
                        % (request.status_code, request.content.decode())
                    )
-                if not not_found or not unknown_error:
+            result = imdb_template.safe_substitute(response)
-                    meta = tomato = None
+        if result:
-                    imdb_template = self.registryValue("template", channel)
+            irc.reply(result, prefixNick=False)
-                    imdb_template = imdb_template.replace(
+        else:
-                        "$title", str(response.get("Title"))
+            irc.error(self.registryValue("noResultsMessage", msg.channel))
                    )
                    imdb_template = imdb_template.replace(
                        "$year", str(response.get("Year"))
                    )
                    imdb_template = imdb_template.replace(
                        "$country", str(response.get("Country"))
                    )
                    imdb_template = imdb_template.replace(
                        "$director", str(response.get("Director"))
                    )
                    imdb_template = imdb_template.replace(
                        "$plot", str(response.get("Plot"))
                    )
                    imdb_template = imdb_template.replace(
                        "$imdbID", str(response.get("imdbID"))
                    )
                    imdb_template = imdb_template.replace(
                        "$imdbRating", str(response.get("imdbRating"))
                    )
                    for rating in response["Ratings"]:
                        if rating["Source"] == "Rotten Tomatoes":
                            tomato = rating.get("Value")
                        if rating["Source"] == "Metacritic":
                            meta = "{0}%".format(rating.get("Value").split("/")[0])
                    if meta:
                        imdb_template = imdb_template.replace("$metascore", meta)
                    else:
                        imdb_template = imdb_template.replace("$metascore", "N/A")
                    if tomato:
                        imdb_template = imdb_template.replace("$tomatoMeter", tomato)
                    else:
                        imdb_template = imdb_template.replace("$tomatoMeter", "N/A")
                    imdb_template = imdb_template.replace(
                        "$released", str(response.get("Released"))
                    )
                    imdb_template = imdb_template.replace(
                        "$genre", str(response.get("Genre"))
                    )
                    imdb_template = imdb_template.replace(
                        "$released", str(response.get("Released"))
                    )
                    imdb_template = imdb_template.replace(
                        "$awards", str(response.get("Awards"))
                    )
                    imdb_template = imdb_template.replace(
                        "$actors", str(response.get("Actors"))
                    )
                    imdb_template = imdb_template.replace(
                        "$rated", str(response.get("Rated"))
                    )
                    imdb_template = imdb_template.replace(
                        "$runtime", str(response.get("Runtime"))
                    )
                    imdb_template = imdb_template.replace(
                        "$writer", str(response.get("Writer"))
                    )
                    imdb_template = imdb_template.replace(
                        "$votes", str(response.get("imdbVotes"))
                    )
                    imdb_template = imdb_template.replace(
                        "$boxOffice", str(response.get("BoxOffice"))
                    )
                    imdb_template = imdb_template.replace(
                        "$production", str(response.get("Production"))
                    )
                    imdb_template = imdb_template.replace(
                        "$website", str(response.get("Website"))
                    )
                    imdb_template = imdb_template.replace(
                        "$poster", str(response.get("Poster"))
                    )
                    result = imdb_template
            else:
                log.error(
                    "IMDb OMDB API %s - %s"
                    % (request.status_code, request.content.decode())
                )
        except requests.exceptions.Timeout as e:
            log.error("IMDb Timeout: %s" % (str(e)))
        except requests.exceptions.ConnectionError as e:
            log.error("IMDb ConnectionError: %s" % (str(e)))
        except requests.exceptions.HTTPError as e:
            log.error("IMDb HTTPError: %s" % (str(e)))
        finally:
            if result is not None:
                irc.reply(result, prefixNick=False)
            elif not stop:
                irc.error(self.registryValue("noResultsMessage", channel))
    imdb = wrap(imdb, ["text"])
--- a/IMDb/requirements.txt
+++ b/IMDb/requirements.txt
@ -1,2 +0,0 @@
 requests
 beautifulsoup4
--- a/Lyrics/README.md
+++ b/Lyrics/README.md
@ -1,3 +1,5 @@
 Limnoria plugin to return song lyrics from http://lyrics.wikia.com/
-`config plugins.lyrics.googleSearch True/False` enable/disable google searches
+`config plugins.lyrics.google 0-2` 0 to disable search using the Google plugin. 1 to set first priority. 2 to set second priority.
 `config plugins.lyrics.ddg 0-2` 0 to disable search using the DDG plugin. 1 to set first priority. 2 to set second priority.
--- a/Lyrics/init.py
+++ b/Lyrics/init.py
@ -36,7 +36,7 @@ import supybot.world as world
 # Use this for the version of this plugin.  You may wish to put a CVS keyword
 # in here if you're keeping the plugin in CVS or some similar system.
-__version__ = "2020.02.24+git"
+__version__ = "2020.05.19+git"
 # XXX Replace this with an appropriate author or supybot.Author instance.
 __author__ = supybot.Author("oddluck", "oddluck", "oddluck@riseup.net")
--- a/Lyrics/config.py
+++ b/Lyrics/config.py
@ -54,20 +54,24 @@ Lyrics = conf.registerPlugin("Lyrics")
 conf.registerChannelValue(
    Lyrics,
-    "googleSearch",
+    "google",
-    registry.Boolean(True, _("""Use google to perform searches for better results.""")),
+    registry.Integer(
-)
+        1,
-
+        """
-conf.registerGlobalValue(
+        Google search priority. Google plugin must be loaded.
-    Lyrics,
+        0 = disabled. 1 = first. 2 = second.
-    "userAgents",
+        """,
-    registry.CommaSeparatedListOfStrings(
+    ),
-        [
+)
-            "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:75.0) Gecko/20100101 Firefox/75.0",
+
-            "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:76.0) Gecko/20100101 Firefox/76.0",
+conf.registerChannelValue(
-            "Mozilla/5.0 (Linux x86_64; rv:75.0) Gecko/20100101 Firefox/75.0",
+    Lyrics,
-            "Mozilla/5.0 (Linux x86_64; rv:76.0) Gecko/20100101 Firefox/76.0",
+    "ddg",
-        ],
+    registry.Integer(
-        _("""Reported user agent when fetching links"""),
+        2,
        """
        DDG search priority. DDG plugin must be loaded.
        0 = disabled. 1 = first. 2 = second.
        """,
    ),
 )
--- a/Lyrics/plugin.py
+++ b/Lyrics/plugin.py
@ -34,11 +34,9 @@ import supybot.ircutils as ircutils
 import supybot.callbacks as callbacks
 import supybot.ircmsgs as ircmsgs
 import supybot.log as log
-from bs4 import BeautifulSoup
+import re, random
 import requests
 import re
 import pylyrics3
-import random
+
 try:
    from supybot.i18n import PluginInternationalization
@ -55,25 +53,37 @@ class Lyrics(callbacks.Plugin):
    threaded = True
-    def dosearch(self, lyric):
+    def dosearch(self, irc, channel, text):
-        try:
+        google = ddg = title = None
-            url = None
+        if self.registryValue("google", channel) > 0:
-            title = None
+            google = irc.getCallback("google")
-            searchurl = "https://www.google.com/search?&q="
+        if self.registryValue("ddg", channel) > 0:
-            searchurl += "{0} site:lyrics.fandom.com/wiki/".format(lyric)
+            ddg = irc.getCallback("ddg")
-            agents = self.registryValue("userAgents")
+        if not google and not ddg:
-            ua = random.choice(agents)
+            return
-            header = {"User-Agent": ua}
+        query = "site:lyrics.fandom.com/wiki/ %s" % text
-            data = requests.get(searchurl, headers=header, timeout=10)
+        pattern = re.compile(r"https?://lyrics.fandom.com/wiki/.*")
-            data.raise_for_status()
+        for i in range(1, 3):
-            log.debug(data.content.decode())
+            if google and self.registryValue("google", channel) == i:
-            soup = BeautifulSoup(data.content)
+                results = google.decode(google.search(query, irc.network, channel))
-            elements = soup.select(".r a")
+                for r in results:
-            title = soup.find("h3").getText().replace(":", " - ").split("|")[0]
+                    match = re.search(pattern, r["url"])
-            url = elements[0]["href"]
+                    if match:
-        except Exception:
+                        title = r["title"].replace(":", " - ").split("|")[0]
-            pass
+                        break
-        return title, url
+            elif self.registryValue("ddg", channel) == i:
                results = ddg.search_core(
                    query, channel_context=channel, max_results=10, show_snippet=False
                )
                for r in results:
                    match = re.search(pattern, r[2])
                    if match:
                        title = r[0].replace(":", " - ").split("|")[0]
                        break
        if match and title:
            return title, match.group(0)
        else:
            return
    def getlyrics(self, query):
        lyrics = None
@ -98,11 +108,9 @@ class Lyrics(callbacks.Plugin):
        """<query>
        Get song lyrics from Lyrics Wiki.
        """
        channel = msg.channel
        title = None
        url = None
-        if self.registryValue("googleSearch", channel):
+        title, url = self.dosearch(irc, msg.channel, lyric)
            title, url = self.dosearch(lyric)
        if url and title and "lyrics.fandom.com/wiki/" in url:
            try:
                lyrics = self.getlyrics(url)
--- a/Lyrics/requirements.txt
+++ b/Lyrics/requirements.txt
@ -1,3 +1 @@
 requests
 beautifulsoup4
 pylyrics3