IMDb/Lyrics: use google/ddg search plugins

2025-04-25 12:31:07 -05:00 · 2020-05-19 17:09:49 -04:00 · 2020-05-19 17:09:49 -04:00 · c94b470859
commit c94b470859
parent f8b52b7b4c
10 changed files with 194 additions and 290 deletions
--- a/IMDb/README.md
+++ b/IMDb/README.md
@ -4,16 +4,25 @@ Obtain an [OMDB API Key](https://omdbapi.com/apikey.aspx)

 `config plugins.imdb.omdbAPI your_key_here`

-`config plugins.imdb.googleSearch True/False` enable/disable google searches 
+`config plugins.imdb.google 0-2` 0 to disable search using the Google plugin. 1 to set first priority. 2 to set second priority.
+
+`config plugins.imdb.ddg 0-2` 0 to disable search using the DDG plugin. 1 to set first priority. 2 to set second priority.

 `config plugins.imdb.template` change the reply template

 Default template:
 
-`\x02\x031,8 IMDb \x0F\x02 :: $title ($year, $country, [$rated], $genre, $runtime) :: IMDb: $imdbRating | MC: $metascore | RT: $tomatoMeter :: http://imdb.com/title/$imdbID :: $plot :: Director: $director :: Cast: $actors :: Writer: $writer`
+`$logo :: $title ($year, $country, [$rated], $genre, $runtime) :: IMDb: $imdbRating | MC: $metascore | RT: $tomatoMeter :: http://imdb.com/title/$imdbID :: $plot :: Director: $director :: Cast: $actors :: Writer: $writer`
+
+`config plugins.imdb.logo` change the template logo
+
+Default logo: `\x02\x031,8 IMDb \x03`
+
+### Available variables for IMDB template ###

 Variable       | Description
 ---------------|------------
+logo           | Colored IMDB logo
 title          | Movie title
 year           | Release year
 country        | Country
@ -22,6 +31,7 @@ plot           | Plot
 imdbID         | IMDB tile ID#
 imdbRating     | IMDB rating
 metascore      | Metacritic score
+tomatometer    | Rotten Tomatoes score
 released       | Release date
 genre          | Genre
 awards         | Awards won
@ -34,4 +44,4 @@ website        | Website URL
 language       | Language
 boxOffice      | Box Office
 production     | Production company
-poster         | Poster URL
+poster         | Poster URL
--- a/IMDb/init.py
+++ b/IMDb/init.py
@ -37,7 +37,7 @@ import supybot.world as world

 # Use this for the version of this plugin.  You may wish to put a CVS keyword
 # in here if you're keeping the plugin in CVS or some similar system.
-__version__ = "2020.02.24+git"
+__version__ = "2020.05.19+git"

 # XXX Replace this with an appropriate author or supybot.Author instance.
 __author__ = supybot.Author("butterscotchstallion", "butterscotchstallion", "")
--- a/IMDb/config.py
+++ b/IMDb/config.py
@ -57,11 +57,19 @@ conf.registerChannelValue(
    IMDb,
    "template",
    registry.String(
-        "\x02\x031,8 IMDb \x0F\x02 :: $title ($year, $country, [$rated], $genre, "
-        "$runtime) :: IMDb: $imdbRating | MC: $metascore | RT: $tomatoMeter :: "
-        "http://imdb.com/title/$imdbID :: $plot :: Director: $director :: Cast: "
-        "$actors :: Writer: $writer",
-        _("""Template for the output of a search query."""),
+        "$logo :: $title ($year, $country, [$rated], $genre, $runtime) "
+        ":: IMDb: $imdbRating | MC: $metascore | RT: $tomatoMeter :: "
+        "http://imdb.com/title/$imdbID :: $plot :: Director: $director :: "
+        "Cast: $actors :: Writer: $writer",
+        _("""IMDb reply template."""),
+    ),
+)
+
+conf.registerChannelValue(
+    IMDb,
+    "logo",
+    registry.String(
+        "\x02\x031,8 IMDb \x03", _("""Logo used with {{logo}} in template""")
    ),
 )

@ -74,28 +82,32 @@ conf.registerChannelValue(
    ),
 )

-conf.registerGlobalValue(
-    IMDb, "omdbAPI", registry.String("", _("""OMDB API Key"""), private=True)
+conf.registerChannelValue(
+    IMDb,
+    "google",
+    registry.Integer(
+        1,
+        """
+        Google search priority. Google plugin must be loaded.
+        0 = disabled. 1 = first. 2 = second.
+        """,
+    ),
 )

 conf.registerChannelValue(
    IMDb,
-    "googleSearch",
-    registry.Boolean(True, _("""Use google to perform searches for better results.""")),
-)
-
-conf.registerGlobalValue(
-    IMDb,
-    "userAgents",
-    registry.CommaSeparatedListOfStrings(
-        [
-            "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:75.0) Gecko/20100101 Firefox/75.0",
-            "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:76.0) Gecko/20100101 Firefox/76.0",
-            "Mozilla/5.0 (Linux x86_64; rv:75.0) Gecko/20100101 Firefox/75.0",
-            "Mozilla/5.0 (Linux x86_64; rv:76.0) Gecko/20100101 Firefox/76.0",
-        ],
-        _("""Reported user agent when fetching links"""),
+    "ddg",
+    registry.Integer(
+        2,
+        """
+        DDG search priority. DDG plugin must be loaded.
+        0 = disabled. 1 = first. 2 = second.
+        """,
    ),
 )

+conf.registerGlobalValue(
+    IMDb, "omdbAPI", registry.String("", _("""OMDB API Key"""), private=True)
+)
+
 # vim:set shiftwidth=4 tabstop=4 expandtab textwidth=79:
--- a/IMDb/plugin.py
+++ b/IMDb/plugin.py
@ -35,11 +35,9 @@ import supybot.ircutils as ircutils
 import supybot.ircmsgs as ircmsgs
 import supybot.callbacks as callbacks
 import supybot.log as log
+import json, random, re
 import requests
-import json
-from bs4 import BeautifulSoup
-import random
-import re
+from string import Template

 try:
    from supybot.i18n import PluginInternationalization
@ -51,234 +49,108 @@ except ImportError:
    _ = lambda x: x


+class lowercase_dict(dict):
+    def __getitem__(self, name):
+        return dict.__getitem__(self, name.lower())
+
+
+class lowercase_template(Template):
+    def safe_substitute(self, mapping=None, **kws):
+        if mapping is None:
+            mapping = {}
+        m = lowercase_dict((k.lower(), v) for k, v in mapping.items())
+        m.update(lowercase_dict((k.lower(), v) for k, v in kws.items()))
+        return Template.safe_substitute(self, m)
+
+
 class IMDb(callbacks.Plugin):
    """Queries OMDB database for information about IMDb titles"""

    threaded = True

-    def dosearch(self, query):
-        try:
-            url = None
-            searchurl = "https://www.google.com/search?&q="
-            searchurl += "{0} site:imdb.com/title/".format(query)
-            agents = self.registryValue("userAgents")
-            ua = random.choice(agents)
-            header = {"User-Agent": ua}
-            data = requests.get(searchurl, headers=header, timeout=10)
-            data.raise_for_status()
-            soup = BeautifulSoup(data.content)
-            url = soup.find(
-                "a", attrs={"href": re.compile(r"https://www.imdb.com/title/tt\d+/$")}
-            )["href"]
-        except Exception:
-            pass
-        return url
+    def dosearch(self, irc, channel, text):
+        google = ddg = match = None
+        if self.registryValue("google", channel) > 0:
+            google = irc.getCallback("google")
+        if self.registryValue("ddg", channel) > 0:
+            ddg = irc.getCallback("ddg")
+        if not google and not ddg:
+            return
+        query = "site:www.imdb.com/title/ %s" % text
+        pattern = re.compile(r"https?://www.imdb.com/title/tt\d+/$")
+        for i in range(1, 3):
+            if google and self.registryValue("google", channel) == i:
+                results = google.decode(google.search(query, irc.network, channel))
+                for r in results:
+                    match = re.search(pattern, r["url"])
+                    if match:
+                        break
+            elif ddg and self.registryValue("ddg", channel) == i:
+                results = ddg.search_core(
+                    query, channel_context=channel, max_results=10, show_snippet=False
+                )
+                for r in results:
+                    match = re.search(pattern, r[2])
+                    if match:
+                        break
+        if match:
+            return match.group(0)
+        else:
+            return

    def imdb(self, irc, msg, args, query):
        """<title>
        Queries the OMDB API about an IMDb title. Search by title name or IMDb ID.
        """
-        channel = msg.channel
-        url = result = None
-        id = stop = False
-        apikey = self.registryValue("omdbAPI")
-        if not apikey:
-            irc.reply("Error: You must set an API key to use this plugin.")
+        url = response = result = None
+        if not self.registryValue("omdbAPI"):
+            irc.error("Error: You must set an API key to use this plugin.")
            return
-        if re.match(r"tt\d+", query.strip()):
-            id = True
-            url = "http://imdb.com/title/{0}".format(query.strip())
-        if not id and self.registryValue("googleSearch", channel):
-            url = self.dosearch(query)
-        if url and "imdb.com/title/" in url:
-            imdb_id = url.split("/title/")[1].rstrip("/")
-            omdb_url = "http://www.omdbapi.com/?i=%s&plot=short&r=json&apikey=%s" % (
-                imdb_id,
-                apikey,
+        id = re.match(r"tt\d+", query.strip())
+        if id:
+            url = "http://imdb.com/title/{0}".format(id.group(0))
+        if not id:
+            url = self.dosearch(irc, msg.channel, query)
+        if url:
+            id = url.split("/title/")[1].rstrip("/")
+            url = "http://www.omdbapi.com/?" + utils.web.urlencode(
+                {
+                    "i": id,
+                    "plot": "short",
+                    "r": "json",
+                    "apikey": self.registryValue("omdbAPI"),
+                }
            )
-            log.debug("IMDb: requesting %s" % omdb_url)
+            log.debug("IMDb: requesting %s" % url)
        else:
-            omdb_url = "http://www.omdbapi.com/?t=%s&plot=short&r=json&apikey=%s" % (
-                query,
-                apikey,
+            url = "http://www.omdbapi.com/?" + utils.web.urlencode(
+                {
+                    "t": query,
+                    "plot": "short",
+                    "r": "json",
+                    "apikey": self.registryValue("omdbAPI"),
+                }
            )
-        try:
-            request = requests.get(omdb_url, timeout=10)
-            if request.status_code == requests.codes.ok:
-                response = json.loads(request.content)
-                not_found = "Error" in response
-                unknown_error = response["Response"] != "True"
-                if not_found or unknown_error:
-                    match = re.match(r"(.*) \(*(\d\d\d\d)\)*$", query.strip())
-                    if match:
-                        query = match.group(1).strip()
-                        year = match.group(2).strip()
-                        omdb_url = (
-                            "http://www.omdbapi.com/?t=%s&y=%s&plot=short&r=json&apikey=%s"
-                            % (query, year, apikey)
-                        )
-                        request = requests.get(omdb_url, timeout=10)
-                        if request.status_code == requests.codes.ok:
-                            response = json.loads(request.content)
-                            not_found = "Error" in response
-                            unknown_error = response["Response"] != "True"
-                            if not_found or unknown_error:
-                                log.debug("IMDb: OMDB error for %s" % (omdb_url))
-                        else:
-                            log.error(
-                                "IMDb OMDB API %s - %s"
-                                % (request.status_code, request.content.decode())
-                            )
-                    else:
-                        log.debug("IMDb: OMDB error for %s" % (omdb_url))
-                        query = re.sub(r"\d\d\d\d", "", query)
-                        omdb_url = (
-                            "http://www.omdbapi.com/?s=%s&plot=short&r=json&apikey=%s"
-                            % (query, apikey)
-                        )
-                        request = requests.get(omdb_url, timeout=10)
-                        if request.status_code == requests.codes.ok:
-                            response = json.loads(request.content)
-                            not_found = "Error" in response
-                            unknown_error = response["Response"] != "True"
-                            if not_found or unknown_error:
-                                log.debug("IMDb: OMDB error for %s" % (omdb_url))
-                            elif (
-                                response.get("Search")
-                                and len(response.get("Search")) == 1
-                            ):
-                                imdb_id = response["Search"][0]["imdbID"]
-                                omdb_url = (
-                                    "http://www.omdbapi.com/?i=%s&plot=short&r=json&apikey=%s"
-                                    % (imdb_id, apikey)
-                                )
-                                request = requests.get(omdb_url, timeout=10)
-                                if request.status_code == requests.codes.ok:
-                                    response = json.loads(request.content)
-                                    not_found = "Error" in response
-                                    unknown_error = response["Response"] != "True"
-                                    if not_found or unknown_error:
-                                        log.debug(
-                                            "IMDb: OMDB error for %s" % (omdb_url)
-                                        )
-                                else:
-                                    log.error(
-                                        "IMDb OMDB API %s - %s"
-                                        % (
-                                            request.status_code,
-                                            request.content.decode(),
-                                        )
-                                    )
-                            elif (
-                                response.get("Search")
-                                and len(response.get("Search")) > 1
-                            ):
-                                reply = "No title found. Did you mean:"
-                                for item in response["Search"]:
-                                    reply += " {0} ({1}) [{2}],".format(
-                                        item["Title"], item["Year"], item["imdbID"]
-                                    )
-                                irc.reply(reply.rstrip(","))
-                                not_found = stop = True
-                                return
-                else:
-                    log.error(
-                        "IMDb OMDB API %s - %s"
-                        % (request.status_code, request.content.decode())
+            log.debug("IMDb: requesting %s" % url)
+        request = utils.web.getUrl(url).decode()
+        response = json.loads(request)
+        if response["Response"] != "False":
+            imdb_template = lowercase_template(
+                self.registryValue("template", msg.channel)
+            )
+            response["logo"] = self.registryValue("logo", msg.channel)
+            for rating in response["Ratings"]:
+                if rating["Source"] == "Rotten Tomatoes":
+                    response["tomatometer"] = rating.get("Value")
+                if rating["Source"] == "Metacritic":
+                    response["metascore"] = "{0}%".format(
+                        rating.get("Value").split("/")[0]
                    )
-                if not not_found or not unknown_error:
-                    meta = tomato = None
-                    imdb_template = self.registryValue("template", channel)
-                    imdb_template = imdb_template.replace(
-                        "$title", str(response.get("Title"))
-                    )
-                    imdb_template = imdb_template.replace(
-                        "$year", str(response.get("Year"))
-                    )
-                    imdb_template = imdb_template.replace(
-                        "$country", str(response.get("Country"))
-                    )
-                    imdb_template = imdb_template.replace(
-                        "$director", str(response.get("Director"))
-                    )
-                    imdb_template = imdb_template.replace(
-                        "$plot", str(response.get("Plot"))
-                    )
-                    imdb_template = imdb_template.replace(
-                        "$imdbID", str(response.get("imdbID"))
-                    )
-                    imdb_template = imdb_template.replace(
-                        "$imdbRating", str(response.get("imdbRating"))
-                    )
-                    for rating in response["Ratings"]:
-                        if rating["Source"] == "Rotten Tomatoes":
-                            tomato = rating.get("Value")
-                        if rating["Source"] == "Metacritic":
-                            meta = "{0}%".format(rating.get("Value").split("/")[0])
-                    if meta:
-                        imdb_template = imdb_template.replace("$metascore", meta)
-                    else:
-                        imdb_template = imdb_template.replace("$metascore", "N/A")
-                    if tomato:
-                        imdb_template = imdb_template.replace("$tomatoMeter", tomato)
-                    else:
-                        imdb_template = imdb_template.replace("$tomatoMeter", "N/A")
-                    imdb_template = imdb_template.replace(
-                        "$released", str(response.get("Released"))
-                    )
-                    imdb_template = imdb_template.replace(
-                        "$genre", str(response.get("Genre"))
-                    )
-                    imdb_template = imdb_template.replace(
-                        "$released", str(response.get("Released"))
-                    )
-                    imdb_template = imdb_template.replace(
-                        "$awards", str(response.get("Awards"))
-                    )
-                    imdb_template = imdb_template.replace(
-                        "$actors", str(response.get("Actors"))
-                    )
-                    imdb_template = imdb_template.replace(
-                        "$rated", str(response.get("Rated"))
-                    )
-                    imdb_template = imdb_template.replace(
-                        "$runtime", str(response.get("Runtime"))
-                    )
-                    imdb_template = imdb_template.replace(
-                        "$writer", str(response.get("Writer"))
-                    )
-                    imdb_template = imdb_template.replace(
-                        "$votes", str(response.get("imdbVotes"))
-                    )
-                    imdb_template = imdb_template.replace(
-                        "$boxOffice", str(response.get("BoxOffice"))
-                    )
-                    imdb_template = imdb_template.replace(
-                        "$production", str(response.get("Production"))
-                    )
-                    imdb_template = imdb_template.replace(
-                        "$website", str(response.get("Website"))
-                    )
-                    imdb_template = imdb_template.replace(
-                        "$poster", str(response.get("Poster"))
-                    )
-                    result = imdb_template
-            else:
-                log.error(
-                    "IMDb OMDB API %s - %s"
-                    % (request.status_code, request.content.decode())
-                )
-        except requests.exceptions.Timeout as e:
-            log.error("IMDb Timeout: %s" % (str(e)))
-        except requests.exceptions.ConnectionError as e:
-            log.error("IMDb ConnectionError: %s" % (str(e)))
-        except requests.exceptions.HTTPError as e:
-            log.error("IMDb HTTPError: %s" % (str(e)))
-        finally:
-            if result is not None:
-                irc.reply(result, prefixNick=False)
-            elif not stop:
-                irc.error(self.registryValue("noResultsMessage", channel))
+            result = imdb_template.safe_substitute(response)
+        if result:
+            irc.reply(result, prefixNick=False)
+        else:
+            irc.error(self.registryValue("noResultsMessage", msg.channel))

    imdb = wrap(imdb, ["text"])

--- a/IMDb/requirements.txt
+++ b/IMDb/requirements.txt
@ -1,2 +0,0 @@
-requests
-beautifulsoup4
--- a/Lyrics/README.md
+++ b/Lyrics/README.md
@ -1,3 +1,5 @@
 Limnoria plugin to return song lyrics from http://lyrics.wikia.com/

-`config plugins.lyrics.googleSearch True/False` enable/disable google searches
+`config plugins.lyrics.google 0-2` 0 to disable search using the Google plugin. 1 to set first priority. 2 to set second priority.
+
+`config plugins.lyrics.ddg 0-2` 0 to disable search using the DDG plugin. 1 to set first priority. 2 to set second priority.
--- a/Lyrics/init.py
+++ b/Lyrics/init.py
@ -36,7 +36,7 @@ import supybot.world as world

 # Use this for the version of this plugin.  You may wish to put a CVS keyword
 # in here if you're keeping the plugin in CVS or some similar system.
-__version__ = "2020.02.24+git"
+__version__ = "2020.05.19+git"

 # XXX Replace this with an appropriate author or supybot.Author instance.
 __author__ = supybot.Author("oddluck", "oddluck", "oddluck@riseup.net")
--- a/Lyrics/config.py
+++ b/Lyrics/config.py
@ -54,20 +54,24 @@ Lyrics = conf.registerPlugin("Lyrics")

 conf.registerChannelValue(
    Lyrics,
-    "googleSearch",
-    registry.Boolean(True, _("""Use google to perform searches for better results.""")),
-)
-
-conf.registerGlobalValue(
-    Lyrics,
-    "userAgents",
-    registry.CommaSeparatedListOfStrings(
-        [
-            "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:75.0) Gecko/20100101 Firefox/75.0",
-            "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:76.0) Gecko/20100101 Firefox/76.0",
-            "Mozilla/5.0 (Linux x86_64; rv:75.0) Gecko/20100101 Firefox/75.0",
-            "Mozilla/5.0 (Linux x86_64; rv:76.0) Gecko/20100101 Firefox/76.0",
-        ],
-        _("""Reported user agent when fetching links"""),
+    "google",
+    registry.Integer(
+        1,
+        """
+        Google search priority. Google plugin must be loaded.
+        0 = disabled. 1 = first. 2 = second.
+        """,
+    ),
+)
+
+conf.registerChannelValue(
+    Lyrics,
+    "ddg",
+    registry.Integer(
+        2,
+        """
+        DDG search priority. DDG plugin must be loaded.
+        0 = disabled. 1 = first. 2 = second.
+        """,
    ),
 )
--- a/Lyrics/plugin.py
+++ b/Lyrics/plugin.py
@ -34,11 +34,9 @@ import supybot.ircutils as ircutils
 import supybot.callbacks as callbacks
 import supybot.ircmsgs as ircmsgs
 import supybot.log as log
-from bs4 import BeautifulSoup
-import requests
-import re
+import re, random
 import pylyrics3
-import random
+

 try:
    from supybot.i18n import PluginInternationalization
@ -55,25 +53,37 @@ class Lyrics(callbacks.Plugin):

    threaded = True

-    def dosearch(self, lyric):
-        try:
-            url = None
-            title = None
-            searchurl = "https://www.google.com/search?&q="
-            searchurl += "{0} site:lyrics.fandom.com/wiki/".format(lyric)
-            agents = self.registryValue("userAgents")
-            ua = random.choice(agents)
-            header = {"User-Agent": ua}
-            data = requests.get(searchurl, headers=header, timeout=10)
-            data.raise_for_status()
-            log.debug(data.content.decode())
-            soup = BeautifulSoup(data.content)
-            elements = soup.select(".r a")
-            title = soup.find("h3").getText().replace(":", " - ").split("|")[0]
-            url = elements[0]["href"]
-        except Exception:
-            pass
-        return title, url
+    def dosearch(self, irc, channel, text):
+        google = ddg = title = None
+        if self.registryValue("google", channel) > 0:
+            google = irc.getCallback("google")
+        if self.registryValue("ddg", channel) > 0:
+            ddg = irc.getCallback("ddg")
+        if not google and not ddg:
+            return
+        query = "site:lyrics.fandom.com/wiki/ %s" % text
+        pattern = re.compile(r"https?://lyrics.fandom.com/wiki/.*")
+        for i in range(1, 3):
+            if google and self.registryValue("google", channel) == i:
+                results = google.decode(google.search(query, irc.network, channel))
+                for r in results:
+                    match = re.search(pattern, r["url"])
+                    if match:
+                        title = r["title"].replace(":", " - ").split("|")[0]
+                        break
+            elif self.registryValue("ddg", channel) == i:
+                results = ddg.search_core(
+                    query, channel_context=channel, max_results=10, show_snippet=False
+                )
+                for r in results:
+                    match = re.search(pattern, r[2])
+                    if match:
+                        title = r[0].replace(":", " - ").split("|")[0]
+                        break
+        if match and title:
+            return title, match.group(0)
+        else:
+            return

    def getlyrics(self, query):
        lyrics = None
@ -98,11 +108,9 @@ class Lyrics(callbacks.Plugin):
        """<query>
        Get song lyrics from Lyrics Wiki.
        """
-        channel = msg.channel
        title = None
        url = None
-        if self.registryValue("googleSearch", channel):
-            title, url = self.dosearch(lyric)
+        title, url = self.dosearch(irc, msg.channel, lyric)
        if url and title and "lyrics.fandom.com/wiki/" in url:
            try:
                lyrics = self.getlyrics(url)
--- a/Lyrics/requirements.txt
+++ b/Lyrics/requirements.txt
@ -1,3 +1 @@
-requests
-beautifulsoup4
 pylyrics3