SpiffyTitles: file type/size, fake-useragent, badLinkText

2025-04-26 04:51:09 -05:00 · 2020-03-04 17:05:38 +00:00 · 2020-03-04 17:05:38 +00:00 · 49df8e850a
commit 49df8e850a
parent accafeb09a
4 changed files with 38 additions and 48 deletions
--- a/SpiffyTitles/README.md
+++ b/SpiffyTitles/README.md
@ -285,12 +285,14 @@ improving performance. Default value: `60`
 `wallClockTimeoutInSeconds` - Timeout for total elapsed time when retrieving a title. If you set this value too 
 high, the bot may time out. Default value: `8` (seconds). You must `!reload SpiffyTitles` for this setting to take effect.
-`channelWhitelist` - a comma separated list of channels in which titles should be displayed. If `""`,
+`channelWhitelist` - A comma separated list of channels in which titles should be displayed. If `""`,
 titles will be shown in all channels. Default value: `""`
-`channelBlacklist` - a comma separated list of channels in which titles should never be displayed. If `""`,
+`channelBlacklist` - A comma separated list of channels in which titles should never be displayed. If `""`,
 titles will be shown in all channels. Default value: `""`
 `badLinkText` - The text to return when unable to retrieve a title from a URL. Default value: `Nice link idiot.`
 ### About white/black lists
 - Channel names must be in lowercase
 - If `channelWhitelist` and `channelBlacklist` are empty, then titles will be displayed in every channel
@ -337,8 +339,6 @@ Ignore all links except youtube, imgur, and reddit
    !config supybot.plugins.SpiffyTitles.whitelistDomainPattern /(reddit\.com|youtube\.com|youtu\.be|imgur\.com)/
 `userAgents` - A comma separated list of strings of user agents randomly chosen when requesting. 
 `urlRegularExpression` - A regular expression used to match URLs. You shouldn't need to change this.
 `linkMessageIgnorePattern` - If a message matches this pattern, it will be ignored. This differs from `ignoredDomainPattern` in that it compares against the entire message rather than just the domain.
--- a/SpiffyTitles/config.py
+++ b/SpiffyTitles/config.py
@ -69,8 +69,8 @@ conf.registerChannelValue(SpiffyTitles, 'useBold',
     registry.Boolean(False, _("""Use bold in titles""")))
 # User agents
-conf.registerGlobalValue(SpiffyTitles, 'userAgents',
+conf.registerGlobalValue(SpiffyTitles, 'badLinkText',
-     registry.CommaSeparatedListOfStrings(["Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/42.0.2311.60 Safari/537.36", "Mozilla/5.0 (Windows NT 6.3; rv:36.0) Gecko/20100101 Firefox/36.0", "Mozilla/5.0 (Windows NT 6.1; WOW64; Trident/7.0; AS; rv:11.0) like Gecko"], _("""Reported user agent when fetching links""")))
+     registry.String("Nice link idiot.", _("""Title to return for bad/unsnarfable links.""")))
 # Mime Types
 conf.registerGlobalValue(SpiffyTitles, 'mimeTypes',
--- a/SpiffyTitles/plugin.py
+++ b/SpiffyTitles/plugin.py
@ -48,6 +48,7 @@ import unicodedata
 import supybot.ircdb as ircdb
 import supybot.log as log
 import pendulum
 from fake_useragent import UserAgent
 try:
    from supybot.i18n import PluginInternationalization
@ -117,7 +118,7 @@ class SpiffyTitles(callbacks.Plugin):
        self.handlers["www.twitch.tv"] = self.handler_twitch
        self.handlers["go.twitch.tv"] = self.handler_twitch
        self.handlers["clips.twitch.tv"] = self.handler_twitch
-        
+
    def add_imdb_handlers(self):
        """
        Enables meta info about IMDB links through the OMDB API
@ -158,11 +159,7 @@ class SpiffyTitles(callbacks.Plugin):
            fields = "id,title,owner.screenname,duration,views_total"
            api_url = "https://api.dailymotion.com/video/%s?fields=%s" % (video_id, fields)
            log.debug("SpiffyTitles: looking up dailymotion info: %s", api_url)
-            agent = self.get_user_agent()
+            headers = self.get_headers()
            headers = {
                "User-Agent": agent
            }
            request = requests.get(api_url, headers=headers)
            ok = request.status_code == requests.codes.ok
@ -213,11 +210,7 @@ class SpiffyTitles(callbacks.Plugin):
            if video_id is not None:
                api_url = "https://vimeo.com/api/v2/video/%s.json" % video_id
                log.debug("SpiffyTitles: looking up vimeo info: %s", api_url)
-                agent = self.get_user_agent()
+                headers = self.get_headers()
                headers = {
                    "User-Agent": agent
                }
                request = requests.get(api_url, headers=headers)
                ok = request.status_code == requests.codes.ok
@ -279,11 +272,7 @@ class SpiffyTitles(callbacks.Plugin):
                video_id = video_id.split("?")[0]
            api_url = "http://coub.com/api/v2/coubs/%s" % video_id
-            agent = self.get_user_agent()
+            headers = self.get_headers()
            headers = {
                "User-Agent": agent
            }
            request = requests.get(api_url, headers=headers)
            ok = request.status_code == requests.codes.ok
@ -491,7 +480,7 @@ class SpiffyTitles(callbacks.Plugin):
        except Exception as e:
            pass
-        if title is not None and title:
+        if title:
            irc.reply(title)
        else:
            irc.reply(error_message + " {}".format(err))
@ -663,10 +652,7 @@ class SpiffyTitles(callbacks.Plugin):
            }
            encoded_options = urlencode(options)
            api_url = "https://www.googleapis.com/youtube/v3/videos?%s" % (encoded_options)
-            agent = self.get_user_agent()
+            headers = self.get_headers()
            headers = {
                "User-Agent": agent
            }
            log.debug("SpiffyTitles: requesting %s" % (api_url))
@ -886,10 +872,7 @@ class SpiffyTitles(callbacks.Plugin):
        if not match:
            self.log.debug("SpiffyTitles: twitch - no title found.")
            return self.handler_default(url, channel)
-        agent = self.get_user_agent()
+        headers = self.get_headers()
        headers = {
            "Client-ID": twitch_client_id
        }
        self.log.debug("SpiffyTitles: twitch - requesting %s" % (data_url))
        request = requests.get(data_url, timeout=10, headers=headers)
        ok = request.status_code == requests.codes.ok
@ -1212,11 +1195,7 @@ class SpiffyTitles(callbacks.Plugin):
        api_params.update(title_param)
        param_string = "&".join("%s=%s" % (key, val) for (key, val) in api_params.items())
        api_url = "https://%s/w/api.php?%s" % (info.netloc, param_string)
-
+        headers = self.get_headers()
        agent = self.get_user_agent()
        headers = {
            "User-Agent": agent
        }
        extract = ""
        self.log.debug("SpiffyTitles: requesting %s" % (api_url))
@ -1292,10 +1271,7 @@ class SpiffyTitles(callbacks.Plugin):
            self.log.debug("SpiffyTitles: no title found.")
            return self.handler_default(url, channel)
-        agent = self.get_user_agent()
+        headers = self.get_headers()
        headers = {
            "User-Agent": agent
        }
        self.log.debug("SpiffyTitles: requesting %s" % (data_url))
@ -1621,14 +1597,30 @@ class SpiffyTitles(callbacks.Plugin):
                    else:
                        log.debug("SpiffyTitles: unacceptable mime type %s for url %s" %
                                  (content_type, url))
                        suffixes = ['B', 'KiB', 'MiB', 'GiB', 'TiB', 'PiB']
                        def humansize(nbytes):
                            i = 0
                            while nbytes >= 1024 and i < len(suffixes)-1:
                                nbytes /= 1024.
                                i += 1
                            f = ('%.2f' % nbytes).rstrip('0').rstrip('.')
                            return '%s %s' % (f, suffixes[i])
                        size = request.headers.get("content-length")
                        if size:
                            size = humansize(int(size))
                            text = "[{0}] ({1})".format(content_type, size)
                        else:
                            text = "[{0}]".format(content_type)
                        text = "<html><head><title>{0}</title></head><body></body></html>".format(text)
                        return (text, is_redirect)
                else:
                    log.error("SpiffyTitles HTTP response code %s" % (request.status_code,))
-                                                                           #request.content))
+                    text = self.registryValue("badLinkText")
-                    return ('<html><head><title>Nice link idiot.</title></head><body></body></html>', is_redirect)
+                    text = "<html><head><title>{0}</title></head><body></body></html>".format(text)
                    return (text, is_redirect)
        except timeout_decorator.TimeoutError:
            log.error("SpiffyTitles: wall timeout!")
            self.get_source_by_url(url, retries + 1)
        except requests.exceptions.MissingSchema as e:
            url_wschema = "http://%s" % (url)
@ -1640,11 +1632,9 @@ class SpiffyTitles(callbacks.Plugin):
                return self.get_source_by_url(url_wschema)
        except requests.exceptions.Timeout as e:
            log.error("SpiffyTitles Timeout: %s" % (str(e)))
            self.get_source_by_url(url, retries + 1)
        except requests.exceptions.ConnectionError as e:
            log.error("SpiffyTitles ConnectionError: %s" % (str(e)))
            self.get_source_by_url(url, retries + 1)
        except requests.exceptions.HTTPError as e:
            log.error("SpiffyTitles HTTPError: %s" % (str(e)))
@ -1674,9 +1664,8 @@ class SpiffyTitles(callbacks.Plugin):
        """
        Returns a random user agent from the ones available
        """
-        agents = self.registryValue("userAgents")
+        ua = UserAgent(fallback="Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:74.0) Gecko/20100101 Firefox/74.0")
-
+        return str(ua.random)
        return random.choice(agents)
    def message_matches_ignore_pattern(self, input):
        """
--- a/SpiffyTitles/requirements.txt
+++ b/SpiffyTitles/requirements.txt
@ -7,3 +7,4 @@ requests
 timeout-decorator
 certifi
 pendulum
 fake-useragent