SpiffyTitles: file type/size, fake-useragent, badLinkText

This commit is contained in:
oddluck 2020-03-04 17:05:38 +00:00
parent accafeb09a
commit 49df8e850a
4 changed files with 38 additions and 48 deletions

View File

@ -285,12 +285,14 @@ improving performance. Default value: `60`
`wallClockTimeoutInSeconds` - Timeout for total elapsed time when retrieving a title. If you set this value too `wallClockTimeoutInSeconds` - Timeout for total elapsed time when retrieving a title. If you set this value too
high, the bot may time out. Default value: `8` (seconds). You must `!reload SpiffyTitles` for this setting to take effect. high, the bot may time out. Default value: `8` (seconds). You must `!reload SpiffyTitles` for this setting to take effect.
`channelWhitelist` - a comma separated list of channels in which titles should be displayed. If `""`, `channelWhitelist` - A comma separated list of channels in which titles should be displayed. If `""`,
titles will be shown in all channels. Default value: `""` titles will be shown in all channels. Default value: `""`
`channelBlacklist` - a comma separated list of channels in which titles should never be displayed. If `""`, `channelBlacklist` - A comma separated list of channels in which titles should never be displayed. If `""`,
titles will be shown in all channels. Default value: `""` titles will be shown in all channels. Default value: `""`
`badLinkText` - The text to return when unable to retrieve a title from a URL. Default value: `Nice link idiot.`
### About white/black lists ### About white/black lists
- Channel names must be in lowercase - Channel names must be in lowercase
- If `channelWhitelist` and `channelBlacklist` are empty, then titles will be displayed in every channel - If `channelWhitelist` and `channelBlacklist` are empty, then titles will be displayed in every channel
@ -337,8 +339,6 @@ Ignore all links except youtube, imgur, and reddit
!config supybot.plugins.SpiffyTitles.whitelistDomainPattern /(reddit\.com|youtube\.com|youtu\.be|imgur\.com)/ !config supybot.plugins.SpiffyTitles.whitelistDomainPattern /(reddit\.com|youtube\.com|youtu\.be|imgur\.com)/
`userAgents` - A comma separated list of strings of user agents randomly chosen when requesting.
`urlRegularExpression` - A regular expression used to match URLs. You shouldn't need to change this. `urlRegularExpression` - A regular expression used to match URLs. You shouldn't need to change this.
`linkMessageIgnorePattern` - If a message matches this pattern, it will be ignored. This differs from `ignoredDomainPattern` in that it compares against the entire message rather than just the domain. `linkMessageIgnorePattern` - If a message matches this pattern, it will be ignored. This differs from `ignoredDomainPattern` in that it compares against the entire message rather than just the domain.

View File

@ -69,8 +69,8 @@ conf.registerChannelValue(SpiffyTitles, 'useBold',
registry.Boolean(False, _("""Use bold in titles"""))) registry.Boolean(False, _("""Use bold in titles""")))
# User agents # User agents
conf.registerGlobalValue(SpiffyTitles, 'userAgents', conf.registerGlobalValue(SpiffyTitles, 'badLinkText',
registry.CommaSeparatedListOfStrings(["Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/42.0.2311.60 Safari/537.36", "Mozilla/5.0 (Windows NT 6.3; rv:36.0) Gecko/20100101 Firefox/36.0", "Mozilla/5.0 (Windows NT 6.1; WOW64; Trident/7.0; AS; rv:11.0) like Gecko"], _("""Reported user agent when fetching links"""))) registry.String("Nice link idiot.", _("""Title to return for bad/unsnarfable links.""")))
# Mime Types # Mime Types
conf.registerGlobalValue(SpiffyTitles, 'mimeTypes', conf.registerGlobalValue(SpiffyTitles, 'mimeTypes',

View File

@ -48,6 +48,7 @@ import unicodedata
import supybot.ircdb as ircdb import supybot.ircdb as ircdb
import supybot.log as log import supybot.log as log
import pendulum import pendulum
from fake_useragent import UserAgent
try: try:
from supybot.i18n import PluginInternationalization from supybot.i18n import PluginInternationalization
@ -117,7 +118,7 @@ class SpiffyTitles(callbacks.Plugin):
self.handlers["www.twitch.tv"] = self.handler_twitch self.handlers["www.twitch.tv"] = self.handler_twitch
self.handlers["go.twitch.tv"] = self.handler_twitch self.handlers["go.twitch.tv"] = self.handler_twitch
self.handlers["clips.twitch.tv"] = self.handler_twitch self.handlers["clips.twitch.tv"] = self.handler_twitch
def add_imdb_handlers(self): def add_imdb_handlers(self):
""" """
Enables meta info about IMDB links through the OMDB API Enables meta info about IMDB links through the OMDB API
@ -158,11 +159,7 @@ class SpiffyTitles(callbacks.Plugin):
fields = "id,title,owner.screenname,duration,views_total" fields = "id,title,owner.screenname,duration,views_total"
api_url = "https://api.dailymotion.com/video/%s?fields=%s" % (video_id, fields) api_url = "https://api.dailymotion.com/video/%s?fields=%s" % (video_id, fields)
log.debug("SpiffyTitles: looking up dailymotion info: %s", api_url) log.debug("SpiffyTitles: looking up dailymotion info: %s", api_url)
agent = self.get_user_agent() headers = self.get_headers()
headers = {
"User-Agent": agent
}
request = requests.get(api_url, headers=headers) request = requests.get(api_url, headers=headers)
ok = request.status_code == requests.codes.ok ok = request.status_code == requests.codes.ok
@ -213,11 +210,7 @@ class SpiffyTitles(callbacks.Plugin):
if video_id is not None: if video_id is not None:
api_url = "https://vimeo.com/api/v2/video/%s.json" % video_id api_url = "https://vimeo.com/api/v2/video/%s.json" % video_id
log.debug("SpiffyTitles: looking up vimeo info: %s", api_url) log.debug("SpiffyTitles: looking up vimeo info: %s", api_url)
agent = self.get_user_agent() headers = self.get_headers()
headers = {
"User-Agent": agent
}
request = requests.get(api_url, headers=headers) request = requests.get(api_url, headers=headers)
ok = request.status_code == requests.codes.ok ok = request.status_code == requests.codes.ok
@ -279,11 +272,7 @@ class SpiffyTitles(callbacks.Plugin):
video_id = video_id.split("?")[0] video_id = video_id.split("?")[0]
api_url = "http://coub.com/api/v2/coubs/%s" % video_id api_url = "http://coub.com/api/v2/coubs/%s" % video_id
agent = self.get_user_agent() headers = self.get_headers()
headers = {
"User-Agent": agent
}
request = requests.get(api_url, headers=headers) request = requests.get(api_url, headers=headers)
ok = request.status_code == requests.codes.ok ok = request.status_code == requests.codes.ok
@ -491,7 +480,7 @@ class SpiffyTitles(callbacks.Plugin):
except Exception as e: except Exception as e:
pass pass
if title is not None and title: if title:
irc.reply(title) irc.reply(title)
else: else:
irc.reply(error_message + " {}".format(err)) irc.reply(error_message + " {}".format(err))
@ -663,10 +652,7 @@ class SpiffyTitles(callbacks.Plugin):
} }
encoded_options = urlencode(options) encoded_options = urlencode(options)
api_url = "https://www.googleapis.com/youtube/v3/videos?%s" % (encoded_options) api_url = "https://www.googleapis.com/youtube/v3/videos?%s" % (encoded_options)
agent = self.get_user_agent() headers = self.get_headers()
headers = {
"User-Agent": agent
}
log.debug("SpiffyTitles: requesting %s" % (api_url)) log.debug("SpiffyTitles: requesting %s" % (api_url))
@ -886,10 +872,7 @@ class SpiffyTitles(callbacks.Plugin):
if not match: if not match:
self.log.debug("SpiffyTitles: twitch - no title found.") self.log.debug("SpiffyTitles: twitch - no title found.")
return self.handler_default(url, channel) return self.handler_default(url, channel)
agent = self.get_user_agent() headers = self.get_headers()
headers = {
"Client-ID": twitch_client_id
}
self.log.debug("SpiffyTitles: twitch - requesting %s" % (data_url)) self.log.debug("SpiffyTitles: twitch - requesting %s" % (data_url))
request = requests.get(data_url, timeout=10, headers=headers) request = requests.get(data_url, timeout=10, headers=headers)
ok = request.status_code == requests.codes.ok ok = request.status_code == requests.codes.ok
@ -1212,11 +1195,7 @@ class SpiffyTitles(callbacks.Plugin):
api_params.update(title_param) api_params.update(title_param)
param_string = "&".join("%s=%s" % (key, val) for (key, val) in api_params.items()) param_string = "&".join("%s=%s" % (key, val) for (key, val) in api_params.items())
api_url = "https://%s/w/api.php?%s" % (info.netloc, param_string) api_url = "https://%s/w/api.php?%s" % (info.netloc, param_string)
headers = self.get_headers()
agent = self.get_user_agent()
headers = {
"User-Agent": agent
}
extract = "" extract = ""
self.log.debug("SpiffyTitles: requesting %s" % (api_url)) self.log.debug("SpiffyTitles: requesting %s" % (api_url))
@ -1292,10 +1271,7 @@ class SpiffyTitles(callbacks.Plugin):
self.log.debug("SpiffyTitles: no title found.") self.log.debug("SpiffyTitles: no title found.")
return self.handler_default(url, channel) return self.handler_default(url, channel)
agent = self.get_user_agent() headers = self.get_headers()
headers = {
"User-Agent": agent
}
self.log.debug("SpiffyTitles: requesting %s" % (data_url)) self.log.debug("SpiffyTitles: requesting %s" % (data_url))
@ -1621,14 +1597,30 @@ class SpiffyTitles(callbacks.Plugin):
else: else:
log.debug("SpiffyTitles: unacceptable mime type %s for url %s" % log.debug("SpiffyTitles: unacceptable mime type %s for url %s" %
(content_type, url)) (content_type, url))
suffixes = ['B', 'KiB', 'MiB', 'GiB', 'TiB', 'PiB']
def humansize(nbytes):
i = 0
while nbytes >= 1024 and i < len(suffixes)-1:
nbytes /= 1024.
i += 1
f = ('%.2f' % nbytes).rstrip('0').rstrip('.')
return '%s %s' % (f, suffixes[i])
size = request.headers.get("content-length")
if size:
size = humansize(int(size))
text = "[{0}] ({1})".format(content_type, size)
else:
text = "[{0}]".format(content_type)
text = "<html><head><title>{0}</title></head><body></body></html>".format(text)
return (text, is_redirect)
else: else:
log.error("SpiffyTitles HTTP response code %s" % (request.status_code,)) log.error("SpiffyTitles HTTP response code %s" % (request.status_code,))
#request.content)) text = self.registryValue("badLinkText")
return ('<html><head><title>Nice link idiot.</title></head><body></body></html>', is_redirect) text = "<html><head><title>{0}</title></head><body></body></html>".format(text)
return (text, is_redirect)
except timeout_decorator.TimeoutError: except timeout_decorator.TimeoutError:
log.error("SpiffyTitles: wall timeout!") log.error("SpiffyTitles: wall timeout!")
self.get_source_by_url(url, retries + 1) self.get_source_by_url(url, retries + 1)
except requests.exceptions.MissingSchema as e: except requests.exceptions.MissingSchema as e:
url_wschema = "http://%s" % (url) url_wschema = "http://%s" % (url)
@ -1640,11 +1632,9 @@ class SpiffyTitles(callbacks.Plugin):
return self.get_source_by_url(url_wschema) return self.get_source_by_url(url_wschema)
except requests.exceptions.Timeout as e: except requests.exceptions.Timeout as e:
log.error("SpiffyTitles Timeout: %s" % (str(e))) log.error("SpiffyTitles Timeout: %s" % (str(e)))
self.get_source_by_url(url, retries + 1) self.get_source_by_url(url, retries + 1)
except requests.exceptions.ConnectionError as e: except requests.exceptions.ConnectionError as e:
log.error("SpiffyTitles ConnectionError: %s" % (str(e))) log.error("SpiffyTitles ConnectionError: %s" % (str(e)))
self.get_source_by_url(url, retries + 1) self.get_source_by_url(url, retries + 1)
except requests.exceptions.HTTPError as e: except requests.exceptions.HTTPError as e:
log.error("SpiffyTitles HTTPError: %s" % (str(e))) log.error("SpiffyTitles HTTPError: %s" % (str(e)))
@ -1674,9 +1664,8 @@ class SpiffyTitles(callbacks.Plugin):
""" """
Returns a random user agent from the ones available Returns a random user agent from the ones available
""" """
agents = self.registryValue("userAgents") ua = UserAgent(fallback="Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:74.0) Gecko/20100101 Firefox/74.0")
return str(ua.random)
return random.choice(agents)
def message_matches_ignore_pattern(self, input): def message_matches_ignore_pattern(self, input):
""" """

View File

@ -7,3 +7,4 @@ requests
timeout-decorator timeout-decorator
certifi certifi
pendulum pendulum
fake-useragent