Remove fake-useragent requirement

This commit is contained in:
oddluck 2020-03-04 20:26:35 +00:00
parent 6da4437e7b
commit ede1181df5
10 changed files with 63 additions and 61 deletions

View File

@ -62,4 +62,7 @@ conf.registerGlobalValue(IMDb, 'omdbAPI',
conf.registerChannelValue(IMDb, 'googleSearch', conf.registerChannelValue(IMDb, 'googleSearch',
registry.Boolean(True, _("""Use google to perform searches for better results."""))) registry.Boolean(True, _("""Use google to perform searches for better results.""")))
conf.registerGlobalValue(IMDb, 'userAgents',
registry.CommaSeparatedListOfStrings(["Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/79.0.3945.130 Safari/537.36", "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:73.0) Gecko/20100101 Firefox/73.0", "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/80.0.3987.122 Safari/537.36", "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:74.0) Gecko/20100101 Firefox/74.0"], _("""Reported user agent when fetching links""")))
# vim:set shiftwidth=4 tabstop=4 expandtab textwidth=79: # vim:set shiftwidth=4 tabstop=4 expandtab textwidth=79:

View File

@ -37,7 +37,6 @@ import supybot.callbacks as callbacks
import supybot.log as log import supybot.log as log
import requests import requests
import json import json
from fake_useragent import UserAgent
from bs4 import BeautifulSoup from bs4 import BeautifulSoup
try: try:
@ -57,8 +56,9 @@ class IMDb(callbacks.Plugin):
url = None url = None
searchurl = "https://www.google.com/search?&q=" searchurl = "https://www.google.com/search?&q="
searchurl += "{0} site:imdb.com/title/".format(query) searchurl += "{0} site:imdb.com/title/".format(query)
ua = UserAgent(fallback="Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:74.0) Gecko/20100101 Firefox/74.0") agents = self.registryValue("userAgents")
header = {'User-Agent':str(ua.random)} ua = random.choice(agents)
header = {'User-Agent': ua}
data = requests.get(searchurl, headers=header, timeout=10) data = requests.get(searchurl, headers=header, timeout=10)
data.raise_for_status() data.raise_for_status()
soup = BeautifulSoup(data.content) soup = BeautifulSoup(data.content)

View File

@ -1,3 +1,2 @@
requests requests
beautifulsoup4 beautifulsoup4
fake_useragent

View File

@ -50,3 +50,6 @@ Lyrics = conf.registerPlugin('Lyrics')
conf.registerChannelValue(Lyrics, 'googleSearch', conf.registerChannelValue(Lyrics, 'googleSearch',
registry.Boolean(True, _("""Use google to perform searches for better results."""))) registry.Boolean(True, _("""Use google to perform searches for better results.""")))
conf.registerGlobalValue(Lyrics, 'userAgents',
registry.CommaSeparatedListOfStrings(["Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/79.0.3945.130 Safari/537.36", "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:73.0) Gecko/20100101 Firefox/73.0", "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/80.0.3987.122 Safari/537.36", "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:74.0) Gecko/20100101 Firefox/74.0"], _("""Reported user agent when fetching links""")))

View File

@ -38,7 +38,6 @@ from bs4 import BeautifulSoup
import requests import requests
import re import re
import pylyrics3 import pylyrics3
from fake_useragent import UserAgent
try: try:
from supybot.i18n import PluginInternationalization from supybot.i18n import PluginInternationalization
@ -58,8 +57,9 @@ class Lyrics(callbacks.Plugin):
title = None title = None
searchurl = "https://www.google.com/search?&q=" searchurl = "https://www.google.com/search?&q="
searchurl += "{0} site:lyrics.fandom.com/wiki/".format(lyric) searchurl += "{0} site:lyrics.fandom.com/wiki/".format(lyric)
ua = UserAgent(fallback="Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:74.0) Gecko/20100101 Firefox/74.0") agents = self.registryValue("userAgents")
header = {'User-Agent':str(ua.random)} ua = random.choice(agents)
header = {'User-Agent': ua}
data = requests.get(searchurl, headers=header, timeout=10) data = requests.get(searchurl, headers=header, timeout=10)
data.raise_for_status() data.raise_for_status()
soup = BeautifulSoup(data.content) soup = BeautifulSoup(data.content)

View File

@ -1,4 +1,3 @@
requests requests
beautifulsoup4 beautifulsoup4
fake_useragent
pylyrics3 pylyrics3

View File

@ -89,3 +89,6 @@ registry.Integer(99, _("""Set the default foreground color for ascii art images.
conf.registerChannelValue(TextArt, 'bg', conf.registerChannelValue(TextArt, 'bg',
registry.Integer(99, _("""Set the default background color for ascii art images. 0-98. 99 is disabled (default)"""))) registry.Integer(99, _("""Set the default background color for ascii art images. 0-98. 99 is disabled (default)""")))
conf.registerGlobalValue(TextArt, 'userAgents',
registry.CommaSeparatedListOfStrings(["Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/79.0.3945.130 Safari/537.36", "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:73.0) Gecko/20100101 Firefox/73.0", "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/80.0.3987.122 Safari/537.36", "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:74.0) Gecko/20100101 Firefox/74.0"], _("""Reported user agent when fetching links""")))

View File

@ -40,7 +40,6 @@ import requests
from PIL import Image, ImageOps, ImageFont, ImageDraw, ImageEnhance from PIL import Image, ImageOps, ImageFont, ImageDraw, ImageEnhance
import numpy as np import numpy as np
import sys, math import sys, math
from fake_useragent import UserAgent
import re import re
import pexpect import pexpect
import time import time
@ -68,6 +67,7 @@ class TextArt(callbacks.Plugin):
self.stopped = {} self.stopped = {}
self.old_color = None self.old_color = None
self.source_colors = 0 self.source_colors = 0
self.agents = self.registryValue("userAgents")
self.rgbColors = [ self.rgbColors = [
(255,255,255), (255,255,255),
(0,0,0), (0,0,0),
@ -714,18 +714,17 @@ class TextArt(callbacks.Plugin):
fg = 0 fg = 0
if url.startswith("https://paste.ee/p/"): if url.startswith("https://paste.ee/p/"):
url = re.sub("https://paste.ee/p/", "https://paste.ee/r/", url) url = re.sub("https://paste.ee/p/", "https://paste.ee/r/", url)
ua = UserAgent(fallback="Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:74.0) Gecko/20100101 Firefox/74.0") ua = random.choice(self.agents)
header = {'User-Agent':str(ua.random)} header = {'User-Agent': ua}
r = requests.head(url, headers=header) r = requests.get(url, stream=True, headers=header, timeout=10)
if "text/plain" in r.headers["content-type"] or url.startswith('https://paste.ee/r/'): if "text/plain" in r.headers["content-type"] or url.startswith('https://paste.ee/r/'):
file = requests.get(url, headers=header, timeout=10) try:
file = r.content.decode()
except:
file = r.content.decode('cp437')
else: else:
irc.reply("Invalid file type.", private=False, notice=False) irc.reply("Invalid file type.", private=False, notice=False)
return return
try:
file = file.content.decode()
except:
file = file.content.decode('cp437')
file = re.sub('(\x03(\d+).*)\x03,', '\g<1>\x03\g<2>,', file).replace('\r\n','\n') file = re.sub('(\x03(\d+).*)\x03,', '\g<1>\x03\g<2>,', file).replace('\r\n','\n')
im, x, y = self.renderImage(file, 18, bg, fg) im, x, y = self.renderImage(file, 18, bg, fg)
path = os.path.dirname(os.path.abspath(__file__)) path = os.path.dirname(os.path.abspath(__file__))
@ -898,20 +897,16 @@ class TextArt(callbacks.Plugin):
cols = self.registryValue('blockWidth', msg.args[0]) cols = self.registryValue('blockWidth', msg.args[0])
if 's' in optlist: if 's' in optlist:
s = float(optlist.get('s')) s = float(optlist.get('s'))
ua = UserAgent(fallback="Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:74.0) Gecko/20100101 Firefox/74.0") ua = random.choice(self.agents)
header = {'User-Agent':str(ua.random)} header = {'User-Agent': ua}
image_formats = ("image/png", "image/jpeg", "image/jpg", "image/gif") image_formats = ("image/png", "image/jpeg", "image/jpg", "image/gif")
r = requests.head(url, headers=header) r = requests.get(url, stream=True, headers=header, timeout=10)
if r.headers["content-type"] in image_formats: if r.headers["content-type"] in image_formats and r.status_code == 200:
response = requests.get(url, stream=True, timeout=10, headers=header) r.raw.decode_content = True
image = Image.open(r.raw)
else: else:
irc.reply("Error: Invalid file type.", private=False, notice=False) irc.reply("Error: Invalid file type.", private=False, notice=False)
return return
if response.status_code == 200:
response.raw.decode_content = True
image = Image.open(response.raw)
else:
irc.reply("Error: Unable to open image.", private=False, notice=False)
# open image and convert to grayscale # open image and convert to grayscale
start_time = time.time() start_time = time.time()
self.source_colors = 0 self.source_colors = 0
@ -1160,15 +1155,14 @@ class TextArt(callbacks.Plugin):
url = url.replace("https://paste.ee/p/", "https://paste.ee/r/") url = url.replace("https://paste.ee/p/", "https://paste.ee/r/")
elif url.startswith("https://pastebin.com/") and '/raw/' not in url: elif url.startswith("https://pastebin.com/") and '/raw/' not in url:
url = url.replace("https://pastebin.com/", "https://pastebin.com/raw/") url = url.replace("https://pastebin.com/", "https://pastebin.com/raw/")
ua = UserAgent(fallback="Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:74.0) Gecko/20100101 Firefox/74.0") ua = random.choice(self.agents)
header = {'User-Agent':str(ua.random)} header = {'User-Agent': ua}
r = requests.head(url, headers=header) r = requests.get(url, headers=header, stream=True, timeout=10)
if "text/plain" in r.headers["content-type"]: if "text/plain" in r.headers["content-type"]:
file = requests.get(url, timeout=10, headers=header) file = r.content.decode().replace('\r\n','\n')
else: else:
irc.reply("Invalid file type.", private=False, notice=False) irc.reply("Invalid file type.", private=False, notice=False)
return return
file = file.content.decode().replace('\r\n','\n')
for line in file.split('\n'): for line in file.split('\n'):
if line.strip() and not self.stopped[msg.args[0]]: if line.strip() and not self.stopped[msg.args[0]]:
time.sleep(delay) time.sleep(delay)
@ -1208,15 +1202,14 @@ class TextArt(callbacks.Plugin):
delay = optlist.get('delay') delay = optlist.get('delay')
else: else:
delay = self.registryValue('delay', msg.args[0]) delay = self.registryValue('delay', msg.args[0])
ua = UserAgent(fallback="Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:74.0) Gecko/20100101 Firefox/74.0") ua = random.choice(self.agents)
header = {'User-Agent':str(ua.random)} header = {'User-Agent': ua}
r = requests.head(url, headers=header) r = requests.get(url, stream=True, headers=header, timeout=10)
try: try:
if "text/plain" in r.headers["content-type"] or "application/octet-stream" in r.headers["content-type"] and int(r.headers["content-length"]) < 1000000: if "text/plain" in r.headers["content-type"] or "application/octet-stream" in r.headers["content-type"] and int(r.headers["content-length"]) < 1000000:
path = os.path.dirname(os.path.abspath(__file__)) path = os.path.dirname(os.path.abspath(__file__))
filepath = "{0}/tmp".format(path) filepath = "{0}/tmp".format(path)
filename = "{0}/{1}".format(filepath, url.split('/')[-1]) filename = "{0}/{1}".format(filepath, url.split('/')[-1])
r = requests.get(url, timeout=10, headers=header)
open(filename, 'wb').write(r.content.replace(b';5;', b';')) open(filename, 'wb').write(r.content.replace(b';5;', b';'))
try: try:
output = pexpect.run('a2m {0} {1}'.format(opts.strip(), str(filename))) output = pexpect.run('a2m {0} {1}'.format(opts.strip(), str(filename)))
@ -1294,18 +1287,13 @@ class TextArt(callbacks.Plugin):
path = os.path.dirname(os.path.abspath(__file__)) path = os.path.dirname(os.path.abspath(__file__))
filepath = "{0}/tmp".format(path) filepath = "{0}/tmp".format(path)
filename = "{0}/{1}".format(filepath, url.split('/')[-1]) filename = "{0}/{1}".format(filepath, url.split('/')[-1])
ua = UserAgent(fallback="Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:74.0) Gecko/20100101 Firefox/74.0") ua = random.choice(self.agents)
header = {'User-Agent':str(ua.random)} header = {'User-Agent': ua}
image_formats = ("image/png", "image/jpeg", "image/jpg", "image/gif") image_formats = ("image/png", "image/jpeg", "image/jpg", "image/gif")
r = requests.head(url, headers=header) r = requests.get(url, stream=True, headers=header, timeout=10)
if r.headers["content-type"] in image_formats: if r.headers["content-type"] in image_formats and r.status_code == 200:
response = requests.get(url, timeout=10, headers=header)
else:
irc.reply("Invalid file type.", private=False, notice=False)
return
if response.status_code == 200:
with open("{0}".format(filename), 'wb') as f: with open("{0}".format(filename), 'wb') as f:
f.write(response.content) f.write(r.content)
try: try:
output = pexpect.run('p2u -f m {0} {1}'.format(opts.strip(), str(filename))) output = pexpect.run('p2u -f m {0} {1}'.format(opts.strip(), str(filename)))
try: try:
@ -1315,18 +1303,21 @@ class TextArt(callbacks.Plugin):
except: except:
irc.reply("Error. Have you installed p2u? https://git.trollforge.org/p2u", private=False, notice=False) irc.reply("Error. Have you installed p2u? https://git.trollforge.org/p2u", private=False, notice=False)
return return
paste = "" else:
self.stopped[msg.args[0]] = False irc.reply("Invalid file type.", private=False, notice=False)
for line in output.splitlines(): return
line = line.decode() paste = ""
line = re.sub('^\x03 ', ' ', line) self.stopped[msg.args[0]] = False
if self.registryValue('pasteEnable', msg.args[0]): for line in output.splitlines():
paste += line + "\n" line = line.decode()
if line.strip() and not self.stopped[msg.args[0]]: line = re.sub('^\x03 ', ' ', line)
time.sleep(delay)
irc.reply(line, prefixNick = False, noLengthCheck=True, private=False, notice=False, to=channel)
if self.registryValue('pasteEnable', msg.args[0]): if self.registryValue('pasteEnable', msg.args[0]):
irc.reply(self.doPaste(url, paste), private=False, notice=False, to=channel) paste += line + "\n"
if line.strip() and not self.stopped[msg.args[0]]:
time.sleep(delay)
irc.reply(line, prefixNick = False, noLengthCheck=True, private=False, notice=False, to=channel)
if self.registryValue('pasteEnable', msg.args[0]):
irc.reply(self.doPaste(url, paste), private=False, notice=False, to=channel)
else: else:
irc.reply("Unexpected file type or link format", private=False, notice=False) irc.reply("Unexpected file type or link format", private=False, notice=False)
p2u = wrap(p2u, [optional('channel'), getopts({'b':'int', 'f':'text', 'p':'text', 's':'int', 't':'int', 'w':'int', 'delay':'float'}), ('text')]) p2u = wrap(p2u, [optional('channel'), getopts({'b':'int', 'f':'text', 'p':'text', 's':'int', 't':'int', 'w':'int', 'delay':'float'}), ('text')])
@ -1665,11 +1656,17 @@ class TextArt(callbacks.Plugin):
else: else:
delay = self.registryValue('delay', msg.args[0]) delay = self.registryValue('delay', msg.args[0])
self.stopped[msg.args[0]] = False self.stopped[msg.args[0]] = False
ua = UserAgent(fallback="Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:74.0) Gecko/20100101 Firefox/74.0") ua = random.choice(self.agents)
header = {'User-Agent':str(ua.random)} header = {'User-Agent': ua}
data = requests.get("https://mircart.org/?s={0}".format(search), headers=header, timeout=10) data = requests.get("https://mircart.org/?s={0}".format(search), headers=header, timeout=10)
if not data:
irc.reply("Error: No results found for {0}".format(search))
return
soup = BeautifulSoup(data.content) soup = BeautifulSoup(data.content)
url = soup.find(href=re.compile(".txt")) url = soup.find(href=re.compile(".txt"))
if not url:
irc.reply("Error: No results found for {0}".format(search))
return
data = requests.get(url.get('href'), headers=header, timeout=10) data = requests.get(url.get('href'), headers=header, timeout=10)
output = data.content.decode() output = data.content.decode()
for line in output.splitlines(): for line in output.splitlines():

View File

@ -1,7 +1,6 @@
requests requests
numpy numpy
pillow pillow
fake-useragent
pexpect pexpect
pyimgur pyimgur
beautifulsoup4 beautifulsoup4

View File

@ -1,6 +1,5 @@
beautifulsoup4 beautifulsoup4
certifi certifi
fake-useragent
ftfy ftfy
geoip2 geoip2
git+https://github.com/oddluck/cobe.git git+https://github.com/oddluck/cobe.git