WTE: Strip unicode on Python 2 because the Unicode implementation is absolutely terrible and stupidly refuses to work.

This commit is contained in:
James Lu 2014-12-03 16:56:02 -08:00
parent 37a55db8bf
commit 4d2b5d2103

View File

@ -28,7 +28,6 @@
### ###
from __future__ import unicode_literals
import supybot.utils as utils import supybot.utils as utils
from supybot.commands import * from supybot.commands import *
import supybot.plugins as plugins import supybot.plugins as plugins
@ -41,6 +40,7 @@ try: # Python 3
from urllib.parse import urlencode from urllib.parse import urlencode
except ImportError: # Python 2 except ImportError: # Python 2
from urllib import urlencode from urllib import urlencode
from string import printable
from sys import version_info from sys import version_info
try: try:
@ -59,6 +59,11 @@ class WTE(callbacks.Plugin):
def __init__(self, irc): def __init__(self, irc):
self.__parent = super(WTE, self) self.__parent = super(WTE, self)
self.__parent.__init__(irc) self.__parent.__init__(irc)
if version_info[0] < 3:
self.log.warning("WTE: Due to Unicode handling issues, "
"Unicode characters will be stripped from this plugin's "
"input/output. For optimal results, please upgrade the "
"bot to Python 3.")
self.langs = ('sw', 'sv', 'is', 'et', 'te', 'tr', 'mr', 'nl', 'sl', self.langs = ('sw', 'sv', 'is', 'et', 'te', 'tr', 'mr', 'nl', 'sl',
'id', 'gu', 'hi', 'az', 'hmn', 'ko', 'da', 'bg', 'lo', 'so', 'tl', 'id', 'gu', 'hi', 'az', 'hmn', 'ko', 'da', 'bg', 'lo', 'so', 'tl',
'hu', 'ca', 'cy', 'bs', 'ka', 'vi', 'eu', 'ms', 'fr', 'no', 'hy', 'hu', 'ca', 'cy', 'bs', 'ka', 'vi', 'eu', 'ms', 'fr', 'no', 'hy',
@ -69,12 +74,18 @@ class WTE(callbacks.Plugin):
'ur', 'pl', 'eo', 'yo', 'en', 'yi') 'ur', 'pl', 'eo', 'yo', 'en', 'yi')
def getTranslation(self, irc, sourceLang, targetLang, text): def getTranslation(self, irc, sourceLang, targetLang, text):
args = {"client": "p", "sl":sourceLang, "tl":targetLang}
if version_info[0] < 3: if version_info[0] < 3:
text = text.encode("utf-8") # Python 2's Unicode handling is just horrible. I've tried a
# dozen different combinations of encoding and decoding and they
# all fail with a stupid, useless UnicodeDecodeError. We're
# just going to strip all non-ASCII characters until
# this stupid issue gets fixed. -GLolol
args['q'] = filter(lambda x: x in printable, text)
else:
args['q'] = text
url = "http://translate.google.com/translate_a/t?"+ \ url = "http://translate.google.com/translate_a/t?"+ \
urlencode({"client": "p", urlencode(args)
"sl":sourceLang, "tl":targetLang,
"q":text})
try: try:
data = json.loads(utils.web.getUrl(url).decode("utf-8")) data = json.loads(utils.web.getUrl(url).decode("utf-8"))
except utils.web.Error as e: except utils.web.Error as e:
@ -98,6 +109,14 @@ class WTE(callbacks.Plugin):
for targetlang in ll: for targetlang in ll:
text = self.getTranslation(irc, "auto", targetlang, text) text = self.getTranslation(irc, "auto", targetlang, text)
text = self.getTranslation(irc, "auto", outlang, text) text = self.getTranslation(irc, "auto", outlang, text)
text = text.strip()
if not text:
s = ("Error encoding/decoding response. If you are using "
"Python 2, it is recommended to upgrade to "
"Python 3 to suppress these kinds of errors, as there "
"are some lingering issues handling Unicode on "
"versions of Python 2.")
irc.error(s, Raise=True)
irc.reply(text) irc.reply(text)
wte = wrap(wte, ['text']) wte = wrap(wte, ['text'])