mirror of
https://github.com/oddluck/limnoria-plugins.git
synced 2025-04-26 04:51:09 -05:00
keep untested plugins in testing branch
This commit is contained in:
parent
7150f6c384
commit
4addb0a540
@ -1 +0,0 @@
|
||||
python3 -m spacy download en_core_web_sm
|
@ -1,44 +0,0 @@
|
||||
###
|
||||
# Copyright (c) 2019 oddluck
|
||||
# All rights reserved.
|
||||
#
|
||||
#
|
||||
###
|
||||
|
||||
"""
|
||||
Markovify: Miscellaneous "Markovify" Commands
|
||||
"""
|
||||
|
||||
import supybot
|
||||
import supybot.world as world
|
||||
|
||||
# Use this for the version of this plugin. You may wish to put a CVS keyword
|
||||
# in here if you're keeping the plugin in CVS or some similar system.
|
||||
__version__ = ""
|
||||
|
||||
# XXX Replace this with an appropriate author or supybot.Author instance.
|
||||
__author__ = supybot.Author('oddluck', 'oddluck',
|
||||
'oddluck@riseup.net')
|
||||
|
||||
# This is a dictionary mapping supybot.Author instances to lists of
|
||||
# contributions.
|
||||
__contributors__ = {}
|
||||
|
||||
# This is a url where the most recent plugin package can be downloaded.
|
||||
__url__ = 'https://github.com/oddluck/limnoria-plugins/'
|
||||
|
||||
from . import config
|
||||
from . import plugin
|
||||
from imp import reload
|
||||
# In case we're being reloaded.
|
||||
reload(config)
|
||||
reload(plugin)
|
||||
# Add more reloads here if you add third-party modules and want them to be
|
||||
# reloaded when this plugin is reloaded. Don't forget to import them as well!
|
||||
|
||||
if world.testing:
|
||||
from . import test
|
||||
|
||||
Class = plugin.Class
|
||||
configure = config.configure
|
||||
|
@ -1,48 +0,0 @@
|
||||
###
|
||||
# Copyright (c) 2019, oddluck
|
||||
# All rights reserved.
|
||||
#
|
||||
#
|
||||
###
|
||||
|
||||
import supybot.conf as conf
|
||||
import supybot.registry as registry
|
||||
try:
|
||||
from supybot.i18n import PluginInternationalization
|
||||
_ = PluginInternationalization('Markovify')
|
||||
except:
|
||||
# Placeholder that allows to run the plugin on a bot
|
||||
# without the i18n module
|
||||
_ = lambda x: x
|
||||
|
||||
|
||||
def configure(advanced):
|
||||
# This will be called by supybot to configure this module. advanced is
|
||||
# a bool that specifies whether the user identified themself as an advanced
|
||||
# user or not. You should effect your configuration by manipulating the
|
||||
# registry as appropriate.
|
||||
from supybot.questions import expect, anything, something, yn
|
||||
conf.registerPlugin('Markovify', True)
|
||||
|
||||
Markovify = conf.registerPlugin('Markovify')
|
||||
|
||||
conf.registerChannelValue(Markovify, 'enable',
|
||||
registry.Boolean(False, _("""Determines whether the plugin is enabled on a channel. This defaults to False to avoid useless resources consumption.""")))
|
||||
conf.registerChannelValue(Markovify, 'stripRelayedNick',
|
||||
registry.Boolean(True, _("""Determines whether the bot will strip strings like <XXX> at the beginning of messages.""")))
|
||||
conf.registerChannelValue(Markovify, 'stripURL',
|
||||
registry.Boolean(True, _("""Determines whether the bot will strip URLs from messages.""")))
|
||||
conf.registerChannelValue(Markovify, 'ignoreNicks',
|
||||
registry.SpaceSeparatedListOfStrings([], _("""A list of nicks to be ignored by the bot""")))
|
||||
conf.registerChannelValue(Markovify, 'ignorePattern',
|
||||
registry.Regexp("", _("""Mesages matching this pattern will be ignored.""")))
|
||||
conf.registerChannelValue(Markovify, 'stripPattern',
|
||||
registry.Regexp("", _("""Text matching this pattern will be stripped.""")))
|
||||
conf.registerChannelValue(Markovify, 'stripNicks',
|
||||
registry.Boolean(False, _("""Strip all nicks, including the bots, when learning? This replaces a nick with the keyword MAGIC_NICK to use for random highlighting.""")))
|
||||
conf.registerChannelValue(Markovify, 'probability',
|
||||
registry.Probability(0, _("""Determines the percent of messages the bot will answer. 0.0 - 1.0""")))
|
||||
conf.registerChannelValue(Markovify, 'probabilityWhenAddressed',
|
||||
registry.Probability(0, _("""Determines the percent of messages adressed to the bot the bot will answer, 0.0 - 1.0""")))
|
||||
conf.registerChannelValue(Markovify, 'responseDelay',
|
||||
registry.Boolean(False, _("""Delay responding for 2 to 4 seconds in order to seem more human?""")))
|
@ -1,365 +0,0 @@
|
||||
###
|
||||
# Copyright (c) 2019 oddluck
|
||||
# All rights reserved.
|
||||
#
|
||||
#
|
||||
###
|
||||
|
||||
import supybot.utils as utils
|
||||
from supybot.commands import *
|
||||
import supybot.plugins as plugins
|
||||
import supybot.ircutils as ircutils
|
||||
import supybot.callbacks as callbacks
|
||||
import supybot.ircmsgs as ircmsgs
|
||||
import supybot.log as log
|
||||
import supybot.conf as conf
|
||||
import os
|
||||
import requests
|
||||
import random
|
||||
import re
|
||||
import json
|
||||
import gc
|
||||
from itertools import chain
|
||||
import markovify
|
||||
import spacy
|
||||
from ftfy import fix_text
|
||||
from nltk.tokenize import sent_tokenize
|
||||
|
||||
try:
|
||||
from supybot.i18n import PluginInternationalization
|
||||
_ = PluginInternationalization('Markovify')
|
||||
except ImportError:
|
||||
# Placeholder that allows to run the plugin on a bot
|
||||
# without the i18n module
|
||||
_ = lambda x: x
|
||||
|
||||
nlp = spacy.load('en_core_web_sm')
|
||||
api = PushshiftAPI()
|
||||
|
||||
CONTRACTION_MAP = {
|
||||
"ain't": "is not",
|
||||
"aren't": "are not",
|
||||
"can't": "cannot",
|
||||
"can't've": "cannot have",
|
||||
"'cause": "because",
|
||||
"could've": "could have",
|
||||
"couldn't": "could not",
|
||||
"couldn't've": "could not have",
|
||||
"didn't": "did not",
|
||||
"doesn't": "does not",
|
||||
"don't": "do not",
|
||||
"hadn't": "had not",
|
||||
"hadn't've": "had not have",
|
||||
"hasn't": "has not",
|
||||
"haven't": "have not",
|
||||
"he'd": "he would",
|
||||
"he'd've": "he would have",
|
||||
"he'll": "he will",
|
||||
"he'll've": "he he will have",
|
||||
"he's": "he is",
|
||||
"how'd": "how did",
|
||||
"how'd'y": "how do you",
|
||||
"how'll": "how will",
|
||||
"how's": "how is",
|
||||
"I'd": "I would",
|
||||
"I'd've": "I would have",
|
||||
"I'll": "I will",
|
||||
"I'll've": "I will have",
|
||||
"I'm": "I am",
|
||||
"I've": "I have",
|
||||
"i'd": "i would",
|
||||
"i'd've": "i would have",
|
||||
"i'll": "i will",
|
||||
"i'll've": "i will have",
|
||||
"i'm": "i am",
|
||||
"i've": "i have",
|
||||
"isn't": "is not",
|
||||
"it'd": "it would",
|
||||
"it'd've": "it would have",
|
||||
"it'll": "it will",
|
||||
"it'll've": "it will have",
|
||||
"it's": "it is",
|
||||
"let's": "let us",
|
||||
"ma'am": "madam",
|
||||
"mayn't": "may not",
|
||||
"might've": "might have",
|
||||
"mightn't": "might not",
|
||||
"mightn't've": "might not have",
|
||||
"must've": "must have",
|
||||
"mustn't": "must not",
|
||||
"mustn't've": "must not have",
|
||||
"needn't": "need not",
|
||||
"needn't've": "need not have",
|
||||
"o'clock": "of the clock",
|
||||
"oughtn't": "ought not",
|
||||
"oughtn't've": "ought not have",
|
||||
"shan't": "shall not",
|
||||
"sha'n't": "shall not",
|
||||
"shan't've": "shall not have",
|
||||
"she'd": "she would",
|
||||
"she'd've": "she would have",
|
||||
"she'll": "she will",
|
||||
"she'll've": "she will have",
|
||||
"she's": "she is",
|
||||
"should've": "should have",
|
||||
"shouldn't": "should not",
|
||||
"shouldn't've": "should not have",
|
||||
"so've": "so have",
|
||||
"so's": "so as",
|
||||
"that'd": "that would",
|
||||
"that'd've": "that would have",
|
||||
"that's": "that is",
|
||||
"there'd": "there would",
|
||||
"there'd've": "there would have",
|
||||
"there's": "there is",
|
||||
"they'd": "they would",
|
||||
"they'd've": "they would have",
|
||||
"they'll": "they will",
|
||||
"they'll've": "they will have",
|
||||
"they're": "they are",
|
||||
"they've": "they have",
|
||||
"to've": "to have",
|
||||
"wasn't": "was not",
|
||||
"we'd": "we would",
|
||||
"we'd've": "we would have",
|
||||
"we'll": "we will",
|
||||
"we'll've": "we will have",
|
||||
"we're": "we are",
|
||||
"we've": "we have",
|
||||
"weren't": "were not",
|
||||
"what'll": "what will",
|
||||
"what'll've": "what will have",
|
||||
"what're": "what are",
|
||||
"what's": "what is",
|
||||
"what've": "what have",
|
||||
"when's": "when is",
|
||||
"when've": "when have",
|
||||
"where'd": "where did",
|
||||
"where's": "where is",
|
||||
"where've": "where have",
|
||||
"who'll": "who will",
|
||||
"who'll've": "who will have",
|
||||
"who's": "who is",
|
||||
"who've": "who have",
|
||||
"why's": "why is",
|
||||
"why've": "why have",
|
||||
"will've": "will have",
|
||||
"won't": "will not",
|
||||
"won't've": "will not have",
|
||||
"would've": "would have",
|
||||
"wouldn't": "would not",
|
||||
"wouldn't've": "would not have",
|
||||
"y'all": "you all",
|
||||
"y'all'd": "you all would",
|
||||
"y'all'd've": "you all would have",
|
||||
"y'all're": "you all are",
|
||||
"y'all've": "you all have",
|
||||
"you'd": "you would",
|
||||
"you'd've": "you would have",
|
||||
"you'll": "you will",
|
||||
"you'll've": "you will have",
|
||||
"you're": "you are",
|
||||
"you've": "you have"
|
||||
}
|
||||
|
||||
class POSifiedText(markovify.Text):
|
||||
def word_split(self, sentence):
|
||||
return ["::".join((word.orth_, word.pos_)) for word in nlp(sentence)]
|
||||
|
||||
def word_join(self, words):
|
||||
sentence = " ".join(word.split("::")[0] for word in words)
|
||||
return sentence
|
||||
|
||||
class Markovify(callbacks.Plugin):
|
||||
"""Generates chat replies with markov"""
|
||||
threaded = True
|
||||
|
||||
def __init__(self, irc):
|
||||
self.__parent = super(Markovify, self)
|
||||
self.__parent.__init__(irc)
|
||||
self.model = {}
|
||||
self.directory = conf.supybot.directories.data
|
||||
self.MATCH_MESSAGE_STRIPNICK = re.compile('^(<[^ ]+> )?(?P<message>.*)$')
|
||||
|
||||
def save_corpus(self, channel):
|
||||
file = self.directory.dirize(channel + "/markov.json")
|
||||
os.makedirs(self.directory.dirize(channel), exist_ok=True)
|
||||
with open(file, 'w') as outfile:
|
||||
jsondata = self.model[channel].to_json()
|
||||
json.dump(jsondata, outfile)
|
||||
|
||||
def add_text(self, channel, text):
|
||||
text = fix_text(text)
|
||||
if self.registryValue('stripURL', channel):
|
||||
text = re.sub(r'(?i)\b((?:[a-z][\w-]+:(?:/{1,3}|[a-z0-9%])|www\d{0,3}[.]|[a-z0-9.\-]+[.][a-z]{2,4}/)(?:[^\s()<>]+|\(([^\s()<>]+|(\([^\s()<>]+\)))*\))+(?:\(([^\s()<>]+|(\([^\s()<>]+\)))*\)|[^\s`!()\[\]{};:\'".,<>?«»“”‘’]))', '', text)
|
||||
text = self.expand_contractions(text)
|
||||
text = self.capsents(text)
|
||||
text = re.sub('<[^<]+?>', '', text)
|
||||
text = re.sub("^'|'$|\s'|'\s|[\"()[\]*`:;<>]", "", text)
|
||||
text = re.sub("\s+", " ", text)
|
||||
try:
|
||||
self.model[channel] = markovify.combine(models=[self.model[channel], POSifiedText(text, retain_original=False)])
|
||||
except KeyError:
|
||||
file = self.directory.dirize(channel.lower() + "/markov.json")
|
||||
try:
|
||||
with open(file) as infile:
|
||||
jsondata = json.load(infile)
|
||||
self.model[channel] = POSifiedText.from_json(jsondata)
|
||||
self.model[channel] = markovify.combine(models=[self.model[channel], POSifiedText(text)])
|
||||
except:
|
||||
self.model[channel] = POSifiedText(text, retain_original=False)
|
||||
|
||||
def get_response(self, channel):
|
||||
try:
|
||||
response = self.model[channel].make_short_sentence(450)
|
||||
except KeyError:
|
||||
file = self.directory.dirize(channel.lower() + "/markov.json")
|
||||
try:
|
||||
with open(file) as infile:
|
||||
jsondata = json.load(infile)
|
||||
self.model[channel] = POSifiedText.from_json(jsondata)
|
||||
except:
|
||||
return
|
||||
response = self.model[channel].make_short_sentence(450)
|
||||
except:
|
||||
return
|
||||
if response and len(response) > 1 and not response.isspace():
|
||||
response = re.sub(' ([.!?,;:]) ', '\g<1> ', response)
|
||||
response = re.sub(" ([.!?,'%])$", "\g<1>", response)
|
||||
response = re.sub('([.?!,])(?=[^\s])', '\g<1> ', response)
|
||||
response = response.replace(' - ', '-').replace(' . . .', '...')
|
||||
return response
|
||||
else:
|
||||
return None
|
||||
|
||||
def capsents(self, user_sentences):
|
||||
sents = sent_tokenize(user_sentences)
|
||||
capitalized_sents = [sent.capitalize() for sent in sents]
|
||||
joined_ = ' '.join(capitalized_sents)
|
||||
return joined_
|
||||
|
||||
def expand_contractions(self, text, contraction_mapping=CONTRACTION_MAP):
|
||||
contractions_pattern = re.compile('({})'.format('|'.join(contraction_mapping.keys())),
|
||||
flags=re.IGNORECASE|re.DOTALL)
|
||||
def expand_match(contraction):
|
||||
match = contraction.group(0)
|
||||
first_char = match[0]
|
||||
expanded_contraction = contraction_mapping.get(match)\
|
||||
if contraction_mapping.get(match)\
|
||||
else contraction_mapping.get(match.lower())
|
||||
expanded_contraction = first_char+expanded_contraction[1:]
|
||||
return expanded_contraction
|
||||
expanded_text = contractions_pattern.sub(expand_match, text)
|
||||
expanded_text = re.sub("'", "", expanded_text)
|
||||
return expanded_text
|
||||
|
||||
def doPrivmsg(self, irc, msg):
|
||||
(channel, message) = msg.args
|
||||
channel = channel.lower()
|
||||
if callbacks.addressed(irc.nick, msg) or ircmsgs.isCtcp(msg) or not irc.isChannel(channel) or not self.registryValue('enable', channel):
|
||||
return
|
||||
if msg.nick.lower() in self.registryValue('ignoreNicks', channel):
|
||||
log.debug("Markovify: nick %s in ignoreNicks for %s" % (msg.nick, channel))
|
||||
return
|
||||
if irc.nick.lower() in message.lower():
|
||||
message = re.sub(re.escape(irc.nick), '', message, re.IGNORECASE)
|
||||
probability = self.registryValue('probabilityWhenAddressed', channel)
|
||||
else:
|
||||
probability = self.registryValue('probability', channel)
|
||||
message = self.processText(channel, message)
|
||||
if not message and len(message) > 1 or message.isspace():
|
||||
return
|
||||
if random.random() < probability:
|
||||
response = self.get_response(channel)
|
||||
if response and len(response) > 1 and not response.isspace():
|
||||
irc.reply(response, prefixNick=False)
|
||||
self.add_text(channel, response)
|
||||
self.add_text(channel, message)
|
||||
else:
|
||||
self.add_text(channel, message)
|
||||
self.save_corpus(channel)
|
||||
|
||||
def processText(self, channel, text):
|
||||
match = False
|
||||
ignore = self.registryValue("ignorePattern", channel)
|
||||
strip = self.registryValue("stripPattern", channel)
|
||||
text = ircutils.stripFormatting(text)
|
||||
if self.registryValue('stripRelayedNick', channel):
|
||||
text = self.MATCH_MESSAGE_STRIPNICK.match(text).group('message')
|
||||
if ignore:
|
||||
match = re.search(ignore, text)
|
||||
if match:
|
||||
log.debug("Markovify: %s matches ignorePattern for %s" % (text, channel))
|
||||
return
|
||||
if strip:
|
||||
match = re.findall(strip, text)
|
||||
if match:
|
||||
for x in match:
|
||||
text = text.replace(x, '')
|
||||
log.debug("Markovify: %s matches stripPattern for %s. New text text: %s" % (x, channel, text))
|
||||
ends_with_punctuation = False
|
||||
text = text.strip()
|
||||
if not text or not len(text) > 1:
|
||||
return
|
||||
for char in [".", "?", "!"]:
|
||||
if text.endswith(char):
|
||||
ends_with_punctuation = True
|
||||
break
|
||||
if not ends_with_punctuation:
|
||||
text = text + "."
|
||||
if text and len(text) > 2:
|
||||
return text
|
||||
else:
|
||||
return None
|
||||
|
||||
def text(self, irc, msg, args, channel, optlist, url):
|
||||
"""[channel] [--process] <url>
|
||||
Load text file into channel corpus. use --process to clean text like chat logs.
|
||||
"""
|
||||
if not channel:
|
||||
channel = msg.args[0]
|
||||
channel = channel.lower()
|
||||
optlist = dict(optlist)
|
||||
if 'process' in optlist:
|
||||
process = True
|
||||
else:
|
||||
process = False
|
||||
r = requests.head(url)
|
||||
if "text/plain" in r.headers["content-type"]:
|
||||
file = requests.get(url)
|
||||
else:
|
||||
irc.reply("Invalid file type.", private=False, notice=False)
|
||||
return
|
||||
data = file.content.decode()
|
||||
lines = 0
|
||||
text = ""
|
||||
for line in data.split('\n'):
|
||||
if not line.strip() or line.isspace():
|
||||
continue
|
||||
if process:
|
||||
line = self.processText(channel, line)
|
||||
if not line or not line.strip() or line.isspace():
|
||||
continue
|
||||
text += " {}".format(line)
|
||||
lines += 1
|
||||
self.add_text(channel, text)
|
||||
irc.reply("{0} lines added to brain file for channel {1}.".format(lines, channel))
|
||||
self.save_corpus(channel)
|
||||
del data, text
|
||||
gc.collect()
|
||||
text = wrap(text, [additional('channel'), getopts({'process':''}), 'text'])
|
||||
|
||||
def respond(self, irc, msg, args, channel):
|
||||
"""[channel]
|
||||
Generate a response from channel corpus.
|
||||
"""
|
||||
if not channel:
|
||||
channel = msg.args[0]
|
||||
channel = channel.lower()
|
||||
response = self.get_response(channel)
|
||||
if response:
|
||||
irc.reply(response, prefixNick=False)
|
||||
self.save_corpus(channel)
|
||||
respond = wrap(respond, [optional('channel')])
|
||||
|
||||
Class = Markovify
|
@ -1,6 +0,0 @@
|
||||
requests
|
||||
markovify
|
||||
spacy
|
||||
ftfy
|
||||
nltk
|
||||
psaw
|
@ -1,16 +0,0 @@
|
||||
###
|
||||
# Copyright (c) 2019, oddluck
|
||||
# All rights reserved.
|
||||
#
|
||||
#
|
||||
###
|
||||
|
||||
from supybot.test import *
|
||||
|
||||
|
||||
class AdviceTestCase(PluginTestCase):
|
||||
plugins = ('Markovify',)
|
||||
|
||||
|
||||
# vim:set shiftwidth=4 tabstop=4 expandtab textwidth=79:
|
||||
|
Loading…
x
Reference in New Issue
Block a user