mirror of
https://github.com/oddluck/limnoria-plugins.git
synced 2025-04-26 13:01:09 -05:00
256 lines
10 KiB
Python
256 lines
10 KiB
Python
# coding: utf8
|
|
###
|
|
# Copyright (c) 2014, Valentin Lorentz
|
|
# All rights reserved.
|
|
#
|
|
# Redistribution and use in source and binary forms, with or without
|
|
# modification, are permitted provided that the following conditions are met:
|
|
#
|
|
# * Redistributions of source code must retain the above copyright notice,
|
|
# this list of conditions, and the following disclaimer.
|
|
# * Redistributions in binary form must reproduce the above copyright notice,
|
|
# this list of conditions, and the following disclaimer in the
|
|
# documentation and/or other materials provided with the distribution.
|
|
# * Neither the name of the author of this software nor the name of
|
|
# contributors to this software may be used to endorse or promote products
|
|
# derived from this software without specific prior written consent.
|
|
#
|
|
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
|
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
|
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
|
# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
|
# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
|
# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
|
# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
|
# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
|
# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
|
# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
|
# POSSIBILITY OF SUCH DAMAGE.
|
|
|
|
###
|
|
|
|
import os
|
|
import re
|
|
import sys
|
|
import glob
|
|
import random
|
|
import functools
|
|
|
|
import supybot.conf as conf
|
|
import supybot.world as world
|
|
import supybot.utils as utils
|
|
from supybot.commands import *
|
|
import supybot.plugins as plugins
|
|
import supybot.ircutils as ircutils
|
|
import supybot.callbacks as callbacks
|
|
import supybot.log as log
|
|
try:
|
|
from supybot.i18n import PluginInternationalization
|
|
_ = PluginInternationalization('Markovgen')
|
|
except ImportError:
|
|
# Placeholder that allows to run the plugin on a bot
|
|
# without the i18n module
|
|
_ = lambda x:x
|
|
|
|
try:
|
|
import markovgen
|
|
except ImportError:
|
|
raise callbacks.Error('Cannot load markovgen library. Make sure you '
|
|
'installed it (%s -m pip install markovgen).'
|
|
% sys.executable)
|
|
from imp import reload as r
|
|
r(markovgen)
|
|
|
|
MATCH_MESSAGE_STRIPNICK = re.compile('^(<[^ ]+> )?(?P<message>.*)$')
|
|
|
|
CHANNELLOGER_REGEXP_BASE = re.compile('^[^ ]* (<[^ ]+> )?(?P<message>.*)$')
|
|
CHANNELLOGER_REGEXP_STRIPNICK = re.compile('^[^ ]* (<[^ ]+> )?(<[^ ]+> )?(?P<message>.*)$')
|
|
|
|
def get_channelloger_extracter(stripRelayedNick):
|
|
@markovgen.mixed_encoding_extracting
|
|
def channelloger_extracter(x):
|
|
regexp = CHANNELLOGER_REGEXP_STRIPNICK if stripRelayedNick else \
|
|
CHANNELLOGER_REGEXP_BASE
|
|
m = regexp.match(x)
|
|
if m:
|
|
return m.group('message')
|
|
return channelloger_extracter
|
|
|
|
def get_extracter(name):
|
|
regexp = re.compile(markovgen.REGEXPS[name])
|
|
@markovgen.mixed_encoding_extracting
|
|
def extracter(x):
|
|
msg = regexp.match(x)
|
|
if msg:
|
|
return msg.group('message')
|
|
return extracter
|
|
|
|
def rec_list_files(path):
|
|
return (os.path.join(dp, f)
|
|
for dp, dn, filenames in os.walk(path)
|
|
for f in filenames)
|
|
|
|
class Markovgen(callbacks.Plugin):
|
|
"""Add the help for "@plugin help Markovgen" here
|
|
This should describe *how* to use this plugin."""
|
|
threaded = True
|
|
|
|
def __init__(self, irc):
|
|
super(Markovgen, self).__init__(irc)
|
|
self._markovs = {}
|
|
|
|
def _load_from_channellogger(self, irc, channel, m):
|
|
cb = irc.getCallback('ChannelLogger')
|
|
if not cb:
|
|
return
|
|
extracter = get_channelloger_extracter(
|
|
self.registryValue('stripRelayedNick', channel))
|
|
for irc in world.ircs:
|
|
for filename in glob.glob(cb.getLogDir(irc, channel) + '/*.log'):
|
|
with open(filename, 'rb') as fd:
|
|
m.feed_from_file(fd, extracter)
|
|
|
|
def _load_from_data(self, irc, channel, m):
|
|
base_path = os.path.join(conf.supybot.directories.data(), 'Markovgen', channel)
|
|
if not os.path.isdir(base_path):
|
|
return
|
|
for extracter_name in os.listdir(base_path):
|
|
extracter = get_extracter(extracter_name)
|
|
path = os.path.join(base_path, extracter_name)
|
|
path = glob.escape(path)
|
|
filenames = rec_list_files(path)
|
|
for filename in filenames:
|
|
with open(filename, 'rb') as fd:
|
|
m.feed_from_file(fd, extracter)
|
|
|
|
|
|
def _get_markov(self, irc, channel):
|
|
if channel not in self._markovs:
|
|
m = markovgen.Markov()
|
|
self._markovs[channel] = m
|
|
self._load_from_channellogger(irc, channel, m)
|
|
self._load_from_data(irc, channel, m)
|
|
else:
|
|
m = self._markovs[channel]
|
|
return m
|
|
|
|
def doPrivmsg(self, irc, msg):
|
|
(channel, message) = msg.args
|
|
if not irc.isChannel(channel):
|
|
return
|
|
if not self.registryValue('enable', channel):
|
|
return
|
|
if self.registryValue('ignoreCommands', channel) and callbacks.addressed(irc.nick, msg):
|
|
return
|
|
match = False
|
|
ignore = self.registryValue("ignorePattern", channel)
|
|
strip = self.registryValue("stripPattern", channel)
|
|
if ignore:
|
|
match = re.search(ignore, message)
|
|
if match:
|
|
log.debug("Markovgen: %s matches ignorePattern for %s" % (message, channel))
|
|
return
|
|
if msg.nick.lower() in self.registryValue('ignoreNicks', channel):
|
|
log.debug("Markovgen: nick %s in ignoreNicks for %s" % (msg.nick, channel))
|
|
return
|
|
m = self._get_markov(irc, channel)
|
|
if self.registryValue('stripFormatting', channel):
|
|
message = ircutils.stripFormatting(message)
|
|
if strip:
|
|
match = re.findall(strip, message)
|
|
if match:
|
|
for x in match:
|
|
message = message.replace(x, '')
|
|
message = re.sub('\s+', ' ', message)
|
|
log.debug("Markovgen: %s matches stripPattern for %s. New message text: %s" % (x, channel, message))
|
|
if self.registryValue('stripURL', channel):
|
|
new_message = re.sub(r'(?i)\b((?:[a-z][\w-]+:(?:/{1,3}|[a-z0-9%])|www\d{0,3}[.]|[a-z0-9.\-]+[.][a-z]{2,4}/)(?:[^\s()<>]+|\(([^\s()<>]+|(\([^\s()<>]+\)))*\))+(?:\(([^\s()<>]+|(\([^\s()<>]+\)))*\)|[^\s`!()\[\]{};:\'".,<>?«»“”‘’]))', '', message)
|
|
new_message = re.sub('\s+', ' ', new_message)
|
|
if new_message != message:
|
|
log.debug("Markovgen: url(s) stripped from message for %s. New message text: %s" % (channel, new_message))
|
|
message = new_message
|
|
if self.registryValue('stripRelayedNick', channel):
|
|
message = MATCH_MESSAGE_STRIPNICK.match(message).group('message')
|
|
m.feed(message)
|
|
tokenized_message = (w.strip(':;,.!?')
|
|
for w in message.lower().split())
|
|
if irc.nick.lower() in tokenized_message:
|
|
if random.random() < self.registryValue('onNick.probability', channel):
|
|
def replace_nick(s):
|
|
return re.sub(re.escape(irc.nick), msg.nick, s, re.IGNORECASE)
|
|
self._answer(irc, message, m, False,
|
|
postprocessing=replace_nick)
|
|
else:
|
|
if random.random() < self.registryValue('probability', channel):
|
|
self._answer(irc, message, m, False)
|
|
|
|
@wrap(['channel', optional('text')])
|
|
def gen(self, irc, msg, args, channel, message):
|
|
"""[<channel>] [<seed>]
|
|
|
|
Generates a random message based on the logs of a channel
|
|
and a seed"""
|
|
if not self.registryValue('enable', channel):
|
|
irc.error(_('Markovgen is disabled for this channel.'),
|
|
Raise=True)
|
|
m = self._get_markov(irc, channel)
|
|
if message:
|
|
m.feed(message)
|
|
self._answer(irc, message or '', m, True)
|
|
|
|
|
|
def _answer(self, irc, message, m, allow_duplicate,
|
|
postprocessing=lambda x: x):
|
|
words = message.split()
|
|
if len(words) == 0:
|
|
possibilities = list(m.available_seeds())
|
|
elif len(words) == 1:
|
|
word = words[0]
|
|
seeds = list(m.available_seeds())
|
|
possibilities = [x for x in seeds if word in x]
|
|
else:
|
|
message_tuples = set(zip(words, words[1:]))
|
|
if not message_tuples:
|
|
return
|
|
seeds = list(m.available_seeds())
|
|
possibilities = [x for x in seeds if x in message_tuples]
|
|
seed = list(random.choice(possibilities))
|
|
backward_seed = list(reversed(seed))
|
|
forward = m.generate_markov_text(seed=seed, backward=False)
|
|
backward = m.generate_markov_text(seed=backward_seed,
|
|
backward=True)
|
|
try:
|
|
answer = '%s %s' % (backward, forward.split(' ', 2)[2])
|
|
except IndexError:
|
|
answer = backward
|
|
if allow_duplicate or message != answer:
|
|
irc.reply(postprocessing(answer), prefixNick=False)
|
|
|
|
@wrap(['channel'])
|
|
def doge(self, irc, msg, args, channel):
|
|
"""takes no arguments
|
|
|
|
Generates a doge."""
|
|
if not self.registryValue('enable', channel):
|
|
irc.error(_('Markovgen is disabled for this channel.'),
|
|
Raise=True)
|
|
r = re.compile('^[a-zA-Zéèàù]{5,}$')
|
|
def pred(x):
|
|
if not r.match(x):
|
|
return None
|
|
else:
|
|
return x
|
|
m = self._get_markov(irc, channel)
|
|
words = m.words
|
|
words = filter(bool, map(pred, words))
|
|
words = [x.strip(',?;.:/!') for x in m.words if pred(x)]
|
|
w2 = random.choice(words)
|
|
w1 = random.choice(['such', 'many', 'very'])
|
|
irc.reply('%s %s' % (w1, w2))
|
|
|
|
|
|
Class = Markovgen
|
|
|
|
|
|
# vim:set shiftwidth=4 softtabstop=4 expandtab textwidth=79:
|