2019-12-01 15:19:36 -05:00

256 lines
10 KiB
Python

# coding: utf8
###
# Copyright (c) 2014, Valentin Lorentz
# All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are met:
#
# * Redistributions of source code must retain the above copyright notice,
# this list of conditions, and the following disclaimer.
# * Redistributions in binary form must reproduce the above copyright notice,
# this list of conditions, and the following disclaimer in the
# documentation and/or other materials provided with the distribution.
# * Neither the name of the author of this software nor the name of
# contributors to this software may be used to endorse or promote products
# derived from this software without specific prior written consent.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
# POSSIBILITY OF SUCH DAMAGE.
###
import os
import re
import sys
import glob
import random
import functools
import supybot.conf as conf
import supybot.world as world
import supybot.utils as utils
from supybot.commands import *
import supybot.plugins as plugins
import supybot.ircutils as ircutils
import supybot.callbacks as callbacks
import supybot.log as log
try:
from supybot.i18n import PluginInternationalization
_ = PluginInternationalization('Markovgen')
except ImportError:
# Placeholder that allows to run the plugin on a bot
# without the i18n module
_ = lambda x:x
try:
import markovgen
except ImportError:
raise callbacks.Error('Cannot load markovgen library. Make sure you '
'installed it (%s -m pip install markovgen).'
% sys.executable)
from imp import reload as r
r(markovgen)
MATCH_MESSAGE_STRIPNICK = re.compile('^(<[^ ]+> )?(?P<message>.*)$')
CHANNELLOGER_REGEXP_BASE = re.compile('^[^ ]* (<[^ ]+> )?(?P<message>.*)$')
CHANNELLOGER_REGEXP_STRIPNICK = re.compile('^[^ ]* (<[^ ]+> )?(<[^ ]+> )?(?P<message>.*)$')
def get_channelloger_extracter(stripRelayedNick):
@markovgen.mixed_encoding_extracting
def channelloger_extracter(x):
regexp = CHANNELLOGER_REGEXP_STRIPNICK if stripRelayedNick else \
CHANNELLOGER_REGEXP_BASE
m = regexp.match(x)
if m:
return m.group('message')
return channelloger_extracter
def get_extracter(name):
regexp = re.compile(markovgen.REGEXPS[name])
@markovgen.mixed_encoding_extracting
def extracter(x):
msg = regexp.match(x)
if msg:
return msg.group('message')
return extracter
def rec_list_files(path):
return (os.path.join(dp, f)
for dp, dn, filenames in os.walk(path)
for f in filenames)
class Markovgen(callbacks.Plugin):
"""Add the help for "@plugin help Markovgen" here
This should describe *how* to use this plugin."""
threaded = True
def __init__(self, irc):
super(Markovgen, self).__init__(irc)
self._markovs = {}
def _load_from_channellogger(self, irc, channel, m):
cb = irc.getCallback('ChannelLogger')
if not cb:
return
extracter = get_channelloger_extracter(
self.registryValue('stripRelayedNick', channel))
for irc in world.ircs:
for filename in glob.glob(cb.getLogDir(irc, channel) + '/*.log'):
with open(filename, 'rb') as fd:
m.feed_from_file(fd, extracter)
def _load_from_data(self, irc, channel, m):
base_path = os.path.join(conf.supybot.directories.data(), 'Markovgen', channel)
if not os.path.isdir(base_path):
return
for extracter_name in os.listdir(base_path):
extracter = get_extracter(extracter_name)
path = os.path.join(base_path, extracter_name)
path = glob.escape(path)
filenames = rec_list_files(path)
for filename in filenames:
with open(filename, 'rb') as fd:
m.feed_from_file(fd, extracter)
def _get_markov(self, irc, channel):
if channel not in self._markovs:
m = markovgen.Markov()
self._markovs[channel] = m
self._load_from_channellogger(irc, channel, m)
self._load_from_data(irc, channel, m)
else:
m = self._markovs[channel]
return m
def doPrivmsg(self, irc, msg):
(channel, message) = msg.args
if not irc.isChannel(channel):
return
if not self.registryValue('enable', channel):
return
if self.registryValue('ignoreCommands', channel) and callbacks.addressed(irc.nick, msg):
return
match = False
ignore = self.registryValue("ignorePattern", channel)
strip = self.registryValue("stripPattern", channel)
if ignore:
match = re.search(ignore, message)
if match:
log.debug("Markovgen: %s matches ignorePattern for %s" % (message, channel))
return
if msg.nick.lower() in self.registryValue('ignoreNicks', channel):
log.debug("Markovgen: nick %s in ignoreNicks for %s" % (msg.nick, channel))
return
m = self._get_markov(irc, channel)
if self.registryValue('stripFormatting', channel):
message = ircutils.stripFormatting(message)
if strip:
match = re.findall(strip, message)
if match:
for x in match:
message = message.replace(x, '')
message = re.sub('\s+', ' ', message)
log.debug("Markovgen: %s matches stripPattern for %s. New message text: %s" % (x, channel, message))
if self.registryValue('stripURL', channel):
new_message = re.sub(r'(?i)\b((?:[a-z][\w-]+:(?:/{1,3}|[a-z0-9%])|www\d{0,3}[.]|[a-z0-9.\-]+[.][a-z]{2,4}/)(?:[^\s()<>]+|\(([^\s()<>]+|(\([^\s()<>]+\)))*\))+(?:\(([^\s()<>]+|(\([^\s()<>]+\)))*\)|[^\s`!()\[\]{};:\'".,<>?«»“”‘’]))', '', message)
new_message = re.sub('\s+', ' ', new_message)
if new_message != message:
log.debug("Markovgen: url(s) stripped from message for %s. New message text: %s" % (channel, new_message))
message = new_message
if self.registryValue('stripRelayedNick', channel):
message = MATCH_MESSAGE_STRIPNICK.match(message).group('message')
m.feed(message)
tokenized_message = (w.strip(':;,.!?')
for w in message.lower().split())
if irc.nick.lower() in tokenized_message:
if random.random() < self.registryValue('onNick.probability', channel):
def replace_nick(s):
return re.sub(re.escape(irc.nick), msg.nick, s, re.IGNORECASE)
self._answer(irc, message, m, False,
postprocessing=replace_nick)
else:
if random.random() < self.registryValue('probability', channel):
self._answer(irc, message, m, False)
@wrap(['channel', optional('text')])
def gen(self, irc, msg, args, channel, message):
"""[<channel>] [<seed>]
Generates a random message based on the logs of a channel
and a seed"""
if not self.registryValue('enable', channel):
irc.error(_('Markovgen is disabled for this channel.'),
Raise=True)
m = self._get_markov(irc, channel)
if message:
m.feed(message)
self._answer(irc, message or '', m, True)
def _answer(self, irc, message, m, allow_duplicate,
postprocessing=lambda x: x):
words = message.split()
if len(words) == 0:
possibilities = list(m.available_seeds())
elif len(words) == 1:
word = words[0]
seeds = list(m.available_seeds())
possibilities = [x for x in seeds if word in x]
else:
message_tuples = set(zip(words, words[1:]))
if not message_tuples:
return
seeds = list(m.available_seeds())
possibilities = [x for x in seeds if x in message_tuples]
seed = list(random.choice(possibilities))
backward_seed = list(reversed(seed))
forward = m.generate_markov_text(seed=seed, backward=False)
backward = m.generate_markov_text(seed=backward_seed,
backward=True)
try:
answer = '%s %s' % (backward, forward.split(' ', 2)[2])
except IndexError:
answer = backward
if allow_duplicate or message != answer:
irc.reply(postprocessing(answer), prefixNick=False)
@wrap(['channel'])
def doge(self, irc, msg, args, channel):
"""takes no arguments
Generates a doge."""
if not self.registryValue('enable', channel):
irc.error(_('Markovgen is disabled for this channel.'),
Raise=True)
r = re.compile('^[a-zA-Zéèàù]{5,}$')
def pred(x):
if not r.match(x):
return None
else:
return x
m = self._get_markov(irc, channel)
words = m.words
words = filter(bool, map(pred, words))
words = [x.strip(',?;.:/!') for x in m.words if pred(x)]
w2 = random.choice(words)
w1 = random.choice(['such', 'many', 'very'])
irc.reply('%s %s' % (w1, w2))
Class = Markovgen
# vim:set shiftwidth=4 softtabstop=4 expandtab textwidth=79: