# coding: utf8 ### # Copyright (c) 2014, Valentin Lorentz # All rights reserved. # # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions are met: # # * Redistributions of source code must retain the above copyright notice, # this list of conditions, and the following disclaimer. # * Redistributions in binary form must reproduce the above copyright notice, # this list of conditions, and the following disclaimer in the # documentation and/or other materials provided with the distribution. # * Neither the name of the author of this software nor the name of # contributors to this software may be used to endorse or promote products # derived from this software without specific prior written consent. # # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE # ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE # LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR # CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF # SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS # INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN # CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) # ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE # POSSIBILITY OF SUCH DAMAGE. ### import os import re import sys import glob import random import functools import supybot.conf as conf import supybot.world as world import supybot.utils as utils from supybot.commands import * import supybot.plugins as plugins import supybot.ircutils as ircutils import supybot.callbacks as callbacks import supybot.log as log try: from supybot.i18n import PluginInternationalization _ = PluginInternationalization('Markovgen') except ImportError: # Placeholder that allows to run the plugin on a bot # without the i18n module _ = lambda x:x try: import markovgen except ImportError: raise callbacks.Error('Cannot load markovgen library. Make sure you ' 'installed it (%s -m pip install markovgen).' % sys.executable) from imp import reload as r r(markovgen) MATCH_MESSAGE_STRIPNICK = re.compile('^(<[^ ]+> )?(?P.*)$') CHANNELLOGER_REGEXP_BASE = re.compile('^[^ ]* (<[^ ]+> )?(?P.*)$') CHANNELLOGER_REGEXP_STRIPNICK = re.compile('^[^ ]* (<[^ ]+> )?(<[^ ]+> )?(?P.*)$') def get_channelloger_extracter(stripRelayedNick): @markovgen.mixed_encoding_extracting def channelloger_extracter(x): regexp = CHANNELLOGER_REGEXP_STRIPNICK if stripRelayedNick else \ CHANNELLOGER_REGEXP_BASE m = regexp.match(x) if m: return m.group('message') return channelloger_extracter def get_extracter(name): regexp = re.compile(markovgen.REGEXPS[name]) @markovgen.mixed_encoding_extracting def extracter(x): msg = regexp.match(x) if msg: return msg.group('message') return extracter def rec_list_files(path): return (os.path.join(dp, f) for dp, dn, filenames in os.walk(path) for f in filenames) class Markovgen(callbacks.Plugin): """Add the help for "@plugin help Markovgen" here This should describe *how* to use this plugin.""" threaded = True def __init__(self, irc): super(Markovgen, self).__init__(irc) self._markovs = {} def _load_from_channellogger(self, irc, channel, m): cb = irc.getCallback('ChannelLogger') if not cb: return extracter = get_channelloger_extracter( self.registryValue('stripRelayedNick', channel)) for irc in world.ircs: for filename in glob.glob(cb.getLogDir(irc, channel) + '/*.log'): with open(filename, 'rb') as fd: m.feed_from_file(fd, extracter) def _load_from_data(self, irc, channel, m): base_path = os.path.join(conf.supybot.directories.data(), 'Markovgen', channel) if not os.path.isdir(base_path): return for extracter_name in os.listdir(base_path): extracter = get_extracter(extracter_name) path = os.path.join(base_path, extracter_name) path = glob.escape(path) filenames = rec_list_files(path) for filename in filenames: with open(filename, 'rb') as fd: m.feed_from_file(fd, extracter) def _get_markov(self, irc, channel): if channel not in self._markovs: m = markovgen.Markov() self._markovs[channel] = m self._load_from_channellogger(irc, channel, m) self._load_from_data(irc, channel, m) else: m = self._markovs[channel] return m def doPrivmsg(self, irc, msg): (channel, message) = msg.args if not irc.isChannel(channel): return if not self.registryValue('enable', channel): return if self.registryValue('ignoreCommands', channel) and callbacks.addressed(irc.nick, msg): return match = False ignore = self.registryValue("ignorePattern", channel) strip = self.registryValue("stripPattern", channel) if ignore: match = re.search(ignore, message) if match: log.debug("Markovgen: %s matches ignorePattern for %s" % (message, channel)) return if msg.nick.lower() in self.registryValue('ignoreNicks', channel): log.debug("Markovgen: nick %s in ignoreNicks for %s" % (msg.nick, channel)) return m = self._get_markov(irc, channel) if self.registryValue('stripFormatting', channel): message = ircutils.stripFormatting(message) if strip: match = re.findall(strip, message) if match: for x in match: message = message.replace(x, '') message = re.sub('\s+', ' ', message) log.debug("Markovgen: %s matches stripPattern for %s. New message text: %s" % (x, channel, message)) if self.registryValue('stripURL', channel): new_message = re.sub(r'(?i)\b((?:[a-z][\w-]+:(?:/{1,3}|[a-z0-9%])|www\d{0,3}[.]|[a-z0-9.\-]+[.][a-z]{2,4}/)(?:[^\s()<>]+|\(([^\s()<>]+|(\([^\s()<>]+\)))*\))+(?:\(([^\s()<>]+|(\([^\s()<>]+\)))*\)|[^\s`!()\[\]{};:\'".,<>?«»“”‘’]))', '', message) new_message = re.sub('\s+', ' ', new_message) if new_message != message: log.debug("Markovgen: url(s) stripped from message for %s. New message text: %s" % (channel, new_message)) message = new_message if self.registryValue('stripRelayedNick', channel): message = MATCH_MESSAGE_STRIPNICK.match(message).group('message') m.feed(message) tokenized_message = (w.strip(':;,.!?') for w in message.lower().split()) if irc.nick.lower() in tokenized_message: if random.random() < self.registryValue('onNick.probability', channel): def replace_nick(s): return re.sub(re.escape(irc.nick), msg.nick, s, re.IGNORECASE) self._answer(irc, message, m, False, postprocessing=replace_nick) else: if random.random() < self.registryValue('probability', channel): self._answer(irc, message, m, False) @wrap(['channel', optional('text')]) def gen(self, irc, msg, args, channel, message): """[] [] Generates a random message based on the logs of a channel and a seed""" if not self.registryValue('enable', channel): irc.error(_('Markovgen is disabled for this channel.'), Raise=True) m = self._get_markov(irc, channel) if message: m.feed(message) self._answer(irc, message or '', m, True) def _answer(self, irc, message, m, allow_duplicate, postprocessing=lambda x: x): words = message.split() if len(words) == 0: possibilities = list(m.available_seeds()) elif len(words) == 1: word = words[0] seeds = list(m.available_seeds()) possibilities = [x for x in seeds if word in x] else: message_tuples = set(zip(words, words[1:])) if not message_tuples: return seeds = list(m.available_seeds()) possibilities = [x for x in seeds if x in message_tuples] seed = list(random.choice(possibilities)) backward_seed = list(reversed(seed)) forward = m.generate_markov_text(seed=seed, backward=False) backward = m.generate_markov_text(seed=backward_seed, backward=True) try: answer = '%s %s' % (backward, forward.split(' ', 2)[2]) except IndexError: answer = backward if allow_duplicate or message != answer: irc.reply(postprocessing(answer), prefixNick=False) @wrap(['channel']) def doge(self, irc, msg, args, channel): """takes no arguments Generates a doge.""" if not self.registryValue('enable', channel): irc.error(_('Markovgen is disabled for this channel.'), Raise=True) r = re.compile('^[a-zA-Zéèàù]{5,}$') def pred(x): if not r.match(x): return None else: return x m = self._get_markov(irc, channel) words = m.words words = filter(bool, map(pred, words)) words = [x.strip(',?;.:/!') for x in m.words if pred(x)] w2 = random.choice(words) w1 = random.choice(['such', 'many', 'very']) irc.reply('%s %s' % (w1, w2)) Class = Markovgen # vim:set shiftwidth=4 softtabstop=4 expandtab textwidth=79: