oddluck-limnoria-plugins/Markovgen/plugin.py

# coding: utf8
###
# Copyright (c) 2014, Valentin Lorentz
# All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are met:
#
#   * Redistributions of source code must retain the above copyright notice,
#     this list of conditions, and the following disclaimer.
#   * Redistributions in binary form must reproduce the above copyright notice,
#     this list of conditions, and the following disclaimer in the
#     documentation and/or other materials provided with the distribution.
#   * Neither the name of the author of this software nor the name of
#     contributors to this software may be used to endorse or promote products
#     derived from this software without specific prior written consent.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
# ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
# POSSIBILITY OF SUCH DAMAGE.

###

import os
import re
import sys
import glob
import random
import functools

import supybot.conf as conf
import supybot.world as world
import supybot.utils as utils
from supybot.commands import *
import supybot.plugins as plugins
import supybot.ircutils as ircutils
import supybot.callbacks as callbacks
import supybot.log as log
try:
    from supybot.i18n import PluginInternationalization
    _ = PluginInternationalization('Markovgen')
except ImportError:
    # Placeholder that allows to run the plugin on a bot
    # without the i18n module
    _ = lambda x:x

try:
    import markovgen
except ImportError:
    raise callbacks.Error('Cannot load markovgen library. Make sure you '
                          'installed it (%s -m pip install markovgen).'
                          % sys.executable)
from imp import reload as r
r(markovgen)

MATCH_MESSAGE_STRIPNICK = re.compile('^(<[^ ]+> )?(?P<message>.*)$')

CHANNELLOGER_REGEXP_BASE = re.compile('^[^ ]*  (<[^ ]+> )?(?P<message>.*)$')
CHANNELLOGER_REGEXP_STRIPNICK = re.compile('^[^ ]*  (<[^ ]+> )?(<[^ ]+> )?(?P<message>.*)$')

def get_channelloger_extracter(stripRelayedNick):
    @markovgen.mixed_encoding_extracting
    def channelloger_extracter(x):
        regexp = CHANNELLOGER_REGEXP_STRIPNICK if stripRelayedNick else \
                CHANNELLOGER_REGEXP_BASE
        m = regexp.match(x)
        if m:
            return m.group('message')
    return channelloger_extracter

def get_extracter(name):
    regexp = re.compile(markovgen.REGEXPS[name])
    @markovgen.mixed_encoding_extracting
    def extracter(x):
        msg = regexp.match(x)
        if msg:
            return msg.group('message')
    return extracter

def rec_list_files(path):
    return (os.path.join(dp, f)
            for dp, dn, filenames in os.walk(path)
            for f in filenames)

class Markovgen(callbacks.Plugin):
    """Add the help for "@plugin help Markovgen" here
    This should describe *how* to use this plugin."""
    threaded = True

    def __init__(self, irc):
        super(Markovgen, self).__init__(irc)
        self._markovs = {}

    def _load_from_channellogger(self, irc, channel, m):
        cb = irc.getCallback('ChannelLogger')
        if not cb:
            return
        extracter = get_channelloger_extracter(
                self.registryValue('stripRelayedNick', channel))
        for irc in world.ircs:
            for filename in glob.glob(cb.getLogDir(irc, channel) + '/*.log'):
                with open(filename, 'rb') as fd:
                    m.feed_from_file(fd, extracter)

    def _load_from_data(self, irc, channel, m):
        base_path = os.path.join(conf.supybot.directories.data(), 'Markovgen', channel)
        if not os.path.isdir(base_path):
            return
        for extracter_name in os.listdir(base_path):
            extracter = get_extracter(extracter_name)
            path = os.path.join(base_path, extracter_name)
            path = glob.escape(path)
            filenames = rec_list_files(path)
            for filename in filenames:
                with open(filename, 'rb') as fd:
                    m.feed_from_file(fd, extracter)


    def _get_markov(self, irc, channel):
        if channel not in self._markovs:
            m = markovgen.Markov()
            self._markovs[channel] = m
            self._load_from_channellogger(irc, channel, m)
            self._load_from_data(irc, channel, m)
        else:
            m = self._markovs[channel]
        return m

    def doPrivmsg(self, irc, msg):
        (channel, message) = msg.args
        if not irc.isChannel(channel):
            return
        if not self.registryValue('enable', channel):
            return
        if self.registryValue('ignoreCommands', channel) and callbacks.addressed(irc.nick, msg):
            return
        match = False
        ignore = self.registryValue("ignorePattern", channel)
        strip = self.registryValue("stripPattern", channel)
        if ignore:
            match = re.search(ignore, message)
            if match:
                log.debug("Markovgen: %s matches ignorePattern for %s" % (message, channel))
                return
        if msg.nick.lower() in self.registryValue('ignoreNicks', channel):
            log.debug("Markovgen: nick %s in ignoreNicks for %s" % (msg.nick, channel))
            return
        m = self._get_markov(irc, channel)
        if self.registryValue('stripFormatting', channel):
            message = ircutils.stripFormatting(message)
        if strip:
            match = re.findall(strip, message)
            if match:
                for x in match:
                    message = message.replace(x, '')
                    message = re.sub('\s+', ' ', message)
                    log.debug("Markovgen: %s matches stripPattern for %s. New message text: %s" % (x, channel, message))
        if self.registryValue('stripURL', channel):
            new_message = re.sub(r'(?i)\b((?:[a-z][\w-]+:(?:/{1,3}|[a-z0-9%])|www\d{0,3}[.]|[a-z0-9.\-]+[.][a-z]{2,4}/)(?:[^\s()<>]+|\(([^\s()<>]+|(\([^\s()<>]+\)))*\))+(?:\(([^\s()<>]+|(\([^\s()<>]+\)))*\)|[^\s`!()\[\]{};:\'".,<>?«»“”‘’]))', '', message)
            new_message = re.sub('\s+', ' ', new_message)
            if new_message != message:
                log.debug("Markovgen: url(s) stripped from message for %s. New message text: %s" % (channel, new_message))
                message = new_message
        if self.registryValue('stripRelayedNick', channel):
            message = MATCH_MESSAGE_STRIPNICK.match(message).group('message')
        m.feed(message)
        tokenized_message = (w.strip(':;,.!?')
                for w in message.lower().split())
        if irc.nick.lower() in tokenized_message:
            if random.random() < self.registryValue('onNick.probability', channel):
                def replace_nick(s):
                    return re.sub(re.escape(irc.nick), msg.nick, s, re.IGNORECASE)
                self._answer(irc, message, m, False,
                        postprocessing=replace_nick)
        else:
            if random.random() < self.registryValue('probability', channel):
                self._answer(irc, message, m, False)

    @wrap(['channel', optional('text')])
    def gen(self, irc, msg, args, channel, message):
        """[<channel>] [<seed>]

        Generates a random message based on the logs of a channel
        and a seed"""
        if not self.registryValue('enable', channel):
            irc.error(_('Markovgen is disabled for this channel.'),
                    Raise=True)
        m = self._get_markov(irc, channel)
        if message:
            m.feed(message)
        self._answer(irc, message or '', m, True)


    def _answer(self, irc, message, m, allow_duplicate,
            postprocessing=lambda x: x):
        words = message.split()
        if len(words) == 0:
            possibilities = list(m.available_seeds())
        elif len(words) == 1:
            word = words[0]
            seeds = list(m.available_seeds())
            possibilities = [x for x in seeds if word in x]
        else:
            message_tuples = set(zip(words, words[1:]))
            if not message_tuples:
                return
            seeds = list(m.available_seeds())
            possibilities = [x for x in seeds if x in message_tuples]
        seed = list(random.choice(possibilities))
        backward_seed = list(reversed(seed))
        forward = m.generate_markov_text(seed=seed, backward=False)
        backward = m.generate_markov_text(seed=backward_seed,
                backward=True)
        try:
            answer = '%s %s' % (backward, forward.split(' ', 2)[2])
        except IndexError:
            answer = backward
        if allow_duplicate or message != answer:
            irc.reply(postprocessing(answer), prefixNick=False)

    @wrap(['channel'])
    def doge(self, irc, msg, args, channel):
        """takes no arguments

        Generates a doge."""
        if not self.registryValue('enable', channel):
            irc.error(_('Markovgen is disabled for this channel.'),
                    Raise=True)
        r = re.compile('^[a-zA-Zéèàù]{5,}$')
        def pred(x):
            if not r.match(x):
                return None
            else:
                return x
        m = self._get_markov(irc, channel)
        words = m.words
        words = filter(bool, map(pred, words))
        words = [x.strip(',?;.:/!') for x in m.words if pred(x)]
        w2 = random.choice(words)
        w1 = random.choice(['such', 'many', 'very'])
        irc.reply('%s %s' % (w1, w2))


Class = Markovgen


# vim:set shiftwidth=4 softtabstop=4 expandtab textwidth=79: