modified massrepeat behaviour, better handling of spam pattern

This commit is contained in:
Nicolas Coevoet 2014-09-01 22:58:12 +02:00
parent 7b30d3b5e8
commit abd832543e
2 changed files with 106 additions and 66 deletions

View File

@ -207,22 +207,24 @@ registry.String('repeat detected',"""comment added on mode changes database, emp
# mass repeat detection # mass repeat detection
conf.registerChannelValue(ChanTracker, 'massRepeatChars', conf.registerChannelValue(ChanTracker, 'massRepeatChars',
registry.PositiveInteger(20,"""number of chars needed to enter massRepeat detection""")) registry.PositiveInteger(100,"""number of chars needed to enter massRepeat detection"""))
conf.registerChannelValue(ChanTracker, 'massRepeatPermit', conf.registerChannelValue(ChanTracker, 'massRepeatPermit',
registry.Integer(-1,"""Number of repeated text allowed, -1 to disable, a bit different to repeat, because it doesn't track user but channel messages, registry.Integer(-1,"""Number of repeated text allowed, -1 to disable, tracks message repetition from various sources on the given channel"""))
if repeat comes from differences sources that helps, it also add a pattern that will match future repeat,
during massRepeatDuration, note, the first two message doesn't count,
so if you want to trigger it after 3 repeat, you must set it to 1"""))
conf.registerChannelValue(ChanTracker, 'massRepeatLife', conf.registerChannelValue(ChanTracker, 'massRepeatLife',
registry.PositiveInteger(60,"""Duration of messages's life in massRepeat counter, in seconds, advice 120""")) registry.PositiveInteger(120,"""Duration of messages's life in massRepeat counter, in seconds"""))
conf.registerChannelValue(ChanTracker, 'massRepeatPercent', conf.registerChannelValue(ChanTracker, 'massRepeatPercent',
registry.Probability(0.95,"""percentage similarity between previous and current message to trigger a repeat count""")) registry.Probability(0.95,"""percentage similarity between previous and current message to trigger a repeat count"""))
conf.registerChannelValue(ChanTracker, 'massRepeatMode', conf.registerChannelValue(ChanTracker, 'massRepeatMode',
registry.String('q',"""mode used by the bot when repeat detection is triggered""")) registry.String('b',"""mode used by the bot when repeat detection is triggered"""))
conf.registerChannelValue(ChanTracker, 'massRepeatDuration', conf.registerChannelValue(ChanTracker, 'massRepeatDuration',
registry.PositiveInteger(180,"""punition in seconds""")) registry.PositiveInteger(1800,"""punition in seconds"""))
conf.registerChannelValue(ChanTracker, 'massRepeatComment', conf.registerChannelValue(ChanTracker, 'massRepeatComment',
registry.String('mass repeat detected',"""comment added on mode changes database, empty for no comment""")) registry.String('mass repeat detected',"""comment added on mode changes database, empty for no comment"""))
conf.registerChannelValue(ChanTracker, 'massRepeatPatternLife',
registry.PositiveInteger(300,"""duration of pattern life"""))
conf.registerChannelValue(ChanTracker, 'massRepeatPatternLength',
registry.Integer(-1,"""if -1, it uses the default system to compare strings, otherwise, it try to find the longest common message, and use it as a regexp pattern,
if found string < length setted, it uses the default string compare"""))
# YES IT'S ANNOYING # YES IT'S ANNOYING
conf.registerChannelValue(ChanTracker, 'capPermit', conf.registerChannelValue(ChanTracker, 'capPermit',

154
plugin.py
View File

@ -46,6 +46,7 @@ import socket
import re import re
import sqlite3 import sqlite3
import collections import collections
from operator import itemgetter
#due to more kind of pattern checked, increase size #due to more kind of pattern checked, increase size
@ -2456,19 +2457,20 @@ class ChanTracker(callbacks.Plugin,plugins.ChannelDBHandler):
self._act(irc,channel,mode,best,duration,comment) self._act(irc,channel,mode,best,duration,comment)
self._isBad(irc,channel,best) self._isBad(irc,channel,best)
self.forceTickle = True self.forceTickle = True
if isNotice: if not isMass and isNotice:
isBad = self._isSomething(irc,channel,best,'bad') isBad = self._isSomething(irc,channel,best,'bad')
if isNotice or isBad: if not isMass:
kind = None if isNotice or isBad:
if isBad: kind = None
kind = 'bad' if isBad:
else: kind = 'bad'
kind = 'notice' else:
mode = self.registryValue('%sMode' % kind,channel=channel) kind = 'notice'
duration = self.registryValue('%sDuration' % kind,channel=channel) mode = self.registryValue('%sMode' % kind,channel=channel)
comment = self.registryValue('%sComment' % kind,channel=channel) duration = self.registryValue('%sDuration' % kind,channel=channel)
self._act(irc,channel,mode,best,duration,comment) comment = self.registryValue('%sComment' % kind,channel=channel)
self.forceTickle = True self._act(irc,channel,mode,best,duration,comment)
self.forceTickle = True
if self.registryValue('announceNotice',channel=channel): if self.registryValue('announceNotice',channel=channel):
if not chan.isWrong(best): if not chan.isWrong(best):
self._logChan(irc,channel,'[%s] %s notice "%s"' % (channel,msg.prefix,text)) self._logChan(irc,channel,'[%s] %s notice "%s"' % (channel,msg.prefix,text))
@ -2547,51 +2549,52 @@ class ChanTracker(callbacks.Plugin,plugins.ChannelDBHandler):
self._act(irc,channel,mode,best,duration,comment) self._act(irc,channel,mode,best,duration,comment)
self._isBad(irc,channel,best) self._isBad(irc,channel,best)
self.forceTickle = True self.forceTickle = True
if isFlood or isHilight or isRepeat or isCap or isCtcp or isLowFlood: if not isMass:
isBad = self._isBad(irc,channel,best) if isFlood or isHilight or isRepeat or isCap or isCtcp or isLowFlood:
kind = None isBad = self._isBad(irc,channel,best)
duration = 0 kind = None
if isBad: duration = 0
kind = 'bad' if isBad:
duration = self.registryValue('badDuration',channel=channel) kind = 'bad'
else: duration = self.registryValue('badDuration',channel=channel)
if isFlood: else:
d = self.registryValue('floodDuration',channel=channel) if isFlood:
if d > duration: d = self.registryValue('floodDuration',channel=channel)
kind = 'flood' if d > duration:
duration = d kind = 'flood'
if isLowFlood: duration = d
d = self.registryValue('lowFloodDuration',channel=channel) if isLowFlood:
if d > duration: d = self.registryValue('lowFloodDuration',channel=channel)
kind = 'lowFlood' if d > duration:
duration = d kind = 'lowFlood'
if isRepeat: duration = d
d = self.registryValue('repeatDuration',channel=channel) if isRepeat:
if d > duration: d = self.registryValue('repeatDuration',channel=channel)
kind = 'repeat' if d > duration:
duration = d kind = 'repeat'
if isHilight: duration = d
d = self.registryValue('hilightDuration',channel=channel) if isHilight:
if d > duration: d = self.registryValue('hilightDuration',channel=channel)
kind = 'hilight' if d > duration:
duration = d kind = 'hilight'
if isCap: duration = d
d = self.registryValue('capDuration',channel=channel) if isCap:
if d > duration: d = self.registryValue('capDuration',channel=channel)
kind = 'cap' if d > duration:
duration = d kind = 'cap'
if isCtcp: duration = d
d = self.registryValue('ctcpDuration',channel=channel) if isCtcp:
if d > duration: d = self.registryValue('ctcpDuration',channel=channel)
kind = 'ctcp' if d > duration:
duration = d kind = 'ctcp'
mode = self.registryValue('%sMode' % kind,channel=channel) duration = d
if len(mode) > 1: mode = self.registryValue('%sMode' % kind,channel=channel)
mode = mode[0] if len(mode) > 1:
duration = self.registryValue('%sDuration' % kind,channel=channel) mode = mode[0]
comment = self.registryValue('%sComment' % kind,channel=channel) duration = self.registryValue('%sDuration' % kind,channel=channel)
self._act(irc,channel,mode,best,duration,comment) comment = self.registryValue('%sComment' % kind,channel=channel)
self.forceTickle = True self._act(irc,channel,mode,best,duration,comment)
self.forceTickle = True
if not chan.isWrong(best): if not chan.isWrong(best):
# prevent the bot to flood logChannel with bad user craps # prevent the bot to flood logChannel with bad user craps
if self.registryValue('announceCtcp',channel=channel) and isCtcpMsg and not isAction: if self.registryValue('announceCtcp',channel=channel) and isCtcpMsg and not isAction:
@ -3057,12 +3060,31 @@ class ChanTracker(callbacks.Plugin,plugins.ChannelDBHandler):
life = self.registryValue('massRepeatLife',channel=channel) life = self.registryValue('massRepeatLife',channel=channel)
if not channel in chan.repeatLogs or chan.repeatLogs[channel].timeout != life: if not channel in chan.repeatLogs or chan.repeatLogs[channel].timeout != life:
chan.repeatLogs[channel] = utils.structures.TimeoutQueue(life) chan.repeatLogs[channel] = utils.structures.TimeoutQueue(life)
patchan = 'pattern%s' % channel
# specific case where bot will try to find the largest pattern to use
if self.registryValue('massRepeatPatternLength',channel=channel) > 0:
if not patchan in chan.repeatLogs or chan.repeatLogs[channel].timeout != self.registryValue('massRepeatPatternLife',channel=channel):
chan.repeatLogs[patchan] = utils.structures.TimeoutQueue(self.registryValue('massRepeatPatternLife',channel=channel))
logs = chan.repeatLogs[patchan]
for msg in logs:
# if we find the string in the message, then
if message.find(msg) != -1:
# increment massrepeat trigger
self._isSomething(irc,channel,channel,'massRepeat')
return True
logs = chan.repeatLogs[channel] logs = chan.repeatLogs[channel]
trigger = self.registryValue('massRepeatPercent',channel=channel) trigger = self.registryValue('massRepeatPercent',channel=channel)
result = False result = False
flag = False flag = False
for msg in logs: for msg in logs:
if self._strcompare(message,msg) >= trigger: if self._strcompare(message,msg) >= trigger:
if self.registryValue('massRepeatPatternLength',channel=channel) > 0:
if not patchan in chan.repeatLogs or chan.repeatLogs[channel].timeout != self.registryValue('massRepeatPatternLife',channel=channel):
chan.repeatLogs[patchan] = utils.structures.TimeoutQueue(self.registryValue('massRepeatPatternLife',channel=channel))
pattern = self._largestpattern(message,msg)
if pattern and len(pattern) > self.registryValue('massRepeatPatternLength',channel=channel):
self.log.debug('mass repeat pattern added %s' % pattern)
chan.repeatLogs[patchan].enqueue(pattern)
flag = True flag = True
break break
if flag: if flag:
@ -3085,10 +3107,26 @@ class ChanTracker(callbacks.Plugin,plugins.ChannelDBHandler):
def _strcompare (self,a,b): def _strcompare (self,a,b):
# return [0 - 1] ratio between two string # return [0 - 1] ratio between two string
# jaccard algo # jaccard algo
sa, sb = set(a), set(b) sa, sb = set(a.lower()), set(b.lower())
n = len(sa.intersection(sb)) n = len(sa.intersection(sb))
jacc = n / float(len(sa) + len(sb) - n) jacc = n / float(len(sa) + len(sb) - n)
return jacc return jacc
def _largestpattern (self,s1,s2):
s1 = s1.lower()
s2 = s2.lower()
m = [[0] * (1 + len(s2)) for i in xrange(1 + len(s1))]
longest, x_longest = 0, 0
for x in xrange(1, 1 + len(s1)):
for y in xrange(1, 1 + len(s2)):
if s1[x - 1] == s2[y - 1]:
m[x][y] = m[x - 1][y - 1] + 1
if m[x][y] > longest:
longest = m[x][y]
x_longest = x
else:
m[x][y] = 0
return s1[x_longest - longest: x_longest]
def reset(self): def reset(self):
self._ircs = ircutils.IrcDict() self._ircs = ircutils.IrcDict()