mirror of
https://github.com/ncoevoet/ChanTracker.git
synced 2025-04-26 04:51:07 -05:00
modified massrepeat behaviour, better handling of spam pattern
This commit is contained in:
parent
7b30d3b5e8
commit
abd832543e
18
config.py
18
config.py
@ -207,22 +207,24 @@ registry.String('repeat detected',"""comment added on mode changes database, emp
|
|||||||
|
|
||||||
# mass repeat detection
|
# mass repeat detection
|
||||||
conf.registerChannelValue(ChanTracker, 'massRepeatChars',
|
conf.registerChannelValue(ChanTracker, 'massRepeatChars',
|
||||||
registry.PositiveInteger(20,"""number of chars needed to enter massRepeat detection"""))
|
registry.PositiveInteger(100,"""number of chars needed to enter massRepeat detection"""))
|
||||||
conf.registerChannelValue(ChanTracker, 'massRepeatPermit',
|
conf.registerChannelValue(ChanTracker, 'massRepeatPermit',
|
||||||
registry.Integer(-1,"""Number of repeated text allowed, -1 to disable, a bit different to repeat, because it doesn't track user but channel messages,
|
registry.Integer(-1,"""Number of repeated text allowed, -1 to disable, tracks message repetition from various sources on the given channel"""))
|
||||||
if repeat comes from differences sources that helps, it also add a pattern that will match future repeat,
|
|
||||||
during massRepeatDuration, note, the first two message doesn't count,
|
|
||||||
so if you want to trigger it after 3 repeat, you must set it to 1"""))
|
|
||||||
conf.registerChannelValue(ChanTracker, 'massRepeatLife',
|
conf.registerChannelValue(ChanTracker, 'massRepeatLife',
|
||||||
registry.PositiveInteger(60,"""Duration of messages's life in massRepeat counter, in seconds, advice 120"""))
|
registry.PositiveInteger(120,"""Duration of messages's life in massRepeat counter, in seconds"""))
|
||||||
conf.registerChannelValue(ChanTracker, 'massRepeatPercent',
|
conf.registerChannelValue(ChanTracker, 'massRepeatPercent',
|
||||||
registry.Probability(0.95,"""percentage similarity between previous and current message to trigger a repeat count"""))
|
registry.Probability(0.95,"""percentage similarity between previous and current message to trigger a repeat count"""))
|
||||||
conf.registerChannelValue(ChanTracker, 'massRepeatMode',
|
conf.registerChannelValue(ChanTracker, 'massRepeatMode',
|
||||||
registry.String('q',"""mode used by the bot when repeat detection is triggered"""))
|
registry.String('b',"""mode used by the bot when repeat detection is triggered"""))
|
||||||
conf.registerChannelValue(ChanTracker, 'massRepeatDuration',
|
conf.registerChannelValue(ChanTracker, 'massRepeatDuration',
|
||||||
registry.PositiveInteger(180,"""punition in seconds"""))
|
registry.PositiveInteger(1800,"""punition in seconds"""))
|
||||||
conf.registerChannelValue(ChanTracker, 'massRepeatComment',
|
conf.registerChannelValue(ChanTracker, 'massRepeatComment',
|
||||||
registry.String('mass repeat detected',"""comment added on mode changes database, empty for no comment"""))
|
registry.String('mass repeat detected',"""comment added on mode changes database, empty for no comment"""))
|
||||||
|
conf.registerChannelValue(ChanTracker, 'massRepeatPatternLife',
|
||||||
|
registry.PositiveInteger(300,"""duration of pattern life"""))
|
||||||
|
conf.registerChannelValue(ChanTracker, 'massRepeatPatternLength',
|
||||||
|
registry.Integer(-1,"""if -1, it uses the default system to compare strings, otherwise, it try to find the longest common message, and use it as a regexp pattern,
|
||||||
|
if found string < length setted, it uses the default string compare"""))
|
||||||
|
|
||||||
# YES IT'S ANNOYING
|
# YES IT'S ANNOYING
|
||||||
conf.registerChannelValue(ChanTracker, 'capPermit',
|
conf.registerChannelValue(ChanTracker, 'capPermit',
|
||||||
|
154
plugin.py
154
plugin.py
@ -46,6 +46,7 @@ import socket
|
|||||||
import re
|
import re
|
||||||
import sqlite3
|
import sqlite3
|
||||||
import collections
|
import collections
|
||||||
|
from operator import itemgetter
|
||||||
|
|
||||||
#due to more kind of pattern checked, increase size
|
#due to more kind of pattern checked, increase size
|
||||||
|
|
||||||
@ -2456,19 +2457,20 @@ class ChanTracker(callbacks.Plugin,plugins.ChannelDBHandler):
|
|||||||
self._act(irc,channel,mode,best,duration,comment)
|
self._act(irc,channel,mode,best,duration,comment)
|
||||||
self._isBad(irc,channel,best)
|
self._isBad(irc,channel,best)
|
||||||
self.forceTickle = True
|
self.forceTickle = True
|
||||||
if isNotice:
|
if not isMass and isNotice:
|
||||||
isBad = self._isSomething(irc,channel,best,'bad')
|
isBad = self._isSomething(irc,channel,best,'bad')
|
||||||
if isNotice or isBad:
|
if not isMass:
|
||||||
kind = None
|
if isNotice or isBad:
|
||||||
if isBad:
|
kind = None
|
||||||
kind = 'bad'
|
if isBad:
|
||||||
else:
|
kind = 'bad'
|
||||||
kind = 'notice'
|
else:
|
||||||
mode = self.registryValue('%sMode' % kind,channel=channel)
|
kind = 'notice'
|
||||||
duration = self.registryValue('%sDuration' % kind,channel=channel)
|
mode = self.registryValue('%sMode' % kind,channel=channel)
|
||||||
comment = self.registryValue('%sComment' % kind,channel=channel)
|
duration = self.registryValue('%sDuration' % kind,channel=channel)
|
||||||
self._act(irc,channel,mode,best,duration,comment)
|
comment = self.registryValue('%sComment' % kind,channel=channel)
|
||||||
self.forceTickle = True
|
self._act(irc,channel,mode,best,duration,comment)
|
||||||
|
self.forceTickle = True
|
||||||
if self.registryValue('announceNotice',channel=channel):
|
if self.registryValue('announceNotice',channel=channel):
|
||||||
if not chan.isWrong(best):
|
if not chan.isWrong(best):
|
||||||
self._logChan(irc,channel,'[%s] %s notice "%s"' % (channel,msg.prefix,text))
|
self._logChan(irc,channel,'[%s] %s notice "%s"' % (channel,msg.prefix,text))
|
||||||
@ -2547,51 +2549,52 @@ class ChanTracker(callbacks.Plugin,plugins.ChannelDBHandler):
|
|||||||
self._act(irc,channel,mode,best,duration,comment)
|
self._act(irc,channel,mode,best,duration,comment)
|
||||||
self._isBad(irc,channel,best)
|
self._isBad(irc,channel,best)
|
||||||
self.forceTickle = True
|
self.forceTickle = True
|
||||||
if isFlood or isHilight or isRepeat or isCap or isCtcp or isLowFlood:
|
if not isMass:
|
||||||
isBad = self._isBad(irc,channel,best)
|
if isFlood or isHilight or isRepeat or isCap or isCtcp or isLowFlood:
|
||||||
kind = None
|
isBad = self._isBad(irc,channel,best)
|
||||||
duration = 0
|
kind = None
|
||||||
if isBad:
|
duration = 0
|
||||||
kind = 'bad'
|
if isBad:
|
||||||
duration = self.registryValue('badDuration',channel=channel)
|
kind = 'bad'
|
||||||
else:
|
duration = self.registryValue('badDuration',channel=channel)
|
||||||
if isFlood:
|
else:
|
||||||
d = self.registryValue('floodDuration',channel=channel)
|
if isFlood:
|
||||||
if d > duration:
|
d = self.registryValue('floodDuration',channel=channel)
|
||||||
kind = 'flood'
|
if d > duration:
|
||||||
duration = d
|
kind = 'flood'
|
||||||
if isLowFlood:
|
duration = d
|
||||||
d = self.registryValue('lowFloodDuration',channel=channel)
|
if isLowFlood:
|
||||||
if d > duration:
|
d = self.registryValue('lowFloodDuration',channel=channel)
|
||||||
kind = 'lowFlood'
|
if d > duration:
|
||||||
duration = d
|
kind = 'lowFlood'
|
||||||
if isRepeat:
|
duration = d
|
||||||
d = self.registryValue('repeatDuration',channel=channel)
|
if isRepeat:
|
||||||
if d > duration:
|
d = self.registryValue('repeatDuration',channel=channel)
|
||||||
kind = 'repeat'
|
if d > duration:
|
||||||
duration = d
|
kind = 'repeat'
|
||||||
if isHilight:
|
duration = d
|
||||||
d = self.registryValue('hilightDuration',channel=channel)
|
if isHilight:
|
||||||
if d > duration:
|
d = self.registryValue('hilightDuration',channel=channel)
|
||||||
kind = 'hilight'
|
if d > duration:
|
||||||
duration = d
|
kind = 'hilight'
|
||||||
if isCap:
|
duration = d
|
||||||
d = self.registryValue('capDuration',channel=channel)
|
if isCap:
|
||||||
if d > duration:
|
d = self.registryValue('capDuration',channel=channel)
|
||||||
kind = 'cap'
|
if d > duration:
|
||||||
duration = d
|
kind = 'cap'
|
||||||
if isCtcp:
|
duration = d
|
||||||
d = self.registryValue('ctcpDuration',channel=channel)
|
if isCtcp:
|
||||||
if d > duration:
|
d = self.registryValue('ctcpDuration',channel=channel)
|
||||||
kind = 'ctcp'
|
if d > duration:
|
||||||
duration = d
|
kind = 'ctcp'
|
||||||
mode = self.registryValue('%sMode' % kind,channel=channel)
|
duration = d
|
||||||
if len(mode) > 1:
|
mode = self.registryValue('%sMode' % kind,channel=channel)
|
||||||
mode = mode[0]
|
if len(mode) > 1:
|
||||||
duration = self.registryValue('%sDuration' % kind,channel=channel)
|
mode = mode[0]
|
||||||
comment = self.registryValue('%sComment' % kind,channel=channel)
|
duration = self.registryValue('%sDuration' % kind,channel=channel)
|
||||||
self._act(irc,channel,mode,best,duration,comment)
|
comment = self.registryValue('%sComment' % kind,channel=channel)
|
||||||
self.forceTickle = True
|
self._act(irc,channel,mode,best,duration,comment)
|
||||||
|
self.forceTickle = True
|
||||||
if not chan.isWrong(best):
|
if not chan.isWrong(best):
|
||||||
# prevent the bot to flood logChannel with bad user craps
|
# prevent the bot to flood logChannel with bad user craps
|
||||||
if self.registryValue('announceCtcp',channel=channel) and isCtcpMsg and not isAction:
|
if self.registryValue('announceCtcp',channel=channel) and isCtcpMsg and not isAction:
|
||||||
@ -3057,12 +3060,31 @@ class ChanTracker(callbacks.Plugin,plugins.ChannelDBHandler):
|
|||||||
life = self.registryValue('massRepeatLife',channel=channel)
|
life = self.registryValue('massRepeatLife',channel=channel)
|
||||||
if not channel in chan.repeatLogs or chan.repeatLogs[channel].timeout != life:
|
if not channel in chan.repeatLogs or chan.repeatLogs[channel].timeout != life:
|
||||||
chan.repeatLogs[channel] = utils.structures.TimeoutQueue(life)
|
chan.repeatLogs[channel] = utils.structures.TimeoutQueue(life)
|
||||||
|
patchan = 'pattern%s' % channel
|
||||||
|
# specific case where bot will try to find the largest pattern to use
|
||||||
|
if self.registryValue('massRepeatPatternLength',channel=channel) > 0:
|
||||||
|
if not patchan in chan.repeatLogs or chan.repeatLogs[channel].timeout != self.registryValue('massRepeatPatternLife',channel=channel):
|
||||||
|
chan.repeatLogs[patchan] = utils.structures.TimeoutQueue(self.registryValue('massRepeatPatternLife',channel=channel))
|
||||||
|
logs = chan.repeatLogs[patchan]
|
||||||
|
for msg in logs:
|
||||||
|
# if we find the string in the message, then
|
||||||
|
if message.find(msg) != -1:
|
||||||
|
# increment massrepeat trigger
|
||||||
|
self._isSomething(irc,channel,channel,'massRepeat')
|
||||||
|
return True
|
||||||
logs = chan.repeatLogs[channel]
|
logs = chan.repeatLogs[channel]
|
||||||
trigger = self.registryValue('massRepeatPercent',channel=channel)
|
trigger = self.registryValue('massRepeatPercent',channel=channel)
|
||||||
result = False
|
result = False
|
||||||
flag = False
|
flag = False
|
||||||
for msg in logs:
|
for msg in logs:
|
||||||
if self._strcompare(message,msg) >= trigger:
|
if self._strcompare(message,msg) >= trigger:
|
||||||
|
if self.registryValue('massRepeatPatternLength',channel=channel) > 0:
|
||||||
|
if not patchan in chan.repeatLogs or chan.repeatLogs[channel].timeout != self.registryValue('massRepeatPatternLife',channel=channel):
|
||||||
|
chan.repeatLogs[patchan] = utils.structures.TimeoutQueue(self.registryValue('massRepeatPatternLife',channel=channel))
|
||||||
|
pattern = self._largestpattern(message,msg)
|
||||||
|
if pattern and len(pattern) > self.registryValue('massRepeatPatternLength',channel=channel):
|
||||||
|
self.log.debug('mass repeat pattern added %s' % pattern)
|
||||||
|
chan.repeatLogs[patchan].enqueue(pattern)
|
||||||
flag = True
|
flag = True
|
||||||
break
|
break
|
||||||
if flag:
|
if flag:
|
||||||
@ -3085,10 +3107,26 @@ class ChanTracker(callbacks.Plugin,plugins.ChannelDBHandler):
|
|||||||
def _strcompare (self,a,b):
|
def _strcompare (self,a,b):
|
||||||
# return [0 - 1] ratio between two string
|
# return [0 - 1] ratio between two string
|
||||||
# jaccard algo
|
# jaccard algo
|
||||||
sa, sb = set(a), set(b)
|
sa, sb = set(a.lower()), set(b.lower())
|
||||||
n = len(sa.intersection(sb))
|
n = len(sa.intersection(sb))
|
||||||
jacc = n / float(len(sa) + len(sb) - n)
|
jacc = n / float(len(sa) + len(sb) - n)
|
||||||
return jacc
|
return jacc
|
||||||
|
|
||||||
|
def _largestpattern (self,s1,s2):
|
||||||
|
s1 = s1.lower()
|
||||||
|
s2 = s2.lower()
|
||||||
|
m = [[0] * (1 + len(s2)) for i in xrange(1 + len(s1))]
|
||||||
|
longest, x_longest = 0, 0
|
||||||
|
for x in xrange(1, 1 + len(s1)):
|
||||||
|
for y in xrange(1, 1 + len(s2)):
|
||||||
|
if s1[x - 1] == s2[y - 1]:
|
||||||
|
m[x][y] = m[x - 1][y - 1] + 1
|
||||||
|
if m[x][y] > longest:
|
||||||
|
longest = m[x][y]
|
||||||
|
x_longest = x
|
||||||
|
else:
|
||||||
|
m[x][y] = 0
|
||||||
|
return s1[x_longest - longest: x_longest]
|
||||||
|
|
||||||
def reset(self):
|
def reset(self):
|
||||||
self._ircs = ircutils.IrcDict()
|
self._ircs = ircutils.IrcDict()
|
||||||
|
Loading…
x
Reference in New Issue
Block a user