#!/usr/bin/env python ### # Copyright (c) 2002, Jeremiah Fincher # All rights reserved. # # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions are met: # # * Redistributions of source code must retain the above copyright notice, # this list of conditions, and the following disclaimer. # * Redistributions in binary form must reproduce the above copyright notice, # this list of conditions, and the following disclaimer in the # documentation and/or other materials provided with the distribution. # * Neither the name of the author of this software nor the name of # contributors to this software may be used to endorse or promote products # derived from this software without specific prior written consent. # # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE # ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE # LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR # CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF # SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS # INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN # CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) # ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE # POSSIBILITY OF SUCH DAMAGE. ### """ Simple utility functions. """ ## from __future__ import generators __revision__ = "$Id$" import fix import os import re import md5 import sha import string import sgmllib import compiler import textwrap import htmlentitydefs from itertools import imap, ifilter from structures import TwoWayDictionary def normalizeWhitespace(s): """Normalizes the whitespace in a string; \s+ becomes one space.""" return ' '.join(s.split()) class HtmlToText(sgmllib.SGMLParser): """Taken from some eff-bot code on c.l.p.""" entitydefs = htmlentitydefs.entitydefs def __init__(self, tagReplace=' '): self.data = [] self.tagReplace = tagReplace sgmllib.SGMLParser.__init__(self) def unknown_starttag(self, tag, attr): self.data.append(self.tagReplace) def unknown_endtag(self, tag): self.data.append(self.tagReplace) def handle_data(self, data): self.data.append(data) def getText(self): text = ''.join(self.data).strip() return normalizeWhitespace(text) def htmlToText(s, tagReplace=' '): """Turns HTML into text. tagReplace is a string to replace HTML tags with. """ x = HtmlToText(tagReplace) x.feed(s) return x.getText() def eachSubstring(s): """Returns every substring starting at the first index until the last.""" for i in xrange(1, len(s)+1): yield s[:i] def abbrev(strings, d=None): """Returns a dictionary mapping unambiguous abbreviations to full forms.""" if d is None: d = {} for s in strings: for abbreviation in eachSubstring(s): if abbreviation not in d: d[abbreviation] = s else: if abbreviation not in strings: d[abbreviation] = None removals = [] for key in d: if d[key] is None: removals.append(key) for key in removals: del d[key] return d def timeElapsed(elapsed, leadingZeroes=False, years=True, weeks=True, days=True, hours=True, minutes=True, seconds=True): """Given seconds, returns a string with an English description of how much time as passed. leadingZeroes determines whether 0 days, 0 hours, etc. will be printed; the others determine what larger time periods should be used. """ elapsed = int(elapsed) assert years or weeks or days or \ hours or minutes or seconds, 'One flag must be True' ret = [] if years: yrs, elapsed = elapsed // 31536000, elapsed % 31536000 if leadingZeroes or yrs: if yrs: leadingZeroes = True ret.append(nItems('year', yrs)) if weeks: wks, elapsed = elapsed // 604800, elapsed % 604800 if leadingZeroes or wks: if wks: leadingZeroes = True ret.append(nItems('week', wks)) if days: ds, elapsed = elapsed // 86400, elapsed % 86400 if leadingZeroes or ds: if ds: leadingZeroes = True ret.append(nItems('day', ds)) if hours: hrs, elapsed = elapsed // 3600, elapsed % 3600 if leadingZeroes or hrs: if hrs: leadingZeroes = True ret.append(nItems('hour', hrs)) if minutes or seconds: mins, secs = elapsed // 60, elapsed % 60 if leadingZeroes or mins: ret.append(nItems('minute', mins)) if seconds: ret.append(nItems('second', secs)) if len(ret) == 0: raise ValueError, 'Time difference not great enough to be noted.' if len(ret) == 1: return ret[0] else: return commaAndify(ret) def distance(s, t): """Returns the levenshtein edit distance between two strings.""" n = len(s) m = len(t) if n == 0: return m elif m == 0: return n d = [] for i in range(n+1): d.append([]) for j in range(m+1): d[i].append(0) d[0][j] = j d[i][0] = i for i in range(1, n+1): cs = s[i-1] for j in range(1, m+1): ct = t[j-1] cost = int(cs != ct) d[i][j] = min(d[i-1][j]+1, d[i][j-1]+1, d[i-1][j-1]+cost) return d[n][m] _soundextrans = string.maketrans(string.ascii_uppercase, '01230120022455012623010202') _notUpper = string.ascii.translate(string.ascii, string.ascii_uppercase) def soundex(s, length=4): """Returns the soundex hash of a given string.""" s = s.upper() # Make everything uppercase. s = s.translate(string.ascii, _notUpper) # Delete non-letters. if not s: raise ValueError, 'Invalid string for soundex: %s' firstChar = s[0] # Save the first character. s = s.translate(_soundextrans) # Convert to soundex numbers. s = s.lstrip(s[0]) # Remove all repeated first characters. L = [firstChar] for c in s: if c != L[-1]: L.append(c) L = [c for c in L if c != '0'] + (['0']*(length-1)) s = ''.join(L) return length and s[:length] or s.rstrip('0') def dqrepr(s): """Returns a repr() of s guaranteed to be in double quotes.""" # The wankers-that-be decided not to use double-quotes anymore in 2.3. # return '"' + repr("'\x00" + s)[6:] return '"%s"' % s.encode('string_escape').replace('"', '\\"') nonEscapedSlashes = re.compile(r'(?>> nItems('clock', 1) '1 clock' >>> nItems('clock', 10) '10 clocks' >>> nItems('clock', 10, between='grandfather') '10 grandfather clocks' """ if between is None: return '%s %s' % (n, pluralize(item, n)) else: return '%s %s %s' % (n, between, pluralize(item, n)) def be(i): """Returns the form of the verb 'to be' based on the number i.""" if i == 1: return 'is' else: return 'are' def sortBy(f, L, cmp=cmp): """Uses the decorate-sort-undecorate pattern to sort L by function f.""" for (i, elt) in enumerate(L): L[i] = (f(elt), i, elt) L.sort(cmp) for (i, elt) in enumerate(L): L[i] = L[i][2] def sorted(iterable, *args, **kwargs): """Returns a sorted list made from iterable. All other args are given to list.sort unchanged.""" L = list(iterable) L.sort(*args, **kwargs) return L def mktemp(suffix=''): """Gives a decent random string, suitable for a filename.""" import sha import md5 import time import random r = random.Random() m = md5.md5(suffix) r.seed(time.time()) s = str(r.getstate()) for x in xrange(0, random.randrange(400), random.randrange(1, 5)): m.update(str(x)) m.update(s) m.update(str(time.time())) s = m.hexdigest() return sha.sha(s + str(time.time())).hexdigest() + suffix def itersplit(isSeparator, iterable, maxsplit=-1, yieldEmpty=False): """Splits an iterator based on a predicate isSeparator.""" acc = [] for element in iterable: if maxsplit == 0 or not isSeparator(element): acc.append(element) else: maxsplit -= 1 if acc or yieldEmpty: yield acc acc = [] if acc or yieldEmpty: yield acc def flatten(seq, strings=False): """Flattens a list of lists into a single list. See the test for examples. """ for elt in seq: if not strings and type(elt) == str or type(elt) == unicode: yield elt else: try: for x in flatten(elt): yield x except TypeError: yield elt def saltHash(password, salt=None, hash='sha'): if salt is None: salt = mktemp()[:8] if hash == 'sha': hasher = sha.sha elif hash == 'md5': hasher = md5.md5 return '|'.join([salt, hasher(salt + password).hexdigest()]) def safeEval(s, namespace={'True': True, 'False': False, 'None': None}): """Evaluates s, safely. Useful for turning strings into tuples/lists/etc. without unsafely using eval().""" node = compiler.parse(s) nodes = compiler.parse(s).node.nodes if not nodes: if node.__class__ is compiler.ast.Module: return node.doc else: raise ValueError, 'Unsafe string.' node = nodes[0] if node.__class__ is not compiler.ast.Discard: raise ValueError, 'Invalid expression: %s' node = node.getChildNodes()[0] def checkNode(node): if node.__class__ is compiler.ast.Const: return True if node.__class__ in (compiler.ast.List, compiler.ast.Tuple, compiler.ast.Dict): return all(checkNode, node.getChildNodes()) if node.__class__ is compiler.ast.Name: if node.name in namespace: return True else: return False else: return False if checkNode(node): return eval(s, namespace, namespace) else: raise ValueError, 'Unsafe string.' def exnToString(e): """Turns a simple exception instance into a string (better than str(e))""" return '%s: %s' % (e.__class__.__name__, e) class IterableMap(object): """Define .iteritems() in a class and subclass this to get the other iters. """ def iteritems(self): raise NotImplementedError def iterkeys(self): for (key, _) in self.iteritems(): yield key __iter__ = iterkeys def itervalues(self): for (_, value) in self.iteritems(): yield value def items(self): return list(self.iteritems()) def keys(self): return list(self.iterkeys()) def values(self): return list(self.itervalues()) def __len__(self): ret = 0 for _ in self.iteritems(): ret += 1 return ret def __nonzero__(self): for _ in self.iteritems(): return True return False if __name__ == '__main__': import sys, doctest doctest.testmod(sys.modules['__main__']) # vim:set shiftwidth=4 tabstop=8 expandtab textwidth=78: