mirror of
https://github.com/progval/Limnoria.git
synced 2025-04-26 04:51:06 -05:00
Fix our RE parsing to handle multiple backslashes before the separator
This also adds support for using brace pairs ({}, [], (), <>) as the separators for m//.
This commit is contained in:
parent
0d4ff7f3dc
commit
0c42ea111a
@ -1,6 +1,6 @@
|
|||||||
###
|
###
|
||||||
# Copyright (c) 2002-2005, Jeremiah Fincher
|
# Copyright (c) 2002-2005, Jeremiah Fincher
|
||||||
# Copyright (c) 2008, James Vega
|
# Copyright (c) 2008-2009, James Vega
|
||||||
# All rights reserved.
|
# All rights reserved.
|
||||||
#
|
#
|
||||||
# Redistribution and use in source and binary forms, with or without
|
# Redistribution and use in source and binary forms, with or without
|
||||||
@ -112,36 +112,42 @@ def quoted(s):
|
|||||||
"""Returns a quoted s."""
|
"""Returns a quoted s."""
|
||||||
return '"%s"' % s
|
return '"%s"' % s
|
||||||
|
|
||||||
def _getSep(s):
|
_openers = '{[(<'
|
||||||
|
_closers = '}])>'
|
||||||
|
def _getSep(s, allowBraces=False):
|
||||||
if len(s) < 2:
|
if len(s) < 2:
|
||||||
raise ValueError, 'string given to _getSep is too short: %r' % s
|
raise ValueError, 'string given to _getSep is too short: %r' % s
|
||||||
|
if allowBraces:
|
||||||
|
braces = _closers
|
||||||
|
else:
|
||||||
|
braces = _openers + _closers
|
||||||
if s.startswith('m') or s.startswith('s'):
|
if s.startswith('m') or s.startswith('s'):
|
||||||
separator = s[1]
|
separator = s[1]
|
||||||
else:
|
else:
|
||||||
separator = s[0]
|
separator = s[0]
|
||||||
if separator.isalnum() or separator in '{}[]()<>':
|
if separator.isalnum() or separator in braces:
|
||||||
raise ValueError, \
|
raise ValueError, \
|
||||||
'Invalid separator: separator must not be alphanumeric or in ' \
|
'Invalid separator: separator must not be alphanumeric or in ' \
|
||||||
'"{}[]()<>"'
|
'"%s"' % braces
|
||||||
return separator
|
return separator
|
||||||
|
|
||||||
def _getSplitterRe(s):
|
|
||||||
separator = _getSep(s)
|
|
||||||
return re.compile(r'(?<!\\)%s' % re.escape(separator))
|
|
||||||
|
|
||||||
def perlReToPythonRe(s):
|
def perlReToPythonRe(s):
|
||||||
"""Converts a string representation of a Perl regular expression (i.e.,
|
"""Converts a string representation of a Perl regular expression (i.e.,
|
||||||
m/^foo$/i or /foo|bar/) to a Python regular expression.
|
m/^foo$/i or /foo|bar/) to a Python regular expression.
|
||||||
"""
|
"""
|
||||||
sep = _getSep(s)
|
opener = closer = _getSep(s, True)
|
||||||
splitter = _getSplitterRe(s)
|
if opener in '{[(<':
|
||||||
|
closer = _closers[_openers.index(opener)]
|
||||||
|
opener = re.escape(opener)
|
||||||
|
closer = re.escape(closer)
|
||||||
|
matcher = re.compile(r'm?%s((?:\\.|[^\\])*)%s(.*)' % (opener, closer))
|
||||||
try:
|
try:
|
||||||
(kind, regexp, flags) = splitter.split(s)
|
(regexp, flags) = matcher.match(s).groups()
|
||||||
except ValueError: # Unpack list of wrong size.
|
except AttributeError: # Unpack list of wrong size.
|
||||||
raise ValueError, 'Must be of the form m/.../ or /.../'
|
raise ValueError, 'Must be of the form m/.../ or /.../'
|
||||||
regexp = regexp.replace('\\'+sep, sep)
|
regexp = regexp.replace('\\'+opener, opener)
|
||||||
if kind not in ('', 'm'):
|
if opener != closer:
|
||||||
raise ValueError, 'Invalid kind: must be in ("", "m")'
|
regexp = regexp.replace('\\'+closer, closer)
|
||||||
flag = 0
|
flag = 0
|
||||||
try:
|
try:
|
||||||
for c in flags.upper():
|
for c in flags.upper():
|
||||||
@ -159,17 +165,17 @@ def perlReToReplacer(s):
|
|||||||
replacement.
|
replacement.
|
||||||
"""
|
"""
|
||||||
sep = _getSep(s)
|
sep = _getSep(s)
|
||||||
splitter = _getSplitterRe(s)
|
escaped = re.escape(sep)
|
||||||
|
matcher = re.compile(r's%s((?:\\.|[^\\])*)%s((?:\\%s|[^\\])*)%s(.*)'
|
||||||
|
% (escaped, escaped, escaped, escaped))
|
||||||
try:
|
try:
|
||||||
(kind, regexp, replace, flags) = splitter.split(s)
|
(regexp, replace, flags) = matcher.match(s).groups()
|
||||||
except ValueError: # Unpack list of wrong size.
|
except AttributeError: # Unpack list of wrong size.
|
||||||
raise ValueError, 'Must be of the form s/.../.../'
|
raise ValueError, 'Must be of the form s/.../.../'
|
||||||
regexp = regexp.replace('\x08', r'\b')
|
regexp = regexp.replace('\x08', r'\b')
|
||||||
replace = replace.replace('\\'+sep, sep)
|
replace = replace.replace('\\'+sep, sep)
|
||||||
for i in xrange(10):
|
for i in xrange(10):
|
||||||
replace = replace.replace(chr(i), r'\%s' % i)
|
replace = replace.replace(chr(i), r'\%s' % i)
|
||||||
if kind != 's':
|
|
||||||
raise ValueError, 'Invalid kind: must be "s"'
|
|
||||||
g = False
|
g = False
|
||||||
if 'g' in flags:
|
if 'g' in flags:
|
||||||
g = True
|
g = True
|
||||||
|
@ -1,5 +1,6 @@
|
|||||||
###
|
###
|
||||||
# Copyright (c) 2002-2005, Jeremiah Fincher
|
# Copyright (c) 2002-2005, Jeremiah Fincher
|
||||||
|
# Copyright (c) 2009, James Vega
|
||||||
# All rights reserved.
|
# All rights reserved.
|
||||||
#
|
#
|
||||||
# Redistribution and use in source and binary forms, with or without
|
# Redistribution and use in source and binary forms, with or without
|
||||||
@ -276,6 +277,8 @@ class StrTest(SupyTestCase):
|
|||||||
def testP2PReDifferentSeparator(self):
|
def testP2PReDifferentSeparator(self):
|
||||||
r = utils.str.perlReToPythonRe('m!foo!')
|
r = utils.str.perlReToPythonRe('m!foo!')
|
||||||
self.failUnless(r.search('foo'))
|
self.failUnless(r.search('foo'))
|
||||||
|
r = utils.str.perlReToPythonRe('m{cat}')
|
||||||
|
self.failUnless(r.search('cat'))
|
||||||
|
|
||||||
def testPerlReToReplacer(self):
|
def testPerlReToReplacer(self):
|
||||||
PRTR = utils.str.perlReToReplacer
|
PRTR = utils.str.perlReToReplacer
|
||||||
@ -291,6 +294,8 @@ class StrTest(SupyTestCase):
|
|||||||
self.assertEqual(f('foobarbaz'), 'foorz')
|
self.assertEqual(f('foobarbaz'), 'foorz')
|
||||||
f = PRTR('s/ba\\///g')
|
f = PRTR('s/ba\\///g')
|
||||||
self.assertEqual(f('fooba/rba/z'), 'foorz')
|
self.assertEqual(f('fooba/rba/z'), 'foorz')
|
||||||
|
f = PRTR('s/ba\\\\//g')
|
||||||
|
self.assertEqual(f('fooba\\rba\\z'), 'foorz')
|
||||||
f = PRTR('s/cat/dog/i')
|
f = PRTR('s/cat/dog/i')
|
||||||
self.assertEqual(f('CATFISH'), 'dogFISH')
|
self.assertEqual(f('CATFISH'), 'dogFISH')
|
||||||
f = PRTR('s/foo/foo\/bar/')
|
f = PRTR('s/foo/foo\/bar/')
|
||||||
|
Loading…
x
Reference in New Issue
Block a user