1
# Copyright (C) 2006 Canonical Ltd
3
# This program is free software; you can redistribute it and/or modify
4
# it under the terms of the GNU General Public License as published by
5
# the Free Software Foundation; either version 2 of the License, or
6
# (at your option) any later version.
8
# This program is distributed in the hope that it will be useful,
9
# but WITHOUT ANY WARRANTY; without even the implied warranty of
10
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11
# GNU General Public License for more details.
13
# You should have received a copy of the GNU General Public License
14
# along with this program; if not, write to the Free Software
15
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
17
"""Tools for converting globs to regular expressions.
19
This module provides functions for converting shell-like globs to regular
25
from bzrlib.trace import (
30
class Replacer(object):
31
"""Do a multiple-pattern substitution.
33
The patterns and substitutions are combined into one, so the result of
34
one replacement is never substituted again. Add the patterns and
35
replacements via the add method and then call the object. The patterns
36
must not contain capturing groups.
39
_expand = re.compile(ur'\\&')
41
def __init__(self, source=None):
44
self._pats = list(source._pats)
45
self._funs = list(source._funs)
50
def add(self, pat, fun):
51
r"""Add a pattern and replacement.
53
The pattern must not contain capturing groups.
54
The replacement might be either a string template in which \& will be
55
replaced with the match, or a function that will get the matching text
56
as argument. It does not get match object, because capturing is
60
self._pats.append(pat)
61
self._funs.append(fun)
63
def add_replacer(self, replacer):
64
r"""Add all patterns from another replacer.
66
All patterns and replacements from replacer are appended to the ones
70
self._pats.extend(replacer._pats)
71
self._funs.extend(replacer._funs)
73
def __call__(self, text):
75
self._pat = re.compile(
76
u'|'.join([u'(%s)' % p for p in self._pats]),
78
return self._pat.sub(self._do_sub, text)
81
fun = self._funs[m.lastindex - 1]
82
if hasattr(fun, '__call__'):
83
return fun(m.group(0))
85
return self._expand.sub(m.group(0), fun)
88
_sub_named = Replacer()
89
_sub_named.add(ur'\[:digit:\]', ur'\d')
90
_sub_named.add(ur'\[:space:\]', ur'\s')
91
_sub_named.add(ur'\[:alnum:\]', ur'\w')
92
_sub_named.add(ur'\[:ascii:\]', ur'\0-\x7f')
93
_sub_named.add(ur'\[:blank:\]', ur' \t')
94
_sub_named.add(ur'\[:cntrl:\]', ur'\0-\x1f\x7f-\x9f')
98
if m[1] in (u'!', u'^'):
99
return u'[^' + _sub_named(m[2:-1]) + u']'
100
return u'[' + _sub_named(m[1:-1]) + u']'
103
def _invalid_regex(repl):
105
warning(u"'%s' not allowed within a regular expression. "
106
"Replacing with '%s'" % (m, repl))
112
_sub_re.add(u'^RE:', u'')
113
_sub_re.add(u'\((?!\?)', u'(?:')
114
_sub_re.add(u'\(\?P<.*>', _invalid_regex(u'(?:'))
115
_sub_re.add(u'\(\?P=[^)]*\)', _invalid_regex(u''))
118
_sub_fullpath = Replacer()
119
_sub_fullpath.add(ur'^RE:.*', _sub_re) # RE:<anything> is a regex
120
_sub_fullpath.add(ur'\[\^?\]?(?:[^][]|\[:[^]]+:\])+\]', _sub_group) # char group
121
_sub_fullpath.add(ur'(?:(?<=/)|^)(?:\.?/)+', u'') # canonicalize path
122
_sub_fullpath.add(ur'\\.', ur'\&') # keep anything backslashed
123
_sub_fullpath.add(ur'[(){}|^$+.]', ur'\\&') # escape specials
124
_sub_fullpath.add(ur'(?:(?<=/)|^)\*\*+/', ur'(?:.*/)?') # **/ after ^ or /
125
_sub_fullpath.add(ur'\*+', ur'[^/]*') # * elsewhere
126
_sub_fullpath.add(ur'\?', ur'[^/]') # ? everywhere
129
_sub_basename = Replacer()
130
_sub_basename.add(ur'\[\^?\]?(?:[^][]|\[:[^]]+:\])+\]', _sub_group) # char group
131
_sub_basename.add(ur'\\.', ur'\&') # keep anything backslashed
132
_sub_basename.add(ur'[(){}|^$+.]', ur'\\&') # escape specials
133
_sub_basename.add(ur'\*+', ur'.*') # * everywhere
134
_sub_basename.add(ur'\?', ur'.') # ? everywhere
137
def _sub_extension(pattern):
138
return _sub_basename(pattern[2:])
141
class Globster(object):
142
"""A simple wrapper for a set of glob patterns.
144
Provides the capability to search the patterns to find a match for
145
a given filename (including the full path).
147
Patterns are translated to regular expressions to expidite matching.
149
The regular expressions for multiple patterns are aggregated into
150
a super-regex containing groups of up to 99 patterns.
151
The 99 limitation is due to the grouping limit of the Python re module.
152
The resulting super-regex and associated patterns are stored as a list of
153
(regex,[patterns]) in _regex_patterns.
155
For performance reasons the patterns are categorised as extension patterns
156
(those that match against a file extension), basename patterns
157
(those that match against the basename of the filename),
158
and fullpath patterns (those that match against the full path).
159
The translations used for extensions and basenames are relatively simpler
160
and therefore faster to perform than the fullpath patterns.
162
Also, the extension patterns are more likely to find a match and
163
so are matched first, then the basename patterns, then the fullpath
166
def __init__(self, patterns):
167
self._regex_patterns = []
172
if pat.startswith(u'RE:') or u'/' in pat:
173
path_patterns.append(pat)
174
elif pat.startswith(u'*.'):
175
ext_patterns.append(pat)
177
base_patterns.append(pat)
178
self._add_patterns(ext_patterns,_sub_extension,
179
prefix=r'(?:.*/)?(?!.*/)(?:.*\.)')
180
self._add_patterns(base_patterns,_sub_basename,
181
prefix=r'(?:.*/)?(?!.*/)')
182
self._add_patterns(path_patterns,_sub_fullpath)
184
def _add_patterns(self, patterns, translator, prefix=''):
186
grouped_rules = ['(%s)' % translator(pat) for pat in patterns[:99]]
187
joined_rule = '%s(?:%s)$' % (prefix, '|'.join(grouped_rules))
188
self._regex_patterns.append((re.compile(joined_rule, re.UNICODE),
190
patterns = patterns[99:]
192
def match(self, filename):
193
"""Searches for a pattern that matches the given filename.
195
:return A matching pattern or None if there is no matching pattern.
197
for regex, patterns in self._regex_patterns:
198
match = regex.match(filename)
200
return patterns[match.lastindex -1]