163
161
Patterns are translated to regular expressions to expidite matching.
165
The regular expressions for multiple patterns are aggregated into
166
a super-regex containing groups of up to 99 patterns.
163
The regular expressions for multiple patterns are aggregated into
164
a super-regex containing groups of up to 99 patterns.
167
165
The 99 limitation is due to the grouping limit of the Python re module.
168
166
The resulting super-regex and associated patterns are stored as a list of
169
167
(regex,[patterns]) in _regex_patterns.
171
169
For performance reasons the patterns are categorised as extension patterns
172
170
(those that match against a file extension), basename patterns
173
171
(those that match against the basename of the filename),
174
172
and fullpath patterns (those that match against the full path).
175
The translations used for extensions and basenames are relatively simpler
173
The translations used for extensions and basenames are relatively simpler
176
174
and therefore faster to perform than the fullpath patterns.
178
Also, the extension patterns are more likely to find a match and
176
Also, the extension patterns are more likely to find a match and
179
177
so are matched first, then the basename patterns, then the fullpath
182
# We want to _add_patterns in a specific order (as per type_list below)
183
# starting with the shortest and going to the longest.
184
# As some Python version don't support ordered dicts the list below is
185
# used to select inputs for _add_pattern in a specific order.
186
pattern_types = [ "extension", "basename", "fullpath" ]
190
"translator" : _sub_extension,
191
"prefix" : r'(?:.*/)?(?!.*/)(?:.*\.)'
194
"translator" : _sub_basename,
195
"prefix" : r'(?:.*/)?(?!.*/)'
198
"translator" : _sub_fullpath,
203
180
def __init__(self, patterns):
204
181
self._regex_patterns = []
210
185
for pat in patterns:
211
186
pat = normalize_pattern(pat)
212
pattern_lists[Globster.identify(pat)].append(pat)
213
pi = Globster.pattern_info
214
for t in Globster.pattern_types:
215
self._add_patterns(pattern_lists[t], pi[t]["translator"],
187
if pat.startswith(u'RE:') or u'/' in pat:
188
path_patterns.append(pat)
189
elif pat.startswith(u'*.'):
190
ext_patterns.append(pat)
192
base_patterns.append(pat)
193
self._add_patterns(ext_patterns,_sub_extension,
194
prefix=r'(?:.*/)?(?!.*/)(?:.*\.)')
195
self._add_patterns(base_patterns,_sub_basename,
196
prefix=r'(?:.*/)?(?!.*/)')
197
self._add_patterns(path_patterns,_sub_fullpath)
218
199
def _add_patterns(self, patterns, translator, prefix=''):
220
201
grouped_rules = ['(%s)' % translator(pat) for pat in patterns[:99]]
221
202
joined_rule = '%s(?:%s)$' % (prefix, '|'.join(grouped_rules))
222
self._regex_patterns.append((re.compile(joined_rule, re.UNICODE),
203
self._regex_patterns.append((re.compile(joined_rule, re.UNICODE),
224
205
patterns = patterns[99:]
226
207
def match(self, filename):
227
208
"""Searches for a pattern that matches the given filename.
229
210
:return A matching pattern or None if there is no matching pattern.
232
for regex, patterns in self._regex_patterns:
233
match = regex.match(filename)
235
return patterns[match.lastindex -1]
236
except errors.InvalidPattern, e:
237
# We can't show the default e.msg to the user as thats for
238
# the combined pattern we sent to regex. Instead we indicate to
239
# the user that an ignore file needs fixing.
240
mutter('Invalid pattern found in regex: %s.', e.msg)
241
e.msg = "File ~/.bazaar/ignore or .bzrignore contains error(s)."
243
for _, patterns in self._regex_patterns:
245
if not Globster.is_pattern_valid(p):
246
bad_patterns += ('\n %s' % p)
247
e.msg += bad_patterns
212
for regex, patterns in self._regex_patterns:
213
match = regex.match(filename)
215
return patterns[match.lastindex -1]
252
def identify(pattern):
253
"""Returns pattern category.
255
:param pattern: normalized pattern.
256
Identify if a pattern is fullpath, basename or extension
257
and returns the appropriate type.
259
if pattern.startswith(u'RE:') or u'/' in pattern:
261
elif pattern.startswith(u'*.'):
267
def is_pattern_valid(pattern):
268
"""Returns True if pattern is valid.
270
:param pattern: Normalized pattern.
271
is_pattern_valid() assumes pattern to be normalized.
272
see: globbing.normalize_pattern
275
translator = Globster.pattern_info[Globster.identify(pattern)]["translator"]
276
tpattern = '(%s)' % translator(pattern)
278
re_obj = re.compile(tpattern, re.UNICODE)
279
re_obj.search("") # force compile
280
except errors.InvalidPattern, e:
285
class ExceptionGlobster(object):
286
"""A Globster that supports exception patterns.
288
Exceptions are ignore patterns prefixed with '!'. Exception
289
patterns take precedence over regular patterns and cause a
290
matching filename to return None from the match() function.
291
Patterns using a '!!' prefix are highest precedence, and act
292
as regular ignores. '!!' patterns are useful to establish ignores
293
that apply under paths specified by '!' exception patterns.
296
def __init__(self,patterns):
297
ignores = [[], [], []]
299
if p.startswith(u'!!'):
300
ignores[2].append(p[2:])
301
elif p.startswith(u'!'):
302
ignores[1].append(p[1:])
305
self._ignores = [Globster(i) for i in ignores]
307
def match(self, filename):
308
"""Searches for a pattern that matches the given filename.
310
:return A matching pattern or None if there is no matching pattern.
312
double_neg = self._ignores[2].match(filename)
314
return "!!%s" % double_neg
315
elif self._ignores[1].match(filename):
318
return self._ignores[0].match(filename)
320
class _OrderedGlobster(Globster):
321
"""A Globster that keeps pattern order."""
323
def __init__(self, patterns):
326
:param patterns: sequence of glob patterns
328
# Note: This could be smarter by running like sequences together
329
self._regex_patterns = []
331
pat = normalize_pattern(pat)
332
t = Globster.identify(pat)
333
self._add_patterns([pat], Globster.pattern_info[t]["translator"],
334
Globster.pattern_info[t]["prefix"])
337
_slashes = re.compile(r'[\\/]+')
338
219
def normalize_pattern(pattern):
339
220
"""Converts backslashes in path patterns to forward slashes.
341
222
Doesn't normalize regular expressions - they may contain escapes.
343
if not (pattern.startswith('RE:') or pattern.startswith('!RE:')):
344
pattern = _slashes.sub('/', pattern)
346
pattern = pattern.rstrip('/')
224
if not pattern.startswith('RE:'):
225
pattern = pattern.replace('\\','/')
226
return pattern.rstrip('/')