161
168
Patterns are translated to regular expressions to expidite matching.
163
The regular expressions for multiple patterns are aggregated into
164
a super-regex containing groups of up to 99 patterns.
170
The regular expressions for multiple patterns are aggregated into
171
a super-regex containing groups of up to 99 patterns.
165
172
The 99 limitation is due to the grouping limit of the Python re module.
166
173
The resulting super-regex and associated patterns are stored as a list of
167
174
(regex,[patterns]) in _regex_patterns.
169
176
For performance reasons the patterns are categorised as extension patterns
170
177
(those that match against a file extension), basename patterns
171
178
(those that match against the basename of the filename),
172
179
and fullpath patterns (those that match against the full path).
173
The translations used for extensions and basenames are relatively simpler
180
The translations used for extensions and basenames are relatively simpler
174
181
and therefore faster to perform than the fullpath patterns.
176
Also, the extension patterns are more likely to find a match and
183
Also, the extension patterns are more likely to find a match and
177
184
so are matched first, then the basename patterns, then the fullpath
187
# We want to _add_patterns in a specific order (as per type_list below)
188
# starting with the shortest and going to the longest.
189
# As some Python version don't support ordered dicts the list below is
190
# used to select inputs for _add_pattern in a specific order.
191
pattern_types = [ "extension", "basename", "fullpath" ]
195
"translator" : _sub_extension,
196
"prefix" : r'(?:.*/)?(?!.*/)(?:.*\.)'
199
"translator" : _sub_basename,
200
"prefix" : r'(?:.*/)?(?!.*/)'
203
"translator" : _sub_fullpath,
180
208
def __init__(self, patterns):
181
209
self._regex_patterns = []
185
215
for pat in patterns:
186
216
pat = normalize_pattern(pat)
187
if pat.startswith(u'RE:') or u'/' in pat:
188
path_patterns.append(pat)
189
elif pat.startswith(u'*.'):
190
ext_patterns.append(pat)
192
base_patterns.append(pat)
193
self._add_patterns(ext_patterns,_sub_extension,
194
prefix=r'(?:.*/)?(?!.*/)(?:.*\.)')
195
self._add_patterns(base_patterns,_sub_basename,
196
prefix=r'(?:.*/)?(?!.*/)')
197
self._add_patterns(path_patterns,_sub_fullpath)
217
pattern_lists[Globster.identify(pat)].append(pat)
218
pi = Globster.pattern_info
219
for t in Globster.pattern_types:
220
self._add_patterns(pattern_lists[t], pi[t]["translator"],
199
223
def _add_patterns(self, patterns, translator, prefix=''):
201
grouped_rules = ['(%s)' % translator(pat) for pat in patterns[:99]]
226
'(%s)' % translator(pat) for pat in patterns[:99]]
202
227
joined_rule = '%s(?:%s)$' % (prefix, '|'.join(grouped_rules))
203
self._regex_patterns.append((re.compile(joined_rule, re.UNICODE),
228
# Explicitly use lazy_compile here, because we count on its
229
# nicer error reporting.
230
self._regex_patterns.append((
231
lazy_regex.lazy_compile(joined_rule, re.UNICODE),
205
233
patterns = patterns[99:]
207
235
def match(self, filename):
208
236
"""Searches for a pattern that matches the given filename.
210
238
:return A matching pattern or None if there is no matching pattern.
212
for regex, patterns in self._regex_patterns:
213
match = regex.match(filename)
215
return patterns[match.lastindex -1]
241
for regex, patterns in self._regex_patterns:
242
match = regex.match(filename)
244
return patterns[match.lastindex -1]
245
except errors.InvalidPattern, e:
246
# We can't show the default e.msg to the user as thats for
247
# the combined pattern we sent to regex. Instead we indicate to
248
# the user that an ignore file needs fixing.
249
mutter('Invalid pattern found in regex: %s.', e.msg)
250
e.msg = "File ~/.bazaar/ignore or .bzrignore contains error(s)."
252
for _, patterns in self._regex_patterns:
254
if not Globster.is_pattern_valid(p):
255
bad_patterns += ('\n %s' % p)
256
e.msg += bad_patterns
261
def identify(pattern):
262
"""Returns pattern category.
264
:param pattern: normalized pattern.
265
Identify if a pattern is fullpath, basename or extension
266
and returns the appropriate type.
268
if pattern.startswith(u'RE:') or u'/' in pattern:
270
elif pattern.startswith(u'*.'):
276
def is_pattern_valid(pattern):
277
"""Returns True if pattern is valid.
279
:param pattern: Normalized pattern.
280
is_pattern_valid() assumes pattern to be normalized.
281
see: globbing.normalize_pattern
284
translator = Globster.pattern_info[Globster.identify(pattern)]["translator"]
285
tpattern = '(%s)' % translator(pattern)
287
re_obj = lazy_regex.lazy_compile(tpattern, re.UNICODE)
288
re_obj.search("") # force compile
289
except errors.InvalidPattern, e:
294
class ExceptionGlobster(object):
295
"""A Globster that supports exception patterns.
297
Exceptions are ignore patterns prefixed with '!'. Exception
298
patterns take precedence over regular patterns and cause a
299
matching filename to return None from the match() function.
300
Patterns using a '!!' prefix are highest precedence, and act
301
as regular ignores. '!!' patterns are useful to establish ignores
302
that apply under paths specified by '!' exception patterns.
305
def __init__(self,patterns):
306
ignores = [[], [], []]
308
if p.startswith(u'!!'):
309
ignores[2].append(p[2:])
310
elif p.startswith(u'!'):
311
ignores[1].append(p[1:])
314
self._ignores = [Globster(i) for i in ignores]
316
def match(self, filename):
317
"""Searches for a pattern that matches the given filename.
319
:return A matching pattern or None if there is no matching pattern.
321
double_neg = self._ignores[2].match(filename)
323
return "!!%s" % double_neg
324
elif self._ignores[1].match(filename):
327
return self._ignores[0].match(filename)
329
class _OrderedGlobster(Globster):
330
"""A Globster that keeps pattern order."""
332
def __init__(self, patterns):
335
:param patterns: sequence of glob patterns
337
# Note: This could be smarter by running like sequences together
338
self._regex_patterns = []
340
pat = normalize_pattern(pat)
341
t = Globster.identify(pat)
342
self._add_patterns([pat], Globster.pattern_info[t]["translator"],
343
Globster.pattern_info[t]["prefix"])
346
_slashes = lazy_regex.lazy_compile(r'[\\/]+')
219
347
def normalize_pattern(pattern):
220
348
"""Converts backslashes in path patterns to forward slashes.
222
350
Doesn't normalize regular expressions - they may contain escapes.
224
if not pattern.startswith('RE:'):
225
pattern = pattern.replace('\\','/')
226
return pattern.rstrip('/')
352
if not (pattern.startswith('RE:') or pattern.startswith('!RE:')):
353
pattern = _slashes.sub('/', pattern)
355
pattern = pattern.rstrip('/')