166
161
Patterns are translated to regular expressions to expidite matching.
168
The regular expressions for multiple patterns are aggregated into
169
a super-regex containing groups of up to 99 patterns.
163
The regular expressions for multiple patterns are aggregated into
164
a super-regex containing groups of up to 99 patterns.
170
165
The 99 limitation is due to the grouping limit of the Python re module.
171
166
The resulting super-regex and associated patterns are stored as a list of
172
167
(regex,[patterns]) in _regex_patterns.
174
169
For performance reasons the patterns are categorised as extension patterns
175
170
(those that match against a file extension), basename patterns
176
171
(those that match against the basename of the filename),
177
172
and fullpath patterns (those that match against the full path).
178
The translations used for extensions and basenames are relatively simpler
173
The translations used for extensions and basenames are relatively simpler
179
174
and therefore faster to perform than the fullpath patterns.
181
Also, the extension patterns are more likely to find a match and
176
Also, the extension patterns are more likely to find a match and
182
177
so are matched first, then the basename patterns, then the fullpath
185
# We want to _add_patterns in a specific order (as per type_list below)
186
# starting with the shortest and going to the longest.
187
# As some Python version don't support ordered dicts the list below is
188
# used to select inputs for _add_pattern in a specific order.
189
pattern_types = [ "extension", "basename", "fullpath" ]
193
"translator" : _sub_extension,
194
"prefix" : r'(?:.*/)?(?!.*/)(?:.*\.)'
197
"translator" : _sub_basename,
198
"prefix" : r'(?:.*/)?(?!.*/)'
201
"translator" : _sub_fullpath,
206
180
def __init__(self, patterns):
207
181
self._regex_patterns = []
213
185
for pat in patterns:
214
186
pat = normalize_pattern(pat)
215
pattern_lists[Globster.identify(pat)].append(pat)
216
pi = Globster.pattern_info
217
for t in Globster.pattern_types:
218
self._add_patterns(pattern_lists[t], pi[t]["translator"],
187
if pat.startswith(u'RE:') or u'/' in pat:
188
path_patterns.append(pat)
189
elif pat.startswith(u'*.'):
190
ext_patterns.append(pat)
192
base_patterns.append(pat)
193
self._add_patterns(ext_patterns,_sub_extension,
194
prefix=r'(?:.*/)?(?!.*/)(?:.*\.)')
195
self._add_patterns(base_patterns,_sub_basename,
196
prefix=r'(?:.*/)?(?!.*/)')
197
self._add_patterns(path_patterns,_sub_fullpath)
221
199
def _add_patterns(self, patterns, translator, prefix=''):
224
'(%s)' % translator(pat) for pat in patterns[:99]]
201
grouped_rules = ['(%s)' % translator(pat) for pat in patterns[:99]]
225
202
joined_rule = '%s(?:%s)$' % (prefix, '|'.join(grouped_rules))
226
# Explicitly use lazy_compile here, because we count on its
227
# nicer error reporting.
228
self._regex_patterns.append((
229
lazy_regex.lazy_compile(joined_rule, re.UNICODE),
203
self._regex_patterns.append((re.compile(joined_rule, re.UNICODE),
231
205
patterns = patterns[99:]
233
207
def match(self, filename):
234
208
"""Searches for a pattern that matches the given filename.
236
210
:return A matching pattern or None if there is no matching pattern.
239
for regex, patterns in self._regex_patterns:
240
match = regex.match(filename)
242
return patterns[match.lastindex -1]
243
except errors.InvalidPattern, e:
244
# We can't show the default e.msg to the user as thats for
245
# the combined pattern we sent to regex. Instead we indicate to
246
# the user that an ignore file needs fixing.
247
mutter('Invalid pattern found in regex: %s.', e.msg)
248
e.msg = "File ~/.bazaar/ignore or .bzrignore contains error(s)."
250
for _, patterns in self._regex_patterns:
252
if not Globster.is_pattern_valid(p):
253
bad_patterns += ('\n %s' % p)
254
e.msg += bad_patterns
212
for regex, patterns in self._regex_patterns:
213
match = regex.match(filename)
215
return patterns[match.lastindex -1]
259
def identify(pattern):
260
"""Returns pattern category.
262
:param pattern: normalized pattern.
263
Identify if a pattern is fullpath, basename or extension
264
and returns the appropriate type.
266
if pattern.startswith(u'RE:') or u'/' in pattern:
268
elif pattern.startswith(u'*.'):
274
def is_pattern_valid(pattern):
275
"""Returns True if pattern is valid.
277
:param pattern: Normalized pattern.
278
is_pattern_valid() assumes pattern to be normalized.
279
see: globbing.normalize_pattern
282
translator = Globster.pattern_info[Globster.identify(pattern)]["translator"]
283
tpattern = '(%s)' % translator(pattern)
285
re_obj = lazy_regex.lazy_compile(tpattern, re.UNICODE)
286
re_obj.search("") # force compile
287
except errors.InvalidPattern, e:
292
class ExceptionGlobster(object):
293
"""A Globster that supports exception patterns.
295
Exceptions are ignore patterns prefixed with '!'. Exception
296
patterns take precedence over regular patterns and cause a
297
matching filename to return None from the match() function.
298
Patterns using a '!!' prefix are highest precedence, and act
299
as regular ignores. '!!' patterns are useful to establish ignores
300
that apply under paths specified by '!' exception patterns.
303
def __init__(self,patterns):
304
ignores = [[], [], []]
306
if p.startswith(u'!!'):
307
ignores[2].append(p[2:])
308
elif p.startswith(u'!'):
309
ignores[1].append(p[1:])
312
self._ignores = [Globster(i) for i in ignores]
314
def match(self, filename):
315
"""Searches for a pattern that matches the given filename.
317
:return A matching pattern or None if there is no matching pattern.
319
double_neg = self._ignores[2].match(filename)
321
return "!!%s" % double_neg
322
elif self._ignores[1].match(filename):
325
return self._ignores[0].match(filename)
327
219
class _OrderedGlobster(Globster):
328
220
"""A Globster that keeps pattern order."""
336
228
self._regex_patterns = []
337
229
for pat in patterns:
338
230
pat = normalize_pattern(pat)
339
t = Globster.identify(pat)
340
self._add_patterns([pat], Globster.pattern_info[t]["translator"],
341
Globster.pattern_info[t]["prefix"])
344
_slashes = lazy_regex.lazy_compile(r'[\\/]+')
231
if pat.startswith(u'RE:') or u'/' in pat:
232
self._add_patterns([pat], _sub_fullpath)
233
elif pat.startswith(u'*.'):
234
self._add_patterns([pat], _sub_extension,
235
prefix=r'(?:.*/)?(?!.*/)(?:.*\.)')
237
self._add_patterns([pat], _sub_basename,
238
prefix=r'(?:.*/)?(?!.*/)')
345
241
def normalize_pattern(pattern):
346
242
"""Converts backslashes in path patterns to forward slashes.
348
244
Doesn't normalize regular expressions - they may contain escapes.
350
if not (pattern.startswith('RE:') or pattern.startswith('!RE:')):
351
pattern = _slashes.sub('/', pattern)
353
pattern = pattern.rstrip('/')
246
if not pattern.startswith('RE:'):
247
pattern = pattern.replace('\\','/')
248
return pattern.rstrip('/')