~bzr-pqm/bzr/bzr.dev

4763.2.4 by John Arbash Meinel
merge bzr.2.1 in preparation for NEWS entry.
1
# Copyright (C) 2006-2010 Canonical Ltd
2135.2.1 by Kent Gibson
Added glob module to replace broken fnmatch based ignore pattern matching (#57637)
2
3
# This program is free software; you can redistribute it and/or modify
4
# it under the terms of the GNU General Public License as published by
5
# the Free Software Foundation; either version 2 of the License, or
6
# (at your option) any later version.
7
#
8
# This program is distributed in the hope that it will be useful,
9
# but WITHOUT ANY WARRANTY; without even the implied warranty of
10
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
11
# GNU General Public License for more details.
12
#
13
# You should have received a copy of the GNU General Public License
14
# along with this program; if not, write to the Free Software
4183.7.1 by Sabin Iacob
update FSF mailing address
15
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
2135.2.1 by Kent Gibson
Added glob module to replace broken fnmatch based ignore pattern matching (#57637)
16
17
"""Tools for converting globs to regular expressions.
18
19
This module provides functions for converting shell-like globs to regular
20
expressions.
21
"""
22
23
import re
24
5326.2.9 by Parth Malwankar
fixed import error in globbing.py
25
from bzrlib import errors
2135.2.1 by Kent Gibson
Added glob module to replace broken fnmatch based ignore pattern matching (#57637)
26
from bzrlib.trace import (
5326.2.7 by Parth Malwankar
Globster now mutters regex failure message before changing message
27
    mutter,
5326.2.1 by Parth Malwankar
added InvalidPattern error.
28
    warning,
2135.2.1 by Kent Gibson
Added glob module to replace broken fnmatch based ignore pattern matching (#57637)
29
    )
30
31
32
class Replacer(object):
33
    """Do a multiple-pattern substitution.
34
35
    The patterns and substitutions are combined into one, so the result of
36
    one replacement is never substituted again. Add the patterns and
37
    replacements via the add method and then call the object. The patterns
38
    must not contain capturing groups.
39
    """
40
41
    _expand = re.compile(ur'\\&')
42
43
    def __init__(self, source=None):
44
        self._pat = None
45
        if source:
46
            self._pats = list(source._pats)
47
            self._funs = list(source._funs)
48
        else:
49
            self._pats = []
50
            self._funs = []
51
52
    def add(self, pat, fun):
53
        r"""Add a pattern and replacement.
54
55
        The pattern must not contain capturing groups.
56
        The replacement might be either a string template in which \& will be
3943.8.1 by Marius Kruger
remove all trailing whitespace from bzr source
57
        replaced with the match, or a function that will get the matching text
58
        as argument. It does not get match object, because capturing is
2135.2.1 by Kent Gibson
Added glob module to replace broken fnmatch based ignore pattern matching (#57637)
59
        forbidden anyway.
60
        """
61
        self._pat = None
62
        self._pats.append(pat)
63
        self._funs.append(fun)
64
65
    def add_replacer(self, replacer):
66
        r"""Add all patterns from another replacer.
67
68
        All patterns and replacements from replacer are appended to the ones
69
        already defined.
70
        """
71
        self._pat = None
72
        self._pats.extend(replacer._pats)
73
        self._funs.extend(replacer._funs)
74
75
    def __call__(self, text):
76
        if not self._pat:
77
            self._pat = re.compile(
78
                    u'|'.join([u'(%s)' % p for p in self._pats]),
79
                    re.UNICODE)
80
        return self._pat.sub(self._do_sub, text)
81
82
    def _do_sub(self, m):
83
        fun = self._funs[m.lastindex - 1]
84
        if hasattr(fun, '__call__'):
85
            return fun(m.group(0))
86
        else:
87
            return self._expand.sub(m.group(0), fun)
88
89
90
_sub_named = Replacer()
91
_sub_named.add(ur'\[:digit:\]', ur'\d')
92
_sub_named.add(ur'\[:space:\]', ur'\s')
93
_sub_named.add(ur'\[:alnum:\]', ur'\w')
94
_sub_named.add(ur'\[:ascii:\]', ur'\0-\x7f')
95
_sub_named.add(ur'\[:blank:\]', ur' \t')
96
_sub_named.add(ur'\[:cntrl:\]', ur'\0-\x1f\x7f-\x9f')
97
98
99
def _sub_group(m):
100
    if m[1] in (u'!', u'^'):
101
        return u'[^' + _sub_named(m[2:-1]) + u']'
102
    return u'[' + _sub_named(m[1:-1]) + u']'
103
104
105
def _invalid_regex(repl):
106
    def _(m):
2135.2.7 by Kent Gibson
Implement JAM's review suggestions.
107
        warning(u"'%s' not allowed within a regular expression. "
108
                "Replacing with '%s'" % (m, repl))
2135.2.1 by Kent Gibson
Added glob module to replace broken fnmatch based ignore pattern matching (#57637)
109
        return repl
110
    return _
111
112
2298.8.1 by Kent Gibson
Normalise ignore patterns to use '/' path separator.
113
def _trailing_backslashes_regex(m):
2298.8.2 by Kent Gibson
Review fixes for lp86451 patch.
114
    """Check trailing backslashes.
115
116
    Does a head count on trailing backslashes to ensure there isn't an odd
117
    one on the end that would escape the brackets we wrap the RE in.
118
    """
119
    if (len(m) % 2) != 0:
2298.8.1 by Kent Gibson
Normalise ignore patterns to use '/' path separator.
120
        warning(u"Regular expressions cannot end with an odd number of '\\'. "
121
                "Dropping the final '\\'.")
122
        return m[:-1]
123
    return m
124
125
2135.2.1 by Kent Gibson
Added glob module to replace broken fnmatch based ignore pattern matching (#57637)
126
_sub_re = Replacer()
127
_sub_re.add(u'^RE:', u'')
128
_sub_re.add(u'\((?!\?)', u'(?:')
129
_sub_re.add(u'\(\?P<.*>', _invalid_regex(u'(?:'))
130
_sub_re.add(u'\(\?P=[^)]*\)', _invalid_regex(u''))
2298.8.1 by Kent Gibson
Normalise ignore patterns to use '/' path separator.
131
_sub_re.add(ur'\\+$', _trailing_backslashes_regex)
2135.2.1 by Kent Gibson
Added glob module to replace broken fnmatch based ignore pattern matching (#57637)
132
133
2135.2.2 by Kent Gibson
Ignore pattern matcher (glob.py) patches:
134
_sub_fullpath = Replacer()
135
_sub_fullpath.add(ur'^RE:.*', _sub_re) # RE:<anything> is a regex
136
_sub_fullpath.add(ur'\[\^?\]?(?:[^][]|\[:[^]]+:\])+\]', _sub_group) # char group
137
_sub_fullpath.add(ur'(?:(?<=/)|^)(?:\.?/)+', u'') # canonicalize path
138
_sub_fullpath.add(ur'\\.', ur'\&') # keep anything backslashed
139
_sub_fullpath.add(ur'[(){}|^$+.]', ur'\\&') # escape specials
140
_sub_fullpath.add(ur'(?:(?<=/)|^)\*\*+/', ur'(?:.*/)?') # **/ after ^ or /
141
_sub_fullpath.add(ur'\*+', ur'[^/]*') # * elsewhere
142
_sub_fullpath.add(ur'\?', ur'[^/]') # ? everywhere
143
144
145
_sub_basename = Replacer()
146
_sub_basename.add(ur'\[\^?\]?(?:[^][]|\[:[^]]+:\])+\]', _sub_group) # char group
147
_sub_basename.add(ur'\\.', ur'\&') # keep anything backslashed
148
_sub_basename.add(ur'[(){}|^$+.]', ur'\\&') # escape specials
149
_sub_basename.add(ur'\*+', ur'.*') # * everywhere
150
_sub_basename.add(ur'\?', ur'.') # ? everywhere
151
152
153
def _sub_extension(pattern):
154
    return _sub_basename(pattern[2:])
2135.2.1 by Kent Gibson
Added glob module to replace broken fnmatch based ignore pattern matching (#57637)
155
156
157
class Globster(object):
158
    """A simple wrapper for a set of glob patterns.
159
160
    Provides the capability to search the patterns to find a match for
161
    a given filename (including the full path).
162
163
    Patterns are translated to regular expressions to expidite matching.
164
3943.8.1 by Marius Kruger
remove all trailing whitespace from bzr source
165
    The regular expressions for multiple patterns are aggregated into
166
    a super-regex containing groups of up to 99 patterns.
2135.2.1 by Kent Gibson
Added glob module to replace broken fnmatch based ignore pattern matching (#57637)
167
    The 99 limitation is due to the grouping limit of the Python re module.
168
    The resulting super-regex and associated patterns are stored as a list of
169
    (regex,[patterns]) in _regex_patterns.
3943.8.1 by Marius Kruger
remove all trailing whitespace from bzr source
170
2135.2.1 by Kent Gibson
Added glob module to replace broken fnmatch based ignore pattern matching (#57637)
171
    For performance reasons the patterns are categorised as extension patterns
172
    (those that match against a file extension), basename patterns
173
    (those that match against the basename of the filename),
174
    and fullpath patterns (those that match against the full path).
3943.8.1 by Marius Kruger
remove all trailing whitespace from bzr source
175
    The translations used for extensions and basenames are relatively simpler
2135.2.1 by Kent Gibson
Added glob module to replace broken fnmatch based ignore pattern matching (#57637)
176
    and therefore faster to perform than the fullpath patterns.
177
3943.8.1 by Marius Kruger
remove all trailing whitespace from bzr source
178
    Also, the extension patterns are more likely to find a match and
2135.2.1 by Kent Gibson
Added glob module to replace broken fnmatch based ignore pattern matching (#57637)
179
    so are matched first, then the basename patterns, then the fullpath
180
    patterns.
181
    """
5050.14.2 by Parth Malwankar
_add_patterns is now done in a specific order in Globster
182
    # We want to _add_patterns in a specific order (as per type_list below)
183
    # starting with the shortest and going to the longest.
184
    # As some Python version don't support ordered dicts the list below is
185
    # used to select inputs for _add_pattern in a specific order.
5050.14.3 by Parth Malwankar
use dict for managining pattern information
186
    pattern_types = [ "extension", "basename", "fullpath" ]
187
188
    pattern_info = {
189
        "extension" : {
190
            "translator" : _sub_extension,
191
            "prefix" : r'(?:.*/)?(?!.*/)(?:.*\.)'
192
        },
193
        "basename" : {
194
            "translator" : _sub_basename,
195
            "prefix" : r'(?:.*/)?(?!.*/)'
196
        },
197
        "fullpath" : {
198
            "translator" : _sub_fullpath,
199
            "prefix" : r''
200
        },
5339.3.1 by Parth Malwankar
'bzr ignore' now fails on bad pattern.
201
    }
202
2135.2.1 by Kent Gibson
Added glob module to replace broken fnmatch based ignore pattern matching (#57637)
203
    def __init__(self, patterns):
204
        self._regex_patterns = []
5339.3.1 by Parth Malwankar
'bzr ignore' now fails on bad pattern.
205
        pattern_lists = {
5050.14.3 by Parth Malwankar
use dict for managining pattern information
206
            "extension" : [],
207
            "basename" : [],
208
            "fullpath" : [],
5339.3.1 by Parth Malwankar
'bzr ignore' now fails on bad pattern.
209
        }
2135.2.1 by Kent Gibson
Added glob module to replace broken fnmatch based ignore pattern matching (#57637)
210
        for pat in patterns:
2298.8.1 by Kent Gibson
Normalise ignore patterns to use '/' path separator.
211
            pat = normalize_pattern(pat)
5339.3.1 by Parth Malwankar
'bzr ignore' now fails on bad pattern.
212
            pattern_lists[Globster.identify(pat)].append(pat)
5050.14.3 by Parth Malwankar
use dict for managining pattern information
213
        pi = Globster.pattern_info
214
        for t in Globster.pattern_types:
215
            self._add_patterns(pattern_lists[t], pi[t]["translator"],
216
                pi[t]["prefix"])
2135.2.1 by Kent Gibson
Added glob module to replace broken fnmatch based ignore pattern matching (#57637)
217
218
    def _add_patterns(self, patterns, translator, prefix=''):
219
        while patterns:
220
            grouped_rules = ['(%s)' % translator(pat) for pat in patterns[:99]]
221
            joined_rule = '%s(?:%s)$' % (prefix, '|'.join(grouped_rules))
3943.8.1 by Marius Kruger
remove all trailing whitespace from bzr source
222
            self._regex_patterns.append((re.compile(joined_rule, re.UNICODE),
2135.2.1 by Kent Gibson
Added glob module to replace broken fnmatch based ignore pattern matching (#57637)
223
                patterns[:99]))
224
            patterns = patterns[99:]
225
226
    def match(self, filename):
227
        """Searches for a pattern that matches the given filename.
3943.8.1 by Marius Kruger
remove all trailing whitespace from bzr source
228
2135.2.1 by Kent Gibson
Added glob module to replace broken fnmatch based ignore pattern matching (#57637)
229
        :return A matching pattern or None if there is no matching pattern.
230
        """
5326.2.1 by Parth Malwankar
added InvalidPattern error.
231
        try:
232
            for regex, patterns in self._regex_patterns:
233
                match = regex.match(filename)
234
                if match:
235
                    return patterns[match.lastindex -1]
236
        except errors.InvalidPattern, e:
5339.1.1 by Parth Malwankar
fixes errors.InvalidPattern to work on Python2.5
237
            # We can't show the default e.msg to the user as thats for
5326.2.1 by Parth Malwankar
added InvalidPattern error.
238
            # the combined pattern we sent to regex. Instead we indicate to
239
            # the user that an ignore file needs fixing.
5339.1.1 by Parth Malwankar
fixes errors.InvalidPattern to work on Python2.5
240
            mutter('Invalid pattern found in regex: %s.', e.msg)
5050.14.1 by Parth Malwankar
'bzr ignore' now fails on bad patterns. failing patterns are displayed.
241
            e.msg = "File ~/.bazaar/ignore or .bzrignore contains error(s)."
5339.3.2 by Parth Malwankar
Globster now prints specific patterns that are bad.
242
            bad_patterns = ''
243
            for _, patterns in self._regex_patterns:
244
                for p in patterns:
245
                    if not Globster.is_pattern_valid(p):
246
                        bad_patterns += ('\n  %s' % p)
5050.14.1 by Parth Malwankar
'bzr ignore' now fails on bad patterns. failing patterns are displayed.
247
            e.msg += bad_patterns
5326.2.1 by Parth Malwankar
added InvalidPattern error.
248
            raise e
2135.2.1 by Kent Gibson
Added glob module to replace broken fnmatch based ignore pattern matching (#57637)
249
        return None
3398.1.1 by Ian Clatworthy
simplify the custom Globster to only care about ordering
250
5339.3.1 by Parth Malwankar
'bzr ignore' now fails on bad pattern.
251
    @staticmethod
252
    def identify(pattern):
253
        """Returns pattern category.
254
255
        :param pattern: normalized pattern.
256
        Identify if a pattern is fullpath, basename or extension
257
        and returns the appropriate type.
258
        """
259
        if pattern.startswith(u'RE:') or u'/' in pattern:
5050.14.3 by Parth Malwankar
use dict for managining pattern information
260
            return "fullpath"
5339.3.1 by Parth Malwankar
'bzr ignore' now fails on bad pattern.
261
        elif pattern.startswith(u'*.'):
5050.14.3 by Parth Malwankar
use dict for managining pattern information
262
            return "extension"
5339.3.1 by Parth Malwankar
'bzr ignore' now fails on bad pattern.
263
        else:
5050.14.3 by Parth Malwankar
use dict for managining pattern information
264
            return "basename"
5339.3.1 by Parth Malwankar
'bzr ignore' now fails on bad pattern.
265
266
    @staticmethod
267
    def is_pattern_valid(pattern):
268
        """Returns True if pattern is valid.
269
270
        :param pattern: Normalized pattern.
271
        is_pattern_valid() assumes pattern to be normalized.
272
        see: globbing.normalize_pattern
273
        """
274
        result = True
5050.14.3 by Parth Malwankar
use dict for managining pattern information
275
        translator = Globster.pattern_info[Globster.identify(pattern)]["translator"]
5339.3.1 by Parth Malwankar
'bzr ignore' now fails on bad pattern.
276
        tpattern = '(%s)' % translator(pattern)
277
        try:
278
            re_obj = re.compile(tpattern, re.UNICODE)
279
            re_obj.search("") # force compile
280
        except errors.InvalidPattern, e:
281
            result = False
282
        return result
283
284
4948.5.7 by John Whitley
Terminology change: exclusion => exception.
285
class ExceptionGlobster(object):
286
    """A Globster that supports exception patterns.
4948.5.5 by John Whitley
Add descriptive text to ExcludingGlobster.
287
    
4948.5.7 by John Whitley
Terminology change: exclusion => exception.
288
    Exceptions are ignore patterns prefixed with '!'.  Exception
289
    patterns take precedence over regular patterns and cause a 
290
    matching filename to return None from the match() function.  
291
    Patterns using a '!!' prefix are highest precedence, and act 
292
    as regular ignores. '!!' patterns are useful to establish ignores
293
    that apply under paths specified by '!' exception patterns.
4948.5.5 by John Whitley
Add descriptive text to ExcludingGlobster.
294
    """
4948.5.3 by John Whitley
Refactor the exclusion handling functionality out of
295
    
296
    def __init__(self,patterns):
4948.5.7 by John Whitley
Terminology change: exclusion => exception.
297
        ignores = [[], [], []]
4948.5.3 by John Whitley
Refactor the exclusion handling functionality out of
298
        for p in patterns:
4948.5.6 by John Whitley
A trial implementation of '!!' syntax for double-negative ignore exclusions.
299
            if p.startswith(u'!!'):
4948.5.7 by John Whitley
Terminology change: exclusion => exception.
300
                ignores[2].append(p[2:])
4948.5.6 by John Whitley
A trial implementation of '!!' syntax for double-negative ignore exclusions.
301
            elif p.startswith(u'!'):
4948.5.7 by John Whitley
Terminology change: exclusion => exception.
302
                ignores[1].append(p[1:])
4948.5.3 by John Whitley
Refactor the exclusion handling functionality out of
303
            else:
4948.5.7 by John Whitley
Terminology change: exclusion => exception.
304
                ignores[0].append(p)
305
        self._ignores = [Globster(i) for i in ignores]
4948.5.3 by John Whitley
Refactor the exclusion handling functionality out of
306
        
307
    def match(self, filename):
308
        """Searches for a pattern that matches the given filename.
309
310
        :return A matching pattern or None if there is no matching pattern.
311
        """
4948.5.7 by John Whitley
Terminology change: exclusion => exception.
312
        double_neg = self._ignores[2].match(filename)
4948.5.6 by John Whitley
A trial implementation of '!!' syntax for double-negative ignore exclusions.
313
        if double_neg:
314
            return "!!%s" % double_neg
4948.5.7 by John Whitley
Terminology change: exclusion => exception.
315
        elif self._ignores[1].match(filename):
4948.5.3 by John Whitley
Refactor the exclusion handling functionality out of
316
            return None
317
        else:
4948.5.7 by John Whitley
Terminology change: exclusion => exception.
318
            return self._ignores[0].match(filename)
3398.1.1 by Ian Clatworthy
simplify the custom Globster to only care about ordering
319
320
class _OrderedGlobster(Globster):
321
    """A Globster that keeps pattern order."""
322
323
    def __init__(self, patterns):
324
        """Constructor.
325
326
        :param patterns: sequence of glob patterns
327
        """
328
        # Note: This could be smarter by running like sequences together
329
        self._regex_patterns = []
330
        for pat in patterns:
331
            pat = normalize_pattern(pat)
5050.14.3 by Parth Malwankar
use dict for managining pattern information
332
            t = Globster.identify(pat)
333
            self._add_patterns([pat], Globster.pattern_info[t]["translator"],
334
                Globster.pattern_info[t]["prefix"])
3398.1.1 by Ian Clatworthy
simplify the custom Globster to only care about ordering
335
2298.8.1 by Kent Gibson
Normalise ignore patterns to use '/' path separator.
336
4792.4.1 by Gordon Tyler
Fixed globbing.normalize_pattern to not strip '/' down to '' and normalize multiple slashes.
337
_slashes = re.compile(r'[\\/]+')
2298.8.1 by Kent Gibson
Normalise ignore patterns to use '/' path separator.
338
def normalize_pattern(pattern):
339
    """Converts backslashes in path patterns to forward slashes.
3943.8.1 by Marius Kruger
remove all trailing whitespace from bzr source
340
2298.8.2 by Kent Gibson
Review fixes for lp86451 patch.
341
    Doesn't normalize regular expressions - they may contain escapes.
2298.8.1 by Kent Gibson
Normalise ignore patterns to use '/' path separator.
342
    """
4948.5.4 by John Whitley
bzrlib.globbing.normalize_pattern needed fix to avoid mangling ignore
343
    if not (pattern.startswith('RE:') or pattern.startswith('!RE:')):
4792.4.1 by Gordon Tyler
Fixed globbing.normalize_pattern to not strip '/' down to '' and normalize multiple slashes.
344
        pattern = _slashes.sub('/', pattern)
345
    if len(pattern) > 1:
346
        pattern = pattern.rstrip('/')
347
    return pattern