~bzr-pqm/bzr/bzr.dev

2135.2.1 by Kent Gibson
Added glob module to replace broken fnmatch based ignore pattern matching (#57637)
1
# Copyright (C) 2006 Canonical Ltd
2
# -*- coding: utf-8 -*-
3
#
4
# This program is free software; you can redistribute it and/or modify
5
# it under the terms of the GNU General Public License as published by
6
# the Free Software Foundation; either version 2 of the License, or
7
# (at your option) any later version.
8
#
9
# This program is distributed in the hope that it will be useful,
10
# but WITHOUT ANY WARRANTY; without even the implied warranty of
11
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12
# GNU General Public License for more details.
13
#
14
# You should have received a copy of the GNU General Public License
15
# along with this program; if not, write to the Free Software
16
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
17
2135.2.7 by Kent Gibson
Implement JAM's review suggestions.
18
from bzrlib.globbing import (
19
    Globster,
3398.1.2 by Ian Clatworthy
add tests for _OrderedGlobster
20
    _OrderedGlobster,
2135.2.7 by Kent Gibson
Implement JAM's review suggestions.
21
    )
22
from bzrlib.tests import (
23
    TestCase, 
24
    TestCaseInTempDir,
25
    )
2135.2.1 by Kent Gibson
Added glob module to replace broken fnmatch based ignore pattern matching (#57637)
26
27
28
class TestGlobster(TestCase):
29
30
    def assertMatch(self, matchset, glob_prefix=None):
31
        for glob, positive, negative in matchset:
32
            if glob_prefix:
33
                glob = glob_prefix + glob
34
            globster = Globster([glob])
35
            for name in positive:
36
                self.failUnless(globster.match(name), repr(
37
                    u'name "%s" does not match glob "%s" (re=%s)' %
38
                    (name, glob, globster._regex_patterns[0][0].pattern)))
39
            for name in negative:
40
                self.failIf(globster.match(name), repr(
41
                    u'name "%s" does match glob "%s" (re=%s)' %
42
                    (name, glob, globster._regex_patterns[0][0].pattern)))
43
2135.2.8 by Kent Gibson
Add helper method to simplify test_char_group cases.
44
    def assertMatchBasenameAndFullpath(self, matchset):
45
        # test basename matcher
46
        self.assertMatch(matchset)
47
        # test fullpath matcher
48
        self.assertMatch(matchset, glob_prefix='./')
49
2135.2.7 by Kent Gibson
Implement JAM's review suggestions.
50
    def test_char_group_digit(self):
2135.2.8 by Kent Gibson
Add helper method to simplify test_char_group cases.
51
        self.assertMatchBasenameAndFullpath([
2135.2.7 by Kent Gibson
Implement JAM's review suggestions.
52
            # The definition of digit this uses includes arabic digits from
53
            # non-latin scripts (arabic, indic, etc.) and subscript/superscript
54
            # digits, but neither roman numerals nor vulgar fractions.
2135.2.1 by Kent Gibson
Added glob module to replace broken fnmatch based ignore pattern matching (#57637)
55
            (u'[[:digit:]]',
56
             [u'0', u'5', u'\u0663', u'\u06f9', u'\u0f21', u'\xb9'],
57
             [u'T', u'q', u' ', u'\u8336', u'.']),
2135.2.7 by Kent Gibson
Implement JAM's review suggestions.
58
            (u'[^[:digit:]]',
59
             [u'T', u'q', u' ', u'\u8336', u'.'],
60
             [u'0', u'5', u'\u0663', u'\u06f9', u'\u0f21', u'\xb9']),
2135.2.8 by Kent Gibson
Add helper method to simplify test_char_group cases.
61
            ])
2135.2.7 by Kent Gibson
Implement JAM's review suggestions.
62
63
    def test_char_group_space(self):
2135.2.8 by Kent Gibson
Add helper method to simplify test_char_group cases.
64
        self.assertMatchBasenameAndFullpath([
2135.2.1 by Kent Gibson
Added glob module to replace broken fnmatch based ignore pattern matching (#57637)
65
            (u'[[:space:]]',
66
             [u' ', u'\t', u'\n', u'\xa0', u'\u2000', u'\u2002'],
67
             [u'a', u'-', u'\u8336', u'.']),
68
            (u'[^[:space:]]',
2135.2.2 by Kent Gibson
Ignore pattern matcher (glob.py) patches:
69
             [u'a', u'-', u'\u8336', u'.'],
70
             [u' ', u'\t', u'\n', u'\xa0', u'\u2000', u'\u2002']),
2135.2.8 by Kent Gibson
Add helper method to simplify test_char_group cases.
71
            ])
2135.2.7 by Kent Gibson
Implement JAM's review suggestions.
72
73
    def test_char_group_alnum(self):
2135.2.8 by Kent Gibson
Add helper method to simplify test_char_group cases.
74
        self.assertMatchBasenameAndFullpath([
2135.2.1 by Kent Gibson
Added glob module to replace broken fnmatch based ignore pattern matching (#57637)
75
            (u'[[:alnum:]]',
76
             [u'a', u'Z', u'\u017e', u'\u8336'],
77
             [u':', u'-', u'\u25cf', u'.']),
78
            (u'[^[:alnum:]]',
2135.2.2 by Kent Gibson
Ignore pattern matcher (glob.py) patches:
79
             [u':', u'-', u'\u25cf', u'.'],
80
             [u'a']),
2135.2.8 by Kent Gibson
Add helper method to simplify test_char_group cases.
81
            ])
2135.2.7 by Kent Gibson
Implement JAM's review suggestions.
82
83
    def test_char_group_ascii(self):
2135.2.8 by Kent Gibson
Add helper method to simplify test_char_group cases.
84
        self.assertMatchBasenameAndFullpath([
2135.2.1 by Kent Gibson
Added glob module to replace broken fnmatch based ignore pattern matching (#57637)
85
            (u'[[:ascii:]]',
2135.2.2 by Kent Gibson
Ignore pattern matcher (glob.py) patches:
86
             [u'a', u'Q', u'^', u'.'],
87
             [u'\xcc', u'\u8336']),
2135.2.1 by Kent Gibson
Added glob module to replace broken fnmatch based ignore pattern matching (#57637)
88
            (u'[^[:ascii:]]',
89
             [u'\xcc', u'\u8336'],
90
             [u'a', u'Q', u'^', u'.']),
2135.2.8 by Kent Gibson
Add helper method to simplify test_char_group cases.
91
            ])
2135.2.7 by Kent Gibson
Implement JAM's review suggestions.
92
93
    def test_char_group_blank(self):
2135.2.8 by Kent Gibson
Add helper method to simplify test_char_group cases.
94
        self.assertMatchBasenameAndFullpath([
2135.2.1 by Kent Gibson
Added glob module to replace broken fnmatch based ignore pattern matching (#57637)
95
            (u'[[:blank:]]',
96
             [u'\t'],
97
             [u'x', u'y', u'z', u'.']),
98
            (u'[^[:blank:]]',
2135.2.2 by Kent Gibson
Ignore pattern matcher (glob.py) patches:
99
             [u'x', u'y', u'z', u'.'],
100
             [u'\t']),
2135.2.8 by Kent Gibson
Add helper method to simplify test_char_group cases.
101
            ])
2135.2.7 by Kent Gibson
Implement JAM's review suggestions.
102
103
    def test_char_group_cntrl(self):
2135.2.8 by Kent Gibson
Add helper method to simplify test_char_group cases.
104
        self.assertMatchBasenameAndFullpath([
2135.2.1 by Kent Gibson
Added glob module to replace broken fnmatch based ignore pattern matching (#57637)
105
            (u'[[:cntrl:]]',
106
             [u'\b', u'\t', '\x7f'],
107
             [u'a', u'Q', u'\u8336', u'.']),
2135.2.7 by Kent Gibson
Implement JAM's review suggestions.
108
            (u'[^[:cntrl:]]',
109
             [u'a', u'Q', u'\u8336', u'.'],
110
             [u'\b', u'\t', '\x7f']),
2135.2.8 by Kent Gibson
Add helper method to simplify test_char_group cases.
111
            ])
2135.2.7 by Kent Gibson
Implement JAM's review suggestions.
112
2135.2.8 by Kent Gibson
Add helper method to simplify test_char_group cases.
113
    def test_char_group_range(self):
114
        self.assertMatchBasenameAndFullpath([
2135.2.1 by Kent Gibson
Added glob module to replace broken fnmatch based ignore pattern matching (#57637)
115
            (u'[a-z]',
116
             [u'a', u'q', u'f'],
117
             [u'A', u'Q', u'F']),
2298.8.2 by Kent Gibson
Review fixes for lp86451 patch.
118
            (u'[^a-z]',
2135.2.1 by Kent Gibson
Added glob module to replace broken fnmatch based ignore pattern matching (#57637)
119
             [u'A', u'Q', u'F'],
120
             [u'a', u'q', u'f']),
121
            (u'[!a-z]foo',
2135.2.2 by Kent Gibson
Ignore pattern matcher (glob.py) patches:
122
             [u'Afoo', u'.foo'],
123
             [u'afoo', u'ABfoo']),
2298.8.2 by Kent Gibson
Review fixes for lp86451 patch.
124
            (u'foo[!a-z]bar',
2135.2.1 by Kent Gibson
Added glob module to replace broken fnmatch based ignore pattern matching (#57637)
125
             [u'fooAbar', u'foo.bar'],
126
             [u'foojbar']),
2298.8.2 by Kent Gibson
Review fixes for lp86451 patch.
127
            (u'[\x20-\x30\u8336]',
2135.2.1 by Kent Gibson
Added glob module to replace broken fnmatch based ignore pattern matching (#57637)
128
             [u'\040', u'\044', u'\u8336'],
2135.2.7 by Kent Gibson
Implement JAM's review suggestions.
129
             [u'\x1f']),
2298.8.2 by Kent Gibson
Review fixes for lp86451 patch.
130
            (u'[^\x20-\x30\u8336]',
2135.2.7 by Kent Gibson
Implement JAM's review suggestions.
131
             [u'\x1f'],
2135.2.1 by Kent Gibson
Added glob module to replace broken fnmatch based ignore pattern matching (#57637)
132
             [u'\040', u'\044', u'\u8336']),
2135.2.8 by Kent Gibson
Add helper method to simplify test_char_group cases.
133
            ])
2135.2.1 by Kent Gibson
Added glob module to replace broken fnmatch based ignore pattern matching (#57637)
134
135
    def test_regex(self):
136
        self.assertMatch([
2298.8.2 by Kent Gibson
Review fixes for lp86451 patch.
137
            (u'RE:(a|b|c+)',
138
             [u'a', u'b', u'ccc'],
139
             [u'd', u'aa', u'c+', u'-a']),
140
            (u'RE:(?:a|b|c+)',
141
             [u'a', u'b', u'ccc'],
142
             [u'd', u'aa', u'c+', u'-a']),
143
            (u'RE:(?P<a>.)(?P=a)',
2135.2.1 by Kent Gibson
Added glob module to replace broken fnmatch based ignore pattern matching (#57637)
144
             [u'a'],
145
             [u'ab', u'aa', u'aaa']),
2298.8.1 by Kent Gibson
Normalise ignore patterns to use '/' path separator.
146
            # test we can handle odd numbers of trailing backslashes
147
            (u'RE:a\\\\\\',
148
             [u'a\\'],
149
             [u'a', u'ab', u'aa', u'aaa']),
2135.2.1 by Kent Gibson
Added glob module to replace broken fnmatch based ignore pattern matching (#57637)
150
            ])
151
152
    def test_question_mark(self):
153
        self.assertMatch([
154
            (u'?foo',
2135.2.2 by Kent Gibson
Ignore pattern matcher (glob.py) patches:
155
             [u'xfoo', u'bar/xfoo', u'bar/\u8336foo', u'.foo', u'bar/.foo'],
156
             [u'bar/foo', u'foo']),
2135.2.1 by Kent Gibson
Added glob module to replace broken fnmatch based ignore pattern matching (#57637)
157
            (u'foo?bar',
158
             [u'fooxbar', u'foo.bar', u'foo\u8336bar', u'qyzzy/foo.bar'],
159
             [u'foo/bar']),
160
            (u'foo/?bar',
2135.2.2 by Kent Gibson
Ignore pattern matcher (glob.py) patches:
161
             [u'foo/xbar', u'foo/\u8336bar', u'foo/.bar'],
162
             [u'foo/bar', u'bar/foo/xbar']),
2135.2.1 by Kent Gibson
Added glob module to replace broken fnmatch based ignore pattern matching (#57637)
163
            ])
164
165
    def test_asterisk(self):
166
        self.assertMatch([
167
            (u'x*x',
168
             [u'xx', u'x.x', u'x\u8336..x', u'\u8336/x.x', u'x.y.x'],
169
             [u'x/x', u'bar/x/bar/x', u'bax/abaxab']),
170
            (u'foo/*x',
2135.2.2 by Kent Gibson
Ignore pattern matcher (glob.py) patches:
171
             [u'foo/x', u'foo/bax', u'foo/a.x', u'foo/.x', u'foo/.q.x'],
172
             [u'foo/bar/bax']),
2135.2.1 by Kent Gibson
Added glob module to replace broken fnmatch based ignore pattern matching (#57637)
173
            (u'*/*x',
2135.2.2 by Kent Gibson
Ignore pattern matcher (glob.py) patches:
174
             [u'\u8336/x', u'foo/x', u'foo/bax', u'x/a.x', u'.foo/x', 
175
              u'\u8336/.x', u'foo/.q.x'],
176
             [u'foo/bar/bax']),
2135.2.1 by Kent Gibson
Added glob module to replace broken fnmatch based ignore pattern matching (#57637)
177
            (u'f*',
178
             [u'foo', u'foo.bar'],
179
             [u'.foo', u'foo/bar', u'foo/.bar']),
180
            (u'*bar',
181
             [u'bar', u'foobar', ur'foo\nbar', u'foo.bar', u'foo/bar', 
2135.2.2 by Kent Gibson
Ignore pattern matcher (glob.py) patches:
182
              u'foo/foobar', u'foo/f.bar', u'.bar', u'foo/.bar'],
183
             []),
184
            ])
185
186
    def test_double_asterisk(self):
187
        self.assertMatch([
188
            # expected uses of double asterisk
189
            (u'foo/**/x',
190
             [u'foo/x', u'foo/bar/x'],
191
             [u'foox', u'foo/bax', u'foo/.x', u'foo/bar/bax']),
192
            (u'**/bar',
193
             [u'bar', u'foo/bar'],
194
             [u'foobar', u'foo.bar', u'foo/foobar', u'foo/f.bar', 
195
              u'.bar', u'foo/.bar']),
196
            # check that we ignore extra *s, so *** is treated like ** not *.
197
            (u'foo/***/x',
198
             [u'foo/x', u'foo/bar/x'],
199
             [u'foox', u'foo/bax', u'foo/.x', u'foo/bar/bax']),
200
            (u'***/bar',
201
             [u'bar', u'foo/bar'],
202
             [u'foobar', u'foo.bar', u'foo/foobar', u'foo/f.bar', 
203
              u'.bar', u'foo/.bar']),
204
            # the remaining tests check that ** is interpreted as *
205
            # unless it is a whole path component
206
            (u'x**/x',
207
             [u'x\u8336/x', u'x/x'],
208
             [u'xx', u'x.x', u'bar/x/bar/x', u'x.y.x', u'x/y/x']),
209
            (u'x**x',
210
             [u'xx', u'x.x', u'x\u8336..x', u'foo/x.x', u'x.y.x'],
211
             [u'bar/x/bar/x', u'xfoo/bar/x', u'x/x', u'bax/abaxab']),
212
            (u'foo/**x',
213
             [u'foo/x', u'foo/bax', u'foo/a.x', u'foo/.x', u'foo/.q.x'],
214
             [u'foo/bar/bax']),
215
            (u'f**',
216
             [u'foo', u'foo.bar'],
217
             [u'.foo', u'foo/bar', u'foo/.bar']),
218
            (u'**bar',
219
             [u'bar', u'foobar', ur'foo\nbar', u'foo.bar', u'foo/bar', 
220
              u'foo/foobar', u'foo/f.bar', u'.bar', u'foo/.bar'],
221
             []),
222
            ])
223
224
    def test_leading_dot_slash(self):
2135.2.1 by Kent Gibson
Added glob module to replace broken fnmatch based ignore pattern matching (#57637)
225
        self.assertMatch([
226
            (u'./foo',
227
             [u'foo'],
228
             [u'\u8336/foo', u'barfoo', u'x/y/foo']),
229
            (u'./f*',
230
             [u'foo'],
231
             [u'foo/bar', u'foo/.bar', u'x/foo/y']),
232
            ])
233
2298.8.1 by Kent Gibson
Normalise ignore patterns to use '/' path separator.
234
    def test_backslash(self):
235
        self.assertMatch([
236
            (u'.\\foo',
237
             [u'foo'],
238
             [u'\u8336/foo', u'barfoo', u'x/y/foo']),
239
            (u'.\\f*',
240
             [u'foo'],
241
             [u'foo/bar', u'foo/.bar', u'x/foo/y']),
242
            (u'foo\\**\\x',
243
             [u'foo/x', u'foo/bar/x'],
244
             [u'foox', u'foo/bax', u'foo/.x', u'foo/bar/bax']),
245
            ])
246
247
    def test_trailing_slash(self):
248
        self.assertMatch([
249
            (u'./foo/',
250
             [u'foo'],
251
             [u'\u8336/foo', u'barfoo', u'x/y/foo']),
252
            (u'.\\foo\\',
253
             [u'foo'],
254
             [u'foo/', u'\u8336/foo', u'barfoo', u'x/y/foo']),
255
            ])
256
2135.2.2 by Kent Gibson
Ignore pattern matcher (glob.py) patches:
257
    def test_leading_asterisk_dot(self):
2135.2.1 by Kent Gibson
Added glob module to replace broken fnmatch based ignore pattern matching (#57637)
258
        self.assertMatch([
259
            (u'*.x',
2135.2.2 by Kent Gibson
Ignore pattern matcher (glob.py) patches:
260
             [u'foo/bar/baz.x', u'\u8336/Q.x', u'foo.y.x', u'.foo.x', 
261
              u'bar/.foo.x', u'.x',],
262
             [u'foo.x.y']),
2135.2.1 by Kent Gibson
Added glob module to replace broken fnmatch based ignore pattern matching (#57637)
263
            (u'foo/*.bar',
2135.2.2 by Kent Gibson
Ignore pattern matcher (glob.py) patches:
264
             [u'foo/b.bar', u'foo/a.b.bar', u'foo/.bar'],
265
             [u'foo/bar']),
2135.2.1 by Kent Gibson
Added glob module to replace broken fnmatch based ignore pattern matching (#57637)
266
            (u'*.~*',
2135.2.2 by Kent Gibson
Ignore pattern matcher (glob.py) patches:
267
             [u'foo.py.~1~', u'.foo.py.~1~'],
268
             []),
2135.2.1 by Kent Gibson
Added glob module to replace broken fnmatch based ignore pattern matching (#57637)
269
            ])
270
271
    def test_end_anchor(self):
272
        self.assertMatch([
273
            (u'*.333',
274
             [u'foo.333'],
275
             [u'foo.3']),
276
            (u'*.3',
277
             [u'foo.3'],
278
             [u'foo.333']),
279
            ])
280
281
    def test_mixed_globs(self):
282
        """tests handling of combinations of path type matches.
283
284
        The types being extension, basename and full path.
285
        """
286
        patterns = [ u'*.foo', u'.*.swp', u'./*.png']
287
        globster = Globster(patterns)
288
        self.assertEqual(u'*.foo', globster.match('bar.foo'))
289
        self.assertEqual(u'./*.png', globster.match('foo.png'))
290
        self.assertEqual(None, globster.match('foo/bar.png'))
291
        self.assertEqual(u'.*.swp', globster.match('foo/.bar.py.swp'))
292
293
    def test_large_globset(self):
294
        """tests that the globster can handle a large set of patterns.
295
296
        Large is defined as more than supported by python regex groups, 
297
        i.e. 99.
298
        This test assumes the globs are broken into regexs containing 99
299
        groups.
300
        """
301
        patterns = [ u'*.%03d' % i for i in xrange(0,300) ]
302
        globster = Globster(patterns)
303
        # test the fence posts
304
        for x in (0,98,99,197,198,296,297,299):
305
            filename = u'foo.%03d' % x
306
            self.assertEqual(patterns[x],globster.match(filename))
307
        self.assertEqual(None,globster.match('foobar.300'))
308
3398.1.2 by Ian Clatworthy
add tests for _OrderedGlobster
309
310
class TestOrderedGlobster(TestCase):
311
312
    def test_ordered_globs(self):
313
        """test that the first match in a list is the one found"""
314
        patterns = [ u'*.foo', u'bar.*']
315
        globster = _OrderedGlobster(patterns)
316
        self.assertEqual(u'*.foo', globster.match('bar.foo'))
317
        self.assertEqual(None, globster.match('foo.bar'))
318
        globster = _OrderedGlobster(reversed(patterns))
319
        self.assertEqual(u'bar.*', globster.match('bar.foo'))
320
        self.assertEqual(None, globster.match('foo.bar'))