~bzr-pqm/bzr/bzr.dev

« back to all changes in this revision

Viewing changes to bzrlib/glob_matcher.py

Committer: John Arbash Meinel
Date: 2006-03-08 14:38:24 UTC
mto: (1685.1.1 bzr-encoding)
mto: This revision was merged to the branch mainline in revision 1752.
Revision ID: john@arbash-meinel.com-20060308143824-d84504389354bfc1

Removing glob_matcher for the future ignore pattern upgrade.

files removed:
bzrlib/glob_matcher.py

bzrlib/tests/test_glob_matcher.py

files modified:
bzrlib/tests/__init__.py

Show diffs side-by-side

added added

removed removed

bzrlib/glob_matcher.py

# This program is free software; you can redistribute it and/or modify

# it under the terms of the GNU General Public License as published by

# the Free Software Foundation; either version 2 of the License, or

# (at your option) any later version.

# This program is distributed in the hope that it will be useful,

# but WITHOUT ANY WARRANTY; without even the implied warranty of

# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the

# GNU General Public License for more details.

# You should have received a copy of the GNU General Public License

# along with this program; if not, write to the Free Software

# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA

import re

def glob_to_re(pat):

"""Convert a glob pattern into a regular expression.

We handle the following patterns:

** Match a string of characters (including dir separators)

* Match a string of characters (not directory separator)

? Match a single character (not directory separator)

[seq] Matches a single character, but any of 'seq'

[!seq] Match any single character not in 'seq'

This was adapted from fnmatch.translate()

:param pat: The pattern to transform

:return: A regular expression

"""

i, n = 0, len(pat)

res = ''

while i < n:

c = pat[i]

i += 1

if c == '*':

if pat[i:i+1] == '*': # pattern '**'

res = res + '.*'

i += 1

else: # pattern '*'

res = res + r'[^/\\]*'

elif c == '?':

res = res + r'[^/\\]'

elif c == '[':

j = i

if j < n and pat[j] == '!':

j = j+1

if j < n and pat[j] == ']':

j = j+1

while j < n and pat[j] != ']':

j = j+1

if j >= n:

res = res + '\\['

else:

stuff = pat[i:j].replace('\\','\\\\')

i = j+1

if stuff[0] == '!':

stuff = '^' + stuff[1:] + r'/\\'

elif stuff[0] == '^':

stuff = '\\' + stuff

res = '%s[%s]' % (res, stuff)

else:

res = res + re.escape(c)

# Without a final $, re.match() will match if just the beginning

# matches. I did not expect that. I thought re.match() had to match

# the entire string.

return res + "$"

class _GlobMatcher(object):

"""A class which handles matching filenames to glob expressions"""

def __init__(self, glob_re):

"""Create a matcher from a regular expression."""

self._compiled_re = re.compile(glob_re, re.UNICODE)

def __call__(self, fname):

"""See if fname matches the internal glob.

:param fname: A filename to check.

:return: Boolean, does/doesn't match

"""

return self._compiled_re.match(fname) is not None

def glob_to_matcher(glob):

"""Return a callable which will match filenames versus the glob."""

return _GlobMatcher(glob_to_re(glob))

def globs_to_re(patterns):

"""Convert a set of patterns into a single regular expression.

:param patterns: A list of patterns to transform

100

:return: A regular expression combining all patterns

101

"""

102

final_re = []

103

for pat in patterns:

104

pat_re = glob_to_re(pat)

105

assert pat_re[-1] == '$'

106

# TODO: jam 20060107 It seems to be enough to do:

107

# (pat1|pat2|pat3|pat4)$

108

# Is there a circumstance where we need to do

109

# ((pat1)|(pat2)|(pat3))$

110

111

# TODO: jam 20060107 Is it more efficient to do:

112

# (pat1|pat2|pat3)$

113

# Or to do:

114

# (pat1$)|(pat2$)|(pat3$)

115

# I thought it would be more efficent to only have to

116

# match the end of the pattern once

117

118

#final_re.append('(' + pat_re[:-1] + ')')

119

final_re.append(pat_re[:-1])

120

# All patterns end in $, we don't need to specify it

121

# for every pattern.

122

# Just put one at the end

123

return '(' + '|'.join(final_re) + ')$'

124

125

126

def globs_to_matcher(patterns):

127

"""Return a callable which will match filenames versus the globs."""

128

return _GlobMatcher(globs_to_re(patterns))

129

130

Older »