~bzr-pqm/bzr/bzr.dev

« back to all changes in this revision

Viewing changes to bzrlib/workingtree.py

Committer: John Arbash Meinel
Date: 2006-12-12 15:09:10 UTC
mfrom: (2135.2.8 lp57637)
mto: This revision was merged to the branch mainline in revision 2179.
Revision ID: john@arbash-meinel.com-20061212150910-yxyy4ioaclh6z7m8

(Kent Gibson, Jan Hudec) Better glob pattern matcher (fixes bug #57637)

files added:
bzrlib/globbing.py

bzrlib/tests/test_globbing.py

files modified:
NEWS

bzrlib/benchmarks/bench_workingtree.py

bzrlib/builtins.py

bzrlib/tests/__init__.py

bzrlib/tests/test_workingtree.py

bzrlib/tests/workingtree_implementations/test_is_ignored.py

bzrlib/workingtree.py

Show diffs side-by-side

added added

removed removed

bzrlib/workingtree.py

from cStringIO import StringIO

import os

import re

from bzrlib.lazy_import import lazy_import

lazy_import(globals(), """

import collections

from copy import deepcopy

import errno

import fnmatch

import stat

from time import time

import warnings

conflicts as _mod_conflicts,

errors,

generate_ids,

globbing,

ignores,

merge,

osutils,

1223

1222

subp = pathjoin(path, subf)

1224

1223

yield subp

1225

1224

1226

def _translate_ignore_rule(self, rule):

1227

"""Translate a single ignore rule to a regex.

1228

1229

There are two types of ignore rules. Those that do not contain a / are

1230

matched against the tail of the filename (that is, they do not care

1231

what directory the file is in.) Rules which do contain a slash must

1232

match the entire path. As a special case, './' at the start of the

1233

string counts as a slash in the string but is removed before matching

1234

(e.g. ./foo.c, ./src/foo.c)

1235

1236

:return: The translated regex.

1237

"""

1238

if rule[:2] in ('./', '.\\'):

1239

# rootdir rule

1240

result = fnmatch.translate(rule[2:])

1241

elif '/' in rule or '\\' in rule:

1242

# path prefix

1243

result = fnmatch.translate(rule)

1244

else:

1245

# default rule style.

1246

result = "(?:.*/)?(?!.*/)" + fnmatch.translate(rule)

1247

assert result[-1] == '$', "fnmatch.translate did not add the expected $"

1248

return "(" + result + ")"

1249

1250

def _combine_ignore_rules(self, rules):

1251

"""Combine a list of ignore rules into a single regex object.

1252

1253

Each individual rule is combined with | to form a big regex, which then

1254

has $ added to it to form something like ()|()|()$. The group index for

1255

each subregex's outermost group is placed in a dictionary mapping back

1256

to the rule. This allows quick identification of the matching rule that

1257

triggered a match.

1258

:return: a list of the compiled regex and the matching-group index

1259

dictionaries. We return a list because python complains if you try to

1260

combine more than 100 regexes.

1261

"""

1262

result = []

1263

groups = {}

1264

next_group = 0

1265

translated_rules = []

1266

for rule in rules:

1267

translated_rule = self._translate_ignore_rule(rule)

1268

compiled_rule = re.compile(translated_rule)

1269

groups[next_group] = rule

1270

next_group += compiled_rule.groups

1271

translated_rules.append(translated_rule)

1272

if next_group == 99:

1273

result.append((re.compile("|".join(translated_rules)), groups))

1274

groups = {}

1275

next_group = 0

1276

translated_rules = []

1277

if len(translated_rules):

1278

result.append((re.compile("|".join(translated_rules)), groups))

1279

return result

1280

1225

1281

1226

def ignored_files(self):

1282

1227

"""Yield list of PATH, IGNORE_PATTERN"""

1296

1241

1297

1242

ignore_globs = set(bzrlib.DEFAULT_IGNORE)

1298

1243

ignore_globs.update(ignores.get_runtime_ignores())

1299

1300

1244

ignore_globs.update(ignores.get_user_ignores())

1301

1302

1245

if self.has_filename(bzrlib.IGNORE_FILENAME):

1303

1246

f = self.get_file_byname(bzrlib.IGNORE_FILENAME)

1304

1247

try:

1305

1248

ignore_globs.update(ignores.parse_ignore_file(f))

1306

1249

finally:

1307

1250

f.close()

1308

1309

1251

self._ignoreset = ignore_globs

1310

self._ignore_regex = self._combine_ignore_rules(ignore_globs)

1311

1252

return ignore_globs

1312

1253

1313

def _get_ignore_rules_as_regex(self):

1314

"""Return a regex of the ignore rules and a mapping dict.

1315

1316

:return: (ignore rules compiled regex, dictionary mapping rule group

1317

indices to original rule.)

1318

"""

1319

if getattr(self, '_ignoreset', None) is None:

1320

self.get_ignore_list()

1321

return self._ignore_regex

1254

def _flush_ignore_list_cache(self):

1255

"""Resets the cached ignore list to force a cache rebuild."""

1256

self._ignoreset = None

1257

self._ignoreglobster = None

1322

1258

1323

1259

def is_ignored(self, filename):

1324

1260

r"""Check whether the filename matches an ignore pattern.

1329

1265

If the file is ignored, returns the pattern which caused it to

1330

1266

be ignored, otherwise None. So this can simply be used as a

1331

1267

boolean if desired."""

1332

1333

# TODO: Use '**' to match directories, and other extended

1334

# globbing stuff from cvs/rsync.

1335

1336

# XXX: fnmatch is actually not quite what we want: it's only

1337

# approximately the same as real Unix fnmatch, and doesn't

1338

# treat dotfiles correctly and allows * to match /.

1339

# Eventually it should be replaced with something more

1340

# accurate.

1341

1342

rules = self._get_ignore_rules_as_regex()

1343

for regex, mapping in rules:

1344

match = regex.match(filename)

1345

if match is not None:

1346

# one or more of the groups in mapping will have a non-None

1347

# group match.

1348

groups = match.groups()

1349

rules = [mapping[group] for group in

1350

mapping if groups[group] is not None]

1351

return rules[0]

1352

return None

1268

if getattr(self, '_ignoreglobster', None) is None:

1269

self._ignoreglobster = globbing.Globster(self.get_ignore_list())

1270

return self._ignoreglobster.match(filename)

1353

1271

1354

1272

def kind(self, file_id):

1355

1273

return file_kind(self.id2abspath(file_id))

Older »