1
# Copyright (C) 2010 Canonical Ltd
3
# This program is free software; you can redistribute it and/or modify
4
# it under the terms of the GNU General Public License as published by
5
# the Free Software Foundation; either version 2 of the License, or
6
# (at your option) any later version.
8
# This program is distributed in the hope that it will be useful,
9
# but WITHOUT ANY WARRANTY; without even the implied warranty of
10
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11
# GNU General Public License for more details.
13
# You should have received a copy of the GNU General Public License
14
# along with this program; if not, write to the Free Software
15
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
17
from __future__ import absolute_import
21
from bzrlib.lazy_import import lazy_import
22
lazy_import(globals(), """
23
from fnmatch import fnmatch
25
from cStringIO import StringIO
27
from bzrlib._termcolor import color_string, re_color_string, FG
29
from bzrlib.revisionspec import (
40
revision as _mod_revision,
45
_user_encoding = osutils.get_user_encoding()
48
class _RevisionNotLinear(Exception):
49
"""Raised when a revision is not on left-hand history."""
52
def _rev_on_mainline(rev_tuple):
53
"""returns True is rev tuple is on mainline"""
54
if len(rev_tuple) == 1:
56
return rev_tuple[1] == 0 and rev_tuple[2] == 0
59
# NOTE: _linear_view_revisions is basided on
60
# bzrlib.log._linear_view_revisions.
61
# This should probably be a common public API
62
def _linear_view_revisions(branch, start_rev_id, end_rev_id):
63
# requires that start is older than end
64
repo = branch.repository
65
graph = repo.get_graph()
66
for revision_id in graph.iter_lefthand_ancestry(
67
end_rev_id, (_mod_revision.NULL_REVISION, )):
68
revno = branch.revision_id_to_dotted_revno(revision_id)
69
revno_str = '.'.join(str(n) for n in revno)
70
if revision_id == start_rev_id:
71
yield revision_id, revno_str, 0
73
yield revision_id, revno_str, 0
76
# NOTE: _graph_view_revisions is copied from
77
# bzrlib.log._graph_view_revisions.
78
# This should probably be a common public API
79
def _graph_view_revisions(branch, start_rev_id, end_rev_id,
80
rebase_initial_depths=True):
81
"""Calculate revisions to view including merges, newest to oldest.
83
:param branch: the branch
84
:param start_rev_id: the lower revision-id
85
:param end_rev_id: the upper revision-id
86
:param rebase_initial_depth: should depths be rebased until a mainline
88
:return: An iterator of (revision_id, dotted_revno, merge_depth) tuples.
90
# requires that start is older than end
91
view_revisions = branch.iter_merge_sorted_revisions(
92
start_revision_id=end_rev_id, stop_revision_id=start_rev_id,
93
stop_rule="with-merges")
94
if not rebase_initial_depths:
95
for (rev_id, merge_depth, revno, end_of_merge
97
yield rev_id, '.'.join(map(str, revno)), merge_depth
99
# We're following a development line starting at a merged revision.
100
# We need to adjust depths down by the initial depth until we find
101
# a depth less than it. Then we use that depth as the adjustment.
102
# If and when we reach the mainline, depth adjustment ends.
103
depth_adjustment = None
104
for (rev_id, merge_depth, revno, end_of_merge
106
if depth_adjustment is None:
107
depth_adjustment = merge_depth
109
if merge_depth < depth_adjustment:
110
# From now on we reduce the depth adjustement, this can be
111
# surprising for users. The alternative requires two passes
112
# which breaks the fast display of the first revision
114
depth_adjustment = merge_depth
115
merge_depth -= depth_adjustment
116
yield rev_id, '.'.join(map(str, revno)), merge_depth
119
def compile_pattern(pattern, flags=0):
122
# use python's re.compile as we need to catch re.error in case of bad pattern
123
lazy_regex.reset_compile()
124
patternc = re.compile(pattern, flags)
126
raise errors.BzrError("Invalid pattern: '%s'" % pattern)
130
def is_fixed_string(s):
131
if re.match("^([A-Za-z0-9_]|\s)*$", s):
136
class _GrepDiffOutputter(object):
137
"""Precalculate formatting based on options given for diff grep.
140
def __init__(self, opts):
142
self.outf = opts.outf
144
pat = opts.pattern.encode(_user_encoding, 'replace')
145
if opts.fixed_string:
147
self._new = color_string(pat, FG.BOLD_RED)
148
self.get_writer = self._get_writer_fixed_highlighted
150
flags = opts.patternc.flags
151
self._sub = re.compile(pat.join(("((?:",")+)")), flags).sub
152
self._highlight = color_string("\\1", FG.BOLD_RED)
153
self.get_writer = self._get_writer_regexp_highlighted
155
self.get_writer = self._get_writer_plain
157
def get_file_header_writer(self):
158
"""Get function for writing file headers"""
159
write = self.outf.write
160
eol_marker = self.opts.eol_marker
161
def _line_writer(line):
162
write(line + eol_marker)
163
def _line_writer_color(line):
164
write(FG.BOLD_MAGENTA + line + FG.NONE + eol_marker)
165
if self.opts.show_color:
166
return _line_writer_color
171
def get_revision_header_writer(self):
172
"""Get function for writing revno lines"""
173
write = self.outf.write
174
eol_marker = self.opts.eol_marker
175
def _line_writer(line):
176
write(line + eol_marker)
177
def _line_writer_color(line):
178
write(FG.BOLD_BLUE + line + FG.NONE + eol_marker)
179
if self.opts.show_color:
180
return _line_writer_color
185
def _get_writer_plain(self):
186
"""Get function for writing uncoloured output"""
187
write = self.outf.write
188
eol_marker = self.opts.eol_marker
189
def _line_writer(line):
190
write(line + eol_marker)
193
def _get_writer_regexp_highlighted(self):
194
"""Get function for writing output with regexp match highlighted"""
195
_line_writer = self._get_writer_plain()
196
sub, highlight = self._sub, self._highlight
197
def _line_writer_regexp_highlighted(line):
198
"""Write formatted line with matched pattern highlighted"""
199
return _line_writer(line=sub(highlight, line))
200
return _line_writer_regexp_highlighted
202
def _get_writer_fixed_highlighted(self):
203
"""Get function for writing output with search string highlighted"""
204
_line_writer = self._get_writer_plain()
205
old, new = self._old, self._new
206
def _line_writer_fixed_highlighted(line):
207
"""Write formatted line with string searched for highlighted"""
208
return _line_writer(line=line.replace(old, new))
209
return _line_writer_fixed_highlighted
213
wt, branch, relpath = \
214
bzrdir.BzrDir.open_containing_tree_or_branch('.')
218
start_rev = opts.revision[0]
220
# if no revision is sepcified for diff grep we grep all changesets.
221
opts.revision = [RevisionSpec.from_string('revno:1'),
222
RevisionSpec.from_string('last:1')]
223
start_rev = opts.revision[0]
224
start_revid = start_rev.as_revision_id(branch)
225
if start_revid == 'null:':
227
srevno_tuple = branch.revision_id_to_dotted_revno(start_revid)
228
if len(opts.revision) == 2:
229
end_rev = opts.revision[1]
230
end_revid = end_rev.as_revision_id(branch)
231
if end_revid is None:
232
end_revno, end_revid = branch.last_revision_info()
233
erevno_tuple = branch.revision_id_to_dotted_revno(end_revid)
235
grep_mainline = (_rev_on_mainline(srevno_tuple) and
236
_rev_on_mainline(erevno_tuple))
238
# ensure that we go in reverse order
239
if srevno_tuple > erevno_tuple:
240
srevno_tuple, erevno_tuple = erevno_tuple, srevno_tuple
241
start_revid, end_revid = end_revid, start_revid
243
# Optimization: Traversing the mainline in reverse order is much
244
# faster when we don't want to look at merged revs. We try this
245
# with _linear_view_revisions. If all revs are to be grepped we
246
# use the slower _graph_view_revisions
247
if opts.levels==1 and grep_mainline:
248
given_revs = _linear_view_revisions(branch, start_revid, end_revid)
250
given_revs = _graph_view_revisions(branch, start_revid, end_revid)
252
# We do an optimization below. For grepping a specific revison
253
# We don't need to call _graph_view_revisions which is slow.
254
# We create the start_rev_tuple for only that specific revision.
255
# _graph_view_revisions is used only for revision range.
256
start_revno = '.'.join(map(str, srevno_tuple))
257
start_rev_tuple = (start_revid, start_revno, 0)
258
given_revs = [start_rev_tuple]
259
repo = branch.repository
260
diff_pattern = re.compile("^[+\-].*(" + opts.pattern + ")")
261
file_pattern = re.compile("=== (modified|added|removed) file '.*'", re.UNICODE)
262
outputter = _GrepDiffOutputter(opts)
263
writeline = outputter.get_writer()
264
writerevno = outputter.get_revision_header_writer()
265
writefileheader = outputter.get_file_header_writer()
266
file_encoding = _user_encoding
267
for revid, revno, merge_depth in given_revs:
268
if opts.levels == 1 and merge_depth != 0:
269
# with level=1 show only top level
272
rev_spec = RevisionSpec_revid.from_string("revid:"+revid)
273
new_rev = repo.get_revision(revid)
274
new_tree = rev_spec.as_tree(branch)
275
if len(new_rev.parent_ids) == 0:
276
ancestor_id = _mod_revision.NULL_REVISION
278
ancestor_id = new_rev.parent_ids[0]
279
old_tree = repo.revision_tree(ancestor_id)
281
diff.show_diff_trees(old_tree, new_tree, s,
282
old_label='', new_label='')
287
for line in text.splitlines():
288
if file_pattern.search(line):
291
elif diff_pattern.search(line):
293
writerevno("=== revno:%s ===" % (revno,))
294
display_revno = False
296
writefileheader(" %s" % (file_header,))
298
line = line.decode(file_encoding, 'replace')
299
writeline(" %s" % (line,))
304
def versioned_grep(opts):
305
wt, branch, relpath = \
306
bzrdir.BzrDir.open_containing_tree_or_branch('.')
309
start_rev = opts.revision[0]
310
start_revid = start_rev.as_revision_id(branch)
311
if start_revid is None:
312
start_rev = RevisionSpec_revno.from_string("revno:1")
313
start_revid = start_rev.as_revision_id(branch)
314
srevno_tuple = branch.revision_id_to_dotted_revno(start_revid)
316
if len(opts.revision) == 2:
317
end_rev = opts.revision[1]
318
end_revid = end_rev.as_revision_id(branch)
319
if end_revid is None:
320
end_revno, end_revid = branch.last_revision_info()
321
erevno_tuple = branch.revision_id_to_dotted_revno(end_revid)
323
grep_mainline = (_rev_on_mainline(srevno_tuple) and
324
_rev_on_mainline(erevno_tuple))
326
# ensure that we go in reverse order
327
if srevno_tuple > erevno_tuple:
328
srevno_tuple, erevno_tuple = erevno_tuple, srevno_tuple
329
start_revid, end_revid = end_revid, start_revid
331
# Optimization: Traversing the mainline in reverse order is much
332
# faster when we don't want to look at merged revs. We try this
333
# with _linear_view_revisions. If all revs are to be grepped we
334
# use the slower _graph_view_revisions
335
if opts.levels == 1 and grep_mainline:
336
given_revs = _linear_view_revisions(branch, start_revid, end_revid)
338
given_revs = _graph_view_revisions(branch, start_revid, end_revid)
340
# We do an optimization below. For grepping a specific revison
341
# We don't need to call _graph_view_revisions which is slow.
342
# We create the start_rev_tuple for only that specific revision.
343
# _graph_view_revisions is used only for revision range.
344
start_revno = '.'.join(map(str, srevno_tuple))
345
start_rev_tuple = (start_revid, start_revno, 0)
346
given_revs = [start_rev_tuple]
348
# GZ 2010-06-02: Shouldn't be smuggling this on opts, but easy for now
349
opts.outputter = _Outputter(opts, use_cache=True)
351
for revid, revno, merge_depth in given_revs:
352
if opts.levels == 1 and merge_depth != 0:
353
# with level=1 show only top level
356
rev = RevisionSpec_revid.from_string("revid:"+revid)
357
tree = rev.as_tree(branch)
358
for path in opts.path_list:
359
path_for_id = osutils.pathjoin(relpath, path)
360
id = tree.path2id(path_for_id)
362
trace.warning("Skipped unknown file '%s'." % path)
365
if osutils.isdir(path):
367
dir_grep(tree, path, relpath, opts, revno, path_prefix)
369
versioned_file_grep(tree, id, '.', path, opts, revno)
374
def workingtree_grep(opts):
375
revno = opts.print_revno = None # for working tree set revno to None
377
tree, branch, relpath = \
378
bzrdir.BzrDir.open_containing_tree_or_branch('.')
380
msg = ('Cannot search working tree. Working tree not found.\n'
381
'To search for specific revision in history use the -r option.')
382
raise errors.BzrCommandError(msg)
384
# GZ 2010-06-02: Shouldn't be smuggling this on opts, but easy for now
385
opts.outputter = _Outputter(opts)
389
for path in opts.path_list:
390
if osutils.isdir(path):
392
dir_grep(tree, path, relpath, opts, revno, path_prefix)
394
_file_grep(open(path).read(), path, opts, revno)
399
def _skip_file(include, exclude, path):
400
if include and not _path_in_glob_list(path, include):
402
if exclude and _path_in_glob_list(path, exclude):
407
def dir_grep(tree, path, relpath, opts, revno, path_prefix):
408
# setup relpath to open files relative to cwd
411
rpath = osutils.pathjoin('..',relpath)
413
from_dir = osutils.pathjoin(relpath, path)
415
# start searching recursively from root
420
to_grep_append = to_grep.append
421
# GZ 2010-06-05: The cache dict used to be recycled every call to dir_grep
422
# and hits manually refilled. Could do this again if it was
423
# for a good reason, otherwise cache might want purging.
424
outputter = opts.outputter
425
for fp, fc, fkind, fid, entry in tree.list_files(include_root=False,
426
from_dir=from_dir, recursive=opts.recursive):
428
if _skip_file(opts.include, opts.exclude, fp):
431
if fc == 'V' and fkind == 'file':
433
# If old result is valid, print results immediately.
434
# Otherwise, add file info to to_grep so that the
435
# loop later will get chunks and grep them
436
cache_id = tree.get_file_revision(fid)
437
if cache_id in outputter.cache:
438
# GZ 2010-06-05: Not really sure caching and re-outputting
439
# the old path is really the right thing,
440
# but it's what the old code seemed to do
441
outputter.write_cached_lines(cache_id, revno)
443
to_grep_append((fid, (fp, fid)))
445
# we are grepping working tree.
449
path_for_file = osutils.pathjoin(tree.basedir, from_dir, fp)
450
if opts.files_with_matches or opts.files_without_match:
451
# Optimize for wtree list-only as we don't need to read the
453
file = open(path_for_file, 'r', buffering=4096)
454
_file_grep_list_only_wtree(file, fp, opts, path_prefix)
456
file_text = open(path_for_file, 'r').read()
457
_file_grep(file_text, fp, opts, revno, path_prefix)
459
if revno != None: # grep versioned files
460
for (path, fid), chunks in tree.iter_files_bytes(to_grep):
461
path = _make_display_path(relpath, path)
462
_file_grep(chunks[0], path, opts, revno, path_prefix,
463
tree.get_file_revision(fid, path))
466
def _make_display_path(relpath, path):
467
"""Return path string relative to user cwd.
469
Take tree's 'relpath' and user supplied 'path', and return path
470
that can be displayed to the user.
473
# update path so to display it w.r.t cwd
474
# handle windows slash separator
475
path = osutils.normpath(osutils.pathjoin(relpath, path))
476
path = path.replace('\\', '/')
477
path = path.replace(relpath + '/', '', 1)
481
def versioned_file_grep(tree, id, relpath, path, opts, revno, path_prefix = None):
482
"""Create a file object for the specified id and pass it on to _file_grep.
485
path = _make_display_path(relpath, path)
486
file_text = tree.get_file_text(id)
487
_file_grep(file_text, path, opts, revno, path_prefix)
490
def _path_in_glob_list(path, glob_list):
491
for glob in glob_list:
492
if fnmatch(path, glob):
497
def _file_grep_list_only_wtree(file, path, opts, path_prefix=None):
498
# test and skip binary files
499
if '\x00' in file.read(1024):
501
trace.warning("Binary file '%s' skipped." % path)
504
file.seek(0) # search from beginning
507
if opts.fixed_string:
508
pattern = opts.pattern.encode(_user_encoding, 'replace')
513
else: # not fixed_string
515
if opts.patternc.search(line):
519
if (opts.files_with_matches and found) or \
520
(opts.files_without_match and not found):
521
if path_prefix and path_prefix != '.':
522
# user has passed a dir arg, show that as result prefix
523
path = osutils.pathjoin(path_prefix, path)
524
opts.outputter.get_writer(path, None, None)()
527
class _Outputter(object):
528
"""Precalculate formatting based on options given
530
The idea here is to do this work only once per run, and finally return a
531
function that will do the minimum amount possible for each match.
533
def __init__(self, opts, use_cache=False):
534
self.outf = opts.outf
536
# self.cache is used to cache results for dir grep based on fid.
537
# If the fid is does not change between results, it means that
538
# the result will be the same apart from revno. In such a case
539
# we avoid getting file chunks from repo and grepping. The result
540
# is just printed by replacing old revno with new one.
544
no_line = opts.files_with_matches or opts.files_without_match
547
pat = opts.pattern.encode(_user_encoding, 'replace')
549
self.get_writer = self._get_writer_plain
550
elif opts.fixed_string:
552
self._new = color_string(pat, FG.BOLD_RED)
553
self.get_writer = self._get_writer_fixed_highlighted
555
flags = opts.patternc.flags
556
self._sub = re.compile(pat.join(("((?:",")+)")), flags).sub
557
self._highlight = color_string("\\1", FG.BOLD_RED)
558
self.get_writer = self._get_writer_regexp_highlighted
559
path_start = FG.MAGENTA
561
sep = color_string(':', FG.BOLD_CYAN)
562
rev_sep = color_string('~', FG.BOLD_YELLOW)
564
self.get_writer = self._get_writer_plain
565
path_start = path_end = ""
569
parts = [path_start, "%(path)s"]
571
parts.extend([rev_sep, "%(revno)s"])
572
self._format_initial = "".join(parts)
575
if not opts.print_revno:
576
parts.append(path_end)
579
parts.extend([sep, "%(lineno)s"])
580
parts.extend([sep, "%(line)s"])
581
parts.append(opts.eol_marker)
582
self._format_perline = "".join(parts)
584
def _get_writer_plain(self, path, revno, cache_id):
585
"""Get function for writing uncoloured output"""
586
per_line = self._format_perline
587
start = self._format_initial % {"path":path, "revno":revno}
588
write = self.outf.write
589
if self.cache is not None and cache_id is not None:
591
self.cache[cache_id] = path, result_list
592
add_to_cache = result_list.append
593
def _line_cache_and_writer(**kwargs):
594
"""Write formatted line and cache arguments"""
595
end = per_line % kwargs
598
return _line_cache_and_writer
599
def _line_writer(**kwargs):
600
"""Write formatted line from arguments given by underlying opts"""
601
write(start + per_line % kwargs)
604
def write_cached_lines(self, cache_id, revno):
605
"""Write cached results out again for new revision"""
606
cached_path, cached_matches = self.cache[cache_id]
607
start = self._format_initial % {"path":cached_path, "revno":revno}
608
write = self.outf.write
609
for end in cached_matches:
612
def _get_writer_regexp_highlighted(self, path, revno, cache_id):
613
"""Get function for writing output with regexp match highlighted"""
614
_line_writer = self._get_writer_plain(path, revno, cache_id)
615
sub, highlight = self._sub, self._highlight
616
def _line_writer_regexp_highlighted(line, **kwargs):
617
"""Write formatted line with matched pattern highlighted"""
618
return _line_writer(line=sub(highlight, line), **kwargs)
619
return _line_writer_regexp_highlighted
621
def _get_writer_fixed_highlighted(self, path, revno, cache_id):
622
"""Get function for writing output with search string highlighted"""
623
_line_writer = self._get_writer_plain(path, revno, cache_id)
624
old, new = self._old, self._new
625
def _line_writer_fixed_highlighted(line, **kwargs):
626
"""Write formatted line with string searched for highlighted"""
627
return _line_writer(line=line.replace(old, new), **kwargs)
628
return _line_writer_fixed_highlighted
631
def _file_grep(file_text, path, opts, revno, path_prefix=None, cache_id=None):
632
# test and skip binary files
633
if '\x00' in file_text[:1024]:
635
trace.warning("Binary file '%s' skipped." % path)
638
if path_prefix and path_prefix != '.':
639
# user has passed a dir arg, show that as result prefix
640
path = osutils.pathjoin(path_prefix, path)
642
# GZ 2010-06-07: There's no actual guarentee the file contents will be in
643
# the user encoding, but we have to guess something and it
644
# is a reasonable default without a better mechanism.
645
file_encoding = _user_encoding
646
pattern = opts.pattern.encode(_user_encoding, 'replace')
648
writeline = opts.outputter.get_writer(path, revno, cache_id)
650
if opts.files_with_matches or opts.files_without_match:
651
if opts.fixed_string:
652
if sys.platform > (2, 5):
653
found = pattern in file_text
655
for line in file_text.splitlines():
662
search = opts.patternc.search
663
if "$" not in pattern:
664
found = search(file_text) is not None
666
for line in file_text.splitlines():
672
if (opts.files_with_matches and found) or \
673
(opts.files_without_match and not found):
675
elif opts.fixed_string:
676
# Fast path for no match, search through the entire file at once rather
677
# than a line at a time. However, we don't want this without Python 2.5
678
# as the quick string search algorithm wasn't implemented till then:
679
# <http://effbot.org/zone/stringlib.htm>
680
if sys.version_info > (2, 5):
681
i = file_text.find(pattern)
684
b = file_text.rfind("\n", 0, i) + 1
686
start = file_text.count("\n", 0, b) + 1
687
file_text = file_text[b:]
691
for index, line in enumerate(file_text.splitlines()):
693
line = line.decode(file_encoding, 'replace')
694
writeline(lineno=index+start, line=line)
696
for line in file_text.splitlines():
698
line = line.decode(file_encoding, 'replace')
701
# Fast path on no match, the re module avoids bad behaviour in most
702
# standard cases, but perhaps could try and detect backtracking
703
# patterns here and avoid whole text search in those cases
704
search = opts.patternc.search
705
if "$" not in pattern:
706
# GZ 2010-06-05: Grr, re.MULTILINE can't save us when searching
707
# through revisions as bazaar returns binary mode
708
# and trailing \r breaks $ as line ending match
709
m = search(file_text)
712
b = file_text.rfind("\n", 0, m.start()) + 1
714
start = file_text.count("\n", 0, b) + 1
715
file_text = file_text[b:]
719
for index, line in enumerate(file_text.splitlines()):
721
line = line.decode(file_encoding, 'replace')
722
writeline(lineno=index+start, line=line)
724
for line in file_text.splitlines():
726
line = line.decode(file_encoding, 'replace')