~bzr-pqm/bzr/bzr.dev

« back to all changes in this revision

Viewing changes to bzrlib/patiencediff.py

Committer: Robey Pointer
Date: 2006-07-01 19:03:33 UTC
mfrom: (1829 +trunk)
mto: This revision was merged to the branch mainline in revision 1830.
Revision ID: robey@lag.net-20060701190333-f58465aec4bd3412

merge from bzr.dev

files added:
BRANCH.TODO

INSTALL

bzrlib/benchmarks

bzrlib/benchmarks/__init__.py

bzrlib/benchmarks/bench_add.py

bzrlib/benchmarks/bench_bench.py

bzrlib/benchmarks/bench_checkout.py

bzrlib/benchmarks/bench_commit.py

bzrlib/benchmarks/bench_inventory.py

bzrlib/benchmarks/bench_log.py

bzrlib/benchmarks/bench_osutils.py

bzrlib/benchmarks/bench_rocks.py

bzrlib/benchmarks/bench_status.py

bzrlib/benchmarks/bench_transform.py

bzrlib/benchmarks/bench_workingtree.py

bzrlib/bundle

bzrlib/bundle/apply_bundle.py

bzrlib/bundle/bundle_data.py

bzrlib/bundle/commands.py

bzrlib/bundle/common.py

bzrlib/bundle/old

bzrlib/bundle/old/send_changeset.py

bzrlib/bundle/serializer

bzrlib/bundle/serializer/__init__.py

bzrlib/bundle/serializer/v08.py

bzrlib/bzrdir.py

bzrlib/config.py

bzrlib/conflicts.py

bzrlib/decorators.py

bzrlib/doc

bzrlib/doc/__init__.py

bzrlib/doc/api

bzrlib/doc/api/__init__.py

bzrlib/doc/api/branch.txt

bzrlib/doc/api/transport.txt

bzrlib/export

bzrlib/export/__init__.py

bzrlib/export/dir_exporter.py

bzrlib/export/tar_exporter.py

bzrlib/export/zip_exporter.py

bzrlib/gpg.py

bzrlib/identitymap.py

bzrlib/inter.py

bzrlib/iterablefile.py

bzrlib/knit.py

bzrlib/lockable_files.py

bzrlib/lockdir.py

bzrlib/lsprof.py

bzrlib/option.py

bzrlib/patches.py

bzrlib/patiencediff.py

bzrlib/plugins/launchpad

bzrlib/plugins/launchpad/__init__.py

bzrlib/plugins/launchpad/lp_registration.py

bzrlib/plugins/launchpad/test_register.py

bzrlib/reconcile.py

bzrlib/repository.py

bzrlib/rio.py

bzrlib/sign_my_commits.py

bzrlib/store/revision

bzrlib/store/revision/__init__.py

bzrlib/store/revision/knit.py

bzrlib/store/revision/text.py

bzrlib/store/versioned

bzrlib/symbol_versioning.py

bzrlib/testament.py

bzrlib/tests/EncodingAdapter.py

bzrlib/tests/blackbox

bzrlib/tests/blackbox/__init__.py

bzrlib/tests/blackbox/test_add.py

bzrlib/tests/blackbox/test_added.py

bzrlib/tests/blackbox/test_aliases.py

bzrlib/tests/blackbox/test_ancestry.py

bzrlib/tests/blackbox/test_annotate.py

bzrlib/tests/blackbox/test_bound_branches.py

bzrlib/tests/blackbox/test_branch.py

bzrlib/tests/blackbox/test_break_lock.py

bzrlib/tests/blackbox/test_bundle.py

bzrlib/tests/blackbox/test_cat.py

bzrlib/tests/blackbox/test_checkout.py

bzrlib/tests/blackbox/test_command_encoding.py

bzrlib/tests/blackbox/test_commit.py

bzrlib/tests/blackbox/test_conflicts.py

bzrlib/tests/blackbox/test_diff.py

bzrlib/tests/blackbox/test_exceptions.py

bzrlib/tests/blackbox/test_export.py

bzrlib/tests/blackbox/test_find_merge_base.py

bzrlib/tests/blackbox/test_help.py

bzrlib/tests/blackbox/test_ignored.py

bzrlib/tests/blackbox/test_info.py

bzrlib/tests/blackbox/test_init.py

bzrlib/tests/blackbox/test_log.py

bzrlib/tests/blackbox/test_logformats.py

bzrlib/tests/blackbox/test_merge.py

bzrlib/tests/blackbox/test_missing.py

bzrlib/tests/blackbox/test_non_ascii.py

bzrlib/tests/blackbox/test_outside_wt.py

bzrlib/tests/blackbox/test_pull.py

bzrlib/tests/blackbox/test_push.py

bzrlib/tests/blackbox/test_re_sign.py

bzrlib/tests/blackbox/test_reconcile.py

bzrlib/tests/blackbox/test_remerge.py

bzrlib/tests/blackbox/test_remove.py

bzrlib/tests/blackbox/test_revert.py

bzrlib/tests/blackbox/test_revision_history.py

bzrlib/tests/blackbox/test_revno.py

bzrlib/tests/blackbox/test_selftest.py

bzrlib/tests/blackbox/test_shared_repository.py

bzrlib/tests/blackbox/test_sign_my_commits.py

bzrlib/tests/blackbox/test_uncommit.py

bzrlib/tests/blackbox/test_update.py

bzrlib/tests/blackbox/test_upgrade.py

bzrlib/tests/branch_implementations

bzrlib/tests/branch_implementations/__init__.py

bzrlib/tests/branch_implementations/test_bound_sftp.py

bzrlib/tests/branch_implementations/test_break_lock.py

bzrlib/tests/branch_implementations/test_permissions.py

bzrlib/tests/branch_implementations/test_pull.py

bzrlib/tests/branch_implementations/test_update.py

bzrlib/tests/bzrdir_implementations

bzrlib/tests/bzrdir_implementations/__init__.py

bzrlib/tests/bzrdir_implementations/test_bzrdir.py

bzrlib/tests/interrepository_implementations

bzrlib/tests/interrepository_implementations/__init__.py

bzrlib/tests/interrepository_implementations/test_interrepository.py

bzrlib/tests/interversionedfile_implementations

bzrlib/tests/interversionedfile_implementations/__init__.py

bzrlib/tests/interversionedfile_implementations/test_join.py

bzrlib/tests/repository_implementations

bzrlib/tests/repository_implementations/__init__.py

bzrlib/tests/repository_implementations/test_break_lock.py

bzrlib/tests/repository_implementations/test_commit_builder.py

bzrlib/tests/repository_implementations/test_fileid_involved.py

bzrlib/tests/repository_implementations/test_reconcile.py

bzrlib/tests/repository_implementations/test_repository.py

bzrlib/tests/revisionstore_implementations

bzrlib/tests/revisionstore_implementations/__init__.py

bzrlib/tests/revisionstore_implementations/test_all.py

bzrlib/tests/stub_sftp.py

bzrlib/tests/test_api.py

bzrlib/tests/test_branch.py

bzrlib/tests/test_bundle.py

bzrlib/tests/test_bzrdir.py

bzrlib/tests/test_command.py

bzrlib/tests/test_config.py

bzrlib/tests/test_conflicts.py

bzrlib/tests/test_decorators.py

bzrlib/tests/test_doc_generate.py

bzrlib/tests/test_emptytree.py

bzrlib/tests/test_errors.py

bzrlib/tests/test_escaped_store.py

bzrlib/tests/test_gpg.py

bzrlib/tests/test_http.py

bzrlib/tests/test_identitymap.py

bzrlib/tests/test_knit.py

bzrlib/tests/test_lockable_files.py

bzrlib/tests/test_lockdir.py

bzrlib/tests/test_missing.py

bzrlib/tests/test_msgeditor.py

bzrlib/tests/test_nonascii.py

bzrlib/tests/test_options.py

bzrlib/tests/test_osutils.py

bzrlib/tests/test_patch.py

bzrlib/tests/test_patches.py

bzrlib/tests/test_patches_data

bzrlib/tests/test_patches_data/diff

bzrlib/tests/test_patches_data/diff-2

bzrlib/tests/test_patches_data/diff-3

bzrlib/tests/test_patches_data/diff-4

bzrlib/tests/test_patches_data/diff-5

bzrlib/tests/test_patches_data/diff-6

bzrlib/tests/test_patches_data/insert_top.patch

bzrlib/tests/test_patches_data/mod

bzrlib/tests/test_patches_data/mod-2

bzrlib/tests/test_patches_data/mod-3

bzrlib/tests/test_patches_data/mod-4

bzrlib/tests/test_patches_data/mod-5

bzrlib/tests/test_patches_data/mod-6

bzrlib/tests/test_patches_data/orig

bzrlib/tests/test_patches_data/orig-2

bzrlib/tests/test_patches_data/orig-3

bzrlib/tests/test_patches_data/orig-4

bzrlib/tests/test_patches_data/orig-5

bzrlib/tests/test_patches_data/orig-6

bzrlib/tests/test_patches_data/patchtext.patch

bzrlib/tests/test_permissions.py

bzrlib/tests/test_progress.py

bzrlib/tests/test_read_bundle.py

bzrlib/tests/test_reconcile.py

bzrlib/tests/test_repository.py

bzrlib/tests/test_revisiontree.py

bzrlib/tests/test_revprops.py

bzrlib/tests/test_rio.py

bzrlib/tests/test_sampler.py

bzrlib/tests/test_selftest.py

bzrlib/tests/test_setup.py

bzrlib/tests/test_sftp_transport.py

bzrlib/tests/test_source.py

bzrlib/tests/test_status.py

bzrlib/tests/test_symbol_versioning.py

bzrlib/tests/test_testament.py

bzrlib/tests/test_textfile.py

bzrlib/tests/test_textmerge.py

bzrlib/tests/test_trace.py

bzrlib/tests/test_transactions.py

bzrlib/tests/test_transform.py

bzrlib/tests/test_transport_implementations.py

bzrlib/tests/test_tsort.py

bzrlib/tests/test_tuned_gzip.py

bzrlib/tests/test_ui.py

bzrlib/tests/test_urlutils.py

bzrlib/tests/test_versionedfile.py

bzrlib/tests/workingtree_implementations

bzrlib/tests/workingtree_implementations/__init__.py

bzrlib/tests/workingtree_implementations/test_basis_inventory.py

bzrlib/tests/workingtree_implementations/test_break_lock.py

bzrlib/tests/workingtree_implementations/test_commit.py

bzrlib/tests/workingtree_implementations/test_executable.py

bzrlib/tests/workingtree_implementations/test_get_parent_ids.py

bzrlib/tests/workingtree_implementations/test_is_control_filename.py

bzrlib/tests/workingtree_implementations/test_is_ignored.py

bzrlib/tests/workingtree_implementations/test_pull.py

bzrlib/tests/workingtree_implementations/test_workingtree.py

bzrlib/textfile.py

bzrlib/textmerge.py

bzrlib/transactions.py

bzrlib/transform.py

bzrlib/transport/decorator.py

bzrlib/transport/fakenfs.py

bzrlib/transport/fakevfat.py

bzrlib/transport/ftp.py

bzrlib/transport/http

bzrlib/transport/http/_pycurl.py

bzrlib/transport/http/_urllib.py

bzrlib/transport/memory.py

bzrlib/transport/readonly.py

bzrlib/transport/sftp.py

bzrlib/tsort.py

bzrlib/tuned_gzip.py

bzrlib/ui

bzrlib/ui/text.py

bzrlib/uncommit.py

bzrlib/urlutils.py

bzrlib/util/configobj

bzrlib/util/configobj/__init__.py

bzrlib/util/configobj/configobj.py

bzrlib/util/configobj/docs

bzrlib/util/configobj/docs/BSD-LICENSE.txt

bzrlib/util/configobj/docs/configobj.txt

bzrlib/util/configobj/docs/validate.txt

bzrlib/versionedfile.py

bzrlib/weave_commands.py

bzrlib/win32console.py

doc/README.1st

doc/configuration.txt

doc/plugins.txt

doc/setting_up_email.txt

doc/specifying_revisions.txt

doc/using_aliases.txt

generate_docs.py

profile_imports.py

tools/__init__.py

tools/biobench.py

tools/doc_generate

tools/doc_generate/__init__.py

tools/doc_generate/autodoc_bash_completion.py

tools/doc_generate/autodoc_rstx.py

tools/riodemo.py

tools/trace-revisions

files removed:
bzrlib/clone.py

bzrlib/mdiff.py

bzrlib/merge_core.py

bzrlib/revfile.py

bzrlib/store/compressed_text.py

doc/Makefile

doc/adoption.txt

doc/bitkeeper.txt

doc/changelogs.txt

doc/cherry-picking.txt

doc/cmdref.txt

doc/common-format.txt

doc/compared-aegis.txt

doc/compared-codeville.txt

doc/compared-cvsnt.txt

doc/compared-opencm.txt

doc/compared-prcs.txt

doc/compared-teamware.txt

doc/compression.txt

doc/config-specs.txt

doc/conflicts.txt

doc/costs.txt

doc/darcs.txt

doc/deadly-sins.txt

doc/default.css

doc/design.txt

doc/extra-commands.txt

doc/formats.txt

doc/hashes.txt

doc/ignore.txt

doc/index.txt

doc/interrupted.txt

doc/intro.txt

doc/inventory.txt

doc/join-branches.txt

doc/kill-version.txt

doc/layers.txt

doc/library-interface.txt

doc/merge.txt

doc/mirroring.txt

doc/monotone.txt

doc/news.txt

doc/optional-edit.txt

doc/partial-commit.txt

doc/pool.txt

doc/purpose.txt

doc/python.txt

doc/quilt.txt

doc/quotes.txt

doc/random.txt

doc/requirements.txt

doc/revfile-annotation.txt

doc/revfile.txt

doc/revision-syntax.txt

doc/rollup.txt

doc/scalability.txt

doc/security.txt

doc/shared-branches.txt

doc/short-demo.txt

doc/split-join-files.txt

doc/supportability.txt

doc/svk.txt

doc/switch-in-branch.txt

doc/tagging.txt

doc/taxonomy.txt

doc/thanks.txt

doc/todo-from-arch.txt

doc/unchanged.txt

doc/unrelated-merge.txt

doc/usability.txt

doc/use-cases.txt

doc/web-interface.txt

doc/workflow.txt

doc/yaml.txt

notes

notes/inventory-v2-sample.xml

notes/inventory-v2.rnc

notes/new-inventory-sample.xml

notes/performance.txt

notes/revfile.txt

notes/schemas.xml

patches

patches/cache-remote-revisions.diff

patches/cache_weave_inclusions.diff

patches/find-touching-from-seq.diff

patches/meta-data-in-inventory.patch

patches/ndiff.patch

testbzr

files renamed:
bzrlib/changeset.py => bzrlib/bundle/__init__.py

bzrlib/store/weave.py => bzrlib/store/versioned/__init__.py

bzrlib/selftest/ => bzrlib/tests/

bzrlib/selftest/test_revision_info.py => bzrlib/tests/blackbox/test_revision_info.py

bzrlib/selftest/teststatus.py => bzrlib/tests/blackbox/test_status.py

bzrlib/selftest/blackbox.py => bzrlib/tests/blackbox/test_too_much.py

bzrlib/selftest/versioning.py => bzrlib/tests/blackbox/test_versioning.py

bzrlib/selftest/testbranch.py => bzrlib/tests/branch_implementations/test_branch.py

bzrlib/selftest/test_parent.py => bzrlib/tests/branch_implementations/test_parent.py

bzrlib/selftest/testdiff.py => bzrlib/tests/test_diff.py

bzrlib/selftest/testfetch.py => bzrlib/tests/test_fetch.py

bzrlib/selftest/testgraph.py => bzrlib/tests/test_graph.py

bzrlib/selftest/testhashcache.py => bzrlib/tests/test_hashcache.py

bzrlib/selftest/testinv.py => bzrlib/tests/test_inv.py

bzrlib/selftest/testlog.py => bzrlib/tests/test_log.py

bzrlib/selftest/testmerge.py => bzrlib/tests/test_merge.py

bzrlib/selftest/testmerge3.py => bzrlib/tests/test_merge3.py

bzrlib/selftest/plugins.py => bzrlib/tests/test_plugins.py

bzrlib/selftest/testrevision.py => bzrlib/tests/test_revision.py

bzrlib/selftest/testrevisionnamespaces.py => bzrlib/tests/test_revisionnamespaces.py

bzrlib/selftest/teststore.py => bzrlib/tests/test_store.py

bzrlib/selftest/testtransport.py => bzrlib/tests/test_transport.py

bzrlib/selftest/whitebox.py => bzrlib/tests/test_whitebox.py

bzrlib/selftest/testworkingtree.py => bzrlib/tests/test_workingtree.py

bzrlib/transport/http.py => bzrlib/transport/http/__init__.py

bzrlib/ui.py => bzrlib/ui/__init__.py

bzrlib/xml.py => bzrlib/xml_serializer.py

tutorial.txt => doc/tutorial.txt

bzr-man.py => tools/doc_generate/autodoc_man.py

files modified:
.bzrignore

.rsyncexclude

HACKING

Makefile

NEWS

NEWS.developers

README

TODO

build-api

bzr *

bzrlib/__init__.py

bzrlib/add.py

bzrlib/annotate.py

bzrlib/atomicfile.py

bzrlib/branch.py

bzrlib/builtins.py

bzrlib/check.py

bzrlib/commands.py

bzrlib/commit.py

bzrlib/delta.py

bzrlib/diff.py

bzrlib/errors.py

bzrlib/externalcommand.py

bzrlib/fetch.py

bzrlib/graph.py

bzrlib/hashcache.py

bzrlib/help.py

bzrlib/info.py

bzrlib/intset.py

bzrlib/inventory.py

bzrlib/lock.py

bzrlib/log.py

bzrlib/merge.py

bzrlib/merge3.py

bzrlib/missing.py

bzrlib/msgeditor.py

bzrlib/osutils.py

bzrlib/patch.py

bzrlib/plugin.py

bzrlib/progress.py

bzrlib/revision.py

bzrlib/revisionspec.py

bzrlib/shellcomplete.py

bzrlib/status.py

bzrlib/store/__init__.py

bzrlib/store/text.py

bzrlib/tests/HTTPTestUtil.py

bzrlib/tests/TestUtil.py

bzrlib/tests/__init__.py

bzrlib/tests/test_ancestry.py

bzrlib/tests/test_bad_files.py

bzrlib/tests/test_commit.py

bzrlib/tests/test_commit_merge.py

bzrlib/tests/test_merge_core.py

bzrlib/tests/test_smart_add.py

bzrlib/tests/test_upgrade.py

bzrlib/tests/test_weave.py

bzrlib/tests/test_xml.py

bzrlib/tests/treeshape.py

bzrlib/textui.py

bzrlib/trace.py

bzrlib/transport/__init__.py

bzrlib/transport/local.py

bzrlib/tree.py

bzrlib/upgrade.py

bzrlib/weave.py *

bzrlib/weavefile.py

bzrlib/workingtree.py

bzrlib/xml4.py

bzrlib/xml5.py

contrib/pwk

contrib/zsh/_bzr

setup.py *

Show diffs side-by-side

added added

removed removed

bzrlib/patiencediff.py

#!/usr/bin/env python

# This program is free software; you can redistribute it and/or modify

# it under the terms of the GNU General Public License as published by

# the Free Software Foundation; either version 2 of the License, or

# (at your option) any later version.

# This program is distributed in the hope that it will be useful,

# but WITHOUT ANY WARRANTY; without even the implied warranty of

# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the

# GNU General Public License for more details.

# You should have received a copy of the GNU General Public License

# along with this program; if not, write to the Free Software

# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA

from bisect import bisect

import difflib

import os

import sys

import time

from bzrlib.trace import mutter

__all__ = ['PatienceSequenceMatcher', 'unified_diff', 'unified_diff_files']

def unique_lcs(a, b):

"""Find the longest common subset for unique lines.

:param a: An indexable object (such as string or list of strings)

:param b: Another indexable object (such as string or list of strings)

:return: A list of tuples, one for each line which is matched.

[(line_in_a, line_in_b), ...]

This only matches lines which are unique on both sides.

This helps prevent common lines from over influencing match

results.

The longest common subset uses the Patience Sorting algorithm:

http://en.wikipedia.org/wiki/Patience_sorting

"""

# set index[line in a] = position of line in a unless

# unless a is a duplicate, in which case it's set to None

index = {}

for i in xrange(len(a)):

line = a[i]

if line in index:

index[line] = None

else:

index[line]= i

# make btoa[i] = position of line i in a, unless

# that line doesn't occur exactly once in both,

# in which case it's set to None

btoa = [None] * len(b)

index2 = {}

for pos, line in enumerate(b):

next = index.get(line)

if next is not None:

if line in index2:

# unset the previous mapping, which we now know to

# be invalid because the line isn't unique

btoa[index2[line]] = None

del index[line]

else:

index2[line] = pos

btoa[pos] = next

# this is the Patience sorting algorithm

# see http://en.wikipedia.org/wiki/Patience_sorting

backpointers = [None] * len(b)

stacks = []

lasts = []

k = 0

for bpos, apos in enumerate(btoa):

if apos is None:

continue

# as an optimization, check if the next line comes at the end,

# because it usually does

if stacks and stacks[-1] < apos:

k = len(stacks)

# as an optimization, check if the next line comes right after

# the previous line, because usually it does

elif stacks and stacks[k] < apos and (k == len(stacks) - 1 or

stacks[k+1] > apos):

k += 1

else:

k = bisect(stacks, apos)

if k > 0:

backpointers[bpos] = lasts[k-1]

if k < len(stacks):

stacks[k] = apos

lasts[k] = bpos

else:

stacks.append(apos)

lasts.append(bpos)

if len(lasts) == 0:

return []

100

result = []

101

k = lasts[-1]

102

while k is not None:

103

result.append((btoa[k], k))

104

k = backpointers[k]

105

result.reverse()

106

return result

107

108

109

def recurse_matches(a, b, alo, blo, ahi, bhi, answer, maxrecursion):

110

"""Find all of the matching text in the lines of a and b.

111

112

:param a: A sequence

113

:param b: Another sequence

114

:param alo: The start location of a to check, typically 0

115

:param ahi: The start location of b to check, typically 0

116

:param ahi: The maximum length of a to check, typically len(a)

117

:param bhi: The maximum length of b to check, typically len(b)

118

:param answer: The return array. Will be filled with tuples

119

indicating [(line_in_a, line_in_b)]

120

:param maxrecursion: The maximum depth to recurse.

121

Must be a positive integer.

122

:return: None, the return value is in the parameter answer, which

123

should be a list

124

125

"""

126

if maxrecursion < 0:

127

mutter('max recursion depth reached')

128

# this will never happen normally, this check is to prevent DOS attacks

129

return

130

oldlength = len(answer)

131

if alo == ahi or blo == bhi:

132

return

133

last_a_pos = alo-1

134

last_b_pos = blo-1

135

for apos, bpos in unique_lcs(a[alo:ahi], b[blo:bhi]):

136

# recurse between lines which are unique in each file and match

137

apos += alo

138

bpos += blo

139

# Most of the time, you will have a sequence of similar entries

140

if last_a_pos+1 != apos or last_b_pos+1 != bpos:

141

recurse_matches(a, b, last_a_pos+1, last_b_pos+1,

142

apos, bpos, answer, maxrecursion - 1)

143

last_a_pos = apos

144

last_b_pos = bpos

145

answer.append((apos, bpos))

146

if len(answer) > oldlength:

147

# find matches between the last match and the end

148

recurse_matches(a, b, last_a_pos+1, last_b_pos+1,

149

ahi, bhi, answer, maxrecursion - 1)

150

elif a[alo] == b[blo]:

151

# find matching lines at the very beginning

152

while alo < ahi and blo < bhi and a[alo] == b[blo]:

153

answer.append((alo, blo))

154

alo += 1

155

blo += 1

156

recurse_matches(a, b, alo, blo,

157

ahi, bhi, answer, maxrecursion - 1)

158

elif a[ahi - 1] == b[bhi - 1]:

159

# find matching lines at the very end

160

nahi = ahi - 1

161

nbhi = bhi - 1

162

while nahi > alo and nbhi > blo and a[nahi - 1] == b[nbhi - 1]:

163

nahi -= 1

164

nbhi -= 1

165

recurse_matches(a, b, last_a_pos+1, last_b_pos+1,

166

nahi, nbhi, answer, maxrecursion - 1)

167

for i in xrange(ahi - nahi):

168

answer.append((nahi + i, nbhi + i))

169

170

171

def _collapse_sequences(matches):

172

"""Find sequences of lines.

173

174

Given a sequence of [(line_in_a, line_in_b),]

175

find regions where they both increment at the same time

176

"""

177

answer = []

178

start_a = start_b = None

179

length = 0

180

for i_a, i_b in matches:

181

if (start_a is not None

182

and (i_a == start_a + length)

183

and (i_b == start_b + length)):

184

length += 1

185

else:

186

if start_a is not None:

187

answer.append((start_a, start_b, length))

188

start_a = i_a

189

start_b = i_b

190

length = 1

191

192

if length != 0:

193

answer.append((start_a, start_b, length))

194

195

return answer

196

197

198

def _check_consistency(answer):

199

# For consistency sake, make sure all matches are only increasing

200

next_a = -1

201

next_b = -1

202

for a,b,match_len in answer:

203

assert a >= next_a, 'Non increasing matches for a'

204

assert b >= next_b, 'Not increasing matches for b'

205

next_a = a + match_len

206

next_b = b + match_len

207

208

209

class PatienceSequenceMatcher(difflib.SequenceMatcher):

210

"""Compare a pair of sequences using longest common subset."""

211

212

_do_check_consistency = True

213

214

def __init__(self, isjunk=None, a='', b=''):

215

if isjunk is not None:

216

raise NotImplementedError('Currently we do not support'

217

' isjunk for sequence matching')

218

difflib.SequenceMatcher.__init__(self, isjunk, a, b)

219

220

def get_matching_blocks(self):

221

"""Return list of triples describing matching subsequences.

222

223

Each triple is of the form (i, j, n), and means that

224

a[i:i+n] == b[j:j+n]. The triples are monotonically increasing in

225

i and in j.

226

227

The last triple is a dummy, (len(a), len(b), 0), and is the only

228

triple with n==0.

229

230

>>> s = PatienceSequenceMatcher(None, "abxcd", "abcd")

231

>>> s.get_matching_blocks()

232

[(0, 0, 2), (3, 2, 2), (5, 4, 0)]

233

"""

234

# jam 20060525 This is the python 2.4.1 difflib get_matching_blocks

235

# implementation which uses __helper. 2.4.3 got rid of helper for

236

# doing it inline with a queue.

237

# We should consider doing the same for recurse_matches

238

239

if self.matching_blocks is not None:

240

return self.matching_blocks

241

242

matches = []

243

recurse_matches(self.a, self.b, 0, 0,

244

len(self.a), len(self.b), matches, 10)

245

# Matches now has individual line pairs of

246

# line A matches line B, at the given offsets

247

self.matching_blocks = _collapse_sequences(matches)

248

self.matching_blocks.append( (len(self.a), len(self.b), 0) )

249

if PatienceSequenceMatcher._do_check_consistency:

250

if __debug__:

251

_check_consistency(self.matching_blocks)

252

253

return self.matching_blocks

254

255

256

# This is a version of unified_diff which only adds a factory parameter

257

# so that you can override the default SequenceMatcher

258

# this has been submitted as a patch to python

259

def unified_diff(a, b, fromfile='', tofile='', fromfiledate='',

260

tofiledate='', n=3, lineterm='\n',

261

sequencematcher=None):

262

r"""

263

Compare two sequences of lines; generate the delta as a unified diff.

264

265

Unified diffs are a compact way of showing line changes and a few

266

lines of context. The number of context lines is set by 'n' which

267

defaults to three.

268

269

By default, the diff control lines (those with ---, +++, or @@) are

270

created with a trailing newline. This is helpful so that inputs

271

created from file.readlines() result in diffs that are suitable for

272

file.writelines() since both the inputs and outputs have trailing

273

newlines.

274

275

For inputs that do not have trailing newlines, set the lineterm

276

argument to "" so that the output will be uniformly newline free.

277

278

The unidiff format normally has a header for filenames and modification

279

times. Any or all of these may be specified using strings for

280

'fromfile', 'tofile', 'fromfiledate', and 'tofiledate'. The modification

281

times are normally expressed in the format returned by time.ctime().

282

283

Example:

284

285

>>> for line in unified_diff('one two three four'.split(),

286

... 'zero one tree four'.split(), 'Original', 'Current',

287

... 'Sat Jan 26 23:30:50 1991', 'Fri Jun 06 10:20:52 2003',

288

... lineterm=''):

289

... print line

290

--- Original Sat Jan 26 23:30:50 1991

291

+++ Current Fri Jun 06 10:20:52 2003

292

@@ -1,4 +1,4 @@

293

+zero

294

one

295

-two

296

-three

297

+tree

298

four

299

"""

300

if sequencematcher is None:

301

sequencematcher = difflib.SequenceMatcher

302

303

started = False

304

for group in sequencematcher(None,a,b).get_grouped_opcodes(n):

305

if not started:

306

yield '--- %s %s%s' % (fromfile, fromfiledate, lineterm)

307

yield '+++ %s %s%s' % (tofile, tofiledate, lineterm)

308

started = True

309

i1, i2, j1, j2 = group[0][1], group[-1][2], group[0][3], group[-1][4]

310

yield "@@ -%d,%d +%d,%d @@%s" % (i1+1, i2-i1, j1+1, j2-j1, lineterm)

311

for tag, i1, i2, j1, j2 in group:

312

if tag == 'equal':

313

for line in a[i1:i2]:

314

yield ' ' + line

315

continue

316

if tag == 'replace' or tag == 'delete':

317

for line in a[i1:i2]:

318

yield '-' + line

319

if tag == 'replace' or tag == 'insert':

320

for line in b[j1:j2]:

321

yield '+' + line

322

323

324

def unified_diff_files(a, b, sequencematcher=None):

325

"""Generate the diff for two files.

326

"""

327

# Should this actually be an error?

328

if a == b:

329

return []

330

if a == '-':

331

file_a = sys.stdin

332

time_a = time.time()

333

else:

334

file_a = open(a, 'rb')

335

time_a = os.stat(a).st_mtime

336

337

if b == '-':

338

file_b = sys.stdin

339

time_b = time.time()

340

else:

341

file_b = open(b, 'rb')

342

time_b = os.stat(b).st_mtime

343

344

# TODO: Include fromfiledate and tofiledate

345

return unified_diff(file_a.readlines(), file_b.readlines(),

346

fromfile=a, tofile=b,

347

sequencematcher=sequencematcher)

348

349

350

def main(args):

351

import optparse

352

p = optparse.OptionParser(usage='%prog [options] file_a file_b'

353

'\nFiles can be "-" to read from stdin')

354

p.add_option('--patience', dest='matcher', action='store_const', const='patience',

355

default='patience', help='Use the patience difference algorithm')

356

p.add_option('--difflib', dest='matcher', action='store_const', const='difflib',

357

default='patience', help='Use python\'s difflib algorithm')

358

359

algorithms = {'patience':PatienceSequenceMatcher, 'difflib':difflib.SequenceMatcher}

360

361

(opts, args) = p.parse_args(args)

362

matcher = algorithms[opts.matcher]

363

364

if len(args) != 2:

365

print 'You must supply 2 filenames to diff'

366

return -1

367

368

for line in unified_diff_files(args[0], args[1], sequencematcher=matcher):

369

sys.stdout.write(line)

370

371

if __name__ == '__main__':

372

sys.exit(main(sys.argv[1:]))

Older »