~bzr-pqm/bzr/bzr.dev

« back to all changes in this revision

Viewing changes to bzrlib/patiencediff.py

Committer: Canonical.com Patch Queue Manager
Date: 2006-06-12 18:16:35 UTC
mfrom: (1185.84.3 bundles)
Revision ID: pqm@pqm.ubuntu.com-20060612181635-930f7114f61dbfcb

Hide diffs for old revisions in bundles

files added:
bzrlib/benchmarks

bzrlib/benchmarks/__init__.py

bzrlib/benchmarks/bench_add.py

bzrlib/benchmarks/bench_bench.py

bzrlib/benchmarks/bench_checkout.py

bzrlib/benchmarks/bench_commit.py

bzrlib/benchmarks/bench_osutils.py

bzrlib/benchmarks/bench_rocks.py

bzrlib/benchmarks/bench_status.py

bzrlib/benchmarks/bench_transform.py

bzrlib/benchmarks/bench_workingtree.py

bzrlib/bundle

bzrlib/bundle/apply_bundle.py

bzrlib/bundle/commands.py

bzrlib/bundle/common.py

bzrlib/bundle/old

bzrlib/bundle/old/send_changeset.py

bzrlib/bundle/read_bundle.py

bzrlib/bundle/serializer

bzrlib/bundle/serializer/__init__.py

bzrlib/bundle/serializer/v07.py

bzrlib/bzrdir.py

bzrlib/doc/api/branch.txt

bzrlib/inter.py

bzrlib/knit.py

bzrlib/lockdir.py

bzrlib/patches.py

bzrlib/patiencediff.py

bzrlib/plugins/launchpad

bzrlib/plugins/launchpad/__init__.py

bzrlib/plugins/launchpad/lp_registration.py

bzrlib/plugins/launchpad/test_register.py

bzrlib/reconcile.py

bzrlib/sign_my_commits.py

bzrlib/store/revision

bzrlib/store/revision/__init__.py

bzrlib/store/revision/knit.py

bzrlib/store/revision/text.py

bzrlib/store/versioned

bzrlib/tests/EncodingAdapter.py

bzrlib/tests/blackbox/test_add.py

bzrlib/tests/blackbox/test_aliases.py

bzrlib/tests/blackbox/test_ancestry.py

bzrlib/tests/blackbox/test_bound_branches.py

bzrlib/tests/blackbox/test_branch.py

bzrlib/tests/blackbox/test_break_lock.py

bzrlib/tests/blackbox/test_checkout.py

bzrlib/tests/blackbox/test_command_encoding.py

bzrlib/tests/blackbox/test_commit.py

bzrlib/tests/blackbox/test_conflicts.py

bzrlib/tests/blackbox/test_find_merge_base.py

bzrlib/tests/blackbox/test_help.py

bzrlib/tests/blackbox/test_ignored.py

bzrlib/tests/blackbox/test_info.py

bzrlib/tests/blackbox/test_init.py

bzrlib/tests/blackbox/test_log.py

bzrlib/tests/blackbox/test_logformats.py

bzrlib/tests/blackbox/test_merge.py

bzrlib/tests/blackbox/test_non_ascii.py

bzrlib/tests/blackbox/test_push.py

bzrlib/tests/blackbox/test_re_sign.py

bzrlib/tests/blackbox/test_reconcile.py

bzrlib/tests/blackbox/test_remove.py

bzrlib/tests/blackbox/test_selftest.py

bzrlib/tests/blackbox/test_shared_repository.py

bzrlib/tests/blackbox/test_sign_my_commits.py

bzrlib/tests/blackbox/test_update.py

bzrlib/tests/blackbox/test_upgrade.py

bzrlib/tests/branch_implementations

bzrlib/tests/branch_implementations/__init__.py

bzrlib/tests/branch_implementations/test_bound_sftp.py

bzrlib/tests/branch_implementations/test_break_lock.py

bzrlib/tests/branch_implementations/test_permissions.py

bzrlib/tests/branch_implementations/test_pull.py

bzrlib/tests/branch_implementations/test_update.py

bzrlib/tests/bzrdir_implementations

bzrlib/tests/bzrdir_implementations/__init__.py

bzrlib/tests/bzrdir_implementations/test_bzrdir.py

bzrlib/tests/interrepository_implementations

bzrlib/tests/interrepository_implementations/__init__.py

bzrlib/tests/interrepository_implementations/test_interrepository.py

bzrlib/tests/interversionedfile_implementations

bzrlib/tests/interversionedfile_implementations/__init__.py

bzrlib/tests/interversionedfile_implementations/test_join.py

bzrlib/tests/repository_implementations

bzrlib/tests/repository_implementations/__init__.py

bzrlib/tests/repository_implementations/test_break_lock.py

bzrlib/tests/repository_implementations/test_commit_builder.py

bzrlib/tests/repository_implementations/test_reconcile.py

bzrlib/tests/repository_implementations/test_repository.py

bzrlib/tests/revisionstore_implementations

bzrlib/tests/revisionstore_implementations/__init__.py

bzrlib/tests/revisionstore_implementations/test_all.py

bzrlib/tests/test_branch.py

bzrlib/tests/test_bundle.py

bzrlib/tests/test_bzrdir.py

bzrlib/tests/test_doc_generate.py

bzrlib/tests/test_errors.py

bzrlib/tests/test_escaped_store.py

bzrlib/tests/test_knit.py

bzrlib/tests/test_lockdir.py

bzrlib/tests/test_patch.py

bzrlib/tests/test_patches.py

bzrlib/tests/test_patches_data

bzrlib/tests/test_patches_data/diff

bzrlib/tests/test_patches_data/diff-2

bzrlib/tests/test_patches_data/diff-3

bzrlib/tests/test_patches_data/diff-4

bzrlib/tests/test_patches_data/diff-5

bzrlib/tests/test_patches_data/diff-6

bzrlib/tests/test_patches_data/insert_top.patch

bzrlib/tests/test_patches_data/mod

bzrlib/tests/test_patches_data/mod-2

bzrlib/tests/test_patches_data/mod-3

bzrlib/tests/test_patches_data/mod-4

bzrlib/tests/test_patches_data/mod-5

bzrlib/tests/test_patches_data/mod-6

bzrlib/tests/test_patches_data/orig

bzrlib/tests/test_patches_data/orig-2

bzrlib/tests/test_patches_data/orig-3

bzrlib/tests/test_patches_data/orig-4

bzrlib/tests/test_patches_data/orig-5

bzrlib/tests/test_patches_data/orig-6

bzrlib/tests/test_patches_data/patchtext.patch

bzrlib/tests/test_progress.py

bzrlib/tests/test_reconcile.py

bzrlib/tests/test_repository.py

bzrlib/tests/test_status.py

bzrlib/tests/test_textfile.py

bzrlib/tests/test_textmerge.py

bzrlib/tests/test_transform.py

bzrlib/tests/test_tuned_gzip.py

bzrlib/tests/test_urlutils.py

bzrlib/tests/test_versionedfile.py

bzrlib/tests/workingtree_implementations

bzrlib/tests/workingtree_implementations/__init__.py

bzrlib/tests/workingtree_implementations/test_break_lock.py

bzrlib/tests/workingtree_implementations/test_commit.py

bzrlib/tests/workingtree_implementations/test_is_control_filename.py

bzrlib/tests/workingtree_implementations/test_is_ignored.py

bzrlib/tests/workingtree_implementations/test_pull.py

bzrlib/tests/workingtree_implementations/test_workingtree.py

bzrlib/textfile.py

bzrlib/textmerge.py

bzrlib/transform.py

bzrlib/transport/decorator.py

bzrlib/transport/fakenfs.py

bzrlib/transport/fakevfat.py

bzrlib/transport/http

bzrlib/transport/http/_pycurl.py

bzrlib/transport/http/_urllib.py

bzrlib/transport/readonly.py

bzrlib/tuned_gzip.py

bzrlib/urlutils.py

bzrlib/versionedfile.py

bzrlib/weave_commands.py

doc/README.1st

doc/configuration.txt

doc/plugins.txt

doc/setting_up_email.txt

doc/specifying_revisions.txt

doc/using_aliases.txt

generate_docs.py

tools/__init__.py

tools/doc_generate

tools/doc_generate/__init__.py

tools/doc_generate/autodoc_bash_completion.py

tools/doc_generate/autodoc_rstx.py

files removed:
bzrlib/_merge_core.py

bzrlib/tests/test_remove.py

bzrlib/tests/test_reweave.py

bzrlib/util/configobj/validate.py

doc/Makefile

doc/adoption.txt

doc/bitkeeper.txt

doc/changelogs.txt

doc/cherry-picking.txt

doc/cmdref.txt

doc/common-format.txt

doc/compared-aegis.txt

doc/compared-codeville.txt

doc/compared-cvsnt.txt

doc/compared-opencm.txt

doc/compared-prcs.txt

doc/compared-teamware.txt

doc/compression.txt

doc/config-specs.txt

doc/conflicts.txt

doc/costs.txt

doc/darcs.txt

doc/deadly-sins.txt

doc/default.css

doc/design.txt

doc/extra-commands.txt

doc/formats.txt

doc/hashes.txt

doc/ignore.txt

doc/index.txt

doc/interrupted.txt

doc/intro.txt

doc/inventory.txt

doc/join-branches.txt

doc/kill-version.txt

doc/layers.txt

doc/library-interface.txt

doc/merge.txt

doc/mirroring.txt

doc/monotone.txt

doc/news.txt

doc/optional-edit.txt

doc/partial-commit.txt

doc/pool.txt

doc/purpose.txt

doc/python.txt

doc/quilt.txt

doc/quotes.txt

doc/random.txt

doc/requirements.txt

doc/revfile-annotation.txt

doc/revfile.txt

doc/revision-syntax.txt

doc/rollup.txt

doc/scalability.txt

doc/security.txt

doc/shared-branches.txt

doc/short-demo.txt

doc/split-join-files.txt

doc/supportability.txt

doc/svk.txt

doc/switch-in-branch.txt

doc/tagging.txt

doc/taxonomy.txt

doc/thanks.txt

doc/todo-from-arch.txt

doc/unchanged.txt

doc/unrelated-merge.txt

doc/usability.txt

doc/use-cases.txt

doc/web-interface.txt

doc/workflow.txt

doc/yaml.txt

notes

notes/inventory-v2-sample.xml

notes/inventory-v2.rnc

notes/new-inventory-sample.xml

notes/performance.txt

notes/revfile.txt

notes/schemas.xml

patches

files renamed:
bzrlib/_changeset.py => bzrlib/bundle/__init__.py

bzrlib/store/weave.py => bzrlib/store/versioned/__init__.py

bzrlib/tests/test_annotate.py => bzrlib/tests/blackbox/test_annotate.py

bzrlib/tests/test_status.py => bzrlib/tests/blackbox/test_status.py

bzrlib/tests/test_uncommit.py => bzrlib/tests/blackbox/test_uncommit.py

bzrlib/tests/test_branch.py => bzrlib/tests/branch_implementations/test_branch.py

bzrlib/tests/test_parent.py => bzrlib/tests/branch_implementations/test_parent.py

bzrlib/tests/test_fileid_involved.py => bzrlib/tests/repository_implementations/test_fileid_involved.py

bzrlib/tests/test_basis_inventory.py => bzrlib/tests/workingtree_implementations/test_basis_inventory.py

bzrlib/transport/http.py => bzrlib/transport/http/__init__.py

tutorial.txt => doc/tutorial.txt

bzr_man.py => tools/doc_generate/autodoc_man.py

files modified:
.bzrignore

BRANCH.TODO

HACKING

Makefile

NEWS

README

TODO

bzrlib/__init__.py

bzrlib/add.py

bzrlib/annotate.py

bzrlib/branch.py

bzrlib/builtins.py

bzrlib/check.py

bzrlib/commands.py

bzrlib/commit.py

bzrlib/config.py

bzrlib/conflicts.py

bzrlib/decorators.py

bzrlib/delta.py

bzrlib/diff.py

bzrlib/doc/api/transport.txt

bzrlib/errors.py

bzrlib/externalcommand.py

bzrlib/fetch.py

bzrlib/graph.py

bzrlib/hashcache.py

bzrlib/help.py

bzrlib/identitymap.py

bzrlib/info.py

bzrlib/inventory.py

bzrlib/lock.py

bzrlib/lockable_files.py

bzrlib/log.py

bzrlib/merge.py *

bzrlib/merge3.py

bzrlib/missing.py

bzrlib/msgeditor.py

bzrlib/option.py

bzrlib/osutils.py

bzrlib/patch.py

bzrlib/plugin.py

bzrlib/progress.py

bzrlib/repository.py

bzrlib/revision.py

bzrlib/revisionspec.py

bzrlib/rio.py

bzrlib/status.py

bzrlib/store/__init__.py

bzrlib/store/text.py

bzrlib/symbol_versioning.py

bzrlib/testament.py

bzrlib/tests/HTTPTestUtil.py

bzrlib/tests/TestUtil.py

bzrlib/tests/__init__.py

bzrlib/tests/blackbox/__init__.py

bzrlib/tests/blackbox/test_added.py

bzrlib/tests/blackbox/test_diff.py

bzrlib/tests/blackbox/test_missing.py

bzrlib/tests/blackbox/test_outside_wt.py

bzrlib/tests/blackbox/test_pull.py

bzrlib/tests/blackbox/test_revert.py

bzrlib/tests/blackbox/test_revision_info.py

bzrlib/tests/blackbox/test_too_much.py

bzrlib/tests/blackbox/test_versioning.py

bzrlib/tests/stub_sftp.py

bzrlib/tests/test_ancestry.py

bzrlib/tests/test_bad_files.py

bzrlib/tests/test_commit.py

bzrlib/tests/test_commit_merge.py

bzrlib/tests/test_config.py

bzrlib/tests/test_conflicts.py

bzrlib/tests/test_diff.py

bzrlib/tests/test_fetch.py

bzrlib/tests/test_graph.py

bzrlib/tests/test_hashcache.py

bzrlib/tests/test_http.py

bzrlib/tests/test_inv.py

bzrlib/tests/test_lockable_files.py

bzrlib/tests/test_log.py

bzrlib/tests/test_merge.py

bzrlib/tests/test_merge3.py

bzrlib/tests/test_merge_core.py

bzrlib/tests/test_missing.py

bzrlib/tests/test_msgeditor.py

bzrlib/tests/test_nonascii.py

bzrlib/tests/test_options.py

bzrlib/tests/test_osutils.py

bzrlib/tests/test_permissions.py

bzrlib/tests/test_revision.py

bzrlib/tests/test_revisionnamespaces.py

bzrlib/tests/test_revprops.py

bzrlib/tests/test_rio.py

bzrlib/tests/test_selftest.py

bzrlib/tests/test_setup.py

bzrlib/tests/test_sftp_transport.py

bzrlib/tests/test_smart_add.py

bzrlib/tests/test_source.py

bzrlib/tests/test_store.py

bzrlib/tests/test_symbol_versioning.py

bzrlib/tests/test_testament.py

bzrlib/tests/test_trace.py

bzrlib/tests/test_transactions.py

bzrlib/tests/test_transport.py

bzrlib/tests/test_transport_implementations.py

bzrlib/tests/test_tsort.py

bzrlib/tests/test_ui.py

bzrlib/tests/test_upgrade.py

bzrlib/tests/test_weave.py

bzrlib/tests/test_whitebox.py

bzrlib/tests/test_workingtree.py

bzrlib/tests/test_xml.py

bzrlib/textui.py

bzrlib/trace.py

bzrlib/transactions.py

bzrlib/transport/__init__.py

bzrlib/transport/ftp.py

bzrlib/transport/local.py

bzrlib/transport/memory.py

bzrlib/transport/sftp.py

bzrlib/tree.py

bzrlib/tsort.py

bzrlib/ui/__init__.py

bzrlib/ui/text.py

bzrlib/uncommit.py

bzrlib/upgrade.py

bzrlib/util/configobj/configobj.py

bzrlib/util/configobj/docs/configobj.txt

bzrlib/util/configobj/docs/validate.txt

bzrlib/weave.py

bzrlib/weavefile.py

bzrlib/win32console.py

bzrlib/workingtree.py

bzrlib/xml5.py

bzrlib/xml_serializer.py

contrib/pwk

setup.py

Show diffs side-by-side

added added

removed removed

bzrlib/patiencediff.py

#!/usr/bin/env python

# This program is free software; you can redistribute it and/or modify

# it under the terms of the GNU General Public License as published by

# the Free Software Foundation; either version 2 of the License, or

# (at your option) any later version.

# This program is distributed in the hope that it will be useful,

# but WITHOUT ANY WARRANTY; without even the implied warranty of

# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the

# GNU General Public License for more details.

# You should have received a copy of the GNU General Public License

# along with this program; if not, write to the Free Software

# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA

from bisect import bisect

from copy import copy

import difflib

import os

import sys

import time

from bzrlib.trace import mutter

__all__ = ['PatienceSequenceMatcher', 'unified_diff', 'unified_diff_files']

def unique_lcs(a, b):

"""Find the longest common subset for unique lines.

:param a: An indexable object (such as string or list of strings)

:param b: Another indexable object (such as string or list of strings)

:return: A list of tuples, one for each line which is matched.

[(line_in_a, line_in_b), ...]

This only matches lines which are unique on both sides.

This helps prevent common lines from over influencing match

results.

The longest common subset uses the Patience Sorting algorithm:

http://en.wikipedia.org/wiki/Patience_sorting

"""

# set index[line in a] = position of line in a unless

# unless a is a duplicate, in which case it's set to None

index = {}

for i in xrange(len(a)):

line = a[i]

if line in index:

index[line] = None

else:

index[line]= i

# make btoa[i] = position of line i in a, unless

# that line doesn't occur exactly once in both,

# in which case it's set to None

btoa = [None] * len(b)

index2 = {}

for pos, line in enumerate(b):

next = index.get(line)

if next is not None:

if line in index2:

# unset the previous mapping, which we now know to

# be invalid because the line isn't unique

btoa[index2[line]] = None

del index[line]

else:

index2[line] = pos

btoa[pos] = next

# this is the Patience sorting algorithm

# see http://en.wikipedia.org/wiki/Patience_sorting

backpointers = [None] * len(b)

stacks = []

lasts = []

k = 0

for bpos, apos in enumerate(btoa):

if apos is None:

continue

# as an optimization, check if the next line comes at the end,

# because it usually does

if stacks and stacks[-1] < apos:

k = len(stacks)

# as an optimization, check if the next line comes right after

# the previous line, because usually it does

elif stacks and stacks[k] < apos and (k == len(stacks) - 1 or

stacks[k+1] > apos):

k += 1

else:

k = bisect(stacks, apos)

if k > 0:

backpointers[bpos] = lasts[k-1]

if k < len(stacks):

stacks[k] = apos

lasts[k] = bpos

else:

stacks.append(apos)

lasts.append(bpos)

if len(lasts) == 0:

100

return []

101

result = []

102

k = lasts[-1]

103

while k is not None:

104

result.append((btoa[k], k))

105

k = backpointers[k]

106

result.reverse()

107

return result

108

109

110

def recurse_matches(a, b, alo, blo, ahi, bhi, answer, maxrecursion):

111

"""Find all of the matching text in the lines of a and b.

112

113

:param a: A sequence

114

:param b: Another sequence

115

:param alo: The start location of a to check, typically 0

116

:param ahi: The start location of b to check, typically 0

117

:param ahi: The maximum length of a to check, typically len(a)

118

:param bhi: The maximum length of b to check, typically len(b)

119

:param answer: The return array. Will be filled with tuples

120

indicating [(line_in_a, line_in_b)]

121

:param maxrecursion: The maximum depth to recurse.

122

Must be a positive integer.

123

:return: None, the return value is in the parameter answer, which

124

should be a list

125

126

"""

127

if maxrecursion < 0:

128

mutter('max recursion depth reached')

129

# this will never happen normally, this check is to prevent DOS attacks

130

return

131

oldlength = len(answer)

132

if alo == ahi or blo == bhi:

133

return

134

last_a_pos = alo-1

135

last_b_pos = blo-1

136

for apos, bpos in unique_lcs(a[alo:ahi], b[blo:bhi]):

137

# recurse between lines which are unique in each file and match

138

apos += alo

139

bpos += blo

140

# Most of the time, you will have a sequence of similar entries

141

if last_a_pos+1 != apos or last_b_pos+1 != bpos:

142

recurse_matches(a, b, last_a_pos+1, last_b_pos+1,

143

apos, bpos, answer, maxrecursion - 1)

144

last_a_pos = apos

145

last_b_pos = bpos

146

answer.append((apos, bpos))

147

if len(answer) > oldlength:

148

# find matches between the last match and the end

149

recurse_matches(a, b, last_a_pos+1, last_b_pos+1,

150

ahi, bhi, answer, maxrecursion - 1)

151

elif a[alo] == b[blo]:

152

# find matching lines at the very beginning

153

while alo < ahi and blo < bhi and a[alo] == b[blo]:

154

answer.append((alo, blo))

155

alo += 1

156

blo += 1

157

recurse_matches(a, b, alo, blo,

158

ahi, bhi, answer, maxrecursion - 1)

159

elif a[ahi - 1] == b[bhi - 1]:

160

# find matching lines at the very end

161

nahi = ahi - 1

162

nbhi = bhi - 1

163

while nahi > alo and nbhi > blo and a[nahi - 1] == b[nbhi - 1]:

164

nahi -= 1

165

nbhi -= 1

166

recurse_matches(a, b, last_a_pos+1, last_b_pos+1,

167

nahi, nbhi, answer, maxrecursion - 1)

168

for i in xrange(ahi - nahi):

169

answer.append((nahi + i, nbhi + i))

170

171

172

def _collapse_sequences(matches):

173

"""Find sequences of lines.

174

175

Given a sequence of [(line_in_a, line_in_b),]

176

find regions where they both increment at the same time

177

"""

178

answer = []

179

start_a = start_b = None

180

length = 0

181

for i_a, i_b in matches:

182

if (start_a is not None

183

and (i_a == start_a + length)

184

and (i_b == start_b + length)):

185

length += 1

186

else:

187

if start_a is not None:

188

answer.append((start_a, start_b, length))

189

start_a = i_a

190

start_b = i_b

191

length = 1

192

193

if length != 0:

194

answer.append((start_a, start_b, length))

195

196

return answer

197

198

199

def _check_consistency(answer):

200

# For consistency sake, make sure all matches are only increasing

201

next_a = -1

202

next_b = -1

203

for a,b,match_len in answer:

204

assert a >= next_a, 'Non increasing matches for a'

205

assert b >= next_b, 'Not increasing matches for b'

206

next_a = a + match_len

207

next_b = b + match_len

208

209

210

class PatienceSequenceMatcher(difflib.SequenceMatcher):

211

"""Compare a pair of sequences using longest common subset."""

212

213

_do_check_consistency = True

214

215

def __init__(self, isjunk=None, a='', b=''):

216

if isjunk is not None:

217

raise NotImplementedError('Currently we do not support'

218

' isjunk for sequence matching')

219

difflib.SequenceMatcher.__init__(self, isjunk, a, b)

220

221

def get_matching_blocks(self):

222

"""Return list of triples describing matching subsequences.

223

224

Each triple is of the form (i, j, n), and means that

225

a[i:i+n] == b[j:j+n]. The triples are monotonically increasing in

226

i and in j.

227

228

The last triple is a dummy, (len(a), len(b), 0), and is the only

229

triple with n==0.

230

231

>>> s = PatienceSequenceMatcher(None, "abxcd", "abcd")

232

>>> s.get_matching_blocks()

233

[(0, 0, 2), (3, 2, 2), (5, 4, 0)]

234

"""

235

# jam 20060525 This is the python 2.4.1 difflib get_matching_blocks

236

# implementation which uses __helper. 2.4.3 got rid of helper for

237

# doing it inline with a queue.

238

# We should consider doing the same for recurse_matches

239

240

if self.matching_blocks is not None:

241

return self.matching_blocks

242

243

matches = []

244

recurse_matches(self.a, self.b, 0, 0,

245

len(self.a), len(self.b), matches, 10)

246

# Matches now has individual line pairs of

247

# line A matches line B, at the given offsets

248

self.matching_blocks = _collapse_sequences(matches)

249

self.matching_blocks.append( (len(self.a), len(self.b), 0) )

250

if PatienceSequenceMatcher._do_check_consistency:

251

if __debug__:

252

_check_consistency(self.matching_blocks)

253

254

return self.matching_blocks

255

256

257

# This is a version of unified_diff which only adds a factory parameter

258

# so that you can override the default SequenceMatcher

259

# this has been submitted as a patch to python

260

def unified_diff(a, b, fromfile='', tofile='', fromfiledate='',

261

tofiledate='', n=3, lineterm='\n',

262

sequencematcher=None):

263

r"""

264

Compare two sequences of lines; generate the delta as a unified diff.

265

266

Unified diffs are a compact way of showing line changes and a few

267

lines of context. The number of context lines is set by 'n' which

268

defaults to three.

269

270

By default, the diff control lines (those with ---, +++, or @@) are

271

created with a trailing newline. This is helpful so that inputs

272

created from file.readlines() result in diffs that are suitable for

273

file.writelines() since both the inputs and outputs have trailing

274

newlines.

275

276

For inputs that do not have trailing newlines, set the lineterm

277

argument to "" so that the output will be uniformly newline free.

278

279

The unidiff format normally has a header for filenames and modification

280

times. Any or all of these may be specified using strings for

281

'fromfile', 'tofile', 'fromfiledate', and 'tofiledate'. The modification

282

times are normally expressed in the format returned by time.ctime().

283

284

Example:

285

286

>>> for line in unified_diff('one two three four'.split(),

287

... 'zero one tree four'.split(), 'Original', 'Current',

288

... 'Sat Jan 26 23:30:50 1991', 'Fri Jun 06 10:20:52 2003',

289

... lineterm=''):

290

... print line

291

--- Original Sat Jan 26 23:30:50 1991

292

+++ Current Fri Jun 06 10:20:52 2003

293

@@ -1,4 +1,4 @@

294

+zero

295

one

296

-two

297

-three

298

+tree

299

four

300

"""

301

if sequencematcher is None:

302

sequencematcher = difflib.SequenceMatcher

303

304

started = False

305

for group in sequencematcher(None,a,b).get_grouped_opcodes(n):

306

if not started:

307

yield '--- %s %s%s' % (fromfile, fromfiledate, lineterm)

308

yield '+++ %s %s%s' % (tofile, tofiledate, lineterm)

309

started = True

310

i1, i2, j1, j2 = group[0][1], group[-1][2], group[0][3], group[-1][4]

311

yield "@@ -%d,%d +%d,%d @@%s" % (i1+1, i2-i1, j1+1, j2-j1, lineterm)

312

for tag, i1, i2, j1, j2 in group:

313

if tag == 'equal':

314

for line in a[i1:i2]:

315

yield ' ' + line

316

continue

317

if tag == 'replace' or tag == 'delete':

318

for line in a[i1:i2]:

319

yield '-' + line

320

if tag == 'replace' or tag == 'insert':

321

for line in b[j1:j2]:

322

yield '+' + line

323

324

325

def unified_diff_files(a, b, sequencematcher=None):

326

"""Generate the diff for two files.

327

"""

328

# Should this actually be an error?

329

if a == b:

330

return []

331

if a == '-':

332

file_a = sys.stdin

333

time_a = time.time()

334

else:

335

file_a = open(a, 'rb')

336

time_a = os.stat(a).st_mtime

337

338

if b == '-':

339

file_b = sys.stdin

340

time_b = time.time()

341

else:

342

file_b = open(b, 'rb')

343

time_b = os.stat(b).st_mtime

344

345

# TODO: Include fromfiledate and tofiledate

346

return unified_diff(file_a.readlines(), file_b.readlines(),

347

fromfile=a, tofile=b,

348

sequencematcher=sequencematcher)

349

350

351

def main(args):

352

import optparse

353

p = optparse.OptionParser(usage='%prog [options] file_a file_b'

354

'\nFiles can be "-" to read from stdin')

355

p.add_option('--patience', dest='matcher', action='store_const', const='patience',

356

default='patience', help='Use the patience difference algorithm')

357

p.add_option('--difflib', dest='matcher', action='store_const', const='difflib',

358

default='patience', help='Use python\'s difflib algorithm')

359

360

algorithms = {'patience':PatienceSequenceMatcher, 'difflib':difflib.SequenceMatcher}

361

362

(opts, args) = p.parse_args(args)

363

matcher = algorithms[opts.matcher]

364

365

if len(args) != 2:

366

print 'You must supply 2 filenames to diff'

367

return -1

368

369

for line in unified_diff_files(args[0], args[1], sequencematcher=matcher):

370

sys.stdout.write(line)

371

372

if __name__ == '__main__':

373

sys.exit(main(sys.argv[1:]))

Older »