~bzr-pqm/bzr/bzr.dev

« back to all changes in this revision

Viewing changes to bzrlib/nofrillsprecisemerge.py

Committer: John Arbash Meinel
Date: 2005-11-08 18:36:26 UTC
mto: This revision was merged to the branch mainline in revision 1727.
Revision ID: john@arbash-meinel.com-20051108183626-71f8414338043265

Updating unified_diff to take a factory, using the new diff algorithm in the code.

files added:
INSTALL

NEWS.developers

bzrlib/annotate.py

bzrlib/cdvdifflib.py

bzrlib/clone.py

bzrlib/config.py

bzrlib/conflicts.py

bzrlib/gpg.py

bzrlib/identitymap.py

bzrlib/nofrillsprecisemerge.py

bzrlib/option.py

bzrlib/revisionspec.py

bzrlib/selftest/stub_sftp.py

bzrlib/selftest/test_ancestry.py

bzrlib/selftest/test_bad_files.py

bzrlib/selftest/test_command.py

bzrlib/selftest/test_commit.py

bzrlib/selftest/test_commit_merge.py

bzrlib/selftest/test_conflicts.py

bzrlib/selftest/test_revision_info.py

bzrlib/selftest/test_upgrade.py

bzrlib/selftest/testannotate.py

bzrlib/selftest/testapi.py

bzrlib/selftest/testconfig.py

bzrlib/selftest/testgpg.py

bzrlib/selftest/testhttp.py

bzrlib/selftest/testidentitymap.py

bzrlib/selftest/testnonascii.py

bzrlib/selftest/testoptions.py

bzrlib/selftest/testrevprops.py

bzrlib/selftest/testreweave.py

bzrlib/selftest/testsampler.py

bzrlib/selftest/testsftp.py

bzrlib/selftest/testtestament.py

bzrlib/selftest/testtransactions.py

bzrlib/selftest/testtsort.py

bzrlib/selftest/testworkingtree.py

bzrlib/selftest/treeshape.py

bzrlib/store/text.py

bzrlib/store/weave.py

bzrlib/testament.py

bzrlib/transactions.py

bzrlib/transport/memory.py

bzrlib/transport/sftp.py

bzrlib/tsort.py

bzrlib/util/configobj

bzrlib/util/configobj/__init__.py

bzrlib/util/configobj/configobj.py

bzrlib/util/configobj/docs

bzrlib/util/configobj/docs/BSD-LICENSE.txt

bzrlib/util/configobj/docs/configobj.txt

bzrlib/util/configobj/docs/validate.txt

bzrlib/util/configobj/validate.py

bzrlib/win32console.py

bzrlib/xml4.py

bzrlib/xml5.py

patches/cache_weave_inclusions.diff

tools/capture_tree.py

files removed:
bzrlib/meta_store.py

bzrlib/remotebranch.py

bzrlib/selftest/testremotebranch.py

bzrlib/upgrade.py

patches/annotate3.patch

patches/annotate4.patch

patches/pending-merge.patch

patches/plugins-no-plugins.patch

patches/progress.diff

patches/symlink-support.patch

testsweet.py

files renamed:
bzr-man.py => bzr_man.py

tools/testweave.py => bzrlib/selftest/test_weave.py

bzrlib/selftest/plugins.py => bzrlib/selftest/testplugins.py

tools/history2weaves.py => bzrlib/upgrade.py

bzrlib/newinventory.py => contrib/newinventory.py

files modified:
.bzrignore

.rsyncexclude

HACKING

Makefile

NEWS

README

TODO

bzr *

bzrlib/__init__.py

bzrlib/add.py

bzrlib/atomicfile.py

bzrlib/branch.py

bzrlib/builtins.py

bzrlib/changeset.py

bzrlib/check.py

bzrlib/commands.py

bzrlib/commit.py

bzrlib/delta.py

bzrlib/diff.py

bzrlib/errors.py

bzrlib/externalcommand.py

bzrlib/fetch.py

bzrlib/graph.py

bzrlib/hashcache.py

bzrlib/help.py

bzrlib/info.py

bzrlib/inventory.py

bzrlib/lock.py

bzrlib/log.py

bzrlib/mdiff.py

bzrlib/merge.py

bzrlib/merge3.py

bzrlib/merge_core.py

bzrlib/missing.py

bzrlib/msgeditor.py

bzrlib/osutils.py

bzrlib/patch.py

bzrlib/plugin.py

bzrlib/progress.py

bzrlib/revfile.py

bzrlib/revision.py

bzrlib/selftest/HTTPTestUtil.py

bzrlib/selftest/__init__.py

bzrlib/selftest/blackbox.py

bzrlib/selftest/test_merge_core.py

bzrlib/selftest/test_parent.py

bzrlib/selftest/test_smart_add.py

bzrlib/selftest/test_xml.py

bzrlib/selftest/testbranch.py

bzrlib/selftest/testdiff.py

bzrlib/selftest/testfetch.py

bzrlib/selftest/testgraph.py

bzrlib/selftest/testhashcache.py

bzrlib/selftest/testinv.py

bzrlib/selftest/testlog.py

bzrlib/selftest/testmerge.py

bzrlib/selftest/testmerge3.py

bzrlib/selftest/testrevision.py

bzrlib/selftest/testrevisionnamespaces.py

bzrlib/selftest/teststatus.py

bzrlib/selftest/teststore.py

bzrlib/selftest/testtransport.py

bzrlib/selftest/versioning.py

bzrlib/selftest/whitebox.py

bzrlib/shellcomplete.py

bzrlib/status.py

bzrlib/store/__init__.py

bzrlib/store/compressed_text.py

bzrlib/textinv.py

bzrlib/trace.py

bzrlib/transport/__init__.py

bzrlib/transport/http.py

bzrlib/transport/local.py

bzrlib/tree.py

bzrlib/weave.py *

bzrlib/weavefile.py

bzrlib/workingtree.py

bzrlib/xml.py

contrib/zsh/_bzr

setup.py *

Show diffs side-by-side

added added

removed removed

bzrlib/nofrillsprecisemerge.py

from sets import Set as set

from copy import copy

from bisect import bisect

def unique_lcs(a, b):

"""Find the longest common subset for unique lines.

:param a: An indexable object (such as string or list of strings)

:param b: Another indexable object (such as string or list of strings)

:return: A list of tuples, one for each line which is matched.

[(line_in_a, line_in_b), ...]

This only matches lines which are unique on both sides.

This helps prevent common lines from over influencing match

results.

The longest common subset uses the Patience Sorting algorithm:

http://en.wikipedia.org/wiki/Patience_sorting

"""

# set index[line in a] = position of line in a unless

# unless a is a duplicate, in which case it's set to None

index = {}

for i in xrange(len(a)):

line = a[i]

if line in index:

index[line] = None

else:

index[line]= i

# make btoa[i] = position of line i in a, unless

# that line doesn't occur exactly once in both,

# in which case it's set to None

btoa = [None] * len(b)

index2 = {}

for pos, line in enumerate(b):

next = index.get(line)

if next is not None:

if line in index2:

# unset the previous mapping, which we now know to

# be invalid because the line isn't unique

btoa[index2[line]] = None

del index[line]

else:

index2[line] = pos

btoa[pos] = next

# this is the Patience sorting algorithm

# see http://en.wikipedia.org/wiki/Patience_sorting

backpointers = [None] * len(b)

stacks = []

lasts = []

k = 0

for bpos, apos in enumerate(btoa):

if apos is None:

continue

# as an optimization, check if the next line comes at the end,

# because it usually does

if stacks and stacks[-1] < apos:

k = len(stacks)

# as an optimization, check if the next line comes right after

# the previous line, because usually it does

elif stacks and stacks[k] < apos and (k == len(stacks) - 1 or stacks[k+1] > apos):

k += 1

else:

k = bisect(stacks, apos)

if k > 0:

backpointers[bpos] = lasts[k-1]

if k < len(stacks):

stacks[k] = apos

lasts[k] = bpos

else:

stacks.append(apos)

lasts.append(bpos)

if len(lasts) == 0:

return []

result = []

k = lasts[-1]

while k is not None:

result.append((btoa[k], k))

k = backpointers[k]

result.reverse()

return result

assert unique_lcs('', '') == []

assert unique_lcs('a', 'a') == [(0, 0)]

assert unique_lcs('a', 'b') == []

assert unique_lcs('ab', 'ab') == [(0, 0), (1, 1)]

assert unique_lcs('abcde', 'cdeab') == [(2, 0), (3, 1), (4, 2)]

assert unique_lcs('cdeab', 'abcde') == [(0, 2), (1, 3), (2, 4)]

assert unique_lcs('abXde', 'abYde') == [(0, 0), (1, 1), (3, 3), (4, 4)]

assert unique_lcs('acbac', 'abc') == [(2, 1)]

def recurse_matches(a, b, ahi, bhi, answer, maxrecursion):

"""Find all of the matching text in the lines of a and b.

:param a: A sequence

:param b: Another sequence

:param ahi: The maximum length of a to check, typically len(a)

:param bhi: The maximum length of b to check, typically len(b)

:param answer: The return array. Will be filled with tuples

indicating [(line_in_a), (line_in_b)]

:param maxrecursion: The maximum depth to recurse.

100

Must be a positive integer.

101

:return: None, the return value is in the parameter answer, which

102

should be a list

103

"""

104

oldlen = len(answer)

105

if maxrecursion < 0:

106

# this will never happen normally, this check is to prevent DOS attacks

107

return

108

oldlength = len(answer)

109

if len(answer) == 0:

110

alo, blo = 0, 0

111

else:

112

alo, blo = answer[-1]

113

alo += 1

114

blo += 1

115

if alo == ahi or blo == bhi:

116

return

117

for apos, bpos in unique_lcs(a[alo:ahi], b[blo:bhi]):

118

# recurse between lines which are unique in each file and match

119

apos += alo

120

bpos += blo

121

recurse_matches(a, b, apos, bpos, answer, maxrecursion - 1)

122

answer.append((apos, bpos))

123

if len(answer) > oldlength:

124

# find matches between the last match and the end

125

recurse_matches(a, b, ahi, bhi, answer, maxrecursion - 1)

126

elif a[alo] == b[blo]:

127

# find matching lines at the very beginning

128

while alo < ahi and blo < bhi and a[alo] == b[blo]:

129

answer.append((alo, blo))

130

alo += 1

131

blo += 1

132

recurse_matches(a, b, ahi, bhi, answer, maxrecursion - 1)

133

elif a[ahi - 1] == b[bhi - 1]:

134

# find matching lines at the very end

135

nahi = ahi - 1

136

nbhi = bhi - 1

137

while nahi > alo and nbhi > blo and a[nahi - 1] == b[nbhi - 1]:

138

nahi -= 1

139

nbhi -= 1

140

recurse_matches(a, b, nahi, nbhi, answer, maxrecursion - 1)

141

for i in xrange(ahi - nahi):

142

answer.append((nahi + i, nbhi + i))

143

144

a1 = []

145

recurse_matches(['a', None, 'b', None, 'c'], ['a', 'a', 'b', 'c', 'c'], 5, 5, a1, 10)

146

assert a1 == [(0, 0), (2, 2), (4, 4)]

147

a2 = []

148

recurse_matches(['a', 'c', 'b', 'a', 'c'], ['a', 'b', 'c'], 5, 3, a2, 10)

149

assert a2 == [(0, 0), (2, 1), (4, 2)]

150

151

class Weave:

152

def __init__(self):

153

# [(lineid, line)]

154

self.weave = []

155

# {revid: [parent]}

156

self.parents = {}

157

# {revid: [(lineid, state)]}

158

# states are integers

159

# each line's state starts at 0, then goes to 1, 2, etc.

160

# odd states are when the line is present, even are when it is not

161

# the merge between two states is the greater of the two values

162

self.newstates = {}

163

164

def add_revision(self, revid, lines, parents):

165

assert revid not in self.parents

166

for p in parents:

167

assert p in self.parents

168

self.parents[revid] = copy(parents)

169

matches = []

170

# match against the weave

171

lines2 = [line for (lineid, line) in self.weave]

172

recurse_matches(lines, lines2, len(lines), len(lines2), matches, 10)

173

s = set()

174

for a, b in matches:

175

s.add(self.weave[b][0])

176

vs = [self._make_vals(p) for p in parents]

177

# calculate which lines had their states changed in this revision

178

newvals = []

179

if len(vs) > 0:

180

for lineid, line in self.weave:

181

state = max([v.get(lineid, 0) for v in vs])

182

if (state & 1 == 1) != (lineid in s):

183

newvals.append((lineid, state + 1))

184

else:

185

for lineid, line in self.weave:

186

if lineid in s:

187

newvals.append((lineid, 1))

188

# build a new weave

189

newweave = []

190

revpos = -1

191

weavepos = -1

192

matches.append((len(lines), len(lines2)))

193

for a, b in matches:

194

if b > weavepos + 1:

195

# add current weave lines to the new weave

196

newweave.extend(self.weave[weavepos + 1:b])

197

if a > revpos + 1:

198

# add lines which have never appeared before to the weave

199

for i in xrange(revpos + 1, a):

200

lineid = (revid, i)

201

newweave.append((lineid, lines[i]))

202

newvals.append((lineid, 1))

203

if b != len(lines2):

204

newweave.append(self.weave[b])

205

revpos = a

206

weavepos = b

207

self.newstates[revid] = newvals

208

self.weave = newweave

209

210

def _parents(self, revid):

211

unused = [revid]

212

result = set()

213

while unused:

214

next = unused.pop()

215

if next not in result:

216

unused.extend(self.parents[next])

217

result.add(next)

218

return result

219

220

def _make_vals(self, revid):

221

# return {lineid: state} for the given revision

222

s = self._parents(revid)

223

v = {}

224

for n in s:

225

for p, q in self.newstates[n]:

226

v[p] = max(v.get(p, 0), q)

227

return v

228

229

def retrieve_revision(self, revid):

230

# returns a list of strings

231

v = self._make_vals(revid)

232

return [line for (lineid, line) in self.weave if (v.get(lineid, 0) & 1)]

233

234

def annotate(self, revid):

235

# returns [(line, whether present, [perpetrator])]

236

ps = self._parents(revid)

237

# {lineid: [(parent, state)]}

238

byline = {}

239

for parent in ps:

240

for lineid, state in self.newstates[parent]:

241

byline.setdefault(lineid, []).append((parent, state))

242

result = []

243

for (lineid, line) in self.weave:

244

maxstate = 0

245

perps = []

246

for (parent, state) in byline.get(lineid, []):

247

if state > maxstate:

248

maxstate = state

249

perps = [parent]

250

elif state == maxstate:

251

perps.append(parent)

252

if maxstate > 0:

253

result.append((line, (maxstate & 1) == 1, perps))

254

return result

255

256

def merge_revisions(self, reva, revb):

257

# returns [line]

258

# non-conflict lines are strings, conflict sections are

259

# ([linesa], [linesb])

260

va = self._make_vals(reva)

261

vb = self._make_vals(revb)

262

r = []

263

awins, bwins = False, False

264

alines, blines = [], []

265

for lineid, line in self.weave:

266

aval, bval = va.get(lineid, 0), vb.get(lineid, 0)

267

if aval & 1 and bval & 1:

268

# append a matched line and the section prior to it

269

if awins and bwins:

270

# conflict case

271

r.append((alines, blines))

272

elif awins:

273

r.extend(alines)

274

elif bwins:

275

r.extend(blines)

276

r.append(line)

277

awins, bwins = False, False

278

alines, blines = [], []

279

elif aval & 1 or bval & 1:

280

# extend either side of the potential conflict

281

# section with a non-matching line

282

if aval > bval:

283

awins = True

284

else:

285

bwins = True

286

if aval & 1:

287

alines.append(line)

288

else:

289

blines.append(line)

290

# add the potential conflict section at the end

291

if awins and bwins:

292

r.append((alines, blines))

293

elif awins:

294

r.extend(alines)

295

elif bwins:

296

r.extend(blines)

297

return r

298

299

w = Weave()

300

w.add_revision(1, ['a', 'b'], [])

301

assert w.retrieve_revision(1) == ['a', 'b']

302

w.add_revision(2, ['a', 'x', 'b'], [1])

303

assert w.retrieve_revision(2) == ['a', 'x', 'b']

304

w.add_revision(3, ['a', 'y', 'b'], [1])

305

assert w.retrieve_revision(3) == ['a', 'y', 'b']

306

assert w.merge_revisions(2, 3) == ['a', (['x'], ['y']), 'b']

307

w.add_revision(4, ['a', 'x', 'b'], [1])

308

w.add_revision(5, ['a', 'z', 'b'], [4])

309

assert w.merge_revisions(2, 5) == ['a', 'z', 'b']

310

w = Weave()

311

w.add_revision('p', ['a', 'b'], [])

312

w.add_revision('q', ['a', 'c'], ['p'])

313

w.add_revision('r', ['a'], ['p'])

314

assert w.annotate('r') == [('a', True, ['p']), ('b', False, ['r'])]

Older »