~bzr-pqm/bzr/bzr.dev : revision 1185.81.9

1

from sets import Set as set

2

from copy import copy

3

from bisect import bisect

4

5

def unique_lcs(a, b):

6

# set index[line in a] = position of line in a unless

7

# unless a is a duplicate, in which case it's set to None

8

index = {}

9

for i in xrange(len(a)):

10

line = a[i]

11

if line in index:

12

index[line] = None

13

else:

14

index[line]= i

15

# make btoa[i] = position of line i in a, unless

16

# that line doesn't occur exactly once in both,

17

# in which case it's set to None

18

btoa = [None] * len(b)

19

index2 = {}

20

for pos, line in enumerate(b):

21

next = index.get(line)

22

if next is not None:

23

if line in index2:

24

# unset the previous mapping, which we now know to

25

# be invalid because the line isn't unique

26

btoa[index2[line]] = None

27

del index[line]

28

else:

29

index2[line] = pos

30

btoa[pos] = next

31

# this is the Patience sorting algorithm

32

# see http://en.wikipedia.org/wiki/Patience_sorting

33

backpointers = [None] * len(b)

34

stacks = []

35

lasts = []

36

k = 0

37

for bpos, apos in enumerate(btoa):

38

if apos is None:

39

continue

40

# as an optimization, check if the next line comes at the end,

41

# because it usually does

42

if stacks and stacks[-1] < apos:

43

k = len(stacks)

44

# as an optimization, check if the next line comes right after

45

# the previous line, because usually it does

46

elif stacks and stacks[k] < apos and (k == len(stacks) - 1 or stacks[k+1] > apos):

47

k += 1

48

else:

49

k = bisect(stacks, apos)

50

if k > 0:

51

backpointers[bpos] = lasts[k-1]

52

if k < len(stacks):

53

stacks[k] = apos

54

lasts[k] = bpos

55

else:

56

stacks.append(apos)

57

lasts.append(bpos)

58

if len(lasts) == 0:

59

return []

60

result = []

61

k = lasts[-1]

62

while k is not None:

63

result.append((btoa[k], k))

64

k = backpointers[k]

65

result.reverse()

66

return result

67

68

assert unique_lcs('', '') == []

69

assert unique_lcs('a', 'a') == [(0, 0)]

70

assert unique_lcs('a', 'b') == []

71

assert unique_lcs('ab', 'ab') == [(0, 0), (1, 1)]

72

assert unique_lcs('abcde', 'cdeab') == [(2, 0), (3, 1), (4, 2)]

73

assert unique_lcs('cdeab', 'abcde') == [(0, 2), (1, 3), (2, 4)]

74

assert unique_lcs('abXde', 'abYde') == [(0, 0), (1, 1), (3, 3), (4, 4)]

75

assert unique_lcs('acbac', 'abc') == [(2, 1)]

76

77

def recurse_matches(a, b, ahi, bhi, answer, maxrecursion):

78

oldlen = len(answer)

79

if maxrecursion < 0:

80

# this will never happen normally, this check is to prevent DOS attacks

81

return

82

oldlength = len(answer)

83

if len(answer) == 0:

84

alo, blo = 0, 0

85

else:

86

alo, blo = answer[-1]

87

alo += 1

88

blo += 1

89

if alo == ahi or blo == bhi:

90

return

91

for apos, bpos in unique_lcs(a[alo:ahi], b[blo:bhi]):

92

# recurse between lines which are unique in each file and match

93

apos += alo

94

bpos += blo

95

recurse_matches(a, b, apos, bpos, answer, maxrecursion - 1)

96

answer.append((apos, bpos))

97

if len(answer) > oldlength:

98

# find matches between the last match and the end

99

recurse_matches(a, b, ahi, bhi, answer, maxrecursion - 1)

100

elif a[alo] == b[blo]:

101

# find matching lines at the very beginning

102

while alo < ahi and blo < bhi and a[alo] == b[blo]:

103

answer.append((alo, blo))

104

alo += 1

105

blo += 1

106

recurse_matches(a, b, ahi, bhi, answer, maxrecursion - 1)

107

elif a[ahi - 1] == b[bhi - 1]:

108

# find matching lines at the very end

109

nahi = ahi - 1

110

nbhi = bhi - 1

111

while nahi > alo and nbhi > blo and a[nahi - 1] == b[nbhi - 1]:

112

nahi -= 1

113

nbhi -= 1

114

recurse_matches(a, b, nahi, nbhi, answer, maxrecursion - 1)

115

for i in xrange(ahi - nahi):

116

answer.append((nahi + i, nbhi + i))

117

118

a1 = []

119

recurse_matches(['a', None, 'b', None, 'c'], ['a', 'a', 'b', 'c', 'c'], 5, 5, a1, 10)

120

assert a1 == [(0, 0), (2, 2), (4, 4)]

121

a2 = []

122

recurse_matches(['a', 'c', 'b', 'a', 'c'], ['a', 'b', 'c'], 5, 3, a2, 10)

123

assert a2 == [(0, 0), (2, 1), (4, 2)]

124

125

class Weave:

126

def __init__(self):

127

# [(lineid, line)]

128

self.weave = []

129

# {revid: [parent]}

130

self.parents = {}

131

# {revid: [(lineid, state)]}

132

# states are integers

133

# each line's state starts at 0, then goes to 1, 2, etc.

134

# odd states are when the line is present, even are when it is not

135

# the merge between two states is the greater of the two values

136

self.newedgestates = {}

137

138

def add_revision(self, revid, lines, parents):

139

assert revid not in self.parents

140

for p in parents:

141

assert p in self.parents

142

self.parents[revid] = copy(parents)

143

# match against the weave

144

matches = []

145

lines2 = [line for (lineid, line) in self.weave]

146

recurse_matches(lines, lines2, len(lines), len(lines2), matches, 10)

147

# build a new weave

148

newweave = []

149

revpos = -1

150

weavepos = -1

151

matches.append((len(lines), len(lines2)))

152

currentlines = []

153

for a, b in matches:

154

if b > weavepos + 1:

155

# add current weave lines to the new weave

156

newweave.extend(self.weave[weavepos + 1:b])

157

if a > revpos + 1:

158

# add lines which have never appeared before to the weave

159

for i in xrange(revpos + 1, a):

160

lineid = (revid, i)

161

currentlines.append(lineid)

162

newweave.append((lineid, lines[i]))

163

if b != len(lines2):

164

newweave.append(self.weave[b])

165

currentlines.append(self.weave[b][0])

166

revpos = a

167

weavepos = b

168

self.weave = newweave

169

# calculate which lines had their states changed in this revision

170

currentedges = set()

171

if len(currentlines) > 0:

172

for i in xrange(len(currentlines) - 1):

173

currentedges.add((currentlines[i], currentlines[i+1]))

174

currentedges.add((None, currentlines[0]))

175

currentedges.add((currentlines[-1], None))

176

else:

177

currentedges.add((None, None))

178

newedgevals = []

179

vals = self._make_vals(revid)

180

for edge in currentedges:

181

if edge not in vals:

182

newedgevals.append((edge, 1))

183

for edge, state in vals.items():

184

if (state & 1 == 1) != (edge in currentedges):

185

newedgevals.append((edge, state + 1))

186

if len(newedgevals) > 0:

187

self.newedgestates[revid] = newedgevals

188

189

def _make_vals(self, revid):

190

# return {lineid: state} for the given revision

191

unused = [revid]

192

s = set()

193

while unused:

194

next = unused.pop()

195

if next not in s:

196

unused.extend(self.parents[next])

197

s.add(next)

198

v = {}

199

for n in s:

200

for p, q in self.newedgestates.get(n, []):

201

v[p] = max(v.get(p, 0), q)

202

return v

203

204

def _lineids(self, revid):

205

lineids = set()

206

for (ida, idb), state in self._make_vals(revid).items():

207

if state & 1 == 1:

208

if ida is not None:

209

lineids.add(ida)

210

if idb is not None:

211

lineids.add(idb)

212

return lineids

213

214

def retrieve_revision(self, revid):

215

# returns a list of strings

216

return [line for (lineid, line) in self.weave if lineid in self._lineids(revid)]

217

218

def merge(self, reva, revb):

219

# returns [line]

220

# non-conflict lines are strings, conflict sections are

221

# ([linesa], [linesb])

222

edgesa = self._make_vals(reva)

223

alines = self._lineids(reva)

224

edgesb = self._make_vals(revb)

225

blines = self._lineids(revb)

226

# figure out which side wins at each line

227

weavepositions = {}

228

for pos, (lineid, line) in enumerate(self.weave):

229

weavepositions[lineid] = pos

230

awins = [False] * len(self.weave)

231

bwins = [False] * len(self.weave)

232

for edge in edgesa.keys() + edgesb.keys():

233

statea = edgesa.get(edge, 0)

234

stateb = edgesb.get(edge, 0)

235

if statea != stateb:

236

if statea > stateb:

237

winner = awins

238

else:

239

winner = bwins

240

begin, end = edge

241

if begin is None:

242

start = -1

243

else:

244

start = weavepositions[begin]

245

if begin not in alines or begin not in blines:

246

winner[start] = True

247

if end is None:

248

stop = len(winner)

249

else:

250

stop = weavepositions[end]

251

if end not in alines or end not in blines:

252

winner[stop] = True

253

for i in xrange(start + 1, stop):

254

winner[i] = True

255

# pull out matched lines and winning sections

256

result = []

257

unmatcheda = []

258

unmatchedb = []

259

amustwin = False

260

bmustwin = False

261

for pos, (lineid, line) in enumerate(self.weave):

262

if not awins[pos] and not bwins[pos]:

263

if lineid in alines:

264

if amustwin and bmustwin:

265

if unmatcheda or unmatchedb:

266

result.append((unmatcheda, unmatchedb))

267

unmatcheda = []

268

unmatchedb = []

269

elif amustwin:

270

result.extend(unmatcheda)

271

elif bmustwin:

272

result.extend(unmatchedb)

273

result.append(line)

274

amustwin = False

275

bmustwin = False

276

else:

277

if awins[pos]:

278

amustwin = True

279

if bwins[pos]:

280

bmustwin = True

281

if lineid in alines:

282

unmatcheda.append(line)

283

if lineid in blines:

284

unmatchedb.append(line)

285

if amustwin and bmustwin:

286

if unmatcheda or unmatchedb:

287

result.append((unmatcheda, unmatchedb))

288

elif amustwin:

289

result.extend(unmatcheda)

290

elif bmustwin:

291

result.extend(unmatchedb)

292

return result

293

294

w = Weave()

295

w.add_revision(1, ['a', 'b'], [])

296

assert w.retrieve_revision(1) == ['a', 'b']

297

w.add_revision(2, ['a', 'x', 'b'], [1])

298

assert w.retrieve_revision(2) == ['a', 'x', 'b']

299

w.add_revision(3, ['a', 'y', 'b'], [1])

300

assert w.retrieve_revision(3) == ['a', 'y', 'b']

301

assert w.merge(2, 3) == ['a', (['x'], ['y']), 'b']

302

w.add_revision(4, ['a', 'x', 'b'], [1])

303

w.add_revision(5, ['a', 'z', 'b'], [4])

304

assert w.merge(2, 5) == ['a', 'z', 'b']

305

w = Weave()

306

w.add_revision(1, ['b'], [])

307

assert w.retrieve_revision(1) == ['b']

308

w.add_revision(2, ['x', 'b'], [1])

309

assert w.retrieve_revision(2) == ['x', 'b']

310

w.add_revision(3, ['y', 'b'], [1])

311

assert w.retrieve_revision(3) == ['y', 'b']

312

assert w.merge(2, 3) == [(['x'], ['y']), 'b']

313

w.add_revision(4, ['x', 'b'], [1])

314

w.add_revision(5, ['z', 'b'], [4])

315

assert w.merge(2, 5) == ['z', 'b']

316

w = Weave()

317

w.add_revision(1, ['a'], [])

318

assert w.retrieve_revision(1) == ['a']

319

w.add_revision(2, ['a', 'x'], [1])

320

assert w.retrieve_revision(2) == ['a', 'x']

321

w.add_revision(3, ['a', 'y'], [1])

322

assert w.retrieve_revision(3) == ['a', 'y']

323

assert w.merge(2, 3) == ['a', (['x'], ['y'])]

324

w.add_revision(4, ['a', 'x'], [1])

325

w.add_revision(5, ['a', 'z'], [4])

326

assert w.merge(2, 5) == ['a', 'z']

327

w = Weave()

328

w.add_revision(1, [], [])

329

assert w.retrieve_revision(1) == []

330

w.add_revision(2, ['x'], [1])

331

assert w.retrieve_revision(2) == ['x']

332

w.add_revision(3, ['y'], [1])

333

assert w.retrieve_revision(3) == ['y']

334

assert w.merge(2, 3) == [(['x'], ['y'])]

335

w.add_revision(4, ['x'], [1])

336

w.add_revision(5, ['z'], [4])

337

assert w.merge(2, 5) == ['z']

338

339

w = Weave()

340

w.add_revision(1, ['a', 'b'], [])

341

w.add_revision(2, ['a', 'c', 'b'], [1])

342

w.add_revision(3, ['a', 'b'], [2])

343

w.add_revision(4, ['a', 'd', 'b'], [1])

344

assert w.merge(2, 4) == ['a', (['c'], ['d']), 'b']

345

assert w.merge(3, 4) == ['a', ([], ['d']), 'b']

346

w.add_revision(5, ['a', 'b'], [4])

347

assert w.merge(4, 5) == ['a', 'b']

348

w = Weave()

349

w.add_revision(1, ['b'], [])

350

w.add_revision(2, ['c', 'b'], [1])

351

w.add_revision(3, ['b'], [2])

352

w.add_revision(4, ['d', 'b'], [1])

353

assert w.merge(2, 4) == [(['c'], ['d']), 'b']

354

assert w.merge(3, 4) == [([], ['d']), 'b']

355

w.add_revision(5, ['b'], [4])

356

assert w.merge(4, 5) == ['b']

357

w = Weave()

358

w.add_revision(1, ['a'], [])

359

w.add_revision(2, ['a', 'c'], [1])

360

w.add_revision(3, ['a'], [2])

361

w.add_revision(4, ['a', 'd'], [1])

362

assert w.merge(2, 4) == ['a', (['c'], ['d'])]

363

assert w.merge(3, 4) == ['a', ([], ['d'])]

364

w.add_revision(5, ['a'], [4])

365

assert w.merge(4, 5) == ['a']

366

w = Weave()

367

w.add_revision(1, [], [])

368

w.add_revision(2, ['c'], [1])

369

w.add_revision(3, [], [2])

370

w.add_revision(4, ['d'], [1])

371

assert w.merge(2, 4) == [(['c'], ['d'])]

372

assert w.merge(3, 4) == [([], ['d'])]

373

w.add_revision(5, [], [4])

374

assert w.merge(4, 5) == []

375

376

w = Weave()

377

w.add_revision(1, ['a', 'b', 'c', 'd', 'e'], [])

378

w.add_revision(2, ['a', 'x', 'c', 'd', 'e'], [1])

379

w.add_revision(3, ['a', 'e'], [1])

380

w.add_revision(4, ['a', 'b', 'c', 'd', 'e'], [3])

381

assert w.merge(2, 4) == ['a', (['x', 'c', 'd'], ['b', 'c', 'd']), 'e']

382

w = Weave()

383

w.add_revision(1, ['b', 'c', 'd', 'e'], [])

384

w.add_revision(2, ['x', 'c', 'd', 'e'], [1])

385

w.add_revision(3, ['e'], [1])

386

w.add_revision(4, ['b', 'c', 'd', 'e'], [3])

387

assert w.merge(2, 4) == [(['x', 'c', 'd'], ['b', 'c', 'd']), 'e']

388

w = Weave()

389

w.add_revision(1, ['a', 'b', 'c', 'd'], [])

390

w.add_revision(2, ['a', 'x', 'c', 'd'], [1])

391

w.add_revision(3, ['a'], [1])

392

w.add_revision(4, ['a', 'b', 'c', 'd'], [3])

393

assert w.merge(2, 4) == ['a', (['x', 'c', 'd'], ['b', 'c', 'd'])]

394

w = Weave()

395

w.add_revision(1, ['b', 'c', 'd'], [])

396

w.add_revision(2, ['x', 'c', 'd'], [1])

397

w.add_revision(3, [], [1])

398

w.add_revision(4, ['b', 'c', 'd'], [3])

399

assert w.merge(2, 4) == [(['x', 'c', 'd'], ['b', 'c', 'd'])]

400

401

w = Weave()

402

w.add_revision(1, ['a', 'b'], [])

403

w.add_revision(2, ['a', 'c', 'b'], [1])

404

w.add_revision(3, ['a', 'd', 'b'], [1])

405

w.add_revision(4, ['a', 'c', 'd', 'b'], [2, 3])

406

w.add_revision(5, ['a', 'd', 'c', 'b'], [2, 3])

407

assert w.merge(4, 5) == ['a', (['c'], []), 'd', 'c', 'b']

408

w = Weave()

409

w.add_revision(1, ['b'], [])

410

w.add_revision(2, ['c', 'b'], [1])

411

w.add_revision(3, ['d', 'b'], [1])

412

w.add_revision(4, ['c', 'd', 'b'], [2, 3])

413

w.add_revision(5, ['d', 'c', 'b'], [2, 3])

414

assert w.merge(4, 5) == [(['c'], []), 'd', 'c', 'b']

415

w = Weave()

416

w.add_revision(1, ['a'], [])

417

w.add_revision(2, ['a', 'c'], [1])

418

w.add_revision(3, ['a', 'd'], [1])

419

w.add_revision(4, ['a', 'c', 'd'], [2, 3])

420

w.add_revision(5, ['a', 'd', 'c'], [2, 3])

421

assert w.merge(4, 5) == ['a', (['c'], []), 'd', 'c']

422

w = Weave()

423

w.add_revision(1, [], [])

424

w.add_revision(2, ['c'], [1])

425

w.add_revision(3, ['d'], [1])

426

w.add_revision(4, ['c', 'd'], [2, 3])

427

w.add_revision(5, ['d', 'c'], [2, 3])

428

assert w.merge(4, 5) == [(['c'], []), 'd', 'c']

429