~bzr-pqm/bzr/bzr.dev : revision 1185.81.1

1

from sets import Set as set

2

from copy import copy

3

from bisect import bisect

4

5

def unique_lcs(a, b):

6

# set index[line in a] = position of line in a unless

7

# unless a is a duplicate, in which case it's set to None

8

index = {}

9

for i in xrange(len(a)):

10

line = a[i]

11

if line in index:

12

index[line] = None

13

else:

14

index[line]= i

15

# make btoa[i] = position of line i in a, unless

16

# that line doesn't occur exactly once in both,

17

# in which case it's set to None

18

btoa = [None] * len(b)

19

index2 = {}

20

for pos, line in enumerate(b):

21

next = index.get(line)

22

if next is not None:

23

if line in index2:

24

# unset the previous mapping, which we now know to

25

# be invalid because the line isn't unique

26

btoa[index2[line]] = None

27

del index[line]

28

else:

29

index2[line] = pos

30

btoa[pos] = next

31

# this is the Patience sorting algorithm

32

# see http://en.wikipedia.org/wiki/Patience_sorting

33

backpointers = [None] * len(b)

34

stacks = []

35

lasts = []

36

k = 0

37

for bpos, apos in enumerate(btoa):

38

if apos is None:

39

continue

40

# as an optimization, check if the next line comes at the end,

41

# because it usually does

42

if stacks and stacks[-1] < apos:

43

k = len(stacks)

44

# as an optimization, check if the next line comes right after

45

# the previous line, because usually it does

46

elif stacks and stacks[k] < apos and (k == len(stacks) - 1 or stacks[k+1] > apos):

47

k += 1

48

else:

49

k = bisect(stacks, apos)

50

if k > 0:

51

backpointers[bpos] = lasts[k-1]

52

if k < len(stacks):

53

stacks[k] = apos

54

lasts[k] = bpos

55

else:

56

stacks.append(apos)

57

lasts.append(bpos)

58

if len(lasts) == 0:

59

return []

60

result = []

61

k = lasts[-1]

62

while k is not None:

63

result.append((btoa[k], k))

64

k = backpointers[k]

65

result.reverse()

66

return result

67

68

assert unique_lcs('', '') == []

69

assert unique_lcs('a', 'a') == [(0, 0)]

70

assert unique_lcs('a', 'b') == []

71

assert unique_lcs('ab', 'ab') == [(0, 0), (1, 1)]

72

assert unique_lcs('abcde', 'cdeab') == [(2, 0), (3, 1), (4, 2)]

73

assert unique_lcs('cdeab', 'abcde') == [(0, 2), (1, 3), (2, 4)]

74

assert unique_lcs('abXde', 'abYde') == [(0, 0), (1, 1), (3, 3), (4, 4)]

75

assert unique_lcs('acbac', 'abc') == [(2, 1)]

76

77

def recurse_matches(a, b, ahi, bhi, answer, maxrecursion):

78

oldlen = len(answer)

79

if maxrecursion < 0:

80

# this will never happen normally, this check is to prevent DOS attacks

81

return

82

oldlength = len(answer)

83

if len(answer) == 0:

84

alo, blo = 0, 0

85

else:

86

alo, blo = answer[-1]

87

alo += 1

88

blo += 1

89

if alo == ahi or blo == bhi:

90

return

91

for apos, bpos in unique_lcs(a[alo:ahi], b[blo:bhi]):

92

# recurse between lines which are unique in each file and match

93

apos += alo

94

bpos += blo

95

recurse_matches(a, b, apos, bpos, answer, maxrecursion - 1)

96

answer.append((apos, bpos))

97

if len(answer) > oldlength:

98

# find matches between the last match and the end

99

recurse_matches(a, b, ahi, bhi, answer, maxrecursion - 1)

100

elif a[alo] == b[blo]:

101

# find matching lines at the very beginning

102

while alo < ahi and blo < bhi and a[alo] == b[blo]:

103

answer.append((alo, blo))

104

alo += 1

105

blo += 1

106

recurse_matches(a, b, ahi, bhi, answer, maxrecursion - 1)

107

elif a[ahi - 1] == b[bhi - 1]:

108

# find matching lines at the very end

109

nahi = ahi - 1

110

nbhi = bhi - 1

111

while nahi > alo and nbhi > blo and a[nahi - 1] == b[nbhi - 1]:

112

nahi -= 1

113

nbhi -= 1

114

recurse_matches(a, b, nahi, nbhi, answer, maxrecursion - 1)

115

for i in xrange(ahi - nahi):

116

answer.append((nahi + i, nbhi + i))

117

118

a1 = []

119

recurse_matches(['a', None, 'b', None, 'c'], ['a', 'a', 'b', 'c', 'c'], 5, 5, a1, 10)

120

assert a1 == [(0, 0), (2, 2), (4, 4)]

121

a2 = []

122

recurse_matches(['a', 'c', 'b', 'a', 'c'], ['a', 'b', 'c'], 5, 3, a2, 10)

123

assert a2 == [(0, 0), (2, 1), (4, 2)]

124

125

class Weave:

126

def __init__(self):

127

# [(lineid, line)]

128

self.weave = []

129

# {revid: [parent]}

130

self.parents = {}

131

# {revid: [(lineid, state)]}

132

# states are integers

133

# each line's state starts at 0, then goes to 1, 2, etc.

134

# odd states are when the line is present, even are when it is not

135

# the merge between two states is the greater of the two values

136

self.newstates = {}

137

138

def add_revision(self, revid, lines, parents):

139

assert revid not in self.parents

140

for p in parents:

141

assert p in self.parents

142

self.parents[revid] = copy(parents)

143

matches = []

144

# match against the weave

145

lines2 = [line for (lineid, line) in self.weave]

146

recurse_matches(lines, lines2, len(lines), len(lines2), matches, 10)

147

s = set()

148

for a, b in matches:

149

s.add(self.weave[b][0])

150

vs = [self._make_vals(p) for p in parents]

151

# calculate which lines had their states changed in this revision

152

newvals = []

153

if len(vs) > 0:

154

for lineid, line in self.weave:

155

state = max([v.get(lineid, 0) for v in vs])

156

if (state & 1 == 1) != (lineid in s):

157

newvals.append((lineid, state + 1))

158

else:

159

for lineid, line in self.weave:

160

newvals.append((lineid, 1))

161

# build a new weave

162

newweave = []

163

revpos = -1

164

weavepos = -1

165

matches.append((len(lines), len(lines2)))

166

for a, b in matches:

167

if b > weavepos + 1:

168

# add current weave lines to the new weave

169

newweave.extend(self.weave[weavepos + 1:b])

170

if a > revpos + 1:

171

# add lines which have never appeared before to the weave

172

for i in xrange(revpos + 1, a):

173

lineid = (revid, i)

174

newweave.append((lineid, lines[i]))

175

newvals.append((lineid, 1))

176

if b != len(lines2):

177

newweave.append(self.weave[b])

178

revpos = a

179

weavepos = b

180

self.newstates[revid] = newvals

181

self.weave = newweave

182

183

def _parents(self, revid):

184

unused = [revid]

185

result = set()

186

while unused:

187

next = unused.pop()

188

if next not in result:

189

unused.extend(self.parents[next])

190

result.add(next)

191

return result

192

193

def _make_vals(self, revid):

194

# return {lineid: state} for the given revision

195

s = self._parents(revid)

196

v = {}

197

for n in s:

198

for p, q in self.newstates[n]:

199

v[p] = max(v.get(p, 0), q)

200

return v

201

202

def retrieve_revision(self, revid):

203

# returns a list of strings

204

v = self._make_vals(revid)

205

return [line for (lineid, line) in self.weave if (v.get(lineid, 0) & 1)]

206

207

def annotate(self, revid):

208

# returns [(line, whether present, [perpetrator])]

209

ps = self._parents(revid)

210

# {lineid: [(parent, state)]}

211

byline = {}

212

for parent in ps:

213

for lineid, state in self.newstates[parent]:

214

byline.setdefault(lineid, []).append((parent, state))

215

result = []

216

for (lineid, line) in self.weave:

217

maxstate = 0

218

perps = []

219

for (parent, state) in byline.get(lineid, []):

220

if state > maxstate:

221

maxstate = state

222

perps = [parent]

223

elif state == maxstate:

224

perps.append(parent)

225

if maxstate > 0:

226

result.append((line, (maxstate & 1) == 1, perps))

227

return result

228

229

def merge_revisions(self, reva, revb):

230

# returns [line]

231

# non-conflict lines are strings, conflict sections are

232

# ([linesa], [linesb])

233

va = self._make_vals(reva)

234

vb = self._make_vals(revb)

235

r = []

236

awins, bwins = False, False

237

alines, blines = [], []

238

for lineid, line in self.weave:

239

aval, bval = va.get(lineid, 0), vb.get(lineid, 0)

240

if aval & 1 and bval & 1:

241

# append a matched line and the section prior to it

242

if awins and bwins:

243

# conflict case

244

r.append((alines, blines))

245

elif awins:

246

r.extend(alines)

247

elif bwins:

248

r.extend(blines)

249

r.append(line)

250

awins, bwins = False, False

251

alines, blines = [], []

252

elif aval & 1 or bval & 1:

253

# extend either side of the potential conflict

254

# section with a non-matching line

255

if aval > bval:

256

awins = True

257

else:

258

bwins = True

259

if aval & 1:

260

alines.append(line)

261

else:

262

blines.append(line)

263

# add the potential conflict section at the end

264

if awins and bwins:

265

r.append((alines, blines))

266

elif awins:

267

r.extend(alines)

268

elif bwins:

269

r.extend(blines)

270

return r

271

272

w = Weave()

273

w.add_revision(1, ['a', 'b'], [])

274

assert w.retrieve_revision(1) == ['a', 'b']

275

w.add_revision(2, ['a', 'x', 'b'], [1])

276

assert w.retrieve_revision(2) == ['a', 'x', 'b']

277

w.add_revision(3, ['a', 'y', 'b'], [1])

278

assert w.retrieve_revision(3) == ['a', 'y', 'b']

279

assert w.merge_revisions(2, 3) == ['a', (['x'], ['y']), 'b']

280

w.add_revision(4, ['a', 'x', 'b'], [1])

281

w.add_revision(5, ['a', 'z', 'b'], [4])

282

assert w.merge_revisions(2, 5) == ['a', 'z', 'b']

283

w = Weave()

284

w.add_revision('p', ['a', 'b'], [])

285

w.add_revision('q', ['a', 'c'], ['p'])

286

w.add_revision('r', ['a'], ['p'])

287

assert w.annotate('r') == [('a', True, ['p']), ('b', False, ['r'])]