~bzr-pqm/bzr/bzr.dev

« back to all changes in this revision

Viewing changes to bzrlib/knit.py

Committer: Martin Pool
Date: 2005-09-12 09:50:44 UTC
Revision ID: mbp@sourcefrog.net-20050912095044-6acfdb5611729987

- no tests in bzrlib.fetch anymore

files added:
bzrlib/changeset.py

bzrlib/mdiff.py

bzrlib/merge_core.py

bzrlib/meta_store.py

bzrlib/remotebranch.py

bzrlib/revfile.py

bzrlib/upgrade.py

doc/Makefile

doc/adoption.txt

doc/bitkeeper.txt

doc/changelogs.txt

doc/cherry-picking.txt

doc/cmdref.txt

doc/common-format.txt

doc/compared-aegis.txt

doc/compared-codeville.txt

doc/compared-cvsnt.txt

doc/compared-opencm.txt

doc/compared-prcs.txt

doc/compared-teamware.txt

doc/compression.txt

doc/config-specs.txt

doc/conflicts.txt

doc/costs.txt

doc/darcs.txt

doc/deadly-sins.txt

doc/default.css

doc/design.txt

doc/extra-commands.txt

doc/formats.txt

doc/hashes.txt

doc/ignore.txt

doc/index.txt

doc/interrupted.txt

doc/intro.txt

doc/inventory.txt

doc/join-branches.txt

doc/kill-version.txt

doc/layers.txt

doc/library-interface.txt

doc/merge.txt

doc/mirroring.txt

doc/monotone.txt

doc/news.txt

doc/optional-edit.txt

doc/partial-commit.txt

doc/pool.txt

doc/purpose.txt

doc/python.txt

doc/quilt.txt

doc/quotes.txt

doc/random.txt

doc/requirements.txt

doc/revfile-annotation.txt

doc/revfile.txt

doc/revision-syntax.txt

doc/rollup.txt

doc/scalability.txt

doc/security.txt

doc/shared-branches.txt

doc/short-demo.txt

doc/split-join-files.txt

doc/supportability.txt

doc/svk.txt

doc/switch-in-branch.txt

doc/tagging.txt

doc/taxonomy.txt

doc/thanks.txt

doc/todo-from-arch.txt

doc/unchanged.txt

doc/unrelated-merge.txt

doc/usability.txt

doc/use-cases.txt

doc/web-interface.txt

doc/workflow.txt

doc/yaml.txt

notes/inventory-v2-sample.xml

notes/inventory-v2.rnc

notes/new-inventory-sample.xml

notes/performance.txt

notes/revfile.txt

notes/schemas.xml

patches

patches/annotate3.patch

patches/annotate4.patch

patches/cache-remote-revisions.diff

patches/find-touching-from-seq.diff

patches/meta-data-in-inventory.patch

patches/ndiff.patch

patches/pending-merge.patch

patches/plugins-no-plugins.patch

patches/progress.diff

patches/symlink-support.patch

testbzr

testsweet.py

files removed:
BRANCH.TODO

INSTALL

NEWS.developers

bzrlib/annotate.py

bzrlib/bzrdir.py

bzrlib/config.py

bzrlib/conflicts.py

bzrlib/decorators.py

bzrlib/doc

bzrlib/doc/__init__.py

bzrlib/doc/api

bzrlib/doc/api/__init__.py

bzrlib/doc/api/branch.txt

bzrlib/doc/api/transport.txt

bzrlib/export

bzrlib/export/__init__.py

bzrlib/export/dir_exporter.py

bzrlib/export/tar_exporter.py

bzrlib/export/zip_exporter.py

bzrlib/gpg.py

bzrlib/graph.py

bzrlib/identitymap.py

bzrlib/inter.py

bzrlib/iterablefile.py

bzrlib/knit.py

bzrlib/lockable_files.py

bzrlib/lockdir.py

bzrlib/lsprof.py

bzrlib/option.py

bzrlib/reconcile.py

bzrlib/repository.py

bzrlib/revisionspec.py

bzrlib/rio.py

bzrlib/sign_my_commits.py

bzrlib/store

bzrlib/store/revision

bzrlib/store/revision/__init__.py

bzrlib/store/revision/knit.py

bzrlib/store/revision/text.py

bzrlib/store/text.py

bzrlib/store/versioned

bzrlib/symbol_versioning.py

bzrlib/testament.py

bzrlib/tests/HTTPTestUtil.py

bzrlib/tests/blackbox

bzrlib/tests/blackbox/__init__.py

bzrlib/tests/blackbox/test_added.py

bzrlib/tests/blackbox/test_aliases.py

bzrlib/tests/blackbox/test_ancestry.py

bzrlib/tests/blackbox/test_bound_branches.py

bzrlib/tests/blackbox/test_break_lock.py

bzrlib/tests/blackbox/test_cat.py

bzrlib/tests/blackbox/test_checkout.py

bzrlib/tests/blackbox/test_commit.py

bzrlib/tests/blackbox/test_conflicts.py

bzrlib/tests/blackbox/test_diff.py

bzrlib/tests/blackbox/test_export.py

bzrlib/tests/blackbox/test_find_merge_base.py

bzrlib/tests/blackbox/test_help.py

bzrlib/tests/blackbox/test_info.py

bzrlib/tests/blackbox/test_log.py

bzrlib/tests/blackbox/test_logformats.py

bzrlib/tests/blackbox/test_missing.py

bzrlib/tests/blackbox/test_outside_wt.py

bzrlib/tests/blackbox/test_pull.py

bzrlib/tests/blackbox/test_re_sign.py

bzrlib/tests/blackbox/test_reconcile.py

bzrlib/tests/blackbox/test_revert.py

bzrlib/tests/blackbox/test_revision_info.py

bzrlib/tests/blackbox/test_revno.py

bzrlib/tests/blackbox/test_selftest.py

bzrlib/tests/blackbox/test_sign_my_commits.py

bzrlib/tests/blackbox/test_update.py

bzrlib/tests/blackbox/test_upgrade.py

bzrlib/tests/branch_implementations

bzrlib/tests/branch_implementations/__init__.py

bzrlib/tests/branch_implementations/test_bound_sftp.py

bzrlib/tests/branch_implementations/test_permissions.py

bzrlib/tests/branch_implementations/test_update.py

bzrlib/tests/bzrdir_implementations

bzrlib/tests/bzrdir_implementations/__init__.py

bzrlib/tests/bzrdir_implementations/test_bzrdir.py

bzrlib/tests/interrepository_implementations

bzrlib/tests/interrepository_implementations/__init__.py

bzrlib/tests/interrepository_implementations/test_interrepository.py

bzrlib/tests/interversionedfile_implementations

bzrlib/tests/interversionedfile_implementations/__init__.py

bzrlib/tests/interversionedfile_implementations/test_join.py

bzrlib/tests/repository_implementations

bzrlib/tests/repository_implementations/__init__.py

bzrlib/tests/repository_implementations/test_fileid_involved.py

bzrlib/tests/repository_implementations/test_reconcile.py

bzrlib/tests/repository_implementations/test_repository.py

bzrlib/tests/revisionstore_implementations

bzrlib/tests/revisionstore_implementations/__init__.py

bzrlib/tests/revisionstore_implementations/test_all.py

bzrlib/tests/stub_sftp.py

bzrlib/tests/test_ancestry.py

bzrlib/tests/test_annotate.py

bzrlib/tests/test_api.py

bzrlib/tests/test_bad_files.py

bzrlib/tests/test_basis_inventory.py

bzrlib/tests/test_branch.py

bzrlib/tests/test_bzrdir.py

bzrlib/tests/test_command.py

bzrlib/tests/test_commit.py

bzrlib/tests/test_commit_merge.py

bzrlib/tests/test_config.py

bzrlib/tests/test_conflicts.py

bzrlib/tests/test_decorators.py

bzrlib/tests/test_doc_generate.py

bzrlib/tests/test_errors.py

bzrlib/tests/test_gpg.py

bzrlib/tests/test_graph.py

bzrlib/tests/test_http.py

bzrlib/tests/test_identitymap.py

bzrlib/tests/test_knit.py

bzrlib/tests/test_lockable_files.py

bzrlib/tests/test_lockdir.py

bzrlib/tests/test_merge.py

bzrlib/tests/test_missing.py

bzrlib/tests/test_msgeditor.py

bzrlib/tests/test_nonascii.py

bzrlib/tests/test_options.py

bzrlib/tests/test_osutils.py

bzrlib/tests/test_permissions.py

bzrlib/tests/test_reconcile.py

bzrlib/tests/test_repository.py

bzrlib/tests/test_revprops.py

bzrlib/tests/test_reweave.py.moved

bzrlib/tests/test_rio.py

bzrlib/tests/test_sampler.py

bzrlib/tests/test_selftest.py

bzrlib/tests/test_setup.py

bzrlib/tests/test_sftp_transport.py

bzrlib/tests/test_source.py

bzrlib/tests/test_symbol_versioning.py

bzrlib/tests/test_testament.py

bzrlib/tests/test_trace.py

bzrlib/tests/test_transactions.py

bzrlib/tests/test_transform.py

bzrlib/tests/test_transport.py

bzrlib/tests/test_transport_implementations.py

bzrlib/tests/test_tsort.py

bzrlib/tests/test_ui.py

bzrlib/tests/test_uncommit.py

bzrlib/tests/test_upgrade.py

bzrlib/tests/test_versionedfile.py

bzrlib/tests/test_workingtree.py

bzrlib/tests/treeshape.py

bzrlib/tests/workingtree_implementations

bzrlib/tests/workingtree_implementations/__init__.py

bzrlib/tests/workingtree_implementations/test_is_control_filename.py

bzrlib/tests/workingtree_implementations/test_pull.py

bzrlib/tests/workingtree_implementations/test_workingtree.py

bzrlib/transactions.py

bzrlib/transform.py

bzrlib/transport

bzrlib/transport/__init__.py

bzrlib/transport/ftp.py

bzrlib/transport/http.py

bzrlib/transport/local.py

bzrlib/transport/memory.py

bzrlib/transport/readonly.py

bzrlib/transport/sftp.py

bzrlib/tsort.py

bzrlib/ui

bzrlib/ui/text.py

bzrlib/uncommit.py

bzrlib/util/configobj

bzrlib/util/configobj/__init__.py

bzrlib/util/configobj/configobj.py

bzrlib/util/configobj/docs

bzrlib/util/configobj/docs/BSD-LICENSE.txt

bzrlib/util/configobj/docs/configobj.txt

bzrlib/util/configobj/docs/validate.txt

bzrlib/util/configobj/validate.py

bzrlib/versionedfile.py

bzrlib/win32console.py

bzrlib/xml4.py

generate_docs.py

tools/__init__.py

tools/biobench.py

tools/capture_tree.py

tools/doc_generate

tools/doc_generate/__init__.py

tools/doc_generate/autodoc_bash_completion.py

tools/riodemo.py

tools/trace-revisions

files renamed:
tools/doc_generate/autodoc_man.py => bzr-man.py

bzrlib/tests/ => bzrlib/selftest/

bzrlib/tests/blackbox/test_too_much.py => bzrlib/selftest/blackbox.py

bzrlib/tests/test_plugins.py => bzrlib/selftest/plugins.py

bzrlib/tests/branch_implementations/test_parent.py => bzrlib/selftest/test_parent.py

bzrlib/tests/branch_implementations/test_branch.py => bzrlib/selftest/testbranch.py

bzrlib/tests/test_diff.py => bzrlib/selftest/testdiff.py

bzrlib/tests/test_fetch.py => bzrlib/selftest/testfetch.py

bzrlib/tests/test_hashcache.py => bzrlib/selftest/testhashcache.py

bzrlib/tests/test_inv.py => bzrlib/selftest/testinv.py

bzrlib/tests/test_log.py => bzrlib/selftest/testlog.py

bzrlib/tests/test_merge3.py => bzrlib/selftest/testmerge3.py

bzrlib/tests/test_revision.py => bzrlib/selftest/testrevision.py

bzrlib/tests/test_revisionnamespaces.py => bzrlib/selftest/testrevisionnamespaces.py

bzrlib/tests/blackbox/test_status.py => bzrlib/selftest/teststatus.py

bzrlib/tests/test_store.py => bzrlib/selftest/teststore.py

bzrlib/tests/blackbox/test_versioning.py => bzrlib/selftest/versioning.py

bzrlib/tests/test_whitebox.py => bzrlib/selftest/whitebox.py

bzrlib/store/__init__.py => bzrlib/store.py

bzrlib/ui/__init__.py => bzrlib/ui.py

bzrlib/store/versioned/__init__.py => bzrlib/weavestore.py

bzrlib/xml_serializer.py => bzrlib/xml.py

bzrlib/upgrade.py => tools/history2weaves.py

bzrlib/tests/test_weave.py => tools/testweave.py

files modified:
.bzrignore

.rsyncexclude

HACKING

Makefile

NEWS

README

TODO

build-api

bzr *

bzrlib/__init__.py

bzrlib/add.py

bzrlib/atomicfile.py

bzrlib/branch.py

bzrlib/builtins.py

bzrlib/check.py

bzrlib/commands.py

bzrlib/commit.py

bzrlib/delta.py

bzrlib/diff.py

bzrlib/errors.py

bzrlib/externalcommand.py

bzrlib/fetch.py

bzrlib/hashcache.py

bzrlib/help.py

bzrlib/info.py

bzrlib/intset.py

bzrlib/inventory.py

bzrlib/lock.py

bzrlib/log.py

bzrlib/merge.py

bzrlib/merge3.py

bzrlib/missing.py

bzrlib/msgeditor.py

bzrlib/osutils.py

bzrlib/patch.py

bzrlib/plugin.py

bzrlib/progress.py

bzrlib/revision.py

bzrlib/selftest/__init__.py

bzrlib/selftest/test_merge_core.py

bzrlib/selftest/test_smart_add.py

bzrlib/selftest/test_xml.py

bzrlib/shellcomplete.py

bzrlib/status.py

bzrlib/textinv.py

bzrlib/trace.py

bzrlib/tree.py

bzrlib/weave.py *

bzrlib/weavefile.py

bzrlib/workingtree.py

bzrlib/xml5.py

contrib/newinventory.py

contrib/pwk

contrib/zsh/_bzr

setup.py *

tutorial.txt

Show diffs side-by-side

added added

removed removed

bzrlib/knit.py

# Written by Martin Pool.

# Modified by Johan Rydberg <jrydberg@gnu.org>

# Modified by Robert Collins <robert.collins@canonical.com>

# This program is free software; you can redistribute it and/or modify

# it under the terms of the GNU General Public License as published by

# the Free Software Foundation; either version 2 of the License, or

# (at your option) any later version.

# This program is distributed in the hope that it will be useful,

# but WITHOUT ANY WARRANTY; without even the implied warranty of

# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the

# GNU General Public License for more details.

# You should have received a copy of the GNU General Public License

# along with this program; if not, write to the Free Software

# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA

"""Knit versionedfile implementation.

A knit is a versioned file implementation that supports efficient append only

updates.

Knit file layout:

lifeless: the data file is made up of "delta records". each delta record has a delta header

that contains; (1) a version id, (2) the size of the delta (in lines), and (3) the digest of

the -expanded data- (ie, the delta applied to the parent). the delta also ends with a

end-marker; simply "end VERSION"

delta can be line or full contents.a

... the 8's there are the index number of the annotation.

version robertc@robertcollins.net-20051003014215-ee2990904cc4c7ad 7 c7d23b2a5bd6ca00e8e266cec0ec228158ee9f9e

59,59,3

8 if ie.executable:

8 e.set('executable', 'yes')

130,130,2

8 if elt.get('executable') == 'yes':

8 ie.executable = True

end robertc@robertcollins.net-20051003014215-ee2990904cc4c7ad

whats in an index:

09:33 < jrydberg> lifeless: each index is made up of a tuple of; version id, options, position, size, parents

09:33 < jrydberg> lifeless: the parents are currently dictionary compressed

09:33 < jrydberg> lifeless: (meaning it currently does not support ghosts)

09:33 < lifeless> right

09:33 < jrydberg> lifeless: the position and size is the range in the data file

so the index sequence is the dictionary compressed sequence number used

in the deltas to provide line annotation

"""

# TODOS:

# 10:16 < lifeless> make partial index writes safe

# 10:16 < lifeless> implement 'knit.check()' like weave.check()

# 10:17 < lifeless> record known ghosts so we can detect when they are filled in rather than the current 'reweave

# always' approach.

# move sha1 out of the content so that join is faster at verifying parents

# record content length ?

from copy import copy

from cStringIO import StringIO

import difflib

from difflib import SequenceMatcher

from gzip import GzipFile

from itertools import izip

import os

import bzrlib

import bzrlib.errors as errors

from bzrlib.errors import FileExists, NoSuchFile, KnitError, \

InvalidRevisionId, KnitCorrupt, KnitHeaderError, \

RevisionNotPresent, RevisionAlreadyPresent

from bzrlib.trace import mutter

from bzrlib.osutils import contains_whitespace, contains_linebreaks, \

sha_strings

from bzrlib.versionedfile import VersionedFile, InterVersionedFile

from bzrlib.tsort import topo_sort

# TODO: Split out code specific to this format into an associated object.

# TODO: Can we put in some kind of value to check that the index and data

# files belong together?

# TODO: accomodate binaries, perhaps by storing a byte count

# TODO: function to check whole file

# TODO: atomically append data, then measure backwards from the cursor

# position after writing to work out where it was located. we may need to

# bypass python file buffering.

100

DATA_SUFFIX = '.knit'

101

INDEX_SUFFIX = '.kndx'

102

103

104

class KnitContent(object):

105

"""Content of a knit version to which deltas can be applied."""

106

107

def __init__(self, lines):

108

self._lines = lines

109

110

def annotate_iter(self):

111

"""Yield tuples of (origin, text) for each content line."""

112

for origin, text in self._lines:

113

yield origin, text

114

115

def annotate(self):

116

"""Return a list of (origin, text) tuples."""

117

return list(self.annotate_iter())

118

119

def apply_delta(self, delta):

120

"""Apply delta to this content."""

121

offset = 0

122

for start, end, count, lines in delta:

123

self._lines[offset+start:offset+end] = lines

124

offset = offset + (start - end) + count

125

126

def line_delta_iter(self, new_lines):

127

"""Generate line-based delta from new_lines to this content."""

128

new_texts = [text for origin, text in new_lines._lines]

129

old_texts = [text for origin, text in self._lines]

130

s = difflib.SequenceMatcher(None, old_texts, new_texts)

131

for op in s.get_opcodes():

132

if op[0] == 'equal':

133

continue

134

yield (op[1], op[2], op[4]-op[3], new_lines._lines[op[3]:op[4]])

135

136

def line_delta(self, new_lines):

137

return list(self.line_delta_iter(new_lines))

138

139

def text(self):

140

return [text for origin, text in self._lines]

141

142

143

class _KnitFactory(object):

144

"""Base factory for creating content objects."""

145

146

def make(self, lines, version):

147

num_lines = len(lines)

148

return KnitContent(zip([version] * num_lines, lines))

149

150

151

class KnitAnnotateFactory(_KnitFactory):

152

"""Factory for creating annotated Content objects."""

153

154

annotated = True

155

156

def parse_fulltext(self, content, version):

157

lines = []

158

for line in content:

159

origin, text = line.split(' ', 1)

160

lines.append((int(origin), text))

161

return KnitContent(lines)

162

163

def parse_line_delta_iter(self, lines):

164

while lines:

165

header = lines.pop(0)

166

start, end, c = [int(n) for n in header.split(',')]

167

contents = []

168

for i in range(c):

169

origin, text = lines.pop(0).split(' ', 1)

170

contents.append((int(origin), text))

171

yield start, end, c, contents

172

173

def parse_line_delta(self, lines, version):

174

return list(self.parse_line_delta_iter(lines))

175

176

def lower_fulltext(self, content):

177

return ['%d %s' % (o, t) for o, t in content._lines]

178

179

def lower_line_delta(self, delta):

180

out = []

181

for start, end, c, lines in delta:

182

out.append('%d,%d,%d\n' % (start, end, c))

183

for origin, text in lines:

184

out.append('%d %s' % (origin, text))

185

return out

186

187

188

class KnitPlainFactory(_KnitFactory):

189

"""Factory for creating plain Content objects."""

190

191

annotated = False

192

193

def parse_fulltext(self, content, version):

194

return self.make(content, version)

195

196

def parse_line_delta_iter(self, lines, version):

197

while lines:

198

header = lines.pop(0)

199

start, end, c = [int(n) for n in header.split(',')]

200

yield start, end, c, zip([version] * c, lines[:c])

201

del lines[:c]

202

203

def parse_line_delta(self, lines, version):

204

return list(self.parse_line_delta_iter(lines, version))

205

206

def lower_fulltext(self, content):

207

return content.text()

208

209

def lower_line_delta(self, delta):

210

out = []

211

for start, end, c, lines in delta:

212

out.append('%d,%d,%d\n' % (start, end, c))

213

out.extend([text for origin, text in lines])

214

return out

215

216

217

def make_empty_knit(transport, relpath):

218

"""Construct a empty knit at the specified location."""

219

k = KnitVersionedFile(transport, relpath, 'w', KnitPlainFactory)

220

k._data._open_file()

221

222

223

class KnitVersionedFile(VersionedFile):

224

"""Weave-like structure with faster random access.

225

226

A knit stores a number of texts and a summary of the relationships

227

between them. Texts are identified by a string version-id. Texts

228

are normally stored and retrieved as a series of lines, but can

229

also be passed as single strings.

230

231

Lines are stored with the trailing newline (if any) included, to

232

avoid special cases for files with no final newline. Lines are

233

composed of 8-bit characters, not unicode. The combination of

234

these approaches should mean any 'binary' file can be safely

235

stored and retrieved.

236

"""

237

238

def __init__(self, relpath, transport, file_mode=None, access_mode=None, factory=None,

239

basis_knit=None, delta=True, create=False):

240

"""Construct a knit at location specified by relpath.

241

242

:param create: If not True, only open an existing knit.

243

"""

244

if access_mode is None:

245

access_mode = 'w'

246

super(KnitVersionedFile, self).__init__(access_mode)

247

assert access_mode in ('r', 'w'), "invalid mode specified %r" % access_mode

248

assert not basis_knit or isinstance(basis_knit, KnitVersionedFile), \

249

type(basis_knit)

250

251

self.transport = transport

252

self.filename = relpath

253

self.basis_knit = basis_knit

254

self.factory = factory or KnitAnnotateFactory()

255

self.writable = (access_mode == 'w')

256

self.delta = delta

257

258

self._index = _KnitIndex(transport, relpath + INDEX_SUFFIX,

259

access_mode, create=create)

260

self._data = _KnitData(transport, relpath + DATA_SUFFIX,

261

access_mode, create=not len(self.versions()))

262

263

def clear_cache(self):

264

"""Clear the data cache only."""

265

self._data.clear_cache()

266

267

def copy_to(self, name, transport):

268

"""See VersionedFile.copy_to()."""

269

# copy the current index to a temp index to avoid racing with local

270

# writes

271

transport.put(name + INDEX_SUFFIX + '.tmp', self.transport.get(self._index._filename))

272

# copy the data file

273

transport.put(name + DATA_SUFFIX, self._data._open_file())

274

# rename the copied index into place

275

transport.rename(name + INDEX_SUFFIX + '.tmp', name + INDEX_SUFFIX)

276

277

def create_empty(self, name, transport, mode=None):

278

return KnitVersionedFile(name, transport, factory=self.factory, delta=self.delta, create=True)

279

280

def _fix_parents(self, version, new_parents):

281

"""Fix the parents list for version.

282

283

This is done by appending a new version to the index

284

with identical data except for the parents list.

285

the parents list must be a superset of the current

286

list.

287

"""

288

current_values = self._index._cache[version]

289

assert set(current_values[4]).difference(set(new_parents)) == set()

290

self._index.add_version(version,

291

current_values[1],

292

current_values[2],

293

current_values[3],

294

new_parents)

295

296

def get_graph_with_ghosts(self):

297

"""See VersionedFile.get_graph_with_ghosts()."""

298

graph_items = self._index.get_graph()

299

return dict(graph_items)

300

301

@staticmethod

302

def get_suffixes():

303

"""See VersionedFile.get_suffixes()."""

304

return [DATA_SUFFIX, INDEX_SUFFIX]

305

306

def has_ghost(self, version_id):

307

"""True if there is a ghost reference in the file to version_id."""

308

# maybe we have it

309

if self.has_version(version_id):

310

return False

311

# optimisable if needed by memoising the _ghosts set.

312

items = self._index.get_graph()

313

for node, parents in items:

314

for parent in parents:

315

if parent not in self._index._cache:

316

if parent == version_id:

317

return True

318

return False

319

320

def versions(self):

321

"""See VersionedFile.versions."""

322

return self._index.get_versions()

323

324

def has_version(self, version_id):

325

"""See VersionedFile.has_version."""

326

return self._index.has_version(version_id)

327

328

__contains__ = has_version

329

330

def _merge_annotations(self, content, parents):

331

"""Merge annotations for content. This is done by comparing

332

the annotations based on changed to the text."""

333

for parent_id in parents:

334

merge_content = self._get_content(parent_id)

335

seq = SequenceMatcher(None, merge_content.text(), content.text())

336

for i, j, n in seq.get_matching_blocks():

337

if n == 0:

338

continue

339

content._lines[j:j+n] = merge_content._lines[i:i+n]

340

341

def _get_components(self, version_id):

342

"""Return a list of (version_id, method, data) tuples that

343

makes up version specified by version_id of the knit.

344

345

The components should be applied in the order of the returned

346

list.

347

348

The basis knit will be used to the largest extent possible

349

since it is assumed that accesses to it is faster.

350

"""

351

# needed_revisions holds a list of (method, version_id) of

352

# versions that is needed to be fetched to construct the final

353

# version of the file.

354

355

# basis_revisions is a list of versions that needs to be

356

# fetched but exists in the basis knit.

357

358

basis = self.basis_knit

359

needed_versions = []

360

basis_versions = []

361

cursor = version_id

362

363

while 1:

364

picked_knit = self

365

if basis and basis._index.has_version(cursor):

366

picked_knit = basis

367

basis_versions.append(cursor)

368

method = picked_knit._index.get_method(cursor)

369

needed_versions.append((method, cursor))

370

if method == 'fulltext':

371

break

372

cursor = picked_knit.get_parents(cursor)[0]

373

374

components = {}

375

if basis_versions:

376

records = []

377

for comp_id in basis_versions:

378

data_pos, data_size = basis._index.get_data_position(comp_id)

379

records.append((piece_id, data_pos, data_size))

380

components.update(basis._data.read_records(records))

381

382

records = []

383

for comp_id in [vid for method, vid in needed_versions

384

if vid not in basis_versions]:

385

data_pos, data_size = self._index.get_position(comp_id)

386

records.append((comp_id, data_pos, data_size))

387

components.update(self._data.read_records(records))

388

389

# get_data_records returns a mapping with the version id as

390

# index and the value as data. The order the components need

391

# to be applied is held by needed_versions (reversed).

392

out = []

393

for method, comp_id in reversed(needed_versions):

394

out.append((comp_id, method, components[comp_id]))

395

396

return out

397

398

def _get_content(self, version_id):

399

"""Returns a content object that makes up the specified

400

version."""

401

if not self.has_version(version_id):

402

raise RevisionNotPresent(version_id, self.filename)

403

404

if self.basis_knit and version_id in self.basis_knit:

405

return self.basis_knit._get_content(version_id)

406

407

content = None

408

components = self._get_components(version_id)

409

for component_id, method, (data, digest) in components:

410

version_idx = self._index.lookup(component_id)

411

if method == 'fulltext':

412

assert content is None

413

content = self.factory.parse_fulltext(data, version_idx)

414

elif method == 'line-delta':

415

delta = self.factory.parse_line_delta(data, version_idx)

416

content.apply_delta(delta)

417

418

if 'no-eol' in self._index.get_options(version_id):

419

line = content._lines[-1][1].rstrip('\n')

420

content._lines[-1] = (content._lines[-1][0], line)

421

422

if sha_strings(content.text()) != digest:

423

raise KnitCorrupt(self.filename, 'sha-1 does not match')

424

425

return content

426

427

def _check_versions_present(self, version_ids):

428

"""Check that all specified versions are present."""

429

version_ids = set(version_ids)

430

for r in list(version_ids):

431

if self._index.has_version(r):

432

version_ids.remove(r)

433

if version_ids:

434

raise RevisionNotPresent(list(version_ids)[0], self.filename)

435

436

def _add_lines_with_ghosts(self, version_id, parents, lines):

437

"""See VersionedFile.add_lines_with_ghosts()."""

438

self._check_add(version_id, lines)

439

return self._add(version_id, lines[:], parents, self.delta)

440

441

def _add_lines(self, version_id, parents, lines):

442

"""See VersionedFile.add_lines."""

443

self._check_add(version_id, lines)

444

self._check_versions_present(parents)

445

return self._add(version_id, lines[:], parents, self.delta)

446

447

def _check_add(self, version_id, lines):

448

"""check that version_id and lines are safe to add."""

449

assert self.writable, "knit is not opened for write"

450

### FIXME escape. RBC 20060228

451

if contains_whitespace(version_id):

452

raise InvalidRevisionId(version_id)

453

if self.has_version(version_id):

454

raise RevisionAlreadyPresent(version_id, self.filename)

455

456

if False or __debug__:

457

for l in lines:

458

assert '\n' not in l[:-1]

459

460

def _add(self, version_id, lines, parents, delta):

461

"""Add a set of lines on top of version specified by parents.

462

463

If delta is true, compress the text as a line-delta against

464

the first parent.

465

466

Any versions not present will be converted into ghosts.

467

"""

468

ghostless_parents = []

469

ghosts = []

470

for parent in parents:

471

if not self.has_version(parent):

472

ghosts.append(parent)

473

else:

474

ghostless_parents.append(parent)

475

476

if delta and not len(ghostless_parents):

477

delta = False

478

479

digest = sha_strings(lines)

480

options = []

481

if lines:

482

if lines[-1][-1] != '\n':

483

options.append('no-eol')

484

lines[-1] = lines[-1] + '\n'

485

486

lines = self.factory.make(lines, len(self._index))

487

if self.factory.annotated and len(ghostless_parents) > 0:

488

# Merge annotations from parent texts if so is needed.

489

self._merge_annotations(lines, ghostless_parents)

490

491

if len(ghostless_parents) and delta:

492

# To speed the extract of texts the delta chain is limited

493

# to a fixed number of deltas. This should minimize both

494

# I/O and the time spend applying deltas.

495

count = 0

496

delta_parents = ghostless_parents

497

while count < 25:

498

parent = delta_parents[0]

499

method = self._index.get_method(parent)

500

if method == 'fulltext':

501

break

502

delta_parents = self._index.get_parents(parent)

503

count = count + 1

504

if method == 'line-delta':

505

delta = False

506

507

if delta:

508

options.append('line-delta')

509

content = self._get_content(ghostless_parents[0])

510

delta_hunks = content.line_delta(lines)

511

store_lines = self.factory.lower_line_delta(delta_hunks)

512

else:

513

options.append('fulltext')

514

store_lines = self.factory.lower_fulltext(lines)

515

516

where, size = self._data.add_record(version_id, digest, store_lines)

517

self._index.add_version(version_id, options, where, size, parents)

518

519

def check(self, progress_bar=None):

520

"""See VersionedFile.check()."""

521

522

def _clone_text(self, new_version_id, old_version_id, parents):

523

"""See VersionedFile.clone_text()."""

524

# FIXME RBC 20060228 make fast by only inserting an index with null delta.

525

self.add_lines(new_version_id, parents, self.get_lines(old_version_id))

526

527

def get_lines(self, version_id):

528

"""See VersionedFile.get_lines()."""

529

return self._get_content(version_id).text()

530

531

def iter_lines_added_or_present_in_versions(self, version_ids=None):

532

"""See VersionedFile.iter_lines_added_or_present_in_versions()."""

533

if version_ids is None:

534

version_ids = self.versions()

535

# we dont care about inclusions, the caller cares.

536

# but we need to setup a list of records to visit.

537

# we need version_id, position, length

538

version_id_records = []

539

requested_versions = list(version_ids)

540

# filter for available versions

541

for version_id in requested_versions:

542

if not self.has_version(version_id):

543

raise RevisionNotPresent(version_id, self.filename)

544

# get a in-component-order queue:

545

version_ids = []

546

for version_id in self.versions():

547

if version_id in requested_versions:

548

version_ids.append(version_id)

549

data_pos, length = self._index.get_position(version_id)

550

version_id_records.append((version_id, data_pos, length))

551

552

pb = bzrlib.ui.ui_factory.nested_progress_bar()

553

count = 0

554

total = len(version_id_records)

555

try:

556

pb.update('Walking content.', count, total)

557

for version_id, data, sha_value in \

558

self._data.read_records_iter(version_id_records):

559

pb.update('Walking content.', count, total)

560

method = self._index.get_method(version_id)

561

version_idx = self._index.lookup(version_id)

562

assert method in ('fulltext', 'line-delta')

563

if method == 'fulltext':

564

content = self.factory.parse_fulltext(data, version_idx)

565

for line in content.text():

566

yield line

567

else:

568

delta = self.factory.parse_line_delta(data, version_idx)

569

for start, end, count, lines in delta:

570

for origin, line in lines:

571

yield line

572

count +=1

573

pb.update('Walking content.', total, total)

574

pb.finished()

575

except:

576

pb.update('Walking content.', total, total)

577

pb.finished()

578

raise

579

580

def num_versions(self):

581

"""See VersionedFile.num_versions()."""

582

return self._index.num_versions()

583

584

__len__ = num_versions

585

586

def annotate_iter(self, version_id):

587

"""See VersionedFile.annotate_iter."""

588

content = self._get_content(version_id)

589

for origin, text in content.annotate_iter():

590

yield self._index.idx_to_name(origin), text

591

592

def get_parents(self, version_id):

593

"""See VersionedFile.get_parents."""

594

self._check_versions_present([version_id])

595

return list(self._index.get_parents(version_id))

596

597

def get_parents_with_ghosts(self, version_id):

598

"""See VersionedFile.get_parents."""

599

self._check_versions_present([version_id])

600

return list(self._index.get_parents_with_ghosts(version_id))

601

602

def get_ancestry(self, versions):

603

"""See VersionedFile.get_ancestry."""

604

if isinstance(versions, basestring):

605

versions = [versions]

606

if not versions:

607

return []

608

self._check_versions_present(versions)

609

return self._index.get_ancestry(versions)

610

611

def get_ancestry_with_ghosts(self, versions):

612

"""See VersionedFile.get_ancestry_with_ghosts."""

613

if isinstance(versions, basestring):

614

versions = [versions]

615

if not versions:

616

return []

617

self._check_versions_present(versions)

618

return self._index.get_ancestry_with_ghosts(versions)

619

620

def _reannotate_line_delta(self, other, lines, new_version_id,

621

new_version_idx):

622

"""Re-annotate line-delta and return new delta."""

623

new_delta = []

624

for start, end, count, contents \

625

in self.factory.parse_line_delta_iter(lines):

626

new_lines = []

627

for origin, line in contents:

628

old_version_id = other._index.idx_to_name(origin)

629

if old_version_id == new_version_id:

630

idx = new_version_idx

631

else:

632

idx = self._index.lookup(old_version_id)

633

new_lines.append((idx, line))

634

new_delta.append((start, end, count, new_lines))

635

636

return self.factory.lower_line_delta(new_delta)

637

638

def _reannotate_fulltext(self, other, lines, new_version_id,

639

new_version_idx):

640

"""Re-annotate fulltext and return new version."""

641

content = self.factory.parse_fulltext(lines, new_version_idx)

642

new_lines = []

643

for origin, line in content.annotate_iter():

644

old_version_id = other._index.idx_to_name(origin)

645

if old_version_id == new_version_id:

646

idx = new_version_idx

647

else:

648

idx = self._index.lookup(old_version_id)

649

new_lines.append((idx, line))

650

651

return self.factory.lower_fulltext(KnitContent(new_lines))

652

653

#@deprecated_method(zero_eight)

654

def walk(self, version_ids):

655

"""See VersionedFile.walk."""

656

# We take the short path here, and extract all relevant texts

657

# and put them in a weave and let that do all the work. Far

658

# from optimal, but is much simpler.

659

# FIXME RB 20060228 this really is inefficient!

660

from bzrlib.weave import Weave

661

662

w = Weave(self.filename)

663

ancestry = self.get_ancestry(version_ids)

664

sorted_graph = topo_sort(self._index.get_graph())

665

version_list = [vid for vid in sorted_graph if vid in ancestry]

666

667

for version_id in version_list:

668

lines = self.get_lines(version_id)

669

w.add_lines(version_id, self.get_parents(version_id), lines)

670

671

for lineno, insert_id, dset, line in w.walk(version_ids):

672

yield lineno, insert_id, dset, line

673

674

675

class _KnitComponentFile(object):

676

"""One of the files used to implement a knit database"""

677

678

def __init__(self, transport, filename, mode):

679

self._transport = transport

680

self._filename = filename

681

self._mode = mode

682

683

def write_header(self):

684

old_len = self._transport.append(self._filename, StringIO(self.HEADER))

685

if old_len != 0:

686

raise KnitCorrupt(self._filename, 'misaligned after writing header')

687

688

def check_header(self, fp):

689

line = fp.read(len(self.HEADER))

690

if line != self.HEADER:

691

raise KnitHeaderError(badline=line)

692

693

def commit(self):

694

"""Commit is a nop."""

695

696

def __repr__(self):

697

return '%s(%s)' % (self.__class__.__name__, self._filename)

698

699

700

class _KnitIndex(_KnitComponentFile):

701

"""Manages knit index file.

702

703

The index is already kept in memory and read on startup, to enable

704

fast lookups of revision information. The cursor of the index

705

file is always pointing to the end, making it easy to append

706

entries.

707

708

_cache is a cache for fast mapping from version id to a Index

709

object.

710

711

_history is a cache for fast mapping from indexes to version ids.

712

713

The index data format is dictionary compressed when it comes to

714

parent references; a index entry may only have parents that with a

715

lover index number. As a result, the index is topological sorted.

716

717

Duplicate entries may be written to the index for a single version id

718

if this is done then the latter one completely replaces the former:

719

this allows updates to correct version and parent information.

720

Note that the two entries may share the delta, and that successive

721

annotations and references MUST point to the first entry.

722

"""

723

724

HEADER = "# bzr knit index 7\n"

725

726

def _cache_version(self, version_id, options, pos, size, parents):

727

val = (version_id, options, pos, size, parents)

728

self._cache[version_id] = val

729

if not version_id in self._history:

730

self._history.append(version_id)

731

732

def _iter_index(self, fp):

733

l = fp.readline()

734

while l != '':

735

yield l.split()

736

l = fp.readline()

737

#lines = fp.read()

738

#for l in lines.splitlines(False):

739

# yield l.split()

740

741

def __init__(self, transport, filename, mode, create=False):

742

_KnitComponentFile.__init__(self, transport, filename, mode)

743

self._cache = {}

744

# position in _history is the 'official' index for a revision

745

# but the values may have come from a newer entry.

746

# so - wc -l of a knit index is != the number of uniqe names

747

# in the weave.

748

self._history = []

749

pb = bzrlib.ui.ui_factory.nested_progress_bar()

750

try:

751

count = 0

752

total = 1

753

try:

754

pb.update('read knit index', count, total)

755

fp = self._transport.get(self._filename)

756

self.check_header(fp)

757

for rec in self._iter_index(fp):

758

count += 1

759

total += 1

760

pb.update('read knit index', count, total)

761

parents = self._parse_parents(rec[4:])

762

self._cache_version(rec[0], rec[1].split(','), int(rec[2]), int(rec[3]),

763

parents)

764

except NoSuchFile, e:

765

if mode != 'w' or not create:

766

raise

767

self.write_header()

768

finally:

769

pb.update('read knit index', total, total)

770

pb.finished()

771

772

def _parse_parents(self, compressed_parents):

773

"""convert a list of string parent values into version ids.

774

775

ints are looked up in the index.

776

.FOO values are ghosts and converted in to FOO.

777

"""

778

result = []

779

for value in compressed_parents:

780

if value.startswith('.'):

781

result.append(value[1:])

782

else:

783

assert isinstance(value, str)

784

result.append(self._history[int(value)])

785

return result

786

787

def get_graph(self):

788

graph = []

789

for version_id, index in self._cache.iteritems():

790

graph.append((version_id, index[4]))

791

return graph

792

793

def get_ancestry(self, versions):

794

"""See VersionedFile.get_ancestry."""

795

# get a graph of all the mentioned versions:

796

graph = {}

797

pending = set(versions)

798

while len(pending):

799

version = pending.pop()

800

parents = self._cache[version][4]

801

# got the parents ok

802

# trim ghosts

803

parents = [parent for parent in parents if parent in self._cache]

804

for parent in parents:

805

# if not completed and not a ghost

806

if parent not in graph:

807

pending.add(parent)

808

graph[version] = parents

809

return topo_sort(graph.items())

810

811

def get_ancestry_with_ghosts(self, versions):

812

"""See VersionedFile.get_ancestry_with_ghosts."""

813

# get a graph of all the mentioned versions:

814

graph = {}

815

pending = set(versions)

816

while len(pending):

817

version = pending.pop()

818

try:

819

parents = self._cache[version][4]

820

except KeyError:

821

# ghost, fake it

822

graph[version] = []

823

pass

824

else:

825

# got the parents ok

826

for parent in parents:

827

if parent not in graph:

828

pending.add(parent)

829

graph[version] = parents

830

return topo_sort(graph.items())

831

832

def num_versions(self):

833

return len(self._history)

834

835

__len__ = num_versions

836

837

def get_versions(self):

838

return self._history

839

840

def idx_to_name(self, idx):

841

return self._history[idx]

842

843

def lookup(self, version_id):

844

assert version_id in self._cache

845

return self._history.index(version_id)

846

847

def _version_list_to_index(self, versions):

848

result_list = []

849

for version in versions:

850

if version in self._cache:

851

result_list.append(str(self._history.index(version)))

852

else:

853

result_list.append('.' + version.encode('utf-8'))

854

return ' '.join(result_list)

855

856

def add_version(self, version_id, options, pos, size, parents):

857

"""Add a version record to the index."""

858

self._cache_version(version_id, options, pos, size, parents)

859

860

content = "%s %s %s %s %s\n" % (version_id,

861

','.join(options),

862

pos,

863

size,

864

self._version_list_to_index(parents))

865

self._transport.append(self._filename, StringIO(content))

866

867

def has_version(self, version_id):

868

"""True if the version is in the index."""

869

return self._cache.has_key(version_id)

870

871

def get_position(self, version_id):

872

"""Return data position and size of specified version."""

873

return (self._cache[version_id][2], \

874

self._cache[version_id][3])

875

876

def get_method(self, version_id):

877

"""Return compression method of specified version."""

878

options = self._cache[version_id][1]

879

if 'fulltext' in options:

880

return 'fulltext'

881

else:

882

assert 'line-delta' in options

883

return 'line-delta'

884

885

def get_options(self, version_id):

886

return self._cache[version_id][1]

887

888

def get_parents(self, version_id):

889

"""Return parents of specified version ignoring ghosts."""

890

return [parent for parent in self._cache[version_id][4]

891

if parent in self._cache]

892

893

def get_parents_with_ghosts(self, version_id):

894

"""Return parents of specified version wth ghosts."""

895

return self._cache[version_id][4]

896

897

def check_versions_present(self, version_ids):

898

"""Check that all specified versions are present."""

899

version_ids = set(version_ids)

900

for version_id in list(version_ids):

901

if version_id in self._cache:

902

version_ids.remove(version_id)

903

if version_ids:

904

raise RevisionNotPresent(list(version_ids)[0], self.filename)

905

906

907

class _KnitData(_KnitComponentFile):

908

"""Contents of the knit data file"""

909

910

HEADER = "# bzr knit data 7\n"

911

912

def __init__(self, transport, filename, mode, create=False):

913

_KnitComponentFile.__init__(self, transport, filename, mode)

914

self._file = None

915

self._checked = False

916

if create:

917

self._transport.put(self._filename, StringIO(''))

918

self._records = {}

919

920

def clear_cache(self):

921

"""Clear the record cache."""

922

self._records = {}

923

924

def _open_file(self):

925

if self._file is None:

926

try:

927

self._file = self._transport.get(self._filename)

928

except NoSuchFile:

929

pass

930

return self._file

931

932

def add_record(self, version_id, digest, lines):

933

"""Write new text record to disk. Returns the position in the

934

file where it was written."""

935

sio = StringIO()

936

data_file = GzipFile(None, mode='wb', fileobj=sio)

937

print >>data_file, "version %s %d %s" % (version_id, len(lines), digest)

938

data_file.writelines(lines)

939

print >>data_file, "end %s\n" % version_id

940

data_file.close()

941

942

# cache

943

self._records[version_id] = (digest, lines)

944

945

content = sio.getvalue()

946

sio.seek(0)

947

start_pos = self._transport.append(self._filename, sio)

948

return start_pos, len(content)

949

950

def _parse_record(self, version_id, data):

951

df = GzipFile(mode='rb', fileobj=StringIO(data))

952

rec = df.readline().split()

953

if len(rec) != 4:

954

raise KnitCorrupt(self._filename, 'unexpected number of records')

955

if rec[1] != version_id:

956

raise KnitCorrupt(self._filename,

957

'unexpected version, wanted %r, got %r' % (

958

version_id, rec[1]))

959

lines = int(rec[2])

960

record_contents = self._read_record_contents(df, lines)

961

l = df.readline()

962

if l != 'end %s\n' % version_id:

963

raise KnitCorrupt(self._filename, 'unexpected version end line %r, wanted %r'

964

% (l, version_id))

965

return record_contents, rec[3]

966

967

def _read_record_contents(self, df, record_lines):

968

"""Read and return n lines from datafile."""

969

r = []

970

for i in range(record_lines):

971

r.append(df.readline())

972

return r

973

974

def read_records_iter(self, records):

975

"""Read text records from data file and yield result.

976

977

Each passed record is a tuple of (version_id, pos, len) and

978

will be read in the given order. Yields (version_id,

979

contents, digest).

980

"""

981

982

needed_records = []

983

for version_id, pos, size in records:

984

if version_id not in self._records:

985

needed_records.append((version_id, pos, size))

986

987

if len(needed_records):

988

# We take it that the transport optimizes the fetching as good

989

# as possible (ie, reads continous ranges.)

990

response = self._transport.readv(self._filename,

991

[(pos, size) for version_id, pos, size in needed_records])

992

993

for (record_id, pos, size), (pos, data) in izip(iter(needed_records), response):

994

content, digest = self._parse_record(record_id, data)

995

self._records[record_id] = (digest, content)

996

997

for version_id, pos, size in records:

998

yield version_id, copy(self._records[version_id][1]), copy(self._records[version_id][0])

999

1000

def read_records(self, records):

1001

"""Read records into a dictionary."""

1002

components = {}

1003

for record_id, content, digest in self.read_records_iter(records):

1004

components[record_id] = (content, digest)

1005

return components

1006

1007

1008

class InterKnit(InterVersionedFile):

1009

"""Optimised code paths for knit to knit operations."""

1010

1011

_matching_file_factory = KnitVersionedFile

1012

1013

@staticmethod

1014

def is_compatible(source, target):

1015

"""Be compatible with knits. """

1016

try:

1017

return (isinstance(source, KnitVersionedFile) and

1018

isinstance(target, KnitVersionedFile))

1019

except AttributeError:

1020

return False

1021

1022

def join(self, pb=None, msg=None, version_ids=None, ignore_missing=False):

1023

"""See InterVersionedFile.join."""

1024

assert isinstance(self.source, KnitVersionedFile)

1025

assert isinstance(self.target, KnitVersionedFile)

1026

1027

if version_ids is None:

1028

version_ids = self.source.versions()

1029

else:

1030

if not ignore_missing:

1031

self.source._check_versions_present(version_ids)

1032

else:

1033

version_ids = set(self.source.versions()).intersection(

1034

set(version_ids))

1035

1036

if not version_ids:

1037

return 0

1038

1039

pb = bzrlib.ui.ui_factory.nested_progress_bar()

1040

try:

1041

version_ids = list(version_ids)

1042

if None in version_ids:

1043

version_ids.remove(None)

1044

1045

self.source_ancestry = set(self.source.get_ancestry(version_ids))

1046

this_versions = set(self.target._index.get_versions())

1047

needed_versions = self.source_ancestry - this_versions

1048

cross_check_versions = self.source_ancestry.intersection(this_versions)

1049

mismatched_versions = set()

1050

for version in cross_check_versions:

1051

# scan to include needed parents.

1052

n1 = set(self.target.get_parents_with_ghosts(version))

1053

n2 = set(self.source.get_parents_with_ghosts(version))

1054

if n1 != n2:

1055

# FIXME TEST this check for cycles being introduced works

1056

# the logic is we have a cycle if in our graph we are an

1057

# ancestor of any of the n2 revisions.

1058

for parent in n2:

1059

if parent in n1:

1060

# safe

1061

continue

1062

else:

1063

parent_ancestors = self.source.get_ancestry(parent)

1064

if version in parent_ancestors:

1065

raise errors.GraphCycleError([parent, version])

1066

# ensure this parent will be available later.

1067

new_parents = n2.difference(n1)

1068

needed_versions.update(new_parents.difference(this_versions))

1069

mismatched_versions.add(version)

1070

1071

if not needed_versions and not cross_check_versions:

1072

return 0

1073

full_list = topo_sort(self.source.get_graph())

1074

1075

version_list = [i for i in full_list if (not self.target.has_version(i)

1076

and i in needed_versions)]

1077

1078

records = []

1079

for version_id in version_list:

1080

data_pos, data_size = self.source._index.get_position(version_id)

1081

records.append((version_id, data_pos, data_size))

1082

1083

count = 0

1084

for version_id, lines, digest \

1085

in self.source._data.read_records_iter(records):

1086

options = self.source._index.get_options(version_id)

1087

parents = self.source._index.get_parents_with_ghosts(version_id)

1088

1089

for parent in parents:

1090

# if source has the parent, we must hav grabbed it first.

1091

assert (self.target.has_version(parent) or not

1092

self.source.has_version(parent))

1093

1094

if self.target.factory.annotated:

1095

# FIXME jrydberg: it should be possible to skip

1096

# re-annotating components if we know that we are

1097

# going to pull all revisions in the same order.

1098

new_version_id = version_id

1099

new_version_idx = self.target._index.num_versions()

1100

if 'fulltext' in options:

1101

lines = self.target._reannotate_fulltext(self.source, lines,

1102

new_version_id, new_version_idx)

1103

elif 'line-delta' in options:

1104

lines = self.target._reannotate_line_delta(self.source, lines,

1105

new_version_id, new_version_idx)

1106

1107

count = count + 1

1108

pb.update("Joining knit", count, len(version_list))

1109

1110

pos, size = self.target._data.add_record(version_id, digest, lines)

1111

self.target._index.add_version(version_id, options, pos, size, parents)

1112

1113

for version in mismatched_versions:

1114

n1 = set(self.target.get_parents_with_ghosts(version))

1115

n2 = set(self.source.get_parents_with_ghosts(version))

1116

# write a combined record to our history preserving the current

1117

# parents as first in the list

1118

new_parents = self.target.get_parents_with_ghosts(version) + list(n2.difference(n1))

1119

self.target.fix_parents(version, new_parents)

1120

return count

1121

finally:

1122

pb.clear()

1123

pb.finished()

1124

1125

1126

InterVersionedFile.register_optimiser(InterKnit)

Older »