~bzr-pqm/bzr/bzr.dev

« back to all changes in this revision

Viewing changes to bzrlib/knit.py

Committer: Robert Collins
Date: 2006-03-02 03:12:34 UTC
mto: (1594.2.4 integration)
mto: This revision was merged to the branch mainline in revision 1596.
Revision ID: robertc@robertcollins.net-20060302031234-cf6b75961f27c5df

InterVersionedFile implemented.

files added:
BRANCH.TODO

INSTALL

NEWS.developers

bzrlib/annotate.py

bzrlib/builtins.py

bzrlib/bzrdir.py

bzrlib/config.py

bzrlib/conflicts.py

bzrlib/decorators.py

bzrlib/doc

bzrlib/doc/__init__.py

bzrlib/doc/api

bzrlib/doc/api/__init__.py

bzrlib/doc/api/branch.txt

bzrlib/doc/api/transport.txt

bzrlib/export

bzrlib/export/__init__.py

bzrlib/export/dir_exporter.py

bzrlib/export/tar_exporter.py

bzrlib/export/zip_exporter.py

bzrlib/externalcommand.py

bzrlib/fetch.py

bzrlib/gpg.py

bzrlib/graph.py

bzrlib/identitymap.py

bzrlib/inter.py

bzrlib/iterablefile.py

bzrlib/knit.py

bzrlib/lockable_files.py

bzrlib/lockdir.py

bzrlib/lsprof.py

bzrlib/msgeditor.py

bzrlib/option.py

bzrlib/repository.py

bzrlib/revisionspec.py

bzrlib/rio.py

bzrlib/sign_my_commits.py

bzrlib/store

bzrlib/store/text.py

bzrlib/store/versioned

bzrlib/store/versioned/__init__.py

bzrlib/store/versioned/weave.py

bzrlib/symbol_versioning.py

bzrlib/testament.py

bzrlib/tests/HTTPTestUtil.py

bzrlib/tests/TestUtil.py

bzrlib/tests/blackbox

bzrlib/tests/blackbox/__init__.py

bzrlib/tests/blackbox/test_added.py

bzrlib/tests/blackbox/test_aliases.py

bzrlib/tests/blackbox/test_ancestry.py

bzrlib/tests/blackbox/test_cat.py

bzrlib/tests/blackbox/test_checkout.py

bzrlib/tests/blackbox/test_commit.py

bzrlib/tests/blackbox/test_diff.py

bzrlib/tests/blackbox/test_export.py

bzrlib/tests/blackbox/test_find_merge_base.py

bzrlib/tests/blackbox/test_help.py

bzrlib/tests/blackbox/test_info.py

bzrlib/tests/blackbox/test_log.py

bzrlib/tests/blackbox/test_logformats.py

bzrlib/tests/blackbox/test_missing.py

bzrlib/tests/blackbox/test_outside_wt.py

bzrlib/tests/blackbox/test_pull.py

bzrlib/tests/blackbox/test_re_sign.py

bzrlib/tests/blackbox/test_revert.py

bzrlib/tests/blackbox/test_revision_info.py

bzrlib/tests/blackbox/test_revno.py

bzrlib/tests/blackbox/test_selftest.py

bzrlib/tests/blackbox/test_sign_my_commits.py

bzrlib/tests/blackbox/test_update.py

bzrlib/tests/blackbox/test_upgrade.py

bzrlib/tests/branch_implementations

bzrlib/tests/branch_implementations/__init__.py

bzrlib/tests/branch_implementations/test_parent.py

bzrlib/tests/branch_implementations/test_permissions.py

bzrlib/tests/bzrdir_implementations

bzrlib/tests/bzrdir_implementations/__init__.py

bzrlib/tests/bzrdir_implementations/test_bzrdir.py

bzrlib/tests/interrepository_implementations

bzrlib/tests/interrepository_implementations/__init__.py

bzrlib/tests/interrepository_implementations/test_interrepository.py

bzrlib/tests/interversionedfile_implementations

bzrlib/tests/interversionedfile_implementations/__init__.py

bzrlib/tests/interversionedfile_implementations/test_join.py

bzrlib/tests/repository_implementations

bzrlib/tests/repository_implementations/__init__.py

bzrlib/tests/repository_implementations/test_fileid_involved.py

bzrlib/tests/repository_implementations/test_repository.py

bzrlib/tests/stub_sftp.py

bzrlib/tests/test_ancestry.py

bzrlib/tests/test_annotate.py

bzrlib/tests/test_api.py

bzrlib/tests/test_bad_files.py

bzrlib/tests/test_basis_inventory.py

bzrlib/tests/test_branch.py

bzrlib/tests/test_bzrdir.py

bzrlib/tests/test_command.py

bzrlib/tests/test_commit.py

bzrlib/tests/test_commit_merge.py

bzrlib/tests/test_config.py

bzrlib/tests/test_conflicts.py

bzrlib/tests/test_decorators.py

bzrlib/tests/test_doc_generate.py

bzrlib/tests/test_errors.py

bzrlib/tests/test_fetch.py

bzrlib/tests/test_gpg.py

bzrlib/tests/test_graph.py

bzrlib/tests/test_http.py

bzrlib/tests/test_identitymap.py

bzrlib/tests/test_knit.py

bzrlib/tests/test_lockable_files.py

bzrlib/tests/test_lockdir.py

bzrlib/tests/test_merge.py

bzrlib/tests/test_merge_core.py

bzrlib/tests/test_missing.py

bzrlib/tests/test_msgeditor.py

bzrlib/tests/test_nonascii.py

bzrlib/tests/test_options.py

bzrlib/tests/test_osutils.py

bzrlib/tests/test_permissions.py

bzrlib/tests/test_repository.py

bzrlib/tests/test_revprops.py

bzrlib/tests/test_reweave.py

bzrlib/tests/test_rio.py

bzrlib/tests/test_sampler.py

bzrlib/tests/test_selftest.py

bzrlib/tests/test_setup.py

bzrlib/tests/test_sftp_transport.py

bzrlib/tests/test_smart_add.py

bzrlib/tests/test_source.py

bzrlib/tests/test_store.py

bzrlib/tests/test_symbol_versioning.py

bzrlib/tests/test_testament.py

bzrlib/tests/test_trace.py

bzrlib/tests/test_transactions.py

bzrlib/tests/test_transform.py

bzrlib/tests/test_transport.py

bzrlib/tests/test_transport_implementations.py

bzrlib/tests/test_tsort.py

bzrlib/tests/test_ui.py

bzrlib/tests/test_uncommit.py

bzrlib/tests/test_upgrade.py

bzrlib/tests/test_versionedfile.py

bzrlib/tests/test_workingtree.py

bzrlib/tests/test_xml.py

bzrlib/tests/treeshape.py

bzrlib/tests/workingtree_implementations

bzrlib/tests/workingtree_implementations/__init__.py

bzrlib/tests/workingtree_implementations/test_is_control_filename.py

bzrlib/tests/workingtree_implementations/test_pull.py

bzrlib/tests/workingtree_implementations/test_workingtree.py

bzrlib/transactions.py

bzrlib/transform.py

bzrlib/transport

bzrlib/transport/__init__.py

bzrlib/transport/ftp.py

bzrlib/transport/http.py

bzrlib/transport/local.py

bzrlib/transport/memory.py

bzrlib/transport/readonly.py

bzrlib/transport/sftp.py

bzrlib/tsort.py

bzrlib/ui

bzrlib/ui/__init__.py

bzrlib/ui/text.py

bzrlib/uncommit.py

bzrlib/util/configobj

bzrlib/util/configobj/__init__.py

bzrlib/util/configobj/configobj.py

bzrlib/util/configobj/docs

bzrlib/util/configobj/docs/BSD-LICENSE.txt

bzrlib/util/configobj/docs/configobj.txt

bzrlib/util/configobj/docs/validate.txt

bzrlib/util/configobj/validate.py

bzrlib/versionedfile.py

bzrlib/win32console.py

bzrlib/xml4.py

bzrlib/xml5.py

generate_docs.py

tools/__init__.py

tools/biobench.py

tools/capture_tree.py

tools/doc_generate

tools/doc_generate/__init__.py

tools/doc_generate/autodoc_bash_completion.py

tools/riodemo.py

tools/trace-revisions

files removed:
bzrlib/changeset.py

bzrlib/mdiff.py

bzrlib/merge_core.py

bzrlib/meta_store.py

bzrlib/plugins/checkperms

bzrlib/remotebranch.py

bzrlib/revfile.py

bzrlib/upgrade.py

doc/Makefile

doc/adoption.txt

doc/bitkeeper.txt

doc/changelogs.txt

doc/cherry-picking.txt

doc/cmdref.txt

doc/common-format.txt

doc/compared-aegis.txt

doc/compared-codeville.txt

doc/compared-cvsnt.txt

doc/compared-opencm.txt

doc/compared-prcs.txt

doc/compared-teamware.txt

doc/compression.txt

doc/config-specs.txt

doc/conflicts.txt

doc/costs.txt

doc/darcs.txt

doc/deadly-sins.txt

doc/default.css

doc/design.txt

doc/extra-commands.txt

doc/formats.txt

doc/hashes.txt

doc/ignore.txt

doc/index.txt

doc/interrupted.txt

doc/intro.txt

doc/inventory.txt

doc/join-branches.txt

doc/kill-version.txt

doc/layers.txt

doc/library-interface.txt

doc/merge.txt

doc/mirroring.txt

doc/monotone.txt

doc/news.txt

doc/optional-edit.txt

doc/partial-commit.txt

doc/pool.txt

doc/purpose.txt

doc/python.txt

doc/quilt.txt

doc/quotes.txt

doc/random.txt

doc/requirements.txt

doc/revfile-annotation.txt

doc/revfile.txt

doc/revision-syntax.txt

doc/rollup.txt

doc/scalability.txt

doc/security.txt

doc/shared-branches.txt

doc/short-demo.txt

doc/split-join-files.txt

doc/supportability.txt

doc/svk.txt

doc/switch-in-branch.txt

doc/tagging.txt

doc/taxonomy.txt

doc/thanks.txt

doc/todo-from-arch.txt

doc/unchanged.txt

doc/unrelated-merge.txt

doc/usability.txt

doc/use-cases.txt

doc/web-interface.txt

doc/workflow.txt

doc/yaml.txt

notes/inventory-v2-sample.xml

notes/inventory-v2.rnc

notes/new-inventory-sample.xml

notes/performance.txt

notes/revfile.txt

notes/schemas.xml

patches

patches/annotate3.patch

patches/annotate4.patch

patches/cache-remote-revisions.diff

patches/find-touching-from-seq.diff

patches/meta-data-in-inventory.patch

patches/ndiff.patch

patches/pending-merge.patch

patches/plugins-no-plugins.patch

patches/progress.diff

patches/symlink-support.patch

testbzr

testsweet.py

files renamed:
bzrlib/store.py => bzrlib/store/__init__.py

bzrlib/selftest/ => bzrlib/tests/

bzrlib/selftest/teststatus.py => bzrlib/tests/blackbox/test_status.py

bzrlib/selftest/blackbox.py => bzrlib/tests/blackbox/test_too_much.py

bzrlib/selftest/versioning.py => bzrlib/tests/blackbox/test_versioning.py

bzrlib/selftest/testbranch.py => bzrlib/tests/branch_implementations/test_branch.py

bzrlib/selftest/testdiff.py => bzrlib/tests/test_diff.py

bzrlib/selftest/testhashcache.py => bzrlib/tests/test_hashcache.py

bzrlib/selftest/testinv.py => bzrlib/tests/test_inv.py

bzrlib/selftest/testlog.py => bzrlib/tests/test_log.py

bzrlib/selftest/testmerge3.py => bzrlib/tests/test_merge3.py

bzrlib/selftest/plugins.py => bzrlib/tests/test_plugins.py

bzrlib/selftest/testrevision.py => bzrlib/tests/test_revision.py

bzrlib/selftest/testrevisionnamespaces.py => bzrlib/tests/test_revisionnamespaces.py

tools/testweave.py => bzrlib/tests/test_weave.py

bzrlib/selftest/whitebox.py => bzrlib/tests/test_whitebox.py

tools/history2weaves.py => bzrlib/upgrade.py

bzrlib/xml.py => bzrlib/xml_serializer.py

bzrlib/newinventory.py => contrib/newinventory.py

bzr-man.py => tools/doc_generate/autodoc_man.py

files modified:
.bzrignore

.rsyncexclude

HACKING

Makefile

NEWS

README

TODO

build-api

bzr *

bzrlib/__init__.py

bzrlib/add.py

bzrlib/atomicfile.py

bzrlib/branch.py

bzrlib/check.py

bzrlib/commands.py

bzrlib/commit.py

bzrlib/delta.py

bzrlib/diff.py

bzrlib/errors.py

bzrlib/hashcache.py

bzrlib/help.py

bzrlib/info.py

bzrlib/intset.py

bzrlib/inventory.py

bzrlib/lock.py

bzrlib/log.py

bzrlib/merge.py

bzrlib/merge3.py

bzrlib/missing.py

bzrlib/osutils.py

bzrlib/patch.py

bzrlib/plugin.py

bzrlib/plugins/__init__.py

bzrlib/progress.py

bzrlib/revision.py

bzrlib/shellcomplete.py

bzrlib/status.py

bzrlib/tests/__init__.py

bzrlib/textinv.py

bzrlib/trace.py

bzrlib/tree.py

bzrlib/weave.py *

bzrlib/weavefile.py

bzrlib/workingtree.py

contrib/zsh/_bzr

setup.py *

tools/http_client.py

tools/weavebench.py

tutorial.txt

Show diffs side-by-side

added added

removed removed

bzrlib/knit.py

# Written by Martin Pool.

# Modified by Johan Rydberg <jrydberg@gnu.org>

# Modified by Robert Collins <robert.collins@canonical.com>

# This program is free software; you can redistribute it and/or modify

# it under the terms of the GNU General Public License as published by

# the Free Software Foundation; either version 2 of the License, or

# (at your option) any later version.

# This program is distributed in the hope that it will be useful,

# but WITHOUT ANY WARRANTY; without even the implied warranty of

# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the

# GNU General Public License for more details.

# You should have received a copy of the GNU General Public License

# along with this program; if not, write to the Free Software

# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA

"""Knit versionedfile implementation.

A knit is a versioned file implementation that supports efficient append only

updates.

Knit file layout:

lifeless: the data file is made up of "delta records". each delta record has a delta header

that contains; (1) a version id, (2) the size of the delta (in lines), and (3) the digest of

the -expanded data- (ie, the delta applied to the parent). the delta also ends with a

end-marker; simply "end VERSION"

delta can be line or full contents.a

... the 8's there are the index number of the annotation.

version robertc@robertcollins.net-20051003014215-ee2990904cc4c7ad 7 c7d23b2a5bd6ca00e8e266cec0ec228158ee9f9e

59,59,3

8 if ie.executable:

8 e.set('executable', 'yes')

130,130,2

8 if elt.get('executable') == 'yes':

8 ie.executable = True

end robertc@robertcollins.net-20051003014215-ee2990904cc4c7ad

whats in an index:

09:33 < jrydberg> lifeless: each index is made up of a tuple of; version id, options, position, size, parents

09:33 < jrydberg> lifeless: the parents are currently dictionary compressed

09:33 < jrydberg> lifeless: (meaning it currently does not support ghosts)

09:33 < lifeless> right

09:33 < jrydberg> lifeless: the position and size is the range in the data file

so the index sequence is the dictionary compressed sequence number used

in the deltas to provide line annotation

"""

# TODOS:

# 10:16 < lifeless> make partial index writes safe

# 10:16 < lifeless> implement 'knit.check()' like weave.check()

# 10:17 < lifeless> record known ghosts so we can detect when they are filled in rather than the current 'reweave

# always' approach.

# move sha1 out of the content so that join is faster at verifying parents

# record content length ?

from cStringIO import StringIO

import difflib

from difflib import SequenceMatcher

from gzip import GzipFile

import os

import bzrlib.errors as errors

from bzrlib.errors import FileExists, NoSuchFile, KnitError, \

InvalidRevisionId, KnitCorrupt, KnitHeaderError, \

RevisionNotPresent, RevisionAlreadyPresent

from bzrlib.trace import mutter

from bzrlib.osutils import contains_whitespace, contains_linebreaks, \

sha_strings

from bzrlib.versionedfile import VersionedFile, InterVersionedFile

from bzrlib.tsort import topo_sort

# TODO: Split out code specific to this format into an associated object.

# TODO: Can we put in some kind of value to check that the index and data

# files belong together?

# TODO: accomodate binaries, perhaps by storing a byte count

# TODO: function to check whole file

# TODO: atomically append data, then measure backwards from the cursor

# position after writing to work out where it was located. we may need to

# bypass python file buffering.

DATA_SUFFIX = '.knit'

INDEX_SUFFIX = '.kndx'

100

# convenience factories for testing or use:

101

def AnnotatedKnitFactory(name, transport, mode=None):

102

"""Create a knit with path name in transport transport."""

103

return KnitVersionedFile(transport,

104

name,

105

'w',

106

KnitAnnotateFactory(),

107

delta=True)

108

109

110

class KnitContent(object):

111

"""Content of a knit version to which deltas can be applied."""

112

113

def __init__(self, lines):

114

self._lines = lines

115

116

def annotate_iter(self):

117

"""Yield tuples of (origin, text) for each content line."""

118

for origin, text in self._lines:

119

yield origin, text

120

121

def annotate(self):

122

"""Return a list of (origin, text) tuples."""

123

return list(self.annotate_iter())

124

125

def apply_delta(self, delta):

126

"""Apply delta to this content."""

127

offset = 0

128

for start, end, count, lines in delta:

129

self._lines[offset+start:offset+end] = lines

130

offset = offset + (start - end) + count

131

132

def line_delta_iter(self, new_lines):

133

"""Generate line-based delta from new_lines to this content."""

134

new_texts = [text for origin, text in new_lines._lines]

135

old_texts = [text for origin, text in self._lines]

136

s = difflib.SequenceMatcher(None, old_texts, new_texts)

137

for op in s.get_opcodes():

138

if op[0] == 'equal':

139

continue

140

yield (op[1], op[2], op[4]-op[3], new_lines._lines[op[3]:op[4]])

141

142

def line_delta(self, new_lines):

143

return list(self.line_delta_iter(new_lines))

144

145

def text(self):

146

return [text for origin, text in self._lines]

147

148

149

class _KnitFactory(object):

150

"""Base factory for creating content objects."""

151

152

def make(self, lines, version):

153

num_lines = len(lines)

154

return KnitContent(zip([version] * num_lines, lines))

155

156

157

class KnitAnnotateFactory(_KnitFactory):

158

"""Factory for creating annotated Content objects."""

159

160

annotated = True

161

162

def parse_fulltext(self, content, version):

163

lines = []

164

for line in content:

165

origin, text = line.split(' ', 1)

166

lines.append((int(origin), text))

167

return KnitContent(lines)

168

169

def parse_line_delta_iter(self, lines):

170

while lines:

171

header = lines.pop(0)

172

start, end, c = [int(n) for n in header.split(',')]

173

contents = []

174

for i in range(c):

175

origin, text = lines.pop(0).split(' ', 1)

176

contents.append((int(origin), text))

177

yield start, end, c, contents

178

179

def parse_line_delta(self, lines, version):

180

return list(self.parse_line_delta_iter(lines))

181

182

def lower_fulltext(self, content):

183

return ['%d %s' % (o, t) for o, t in content._lines]

184

185

def lower_line_delta(self, delta):

186

out = []

187

for start, end, c, lines in delta:

188

out.append('%d,%d,%d\n' % (start, end, c))

189

for origin, text in lines:

190

out.append('%d %s' % (origin, text))

191

return out

192

193

194

class KnitPlainFactory(_KnitFactory):

195

"""Factory for creating plain Content objects."""

196

197

annotated = False

198

199

def parse_fulltext(self, content, version):

200

return self.make(content, version)

201

202

def parse_line_delta_iter(self, lines, version):

203

while lines:

204

header = lines.pop(0)

205

start, end, c = [int(n) for n in header.split(',')]

206

yield start, end, c, zip([version] * c, lines[:c])

207

del lines[:c]

208

209

def parse_line_delta(self, lines, version):

210

return list(self.parse_line_delta_iter(lines, version))

211

212

def lower_fulltext(self, content):

213

return content.text()

214

215

def lower_line_delta(self, delta):

216

out = []

217

for start, end, c, lines in delta:

218

out.append('%d,%d,%d\n' % (start, end, c))

219

out.extend([text for origin, text in lines])

220

return out

221

222

223

def make_empty_knit(transport, relpath):

224

"""Construct a empty knit at the specified location."""

225

k = KnitVersionedFile(transport, relpath, 'w', KnitPlainFactory)

226

k._data._open_file()

227

228

229

class KnitVersionedFile(VersionedFile):

230

"""Weave-like structure with faster random access.

231

232

A knit stores a number of texts and a summary of the relationships

233

between them. Texts are identified by a string version-id. Texts

234

are normally stored and retrieved as a series of lines, but can

235

also be passed as single strings.

236

237

Lines are stored with the trailing newline (if any) included, to

238

avoid special cases for files with no final newline. Lines are

239

composed of 8-bit characters, not unicode. The combination of

240

these approaches should mean any 'binary' file can be safely

241

stored and retrieved.

242

"""

243

244

def __init__(self, transport, relpath, mode, factory,

245

basis_knit=None, delta=True):

246

"""Construct a knit at location specified by relpath."""

247

assert mode in ('r', 'w'), "invalid mode specified"

248

assert not basis_knit or isinstance(basis_knit, KnitVersionedFile), \

249

type(basis_knit)

250

251

self.transport = transport

252

self.filename = relpath

253

self.basis_knit = basis_knit

254

self.factory = factory

255

self.writable = (mode == 'w')

256

self.delta = delta

257

258

self._index = _KnitIndex(transport, relpath + INDEX_SUFFIX,

259

mode)

260

self._data = _KnitData(transport, relpath + DATA_SUFFIX,

261

mode)

262

263

def create_empty(self, name, transport, mode=None):

264

return KnitVersionedFile(transport, name, 'w', self.factory, delta=self.delta)

265

266

def versions(self):

267

"""See VersionedFile.versions."""

268

return self._index.get_versions()

269

270

def has_version(self, version_id):

271

"""See VersionedFile.has_version."""

272

return self._index.has_version(version_id)

273

274

__contains__ = has_version

275

276

def _merge_annotations(self, content, parents):

277

"""Merge annotations for content. This is done by comparing

278

the annotations based on changed to the text."""

279

for parent_id in parents:

280

merge_content = self._get_content(parent_id)

281

seq = SequenceMatcher(None, merge_content.text(), content.text())

282

for i, j, n in seq.get_matching_blocks():

283

if n == 0:

284

continue

285

content._lines[j:j+n] = merge_content._lines[i:i+n]

286

287

def _get_components(self, version_id):

288

"""Return a list of (version_id, method, data) tuples that

289

makes up version specified by version_id of the knit.

290

291

The components should be applied in the order of the returned

292

list.

293

294

The basis knit will be used to the largest extent possible

295

since it is assumed that accesses to it is faster.

296

"""

297

# needed_revisions holds a list of (method, version_id) of

298

# versions that is needed to be fetched to construct the final

299

# version of the file.

300

301

# basis_revisions is a list of versions that needs to be

302

# fetched but exists in the basis knit.

303

304

basis = self.basis_knit

305

needed_versions = []

306

basis_versions = []

307

cursor = version_id

308

309

while 1:

310

picked_knit = self

311

if basis and basis._index.has_version(cursor):

312

picked_knit = basis

313

basis_versions.append(cursor)

314

method = picked_knit._index.get_method(cursor)

315

needed_versions.append((method, cursor))

316

if method == 'fulltext':

317

break

318

cursor = picked_knit.get_parents(cursor)[0]

319

320

components = {}

321

if basis_versions:

322

records = []

323

for comp_id in basis_versions:

324

data_pos, data_size = basis._index.get_data_position(comp_id)

325

records.append((piece_id, data_pos, data_size))

326

components.update(basis._data.read_records(records))

327

328

records = []

329

for comp_id in [vid for method, vid in needed_versions

330

if vid not in basis_versions]:

331

data_pos, data_size = self._index.get_position(comp_id)

332

records.append((comp_id, data_pos, data_size))

333

components.update(self._data.read_records(records))

334

335

# get_data_records returns a mapping with the version id as

336

# index and the value as data. The order the components need

337

# to be applied is held by needed_versions (reversed).

338

out = []

339

for method, comp_id in reversed(needed_versions):

340

out.append((comp_id, method, components[comp_id]))

341

342

return out

343

344

def _get_content(self, version_id):

345

"""Returns a content object that makes up the specified

346

version."""

347

if not self.has_version(version_id):

348

raise RevisionNotPresent(version_id, self.filename)

349

350

if self.basis_knit and version_id in self.basis_knit:

351

return self.basis_knit._get_content(version_id)

352

353

content = None

354

components = self._get_components(version_id)

355

for component_id, method, (data, digest) in components:

356

version_idx = self._index.lookup(component_id)

357

if method == 'fulltext':

358

assert content is None

359

content = self.factory.parse_fulltext(data, version_idx)

360

elif method == 'line-delta':

361

delta = self.factory.parse_line_delta(data, version_idx)

362

content.apply_delta(delta)

363

364

if 'no-eol' in self._index.get_options(version_id):

365

line = content._lines[-1][1].rstrip('\n')

366

content._lines[-1] = (content._lines[-1][0], line)

367

368

if sha_strings(content.text()) != digest:

369

raise KnitCorrupt(self.filename, 'sha-1 does not match')

370

371

return content

372

373

def _check_versions_present(self, version_ids):

374

"""Check that all specified versions are present."""

375

version_ids = set(version_ids)

376

for r in list(version_ids):

377

if self._index.has_version(r):

378

version_ids.remove(r)

379

if version_ids:

380

raise RevisionNotPresent(list(version_ids)[0], self.filename)

381

382

def add_lines(self, version_id, parents, lines):

383

"""See VersionedFile.add_lines."""

384

assert self.writable, "knit is not opened for write"

385

### FIXME escape. RBC 20060228

386

if contains_whitespace(version_id):

387

raise InvalidRevisionId(version_id)

388

if self.has_version(version_id):

389

raise RevisionAlreadyPresent(version_id, self.filename)

390

391

if True or __debug__:

392

for l in lines:

393

assert '\n' not in l[:-1]

394

395

self._check_versions_present(parents)

396

return self._add(version_id, lines[:], parents, self.delta)

397

398

def _add(self, version_id, lines, parents, delta):

399

"""Add a set of lines on top of version specified by parents.

400

401

If delta is true, compress the text as a line-delta against

402

the first parent.

403

"""

404

if delta and not parents:

405

delta = False

406

407

digest = sha_strings(lines)

408

options = []

409

if lines:

410

if lines[-1][-1] != '\n':

411

options.append('no-eol')

412

lines[-1] = lines[-1] + '\n'

413

414

lines = self.factory.make(lines, len(self._index))

415

if self.factory.annotated and len(parents) > 0:

416

# Merge annotations from parent texts if so is needed.

417

self._merge_annotations(lines, parents)

418

419

if parents and delta:

420

# To speed the extract of texts the delta chain is limited

421

# to a fixed number of deltas. This should minimize both

422

# I/O and the time spend applying deltas.

423

count = 0

424

delta_parents = parents

425

while count < 25:

426

parent = delta_parents[0]

427

method = self._index.get_method(parent)

428

if method == 'fulltext':

429

break

430

delta_parents = self._index.get_parents(parent)

431

count = count + 1

432

if method == 'line-delta':

433

delta = False

434

435

if delta:

436

options.append('line-delta')

437

content = self._get_content(parents[0])

438

delta_hunks = content.line_delta(lines)

439

store_lines = self.factory.lower_line_delta(delta_hunks)

440

else:

441

options.append('fulltext')

442

store_lines = self.factory.lower_fulltext(lines)

443

444

where, size = self._data.add_record(version_id, digest, store_lines)

445

self._index.add_version(version_id, options, where, size, parents)

446

447

def clone_text(self, new_version_id, old_version_id, parents):

448

"""See VersionedFile.clone_text()."""

449

# FIXME RBC 20060228 make fast by only inserting an index with null delta.

450

self.add_lines(new_version_id, parents, self.get_lines(old_version_id))

451

452

def get_lines(self, version_id):

453

"""See VersionedFile.get_lines()."""

454

return self._get_content(version_id).text()

455

456

def annotate_iter(self, version_id):

457

"""See VersionedFile.annotate_iter."""

458

content = self._get_content(version_id)

459

for origin, text in content.annotate_iter():

460

yield self._index.idx_to_name(origin), text

461

462

def get_parents(self, version_id):

463

"""See VersionedFile.get_parents."""

464

self._check_versions_present([version_id])

465

return list(self._index.get_parents(version_id))

466

467

def get_ancestry(self, versions):

468

"""See VersionedFile.get_ancestry."""

469

if isinstance(versions, basestring):

470

versions = [versions]

471

if not versions:

472

return []

473

self._check_versions_present(versions)

474

return self._index.get_ancestry(versions)

475

476

def _reannotate_line_delta(self, other, lines, new_version_id,

477

new_version_idx):

478

"""Re-annotate line-delta and return new delta."""

479

new_delta = []

480

for start, end, count, contents \

481

in self.factory.parse_line_delta_iter(lines):

482

new_lines = []

483

for origin, line in contents:

484

old_version_id = other._index.idx_to_name(origin)

485

if old_version_id == new_version_id:

486

idx = new_version_idx

487

else:

488

idx = self._index.lookup(old_version_id)

489

new_lines.append((idx, line))

490

new_delta.append((start, end, count, new_lines))

491

492

return self.factory.lower_line_delta(new_delta)

493

494

def _reannotate_fulltext(self, other, lines, new_version_id,

495

new_version_idx):

496

"""Re-annotate fulltext and return new version."""

497

content = self.factory.parse_fulltext(lines, new_version_idx)

498

new_lines = []

499

for origin, line in content.annotate_iter():

500

old_version_id = other._index.idx_to_name(origin)

501

if old_version_id == new_version_id:

502

idx = new_version_idx

503

else:

504

idx = self._index.lookup(old_version_id)

505

new_lines.append((idx, line))

506

507

return self.factory.lower_fulltext(KnitContent(new_lines))

508

509

def walk(self, version_ids):

510

"""See VersionedFile.walk."""

511

# We take the short path here, and extract all relevant texts

512

# and put them in a weave and let that do all the work. Far

513

# from optimal, but is much simpler.

514

# FIXME RB 20060228 this really is inefficient!

515

from bzrlib.weave import Weave

516

517

w = Weave(self.filename)

518

ancestry = self.get_ancestry(version_ids)

519

sorted_graph = topo_sort(self._index.get_graph())

520

version_list = [vid for vid in sorted_graph if vid in ancestry]

521

522

for version_id in version_list:

523

lines = self.get_lines(version_id)

524

w.add_lines(version_id, self.get_parents(version_id), lines)

525

526

for lineno, insert_id, dset, line in w.walk(version_ids):

527

yield lineno, insert_id, dset, line

528

529

530

class _KnitComponentFile(object):

531

"""One of the files used to implement a knit database"""

532

533

def __init__(self, transport, filename, mode):

534

self._transport = transport

535

self._filename = filename

536

self._mode = mode

537

538

def write_header(self):

539

old_len = self._transport.append(self._filename, StringIO(self.HEADER))

540

if old_len != 0:

541

raise KnitCorrupt(self._filename, 'misaligned after writing header')

542

543

def check_header(self, fp):

544

line = fp.read(len(self.HEADER))

545

if line != self.HEADER:

546

raise KnitHeaderError(badline=line)

547

548

def commit(self):

549

"""Commit is a nop."""

550

551

def __repr__(self):

552

return '%s(%s)' % (self.__class__.__name__, self._filename)

553

554

555

class _KnitIndex(_KnitComponentFile):

556

"""Manages knit index file.

557

558

The index is already kept in memory and read on startup, to enable

559

fast lookups of revision information. The cursor of the index

560

file is always pointing to the end, making it easy to append

561

entries.

562

563

_cache is a cache for fast mapping from version id to a Index

564

object.

565

566

_history is a cache for fast mapping from indexes to version ids.

567

568

The index data format is dictionary compressed when it comes to

569

parent references; a index entry may only have parents that with a

570

lover index number. As a result, the index is topological sorted.

571

572

Duplicate entries may be written to the index for a single version id

573

if this is done then the latter one completely replaces the former:

574

this allows updates to correct version and parent information.

575

Note that the two entries may share the delta, and that successive

576

annotations and references MUST point to the first entry.

577

"""

578

579

HEADER = "# bzr knit index 7\n"

580

581

def _cache_version(self, version_id, options, pos, size, parents):

582

val = (version_id, options, pos, size, parents)

583

self._cache[version_id] = val

584

if not version_id in self._history:

585

self._history.append(version_id)

586

587

def _iter_index(self, fp):

588

lines = fp.read()

589

for l in lines.splitlines(False):

590

yield l.split()

591

592

def __init__(self, transport, filename, mode):

593

_KnitComponentFile.__init__(self, transport, filename, mode)

594

self._cache = {}

595

# position in _history is the 'official' index for a revision

596

# but the values may have come from a newer entry.

597

# so - wc -l of a knit index is != the number of uniqe names

598

# in the weave.

599

self._history = []

600

try:

601

fp = self._transport.get(self._filename)

602

self.check_header(fp)

603

for rec in self._iter_index(fp):

604

self._cache_version(rec[0], rec[1].split(','), int(rec[2]), int(rec[3]),

605

[self._history[int(i)] for i in rec[4:]])

606

except NoSuchFile, e:

607

if mode != 'w':

608

raise e

609

self.write_header()

610

611

def get_graph(self):

612

graph = []

613

for version_id, index in self._cache.iteritems():

614

graph.append((version_id, index[4]))

615

return graph

616

617

def get_ancestry(self, versions):

618

"""See VersionedFile.get_ancestry."""

619

version_idxs = []

620

for version_id in versions:

621

version_idxs.append(self._history.index(version_id))

622

i = set(versions)

623

for v in xrange(max(version_idxs), 0, -1):

624

if self._history[v] in i:

625

# include all its parents

626

i.update(self._cache[self._history[v]][4])

627

return list(i)

628

629

def num_versions(self):

630

return len(self._history)

631

632

__len__ = num_versions

633

634

def get_versions(self):

635

return self._history

636

637

def idx_to_name(self, idx):

638

return self._history[idx]

639

640

def lookup(self, version_id):

641

assert version_id in self._cache

642

return self._history.index(version_id)

643

644

def add_version(self, version_id, options, pos, size, parents):

645

"""Add a version record to the index."""

646

self._cache_version(version_id, options, pos, size, parents)

647

648

content = "%s %s %s %s %s\n" % (version_id,

649

','.join(options),

650

pos,

651

size,

652

' '.join([str(self.lookup(vid)) for

653

vid in parents]))

654

self._transport.append(self._filename, StringIO(content))

655

656

def has_version(self, version_id):

657

"""True if the version is in the index."""

658

return self._cache.has_key(version_id)

659

660

def get_position(self, version_id):

661

"""Return data position and size of specified version."""

662

return (self._cache[version_id][2], \

663

self._cache[version_id][3])

664

665

def get_method(self, version_id):

666

"""Return compression method of specified version."""

667

options = self._cache[version_id][1]

668

if 'fulltext' in options:

669

return 'fulltext'

670

else:

671

assert 'line-delta' in options

672

return 'line-delta'

673

674

def get_options(self, version_id):

675

return self._cache[version_id][1]

676

677

def get_parents(self, version_id):

678

"""Return parents of specified version."""

679

return self._cache[version_id][4]

680

681

def check_versions_present(self, version_ids):

682

"""Check that all specified versions are present."""

683

version_ids = set(version_ids)

684

for version_id in list(version_ids):

685

if version_id in self._cache:

686

version_ids.remove(version_id)

687

if version_ids:

688

raise RevisionNotPresent(list(version_ids)[0], self.filename)

689

690

691

class _KnitData(_KnitComponentFile):

692

"""Contents of the knit data file"""

693

694

HEADER = "# bzr knit data 7\n"

695

696

def __init__(self, transport, filename, mode):

697

_KnitComponentFile.__init__(self, transport, filename, mode)

698

self._file = None

699

self._checked = False

700

701

def _open_file(self):

702

if self._file is None:

703

try:

704

self._file = self._transport.get(self._filename)

705

except NoSuchFile:

706

pass

707

return self._file

708

709

def add_record(self, version_id, digest, lines):

710

"""Write new text record to disk. Returns the position in the

711

file where it was written."""

712

sio = StringIO()

713

data_file = GzipFile(None, mode='wb', fileobj=sio)

714

print >>data_file, "version %s %d %s" % (version_id, len(lines), digest)

715

data_file.writelines(lines)

716

print >>data_file, "end %s\n" % version_id

717

data_file.close()

718

719

content = sio.getvalue()

720

start_pos = self._transport.append(self._filename, StringIO(content))

721

return start_pos, len(content)

722

723

def _parse_record(self, version_id, data):

724

df = GzipFile(mode='rb', fileobj=StringIO(data))

725

rec = df.readline().split()

726

if len(rec) != 4:

727

raise KnitCorrupt(self._filename, 'unexpected number of records')

728

if rec[1] != version_id:

729

raise KnitCorrupt(self.file.name,

730

'unexpected version, wanted %r' % version_id)

731

lines = int(rec[2])

732

record_contents = self._read_record_contents(df, lines)

733

l = df.readline()

734

if l != 'end %s\n' % version_id:

735

raise KnitCorrupt(self._filename, 'unexpected version end line %r, wanted %r'

736

% (l, version_id))

737

return record_contents, rec[3]

738

739

def _read_record_contents(self, df, record_lines):

740

"""Read and return n lines from datafile."""

741

r = []

742

for i in range(record_lines):

743

r.append(df.readline())

744

return r

745

746

def read_records_iter(self, records):

747

"""Read text records from data file and yield result.

748

749

Each passed record is a tuple of (version_id, pos, len) and

750

will be read in the given order. Yields (version_id,

751

contents, digest).

752

"""

753

754

class ContinuousRange:

755

def __init__(self, rec_id, pos, size):

756

self.start_pos = pos

757

self.end_pos = pos + size

758

self.versions = [(rec_id, pos, size)]

759

760

def add(self, rec_id, pos, size):

761

if self.end_pos != pos:

762

return False

763

self.end_pos = pos + size

764

self.versions.append((rec_id, pos, size))

765

return True

766

767

def split(self, fp):

768

for rec_id, pos, size in self.versions:

769

yield rec_id, fp.read(size)

770

771

fp = self._open_file()

772

773

# Loop through all records and try to collect as large

774

# continuous region as possible to read.

775

while records:

776

record_id, pos, size = records.pop(0)

777

continuous_range = ContinuousRange(record_id, pos, size)

778

while records:

779

record_id, pos, size = records[0]

780

if continuous_range.add(record_id, pos, size):

781

del records[0]

782

else:

783

break

784

fp.seek(continuous_range.start_pos, 0)

785

for record_id, data in continuous_range.split(fp):

786

content, digest = self._parse_record(record_id, data)

787

yield record_id, content, digest

788

789

self._file = None

790

791

def read_records(self, records):

792

"""Read records into a dictionary."""

793

components = {}

794

for record_id, content, digest in self.read_records_iter(records):

795

components[record_id] = (content, digest)

796

return components

797

798

799

class InterKnit(InterVersionedFile):

800

"""Optimised code paths for knit to knit operations."""

801

802

_matching_file_factory = staticmethod(AnnotatedKnitFactory)

803

804

@staticmethod

805

def is_compatible(source, target):

806

"""Be compatible with knits. """

807

try:

808

return (isinstance(source, KnitVersionedFile) and

809

isinstance(target, KnitVersionedFile))

810

except AttributeError:

811

return False

812

813

def join(self, pb=None, msg=None, version_ids=None):

814

"""See InterVersionedFile.join."""

815

assert isinstance(self.source, KnitVersionedFile)

816

assert isinstance(self.target, KnitVersionedFile)

817

818

if version_ids is None:

819

version_ids = self.source.versions()

820

if not version_ids:

821

return 0

822

823

if pb is None:

824

from bzrlib.progress import DummyProgress

825

pb = DummyProgress()

826

827

version_ids = list(version_ids)

828

if None in version_ids:

829

version_ids.remove(None)

830

831

self.source_ancestry = set(self.source.get_ancestry(version_ids))

832

this_versions = set(self.target._index.get_versions())

833

needed_versions = self.source_ancestry - this_versions

834

cross_check_versions = self.source_ancestry.intersection(this_versions)

835

mismatched_versions = set()

836

for version in cross_check_versions:

837

# scan to include needed parents.

838

n1 = set(self.target.get_parents(version))

839

n2 = set(self.source.get_parents(version))

840

if n1 != n2:

841

# FIXME TEST this check for cycles being introduced works

842

# the logic is we have a cycle if in our graph we are an

843

# ancestor of any of the n2 revisions.

844

for parent in n2:

845

if parent in n1:

846

# safe

847

continue

848

else:

849

parent_ancestors = self.source.get_ancestry(parent)

850

if version in parent_ancestors:

851

raise errors.GraphCycleError([parent, version])

852

# ensure this parent will be available later.

853

new_parents = n2.difference(n1)

854

needed_versions.update(new_parents.difference(this_versions))

855

mismatched_versions.add(version)

856

857

if not needed_versions and not cross_check_versions:

858

return 0

859

full_list = topo_sort(self.source._index.get_graph())

860

861

version_list = [i for i in full_list if (not self.target.has_version(i)

862

and i in needed_versions)]

863

864

records = []

865

for version_id in version_list:

866

data_pos, data_size = self.source._index.get_position(version_id)

867

records.append((version_id, data_pos, data_size))

868

869

count = 0

870

for version_id, lines, digest \

871

in self.source._data.read_records_iter(records):

872

options = self.source._index.get_options(version_id)

873

parents = self.source._index.get_parents(version_id)

874

875

for parent in parents:

876

assert self.target.has_version(parent)

877

878

if self.target.factory.annotated:

879

# FIXME jrydberg: it should be possible to skip

880

# re-annotating components if we know that we are

881

# going to pull all revisions in the same order.

882

new_version_id = version_id

883

new_version_idx = self.target._index.num_versions()

884

if 'fulltext' in options:

885

lines = self.target._reannotate_fulltext(self.source, lines,

886

new_version_id, new_version_idx)

887

elif 'line-delta' in options:

888

lines = self.target._reannotate_line_delta(self.source, lines,

889

new_version_id, new_version_idx)

890

891

count = count + 1

892

pb.update(self.target.filename, count, len(version_list))

893

894

pos, size = self.target._data.add_record(version_id, digest, lines)

895

self.target._index.add_version(version_id, options, pos, size, parents)

896

897

for version in mismatched_versions:

898

n1 = set(self.target.get_parents(version))

899

n2 = set(self.source.get_parents(version))

900

# write a combined record to our history.

901

new_parents = self.target.get_parents(version) + list(n2.difference(n1))

902

current_values = self.target._index._cache[version]

903

self.target._index.add_version(version,

904

current_values[1],

905

current_values[2],

906

current_values[3],

907

new_parents)

908

pb.clear()

909

return count

910

911

912

InterVersionedFile.register_optimiser(InterKnit)

Older »