~bzr-pqm/bzr/bzr.dev

« back to all changes in this revision

Viewing changes to bzrlib/knit.py

Committer: Robert Collins
Date: 2006-03-01 03:13:44 UTC
mto: (1594.2.4 integration)
mto: This revision was merged to the branch mainline in revision 1596.
Revision ID: robertc@robertcollins.net-20060301031344-b661bf92b4985554

add versioned file clear_cache entry.

files added:
BRANCH.TODO

INSTALL

Makefile

NEWS.developers

bzrlib/annotate.py

bzrlib/builtins.py

bzrlib/bzrdir.py

bzrlib/config.py

bzrlib/conflicts.py

bzrlib/decorators.py

bzrlib/doc

bzrlib/doc/__init__.py

bzrlib/doc/api

bzrlib/doc/api/__init__.py

bzrlib/doc/api/branch.txt

bzrlib/doc/api/transport.txt

bzrlib/export

bzrlib/export/__init__.py

bzrlib/export/dir_exporter.py

bzrlib/export/tar_exporter.py

bzrlib/export/zip_exporter.py

bzrlib/externalcommand.py

bzrlib/fetch.py

bzrlib/gpg.py

bzrlib/graph.py

bzrlib/identitymap.py

bzrlib/iterablefile.py

bzrlib/knit.py

bzrlib/lockable_files.py

bzrlib/lockdir.py

bzrlib/lsprof.py

bzrlib/msgeditor.py

bzrlib/option.py

bzrlib/plugins/__init__.py

bzrlib/repository.py

bzrlib/revisionspec.py

bzrlib/rio.py

bzrlib/shellcomplete.py

bzrlib/sign_my_commits.py

bzrlib/store

bzrlib/store/text.py

bzrlib/store/weave.py

bzrlib/symbol_versioning.py

bzrlib/testament.py

bzrlib/tests/HTTPTestUtil.py

bzrlib/tests/TestUtil.py

bzrlib/tests/blackbox

bzrlib/tests/blackbox/__init__.py

bzrlib/tests/blackbox/test_added.py

bzrlib/tests/blackbox/test_aliases.py

bzrlib/tests/blackbox/test_ancestry.py

bzrlib/tests/blackbox/test_cat.py

bzrlib/tests/blackbox/test_checkout.py

bzrlib/tests/blackbox/test_commit.py

bzrlib/tests/blackbox/test_diff.py

bzrlib/tests/blackbox/test_export.py

bzrlib/tests/blackbox/test_find_merge_base.py

bzrlib/tests/blackbox/test_help.py

bzrlib/tests/blackbox/test_info.py

bzrlib/tests/blackbox/test_log.py

bzrlib/tests/blackbox/test_logformats.py

bzrlib/tests/blackbox/test_missing.py

bzrlib/tests/blackbox/test_outside_wt.py

bzrlib/tests/blackbox/test_pull.py

bzrlib/tests/blackbox/test_re_sign.py

bzrlib/tests/blackbox/test_revert.py

bzrlib/tests/blackbox/test_revision_info.py

bzrlib/tests/blackbox/test_revno.py

bzrlib/tests/blackbox/test_selftest.py

bzrlib/tests/blackbox/test_sign_my_commits.py

bzrlib/tests/blackbox/test_update.py

bzrlib/tests/blackbox/test_upgrade.py

bzrlib/tests/branch_implementations

bzrlib/tests/branch_implementations/__init__.py

bzrlib/tests/branch_implementations/test_parent.py

bzrlib/tests/branch_implementations/test_permissions.py

bzrlib/tests/bzrdir_implementations

bzrlib/tests/bzrdir_implementations/__init__.py

bzrlib/tests/bzrdir_implementations/test_bzrdir.py

bzrlib/tests/interrepository_implementations

bzrlib/tests/interrepository_implementations/__init__.py

bzrlib/tests/interrepository_implementations/test_interrepository.py

bzrlib/tests/repository_implementations

bzrlib/tests/repository_implementations/__init__.py

bzrlib/tests/repository_implementations/test_fileid_involved.py

bzrlib/tests/repository_implementations/test_repository.py

bzrlib/tests/stub_sftp.py

bzrlib/tests/test_ancestry.py

bzrlib/tests/test_annotate.py

bzrlib/tests/test_api.py

bzrlib/tests/test_bad_files.py

bzrlib/tests/test_basis_inventory.py

bzrlib/tests/test_branch.py

bzrlib/tests/test_bzrdir.py

bzrlib/tests/test_command.py

bzrlib/tests/test_commit.py

bzrlib/tests/test_commit_merge.py

bzrlib/tests/test_config.py

bzrlib/tests/test_conflicts.py

bzrlib/tests/test_decorators.py

bzrlib/tests/test_doc_generate.py

bzrlib/tests/test_errors.py

bzrlib/tests/test_fetch.py

bzrlib/tests/test_gpg.py

bzrlib/tests/test_graph.py

bzrlib/tests/test_http.py

bzrlib/tests/test_identitymap.py

bzrlib/tests/test_knit.py

bzrlib/tests/test_lockable_files.py

bzrlib/tests/test_lockdir.py

bzrlib/tests/test_merge.py

bzrlib/tests/test_merge_core.py

bzrlib/tests/test_missing.py

bzrlib/tests/test_msgeditor.py

bzrlib/tests/test_nonascii.py

bzrlib/tests/test_options.py

bzrlib/tests/test_osutils.py

bzrlib/tests/test_permissions.py

bzrlib/tests/test_repository.py

bzrlib/tests/test_revprops.py

bzrlib/tests/test_reweave.py

bzrlib/tests/test_rio.py

bzrlib/tests/test_sampler.py

bzrlib/tests/test_selftest.py

bzrlib/tests/test_setup.py

bzrlib/tests/test_sftp_transport.py

bzrlib/tests/test_smart_add.py

bzrlib/tests/test_source.py

bzrlib/tests/test_store.py

bzrlib/tests/test_symbol_versioning.py

bzrlib/tests/test_testament.py

bzrlib/tests/test_trace.py

bzrlib/tests/test_transactions.py

bzrlib/tests/test_transform.py

bzrlib/tests/test_transport.py

bzrlib/tests/test_transport_implementations.py

bzrlib/tests/test_tsort.py

bzrlib/tests/test_ui.py

bzrlib/tests/test_uncommit.py

bzrlib/tests/test_upgrade.py

bzrlib/tests/test_versionedfile.py

bzrlib/tests/test_workingtree.py

bzrlib/tests/test_xml.py

bzrlib/tests/treeshape.py

bzrlib/tests/workingtree_implementations

bzrlib/tests/workingtree_implementations/__init__.py

bzrlib/tests/workingtree_implementations/test_is_control_filename.py

bzrlib/tests/workingtree_implementations/test_pull.py

bzrlib/tests/workingtree_implementations/test_workingtree.py

bzrlib/transactions.py

bzrlib/transform.py

bzrlib/transport

bzrlib/transport/__init__.py

bzrlib/transport/ftp.py

bzrlib/transport/http.py

bzrlib/transport/local.py

bzrlib/transport/memory.py

bzrlib/transport/readonly.py

bzrlib/transport/sftp.py

bzrlib/tsort.py

bzrlib/ui

bzrlib/ui/__init__.py

bzrlib/ui/text.py

bzrlib/uncommit.py

bzrlib/upgrade.py

bzrlib/util

bzrlib/util/__init__.py

bzrlib/util/configobj

bzrlib/util/configobj/__init__.py

bzrlib/util/configobj/configobj.py

bzrlib/util/configobj/docs

bzrlib/util/configobj/docs/BSD-LICENSE.txt

bzrlib/util/configobj/docs/configobj.txt

bzrlib/util/configobj/docs/validate.txt

bzrlib/util/configobj/validate.py

bzrlib/versionedfile.py

bzrlib/win32console.py

bzrlib/xml4.py

bzrlib/xml5.py

generate_docs.py

tools/__init__.py

tools/biobench.py

tools/capture_tree.py

tools/doc_generate

tools/doc_generate/__init__.py

tools/doc_generate/autodoc_bash_completion.py

tools/history2revfiles.py

tools/http_client.py

tools/riodemo.py

tools/trace-revisions

files removed:
bzrlib/changeset.py

bzrlib/mdiff.py

bzrlib/merge_core.py

bzrlib/meta_store.py

bzrlib/remotebranch.py

bzrlib/revfile.py

bzrlib/upgrade.py

doc/Makefile

doc/adoption.txt

doc/bitkeeper.txt

doc/changelogs.txt

doc/cherry-picking.txt

doc/cmdref.txt

doc/common-format.txt

doc/compared-aegis.txt

doc/compared-codeville.txt

doc/compared-cvsnt.txt

doc/compared-opencm.txt

doc/compared-prcs.txt

doc/compared-teamware.txt

doc/compression.txt

doc/config-specs.txt

doc/conflicts.txt

doc/costs.txt

doc/darcs.txt

doc/deadly-sins.txt

doc/default.css

doc/design.txt

doc/extra-commands.txt

doc/formats.txt

doc/hashes.txt

doc/ignore.txt

doc/index.txt

doc/interrupted.txt

doc/intro.txt

doc/inventory.txt

doc/join-branches.txt

doc/kill-version.txt

doc/layers.txt

doc/library-interface.txt

doc/merge.txt

doc/mirroring.txt

doc/monotone.txt

doc/news.txt

doc/optional-edit.txt

doc/partial-commit.txt

doc/pool.txt

doc/purpose.txt

doc/python.txt

doc/quilt.txt

doc/quotes.txt

doc/random.txt

doc/requirements.txt

doc/revfile-annotation.txt

doc/revfile.txt

doc/revision-syntax.txt

doc/rollup.txt

doc/scalability.txt

doc/security.txt

doc/shared-branches.txt

doc/short-demo.txt

doc/split-join-files.txt

doc/supportability.txt

doc/svk.txt

doc/switch-in-branch.txt

doc/tagging.txt

doc/taxonomy.txt

doc/thanks.txt

doc/todo-from-arch.txt

doc/unchanged.txt

doc/unrelated-merge.txt

doc/usability.txt

doc/use-cases.txt

doc/web-interface.txt

doc/workflow.txt

doc/yaml.txt

notes/new-inventory-sample.xml

notes/performance.txt

notes/revfile.txt

patches

patches/annotate3.patch

patches/annotate4.patch

patches/cache-remote-revisions.diff

patches/find-touching-from-seq.diff

patches/meta-data-in-inventory.patch

patches/ndiff.patch

patches/pending-merge.patch

patches/plugins-no-plugins.patch

patches/progress.diff

patches/symlink-support.patch

plugins/changeset

plugins/changeset/__init__.py

plugins/changeset/apply_changeset.py

plugins/changeset/common.py

plugins/changeset/gen_changeset.py

plugins/changeset/read_changeset.py

plugins/checkperms

testbzr

testsweet.py

files renamed:
plugins/ => bzrlib/plugins/

bzrlib/store.py => bzrlib/store/__init__.py

bzrlib/selftest/ => bzrlib/tests/

bzrlib/selftest/teststatus.py => bzrlib/tests/blackbox/test_status.py

bzrlib/selftest/blackbox.py => bzrlib/tests/blackbox/test_too_much.py

bzrlib/selftest/versioning.py => bzrlib/tests/blackbox/test_versioning.py

bzrlib/selftest/testbranch.py => bzrlib/tests/branch_implementations/test_branch.py

bzrlib/selftest/testdiff.py => bzrlib/tests/test_diff.py

bzrlib/selftest/testhashcache.py => bzrlib/tests/test_hashcache.py

bzrlib/selftest/testinv.py => bzrlib/tests/test_inv.py

bzrlib/selftest/testlog.py => bzrlib/tests/test_log.py

bzrlib/selftest/testmerge3.py => bzrlib/tests/test_merge3.py

bzrlib/selftest/plugins.py => bzrlib/tests/test_plugins.py

bzrlib/selftest/testrevision.py => bzrlib/tests/test_revision.py

bzrlib/selftest/testrevisionnamespaces.py => bzrlib/tests/test_revisionnamespaces.py

tools/testweave.py => bzrlib/tests/test_weave.py

bzrlib/selftest/whitebox.py => bzrlib/tests/test_whitebox.py

effbot/ => bzrlib/util/effbot/

elementtree/ => bzrlib/util/elementtree/

urlgrabber/ => bzrlib/util/urlgrabber/

bzrlib/xml.py => bzrlib/xml_serializer.py

bzrlib/newinventory.py => contrib/newinventory.py

bzr-man.py => tools/doc_generate/autodoc_man.py

files modified:
.bzrignore

.rsyncexclude

HACKING

NEWS

README

TODO

build-api

bzr *

bzrlib/__init__.py

bzrlib/add.py

bzrlib/atomicfile.py

bzrlib/branch.py

bzrlib/check.py

bzrlib/commands.py

bzrlib/commit.py

bzrlib/delta.py

bzrlib/diff.py

bzrlib/errors.py

bzrlib/hashcache.py

bzrlib/help.py

bzrlib/info.py

bzrlib/intset.py

bzrlib/inventory.py

bzrlib/lock.py

bzrlib/log.py

bzrlib/merge.py

bzrlib/merge3.py

bzrlib/missing.py

bzrlib/osutils.py

bzrlib/patch.py

bzrlib/plugin.py

bzrlib/progress.py

bzrlib/revision.py

bzrlib/status.py

bzrlib/tests/__init__.py

bzrlib/textinv.py

bzrlib/trace.py

bzrlib/tree.py

bzrlib/weave.py *

bzrlib/weavefile.py

bzrlib/workingtree.py

contrib/zsh/_bzr

setup.py *

tools/weavebench.py

tutorial.txt

Show diffs side-by-side

added added

removed removed

bzrlib/knit.py

# Written by Martin Pool.

# Modified by Johan Rydberg <jrydberg@gnu.org>

# Modified by Robert Collins <robert.collins@canonical.com>

# This program is free software; you can redistribute it and/or modify

# it under the terms of the GNU General Public License as published by

# the Free Software Foundation; either version 2 of the License, or

# (at your option) any later version.

# This program is distributed in the hope that it will be useful,

# but WITHOUT ANY WARRANTY; without even the implied warranty of

# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the

# GNU General Public License for more details.

# You should have received a copy of the GNU General Public License

# along with this program; if not, write to the Free Software

# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA

"""Knit versionedfile implementation.

A knit is a versioned file implementation that supports efficient append only

updates.

Knit file layout:

lifeless: the data file is made up of "delta records". each delta record has a delta header

that contains; (1) a version id, (2) the size of the delta (in lines), and (3) the digest of

the -expanded data- (ie, the delta applied to the parent). the delta also ends with a

end-marker; simply "end VERSION"

delta can be line or full contents.a

... the 8's there are the index number of the annotation.

version robertc@robertcollins.net-20051003014215-ee2990904cc4c7ad 7 c7d23b2a5bd6ca00e8e266cec0ec228158ee9f9e

59,59,3

8 if ie.executable:

8 e.set('executable', 'yes')

130,130,2

8 if elt.get('executable') == 'yes':

8 ie.executable = True

end robertc@robertcollins.net-20051003014215-ee2990904cc4c7ad

whats in an index:

09:33 < jrydberg> lifeless: each index is made up of a tuple of; version id, options, position, size, parents

09:33 < jrydberg> lifeless: the parents are currently dictionary compressed

09:33 < jrydberg> lifeless: (meaning it currently does not support ghosts)

09:33 < lifeless> right

09:33 < jrydberg> lifeless: the position and size is the range in the data file

so the index sequence is the dictionary compressed sequence number used

in the deltas to provide line annotation

"""

# TODOS:

# 10:16 < lifeless> make partial index writes safe

# 10:16 < lifeless> implement 'knit.check()' like weave.check()

# 10:17 < lifeless> record known ghosts so we can detect when they are filled in rather than the current 'reweave

# always' approach.

import difflib

from difflib import SequenceMatcher

from gzip import GzipFile

import os

from StringIO import StringIO

from bzrlib.errors import FileExists, NoSuchFile, KnitError, \

InvalidRevisionId, KnitCorrupt, KnitHeaderError, \

RevisionNotPresent, RevisionAlreadyPresent

from bzrlib.trace import mutter

from bzrlib.osutils import contains_whitespace, contains_linebreaks, \

sha_strings

from bzrlib.versionedfile import VersionedFile

from bzrlib.tsort import topo_sort

# TODO: Split out code specific to this format into an associated object.

# TODO: Can we put in some kind of value to check that the index and data

# files belong together?

# TODO: accomodate binaries, perhaps by storing a byte count

# TODO: function to check whole file

# TODO: atomically append data, then measure backwards from the cursor

# position after writing to work out where it was located. we may need to

# bypass python file buffering.

DATA_SUFFIX = '.knit'

INDEX_SUFFIX = '.kndx'

class KnitContent(object):

"""Content of a knit version to which deltas can be applied."""

100

def __init__(self, lines):

101

self._lines = lines

102

103

def annotate_iter(self):

104

"""Yield tuples of (origin, text) for each content line."""

105

for origin, text in self._lines:

106

yield origin, text

107

108

def annotate(self):

109

"""Return a list of (origin, text) tuples."""

110

return list(self.annotate_iter())

111

112

def apply_delta(self, delta):

113

"""Apply delta to this content."""

114

offset = 0

115

for start, end, count, lines in delta:

116

self._lines[offset+start:offset+end] = lines

117

offset = offset + (start - end) + count

118

119

def line_delta_iter(self, new_lines):

120

"""Generate line-based delta from new_lines to this content."""

121

new_texts = [text for origin, text in new_lines._lines]

122

old_texts = [text for origin, text in self._lines]

123

s = difflib.SequenceMatcher(None, old_texts, new_texts)

124

for op in s.get_opcodes():

125

if op[0] == 'equal':

126

continue

127

yield (op[1], op[2], op[4]-op[3], new_lines._lines[op[3]:op[4]])

128

129

def line_delta(self, new_lines):

130

return list(self.line_delta_iter(new_lines))

131

132

def text(self):

133

return [text for origin, text in self._lines]

134

135

136

class _KnitFactory(object):

137

"""Base factory for creating content objects."""

138

139

def make(self, lines, version):

140

num_lines = len(lines)

141

return KnitContent(zip([version] * num_lines, lines))

142

143

144

class KnitAnnotateFactory(_KnitFactory):

145

"""Factory for creating annotated Content objects."""

146

147

annotated = True

148

149

def parse_fulltext(self, content, version):

150

lines = []

151

for line in content:

152

origin, text = line.split(' ', 1)

153

lines.append((int(origin), text))

154

return KnitContent(lines)

155

156

def parse_line_delta_iter(self, lines):

157

while lines:

158

header = lines.pop(0)

159

start, end, c = [int(n) for n in header.split(',')]

160

contents = []

161

for i in range(c):

162

origin, text = lines.pop(0).split(' ', 1)

163

contents.append((int(origin), text))

164

yield start, end, c, contents

165

166

def parse_line_delta(self, lines, version):

167

return list(self.parse_line_delta_iter(lines))

168

169

def lower_fulltext(self, content):

170

return ['%d %s' % (o, t) for o, t in content._lines]

171

172

def lower_line_delta(self, delta):

173

out = []

174

for start, end, c, lines in delta:

175

out.append('%d,%d,%d\n' % (start, end, c))

176

for origin, text in lines:

177

out.append('%d %s' % (origin, text))

178

return out

179

180

181

class KnitPlainFactory(_KnitFactory):

182

"""Factory for creating plain Content objects."""

183

184

annotated = False

185

186

def parse_fulltext(self, content, version):

187

return self.make(content, version)

188

189

def parse_line_delta_iter(self, lines, version):

190

while lines:

191

header = lines.pop(0)

192

start, end, c = [int(n) for n in header.split(',')]

193

yield start, end, c, zip([version] * c, lines[:c])

194

del lines[:c]

195

196

def parse_line_delta(self, lines, version):

197

return list(self.parse_line_delta_iter(lines, version))

198

199

def lower_fulltext(self, content):

200

return content.text()

201

202

def lower_line_delta(self, delta):

203

out = []

204

for start, end, c, lines in delta:

205

out.append('%d,%d,%d\n' % (start, end, c))

206

out.extend([text for origin, text in lines])

207

return out

208

209

210

def make_empty_knit(transport, relpath):

211

"""Construct a empty knit at the specified location."""

212

k = KnitVersionedFile(transport, relpath, 'w', KnitPlainFactory)

213

k._data._open_file()

214

215

216

class KnitVersionedFile(VersionedFile):

217

"""Weave-like structure with faster random access.

218

219

A knit stores a number of texts and a summary of the relationships

220

between them. Texts are identified by a string version-id. Texts

221

are normally stored and retrieved as a series of lines, but can

222

also be passed as single strings.

223

224

Lines are stored with the trailing newline (if any) included, to

225

avoid special cases for files with no final newline. Lines are

226

composed of 8-bit characters, not unicode. The combination of

227

these approaches should mean any 'binary' file can be safely

228

stored and retrieved.

229

"""

230

231

def __init__(self, transport, relpath, mode, factory,

232

basis_knit=None, delta=True):

233

"""Construct a knit at location specified by relpath."""

234

assert mode in ('r', 'w'), "invalid mode specified"

235

assert not basis_knit or isinstance(basis_knit, KnitVersionedFile), \

236

type(basis_knit)

237

238

self.transport = transport

239

self.filename = relpath

240

self.basis_knit = basis_knit

241

self.factory = factory

242

self.writable = (mode == 'w')

243

self.delta = delta

244

245

self._index = _KnitIndex(transport, relpath + INDEX_SUFFIX,

246

mode)

247

self._data = _KnitData(transport, relpath + DATA_SUFFIX,

248

mode)

249

250

def versions(self):

251

"""See VersionedFile.versions."""

252

return self._index.get_versions()

253

254

def has_version(self, version_id):

255

"""See VersionedFile.has_version."""

256

return self._index.has_version(version_id)

257

258

__contains__ = has_version

259

260

def _merge_annotations(self, content, parents):

261

"""Merge annotations for content. This is done by comparing

262

the annotations based on changed to the text."""

263

for parent_id in parents:

264

merge_content = self._get_content(parent_id)

265

seq = SequenceMatcher(None, merge_content.text(), content.text())

266

for i, j, n in seq.get_matching_blocks():

267

if n == 0:

268

continue

269

content._lines[j:j+n] = merge_content._lines[i:i+n]

270

271

def _get_components(self, version_id):

272

"""Return a list of (version_id, method, data) tuples that

273

makes up version specified by version_id of the knit.

274

275

The components should be applied in the order of the returned

276

list.

277

278

The basis knit will be used to the largest extent possible

279

since it is assumed that accesses to it is faster.

280

"""

281

# needed_revisions holds a list of (method, version_id) of

282

# versions that is needed to be fetched to construct the final

283

# version of the file.

284

285

# basis_revisions is a list of versions that needs to be

286

# fetched but exists in the basis knit.

287

288

basis = self.basis_knit

289

needed_versions = []

290

basis_versions = []

291

cursor = version_id

292

293

while 1:

294

picked_knit = self

295

if basis and basis._index.has_version(cursor):

296

picked_knit = basis

297

basis_versions.append(cursor)

298

method = picked_knit._index.get_method(cursor)

299

needed_versions.append((method, cursor))

300

if method == 'fulltext':

301

break

302

cursor = picked_knit.get_parents(cursor)[0]

303

304

components = {}

305

if basis_versions:

306

records = []

307

for comp_id in basis_versions:

308

data_pos, data_size = basis._index.get_data_position(comp_id)

309

records.append((piece_id, data_pos, data_size))

310

components.update(basis._data.read_records(records))

311

312

records = []

313

for comp_id in [vid for method, vid in needed_versions

314

if vid not in basis_versions]:

315

data_pos, data_size = self._index.get_position(comp_id)

316

records.append((comp_id, data_pos, data_size))

317

components.update(self._data.read_records(records))

318

319

# get_data_records returns a mapping with the version id as

320

# index and the value as data. The order the components need

321

# to be applied is held by needed_versions (reversed).

322

out = []

323

for method, comp_id in reversed(needed_versions):

324

out.append((comp_id, method, components[comp_id]))

325

326

return out

327

328

def _get_content(self, version_id):

329

"""Returns a content object that makes up the specified

330

version."""

331

if not self.has_version(version_id):

332

raise RevisionNotPresent(version_id, self.filename)

333

334

if self.basis_knit and version_id in self.basis_knit:

335

return self.basis_knit._get_content(version_id)

336

337

content = None

338

components = self._get_components(version_id)

339

for component_id, method, (data, digest) in components:

340

version_idx = self._index.lookup(component_id)

341

if method == 'fulltext':

342

assert content is None

343

content = self.factory.parse_fulltext(data, version_idx)

344

elif method == 'line-delta':

345

delta = self.factory.parse_line_delta(data, version_idx)

346

content.apply_delta(delta)

347

348

if 'no-eol' in self._index.get_options(version_id):

349

line = content._lines[-1][1].rstrip('\n')

350

content._lines[-1] = (content._lines[-1][0], line)

351

352

if sha_strings(content.text()) != digest:

353

raise KnitCorrupt(self.filename, 'sha-1 does not match')

354

355

return content

356

357

def _check_versions_present(self, version_ids):

358

"""Check that all specified versions are present."""

359

version_ids = set(version_ids)

360

for r in list(version_ids):

361

if self._index.has_version(r):

362

version_ids.remove(r)

363

if version_ids:

364

raise RevisionNotPresent(list(version_ids)[0], self.filename)

365

366

def add_lines(self, version_id, parents, lines):

367

"""See VersionedFile.add_lines."""

368

assert self.writable, "knit is not opened for write"

369

### FIXME escape. RBC 20060228

370

if contains_whitespace(version_id):

371

raise InvalidRevisionId(version_id)

372

if self.has_version(version_id):

373

raise RevisionAlreadyPresent(version_id, self.filename)

374

375

if True or __debug__:

376

for l in lines:

377

assert '\n' not in l[:-1]

378

379

self._check_versions_present(parents)

380

return self._add(version_id, lines[:], parents, self.delta)

381

382

def _add(self, version_id, lines, parents, delta):

383

"""Add a set of lines on top of version specified by parents.

384

385

If delta is true, compress the text as a line-delta against

386

the first parent.

387

"""

388

if delta and not parents:

389

delta = False

390

391

digest = sha_strings(lines)

392

options = []

393

if lines:

394

if lines[-1][-1] != '\n':

395

options.append('no-eol')

396

lines[-1] = lines[-1] + '\n'

397

398

lines = self.factory.make(lines, len(self._index))

399

if self.factory.annotated and len(parents) > 0:

400

# Merge annotations from parent texts if so is needed.

401

self._merge_annotations(lines, parents)

402

403

if parents and delta:

404

# To speed the extract of texts the delta chain is limited

405

# to a fixed number of deltas. This should minimize both

406

# I/O and the time spend applying deltas.

407

count = 0

408

delta_parents = parents

409

while count < 25:

410

parent = delta_parents[0]

411

method = self._index.get_method(parent)

412

if method == 'fulltext':

413

break

414

delta_parents = self._index.get_parents(parent)

415

count = count + 1

416

if method == 'line-delta':

417

delta = False

418

419

if delta:

420

options.append('line-delta')

421

content = self._get_content(parents[0])

422

delta_hunks = content.line_delta(lines)

423

store_lines = self.factory.lower_line_delta(delta_hunks)

424

else:

425

options.append('fulltext')

426

store_lines = self.factory.lower_fulltext(lines)

427

428

where, size = self._data.add_record(version_id, digest, store_lines)

429

self._index.add_version(version_id, options, where, size, parents)

430

431

def clone_text(self, new_version_id, old_version_id, parents):

432

"""See VersionedFile.clone_text()."""

433

# FIXME RBC 20060228 make fast by only inserting an index with null delta.

434

self.add_lines(new_version_id, parents, self.get_lines(old_version_id))

435

436

def get_lines(self, version_id):

437

"""See VersionedFile.get_lines()."""

438

return self._get_content(version_id).text()

439

440

def annotate_iter(self, version_id):

441

"""See VersionedFile.annotate_iter."""

442

content = self._get_content(version_id)

443

for origin, text in content.annotate_iter():

444

yield self._index.idx_to_name(origin), text

445

446

def get_parents(self, version_id):

447

"""See VersionedFile.get_parents."""

448

self._check_versions_present([version_id])

449

return list(self._index.get_parents(version_id))

450

451

def get_ancestry(self, versions):

452

"""See VersionedFile.get_ancestry."""

453

if isinstance(versions, basestring):

454

versions = [versions]

455

if not versions:

456

return []

457

self._check_versions_present(versions)

458

return self._index.get_ancestry(versions)

459

460

def _reannotate_line_delta(self, other, lines, new_version_id,

461

new_version_idx):

462

"""Re-annotate line-delta and return new delta."""

463

new_delta = []

464

for start, end, count, contents \

465

in self.factory.parse_line_delta_iter(lines):

466

new_lines = []

467

for origin, line in contents:

468

old_version_id = other._index.idx_to_name(origin)

469

if old_version_id == new_version_id:

470

idx = new_version_idx

471

else:

472

idx = self._index.lookup(old_version_id)

473

new_lines.append((idx, line))

474

new_delta.append((start, end, count, new_lines))

475

476

return self.factory.lower_line_delta(new_delta)

477

478

def _reannotate_fulltext(self, other, lines, new_version_id,

479

new_version_idx):

480

"""Re-annotate fulltext and return new version."""

481

content = self.factory.parse_fulltext(lines, new_version_idx)

482

new_lines = []

483

for origin, line in content.annotate_iter():

484

old_version_id = other._index.idx_to_name(origin)

485

if old_version_id == new_version_id:

486

idx = new_version_idx

487

else:

488

idx = self._index.lookup(old_version_id)

489

new_lines.append((idx, line))

490

491

return self.factory.lower_fulltext(KnitContent(new_lines))

492

493

def join(self, other, pb=None, msg=None, version_ids=None):

494

"""See VersionedFile.join."""

495

assert isinstance(other, KnitVersionedFile)

496

497

if version_ids is None:

498

version_ids = other.versions()

499

if not version_ids:

500

return 0

501

502

if pb is None:

503

from bzrlib.progress import DummyProgress

504

pb = DummyProgress()

505

506

version_ids = list(version_ids)

507

if None in version_ids:

508

version_ids.remove(None)

509

510

other_ancestry = set(other.get_ancestry(version_ids))

511

needed_versions = other_ancestry - set(self._index.get_versions())

512

if not needed_versions:

513

return 0

514

full_list = topo_sort(other._index.get_graph())

515

516

version_list = [i for i in full_list if (not self.has_version(i)

517

and i in needed_versions)]

518

519

records = []

520

for version_id in version_list:

521

data_pos, data_size = other._index.get_position(version_id)

522

records.append((version_id, data_pos, data_size))

523

524

count = 0

525

for version_id, lines, digest \

526

in other._data.read_records_iter(records):

527

options = other._index.get_options(version_id)

528

parents = other._index.get_parents(version_id)

529

530

for parent in parents:

531

assert self.has_version(parent)

532

533

if self.factory.annotated:

534

# FIXME jrydberg: it should be possible to skip

535

# re-annotating components if we know that we are

536

# going to pull all revisions in the same order.

537

new_version_id = version_id

538

new_version_idx = self._index.num_versions()

539

if 'fulltext' in options:

540

lines = self._reannotate_fulltext(other, lines,

541

new_version_id, new_version_idx)

542

elif 'line-delta' in options:

543

lines = self._reannotate_line_delta(other, lines,

544

new_version_id, new_version_idx)

545

546

count = count + 1

547

pb.update(self.filename, count, len(version_list))

548

549

pos, size = self._data.add_record(version_id, digest, lines)

550

self._index.add_version(version_id, options, pos, size, parents)

551

552

pb.clear()

553

return count

554

555

def walk(self, version_ids):

556

"""See VersionedFile.walk."""

557

# We take the short path here, and extract all relevant texts

558

# and put them in a weave and let that do all the work. Far

559

# from optimal, but is much simpler.

560

# FIXME RB 20060228 this really is inefficient!

561

from bzrlib.weave import Weave

562

563

w = Weave(self.filename)

564

ancestry = self.get_ancestry(version_ids)

565

sorted_graph = topo_sort(self._index.get_graph())

566

version_list = [vid for vid in sorted_graph if vid in ancestry]

567

568

for version_id in version_list:

569

lines = self.get_lines(version_id)

570

w.add_lines(version_id, self.get_parents(version_id), lines)

571

572

for lineno, insert_id, dset, line in w.walk(version_ids):

573

yield lineno, insert_id, dset, line

574

575

576

class _KnitComponentFile(object):

577

"""One of the files used to implement a knit database"""

578

579

def __init__(self, transport, filename, mode):

580

self._transport = transport

581

self._filename = filename

582

self._mode = mode

583

584

def write_header(self):

585

old_len = self._transport.append(self._filename, self.HEADER)

586

if old_len != 0:

587

raise KnitCorrupt(self._filename, 'misaligned after writing header')

588

589

def check_header(self, fp):

590

line = fp.read(len(self.HEADER))

591

if line != self.HEADER:

592

raise KnitHeaderError(badline=line)

593

594

def commit(self):

595

"""Commit is a nop."""

596

597

def __repr__(self):

598

return '%s(%s)' % (self.__class__.__name__, self._filename)

599

600

601

class _KnitIndex(_KnitComponentFile):

602

"""Manages knit index file.

603

604

The index is already kept in memory and read on startup, to enable

605

fast lookups of revision information. The cursor of the index

606

file is always pointing to the end, making it easy to append

607

entries.

608

609

_cache is a cache for fast mapping from version id to a Index

610

object.

611

612

_history is a cache for fast mapping from indexes to version ids.

613

614

The index data format is dictionary compressed when it comes to

615

parent references; a index entry may only have parents that with a

616

lover index number. As a result, the index is topological sorted.

617

"""

618

619

HEADER = "# bzr knit index 7\n"

620

621

def _cache_version(self, version_id, options, pos, size, parents):

622

val = (version_id, options, pos, size, parents)

623

self._cache[version_id] = val

624

self._history.append(version_id)

625

626

def _iter_index(self, fp):

627

lines = fp.read()

628

for l in lines.splitlines(False):

629

yield l.split()

630

631

def __init__(self, transport, filename, mode):

632

_KnitComponentFile.__init__(self, transport, filename, mode)

633

self._cache = {}

634

self._history = []

635

try:

636

fp = self._transport.get(self._filename)

637

self.check_header(fp)

638

for rec in self._iter_index(fp):

639

self._cache_version(rec[0], rec[1].split(','), int(rec[2]), int(rec[3]),

640

[self._history[int(i)] for i in rec[4:]])

641

except NoSuchFile, e:

642

if mode != 'w':

643

raise e

644

self.write_header()

645

646

def get_graph(self):

647

graph = []

648

for version_id, index in self._cache.iteritems():

649

graph.append((version_id, index[4]))

650

return graph

651

652

def get_ancestry(self, versions):

653

"""See VersionedFile.get_ancestry."""

654

version_idxs = []

655

for version_id in versions:

656

version_idxs.append(self._history.index(version_id))

657

i = set(versions)

658

for v in xrange(max(version_idxs), 0, -1):

659

if self._history[v] in i:

660

# include all its parents

661

i.update(self._cache[self._history[v]][4])

662

return list(i)

663

664

def num_versions(self):

665

return len(self._history)

666

667

__len__ = num_versions

668

669

def get_versions(self):

670

return self._history

671

672

def idx_to_name(self, idx):

673

return self._history[idx]

674

675

def lookup(self, version_id):

676

assert version_id in self._cache

677

return self._history.index(version_id)

678

679

def add_version(self, version_id, options, pos, size, parents):

680

"""Add a version record to the index."""

681

self._cache_version(version_id, options, pos, size, parents)

682

683

content = "%s %s %s %s %s\n" % (version_id,

684

','.join(options),

685

pos,

686

size,

687

' '.join([str(self.lookup(vid)) for

688

vid in parents]))

689

self._transport.append(self._filename, content)

690

691

def has_version(self, version_id):

692

"""True if the version is in the index."""

693

return self._cache.has_key(version_id)

694

695

def get_position(self, version_id):

696

"""Return data position and size of specified version."""

697

return (self._cache[version_id][2], \

698

self._cache[version_id][3])

699

700

def get_method(self, version_id):

701

"""Return compression method of specified version."""

702

options = self._cache[version_id][1]

703

if 'fulltext' in options:

704

return 'fulltext'

705

else:

706

assert 'line-delta' in options

707

return 'line-delta'

708

709

def get_options(self, version_id):

710

return self._cache[version_id][1]

711

712

def get_parents(self, version_id):

713

"""Return parents of specified version."""

714

return self._cache[version_id][4]

715

716

def check_versions_present(self, version_ids):

717

"""Check that all specified versions are present."""

718

version_ids = set(version_ids)

719

for version_id in list(version_ids):

720

if version_id in self._cache:

721

version_ids.remove(version_id)

722

if version_ids:

723

raise RevisionNotPresent(list(version_ids)[0], self.filename)

724

725

726

class _KnitData(_KnitComponentFile):

727

"""Contents of the knit data file"""

728

729

HEADER = "# bzr knit data 7\n"

730

731

def __init__(self, transport, filename, mode):

732

_KnitComponentFile.__init__(self, transport, filename, mode)

733

self._file = None

734

self._checked = False

735

736

def _open_file(self):

737

if self._file is None:

738

try:

739

self._file = self._transport.get(self._filename)

740

except NoSuchFile:

741

pass

742

return self._file

743

744

def add_record(self, version_id, digest, lines):

745

"""Write new text record to disk. Returns the position in the

746

file where it was written."""

747

sio = StringIO()

748

data_file = GzipFile(None, mode='wb', fileobj=sio)

749

print >>data_file, "version %s %d %s" % (version_id, len(lines), digest)

750

data_file.writelines(lines)

751

print >>data_file, "end %s\n" % version_id

752

data_file.close()

753

754

content = sio.getvalue()

755

start_pos = self._transport.append(self._filename, content)

756

return start_pos, len(content)

757

758

def _parse_record(self, version_id, data):

759

df = GzipFile(mode='rb', fileobj=StringIO(data))

760

rec = df.readline().split()

761

if len(rec) != 4:

762

raise KnitCorrupt(self._filename, 'unexpected number of records')

763

if rec[1] != version_id:

764

raise KnitCorrupt(self.file.name,

765

'unexpected version, wanted %r' % version_id)

766

lines = int(rec[2])

767

record_contents = self._read_record_contents(df, lines)

768

l = df.readline()

769

if l != 'end %s\n' % version_id:

770

raise KnitCorrupt(self._filename, 'unexpected version end line %r, wanted %r'

771

% (l, version_id))

772

return record_contents, rec[3]

773

774

def _read_record_contents(self, df, record_lines):

775

"""Read and return n lines from datafile."""

776

r = []

777

for i in range(record_lines):

778

r.append(df.readline())

779

return r

780

781

def read_records_iter(self, records):

782

"""Read text records from data file and yield result.

783

784

Each passed record is a tuple of (version_id, pos, len) and

785

will be read in the given order. Yields (version_id,

786

contents, digest).

787

"""

788

789

class ContinuousRange:

790

def __init__(self, rec_id, pos, size):

791

self.start_pos = pos

792

self.end_pos = pos + size

793

self.versions = [(rec_id, pos, size)]

794

795

def add(self, rec_id, pos, size):

796

if self.end_pos != pos:

797

return False

798

self.end_pos = pos + size

799

self.versions.append((rec_id, pos, size))

800

return True

801

802

def split(self, fp):

803

for rec_id, pos, size in self.versions:

804

yield rec_id, fp.read(size)

805

806

fp = self._open_file()

807

808

# Loop through all records and try to collect as large

809

# continuous region as possible to read.

810

while records:

811

record_id, pos, size = records.pop(0)

812

continuous_range = ContinuousRange(record_id, pos, size)

813

while records:

814

record_id, pos, size = records[0]

815

if continuous_range.add(record_id, pos, size):

816

del records[0]

817

else:

818

break

819

fp.seek(continuous_range.start_pos, 0)

820

for record_id, data in continuous_range.split(fp):

821

content, digest = self._parse_record(record_id, data)

822

yield record_id, content, digest

823

824

self._file = None

825

826

def read_records(self, records):

827

"""Read records into a dictionary."""

828

components = {}

829

for record_id, content, digest in self.read_records_iter(records):

830

components[record_id] = (content, digest)

831

return components

832

Older »