~bzr-pqm/bzr/bzr.dev

« back to all changes in this revision

Viewing changes to bzrlib/knit.py

Committer: Jelmer Vernooij
Date: 2009-02-23 20:55:58 UTC
mfrom: (4034 +trunk)
mto: This revision was merged to the branch mainline in revision 4053.
Revision ID: jelmer@samba.org-20090223205558-1cx2k4w1zgs8r5qa

Merge bzr.dev.

files added:
bzrlib/help_topics/en/log-formats.txt

bzrlib/tests/blackbox/test_filtered_view_ops.py

bzrlib/tests/blackbox/test_view.py

bzrlib/tests/test_smart_request.py

bzrlib/tests/workingtree_implementations/test_views.py

bzrlib/views.py

tools/prepare_for_latex.py

tools/rst2pdf.py

files modified:
.bzrignore

Makefile

NEWS

bzrlib/_dirstate_helpers_c.pyx

bzrlib/_patiencediff_py.py

bzrlib/annotate.py

bzrlib/atomicfile.py

bzrlib/benchmarks/__init__.py

bzrlib/benchmarks/bench_add.py

bzrlib/benchmarks/bench_bench.py

bzrlib/benchmarks/bench_bundle.py

bzrlib/benchmarks/bench_commit.py

bzrlib/benchmarks/bench_info.py

bzrlib/benchmarks/bench_log.py

bzrlib/benchmarks/bench_pack.py

bzrlib/benchmarks/bench_sftp.py

bzrlib/benchmarks/bench_status.py

bzrlib/benchmarks/bench_transform.py

bzrlib/benchmarks/bench_workingtree.py

bzrlib/benchmarks/tree_creator/__init__.py

bzrlib/benchmarks/tree_creator/heavily_merged.py

bzrlib/benchmarks/tree_creator/kernel_like.py

bzrlib/bisect_multi.py

bzrlib/branch.py

bzrlib/branchbuilder.py

bzrlib/btree_index.py

bzrlib/builtins.py

bzrlib/bundle/__init__.py

bzrlib/bundle/apply_bundle.py

bzrlib/bundle/bundle_data.py

bzrlib/bundle/serializer/__init__.py

bzrlib/bundle/serializer/v08.py

bzrlib/bundle/serializer/v09.py

bzrlib/bundle/serializer/v4.py

bzrlib/bzrdir.py

bzrlib/cache_utf8.py

bzrlib/check.py

bzrlib/commands.py

bzrlib/commit.py

bzrlib/config.py

bzrlib/conflicts.py

bzrlib/decorators.py

bzrlib/delta.py

bzrlib/deprecated_graph.py

bzrlib/diff.py

bzrlib/dirstate.py

bzrlib/email_message.py

bzrlib/errors.py

bzrlib/export/__init__.py

bzrlib/fetch.py

bzrlib/foreign.py

bzrlib/generate_ids.py

bzrlib/globbing.py

bzrlib/gpg.py

bzrlib/graph.py

bzrlib/hashcache.py

bzrlib/help.py

bzrlib/help_topics/__init__.py

bzrlib/help_topics/en/configuration.txt

bzrlib/help_topics/en/hooks.txt

bzrlib/hooks.py

bzrlib/identitymap.py

bzrlib/ignores.py

bzrlib/index.py

bzrlib/info.py

bzrlib/inter.py

bzrlib/intset.py

bzrlib/inventory.py

bzrlib/iterablefile.py

bzrlib/knit.py

bzrlib/lazy_import.py

bzrlib/lazy_regex.py

bzrlib/lockable_files.py

bzrlib/lockdir.py

bzrlib/log.py

bzrlib/lsprof.py

bzrlib/memorytree.py

bzrlib/merge.py

bzrlib/merge3.py

bzrlib/missing.py

bzrlib/msgeditor.py

bzrlib/mutabletree.py

bzrlib/option.py

bzrlib/osutils.py

bzrlib/pack.py

bzrlib/patch.py

bzrlib/patches.py

bzrlib/plugin.py

bzrlib/plugins/launchpad/__init__.py

bzrlib/plugins/launchpad/lp_registration.py

bzrlib/plugins/launchpad/test_lp_directory.py

bzrlib/progress.py

bzrlib/push.py

bzrlib/reconcile.py

bzrlib/reconfigure.py

bzrlib/registry.py

bzrlib/remote.py

bzrlib/repofmt/knitrepo.py

bzrlib/repofmt/pack_repo.py

bzrlib/repofmt/weaverepo.py

bzrlib/repository.py

bzrlib/revision.py

bzrlib/revisionspec.py

bzrlib/revisiontree.py

bzrlib/rio.py

bzrlib/shelf_ui.py

bzrlib/shellcomplete.py

bzrlib/sign_my_commits.py

bzrlib/smart/__init__.py

bzrlib/smart/branch.py

bzrlib/smart/bzrdir.py

bzrlib/smart/client.py

bzrlib/smart/medium.py

bzrlib/smart/message.py

bzrlib/smart/protocol.py

bzrlib/smart/repository.py

bzrlib/smart/request.py

bzrlib/smart/server.py

bzrlib/smart/vfs.py

bzrlib/status.py

bzrlib/store/__init__.py

bzrlib/store/text.py

bzrlib/store/versioned/__init__.py

bzrlib/symbol_versioning.py

bzrlib/tag.py

bzrlib/testament.py

bzrlib/tests/EncodingAdapter.py

bzrlib/tests/TestUtil.py

bzrlib/tests/__init__.py

bzrlib/tests/blackbox/__init__.py

bzrlib/tests/blackbox/test_add.py

bzrlib/tests/blackbox/test_added.py

bzrlib/tests/blackbox/test_aliases.py

bzrlib/tests/blackbox/test_annotate.py

bzrlib/tests/blackbox/test_bound_branches.py

bzrlib/tests/blackbox/test_break_lock.py

bzrlib/tests/blackbox/test_breakin.py

bzrlib/tests/blackbox/test_cat.py

bzrlib/tests/blackbox/test_checkout.py

bzrlib/tests/blackbox/test_command_encoding.py

bzrlib/tests/blackbox/test_commit.py

bzrlib/tests/blackbox/test_diff.py

bzrlib/tests/blackbox/test_exceptions.py

bzrlib/tests/blackbox/test_export.py

bzrlib/tests/blackbox/test_find_merge_base.py

bzrlib/tests/blackbox/test_help.py

bzrlib/tests/blackbox/test_ignore.py

bzrlib/tests/blackbox/test_ignored.py

bzrlib/tests/blackbox/test_info.py

bzrlib/tests/blackbox/test_init.py

bzrlib/tests/blackbox/test_join.py

bzrlib/tests/blackbox/test_log.py

bzrlib/tests/blackbox/test_logformats.py

bzrlib/tests/blackbox/test_ls.py

bzrlib/tests/blackbox/test_merge.py

bzrlib/tests/blackbox/test_missing.py

bzrlib/tests/blackbox/test_modified.py

bzrlib/tests/blackbox/test_mv.py

bzrlib/tests/blackbox/test_non_ascii.py

bzrlib/tests/blackbox/test_pack.py

bzrlib/tests/blackbox/test_push.py

bzrlib/tests/blackbox/test_re_sign.py

bzrlib/tests/blackbox/test_reconfigure.py

bzrlib/tests/blackbox/test_remove_tree.py

bzrlib/tests/blackbox/test_revert.py

bzrlib/tests/blackbox/test_revision_info.py

bzrlib/tests/blackbox/test_selftest.py

bzrlib/tests/blackbox/test_send.py

bzrlib/tests/blackbox/test_serve.py

bzrlib/tests/blackbox/test_shared_repository.py

bzrlib/tests/blackbox/test_sign_my_commits.py

bzrlib/tests/blackbox/test_status.py

bzrlib/tests/blackbox/test_switch.py

bzrlib/tests/blackbox/test_tags.py

bzrlib/tests/blackbox/test_too_much.py

bzrlib/tests/blackbox/test_upgrade.py

bzrlib/tests/blackbox/test_whoami.py

bzrlib/tests/branch_implementations/__init__.py

bzrlib/tests/branch_implementations/test_bound_sftp.py

bzrlib/tests/branch_implementations/test_branch.py

bzrlib/tests/branch_implementations/test_commit.py

bzrlib/tests/branch_implementations/test_hooks.py

bzrlib/tests/branch_implementations/test_http.py

bzrlib/tests/branch_implementations/test_last_revision_info.py

bzrlib/tests/branch_implementations/test_locking.py

bzrlib/tests/branch_implementations/test_parent.py

bzrlib/tests/branch_implementations/test_pull.py

bzrlib/tests/branch_implementations/test_push.py

bzrlib/tests/branch_implementations/test_sprout.py

bzrlib/tests/branch_implementations/test_tags.py

bzrlib/tests/branch_implementations/test_uncommit.py

bzrlib/tests/bzrdir_implementations/__init__.py

bzrlib/tests/bzrdir_implementations/test_bzrdir.py

bzrlib/tests/commands/__init__.py

bzrlib/tests/ftp_server.py

bzrlib/tests/http_server.py

bzrlib/tests/http_utils.py

bzrlib/tests/interrepository_implementations/__init__.py

bzrlib/tests/interrepository_implementations/test_fetch.py

bzrlib/tests/interrepository_implementations/test_interrepository.py

bzrlib/tests/intertree_implementations/__init__.py

bzrlib/tests/intertree_implementations/test_compare.py

bzrlib/tests/inventory_implementations/__init__.py

bzrlib/tests/inventory_implementations/basics.py

bzrlib/tests/per_repository/__init__.py

bzrlib/tests/per_repository/helpers.py

bzrlib/tests/per_repository/test_add_fallback_repository.py

bzrlib/tests/per_repository/test_check_reconcile.py

bzrlib/tests/per_repository/test_commit_builder.py

bzrlib/tests/per_repository/test_fetch.py

bzrlib/tests/per_repository/test_fileid_involved.py

bzrlib/tests/per_repository/test_reconcile.py

bzrlib/tests/per_repository/test_repository.py

bzrlib/tests/per_repository/test_statistics.py

bzrlib/tests/per_repository/test_write_group.py

bzrlib/tests/per_repository_reference/__init__.py

bzrlib/tests/stub_sftp.py

bzrlib/tests/test_annotate.py

bzrlib/tests/test_api.py

bzrlib/tests/test_atomicfile.py

bzrlib/tests/test_bisect_multi.py

bzrlib/tests/test_branch.py

bzrlib/tests/test_branchbuilder.py

bzrlib/tests/test_btree_index.py

bzrlib/tests/test_bundle.py

bzrlib/tests/test_bzrdir.py

bzrlib/tests/test_cache_utf8.py

bzrlib/tests/test_commands.py

bzrlib/tests/test_commit.py

bzrlib/tests/test_commit_merge.py

bzrlib/tests/test_config.py

bzrlib/tests/test_conflicts.py

bzrlib/tests/test_delta.py

bzrlib/tests/test_deprecated_graph.py

bzrlib/tests/test_diff.py

bzrlib/tests/test_dirstate.py

bzrlib/tests/test_errors.py

bzrlib/tests/test_extract.py

bzrlib/tests/test_fetch.py

bzrlib/tests/test_foreign.py

bzrlib/tests/test_generate_ids.py

bzrlib/tests/test_globbing.py

bzrlib/tests/test_gpg.py

bzrlib/tests/test_graph.py

bzrlib/tests/test_hashcache.py

bzrlib/tests/test_help.py

bzrlib/tests/test_http.py

bzrlib/tests/test_http_response.py

bzrlib/tests/test_identitymap.py

bzrlib/tests/test_ignores.py

bzrlib/tests/test_index.py

bzrlib/tests/test_inv.py

bzrlib/tests/test_knit.py

bzrlib/tests/test_lazy_import.py

bzrlib/tests/test_lockable_files.py

bzrlib/tests/test_lockdir.py

bzrlib/tests/test_log.py

bzrlib/tests/test_lru_cache.py

bzrlib/tests/test_memorytree.py

bzrlib/tests/test_merge.py

bzrlib/tests/test_merge3.py

bzrlib/tests/test_merge_core.py

bzrlib/tests/test_msgeditor.py

bzrlib/tests/test_mutabletree.py

bzrlib/tests/test_nonascii.py

bzrlib/tests/test_osutils.py

bzrlib/tests/test_osutils_encodings.py

bzrlib/tests/test_pack.py

bzrlib/tests/test_pack_repository.py

bzrlib/tests/test_patches.py

bzrlib/tests/test_permissions.py

bzrlib/tests/test_plugins.py

bzrlib/tests/test_progress.py

bzrlib/tests/test_reconcile.py

bzrlib/tests/test_reconfigure.py

bzrlib/tests/test_remote.py

bzrlib/tests/test_repository.py

bzrlib/tests/test_revert.py

bzrlib/tests/test_revision.py

bzrlib/tests/test_revisionspec.py

bzrlib/tests/test_rio.py

bzrlib/tests/test_selftest.py

bzrlib/tests/test_sftp_transport.py

bzrlib/tests/test_shelf_ui.py

bzrlib/tests/test_smart.py

bzrlib/tests/test_smart_transport.py

bzrlib/tests/test_source.py

bzrlib/tests/test_store.py

bzrlib/tests/test_subsume.py

bzrlib/tests/test_symbol_versioning.py

bzrlib/tests/test_tag.py

bzrlib/tests/test_testament.py

bzrlib/tests/test_trace.py

bzrlib/tests/test_transactions.py

bzrlib/tests/test_transform.py

bzrlib/tests/test_transport.py

bzrlib/tests/test_transport_implementations.py

bzrlib/tests/test_transport_log.py

bzrlib/tests/test_tree.py

bzrlib/tests/test_treebuilder.py

bzrlib/tests/test_tsort.py

bzrlib/tests/test_tuned_gzip.py

bzrlib/tests/test_ui.py

bzrlib/tests/test_upgrade.py

bzrlib/tests/test_upgrade_stacked.py

bzrlib/tests/test_urlutils.py

bzrlib/tests/test_version_info.py

bzrlib/tests/test_versionedfile.py

bzrlib/tests/test_weave.py

bzrlib/tests/test_whitebox.py

bzrlib/tests/test_win32utils.py

bzrlib/tests/test_workingtree.py

bzrlib/tests/test_workingtree_4.py

bzrlib/tests/test_wsgi.py

bzrlib/tests/test_xml.py

bzrlib/tests/tree_implementations/__init__.py

bzrlib/tests/tree_implementations/test_test_trees.py

bzrlib/tests/tree_implementations/test_walkdirs.py

bzrlib/tests/treeshape.py

bzrlib/tests/workingtree_implementations/__init__.py

bzrlib/tests/workingtree_implementations/test_add.py

bzrlib/tests/workingtree_implementations/test_add_reference.py

bzrlib/tests/workingtree_implementations/test_basis_inventory.py

bzrlib/tests/workingtree_implementations/test_changes_from.py

bzrlib/tests/workingtree_implementations/test_commit.py

bzrlib/tests/workingtree_implementations/test_executable.py

bzrlib/tests/workingtree_implementations/test_flush.py

bzrlib/tests/workingtree_implementations/test_inv.py

bzrlib/tests/workingtree_implementations/test_is_control_filename.py

bzrlib/tests/workingtree_implementations/test_locking.py

bzrlib/tests/workingtree_implementations/test_parents.py

bzrlib/tests/workingtree_implementations/test_paths2ids.py

bzrlib/tests/workingtree_implementations/test_pull.py

bzrlib/tests/workingtree_implementations/test_read_working_inventory.py

bzrlib/tests/workingtree_implementations/test_revision_tree.py

bzrlib/tests/workingtree_implementations/test_set_root_id.py

bzrlib/tests/workingtree_implementations/test_smart_add.py

bzrlib/tests/workingtree_implementations/test_workingtree.py

bzrlib/textfile.py

bzrlib/textinv.py

bzrlib/textmerge.py

bzrlib/textui.py

bzrlib/timestamp.py

bzrlib/trace.py

bzrlib/transactions.py

bzrlib/transform.py

bzrlib/transport/__init__.py

bzrlib/transport/chroot.py

bzrlib/transport/decorator.py

bzrlib/transport/fakevfat.py

bzrlib/transport/ftp/__init__.py

bzrlib/transport/ftp/_gssapi.py

bzrlib/transport/http/__init__.py

bzrlib/transport/http/_pycurl.py

bzrlib/transport/http/_urllib2_wrappers.py

bzrlib/transport/http/response.py

bzrlib/transport/http/wsgi.py

bzrlib/transport/local.py

bzrlib/transport/memory.py

bzrlib/transport/readonly.py

bzrlib/transport/remote.py

bzrlib/transport/sftp.py

bzrlib/transport/ssh.py

bzrlib/transport/trace.py

bzrlib/tree.py

bzrlib/treebuilder.py

bzrlib/tsort.py

bzrlib/tuned_gzip.py

bzrlib/ui/__init__.py

bzrlib/ui/text.py

bzrlib/uncommit.py

bzrlib/upgrade.py

bzrlib/urlutils.py

bzrlib/util/bencode.py

bzrlib/util/configobj/configobj.py

bzrlib/util/simplemapi.py

bzrlib/util/tests/test_bencode.py

bzrlib/version_info_formats/__init__.py

bzrlib/versionedfile.py

bzrlib/weave.py

bzrlib/weavefile.py

bzrlib/win32utils.py

bzrlib/workingtree.py

bzrlib/workingtree_4.py

bzrlib/xml4.py

bzrlib/xml8.py

bzrlib/xml_serializer.py

doc/developers/HACKING.txt

doc/developers/index.txt

doc/developers/ppa.txt

doc/developers/releasing.txt

setup.py

tools/packaging/build-packages.sh

tools/packaging/update-changelogs.sh

tools/packaging/update-packaging-branches.sh

tools/win32/build_release.py

tools/win32/bzr.iss.cog

Show diffs side-by-side

added added

removed removed

bzrlib/knit.py

updates.

Knit file layout:

lifeless: the data file is made up of "delta records". each delta record has a delta header

that contains; (1) a version id, (2) the size of the delta (in lines), and (3) the digest of

the -expanded data- (ie, the delta applied to the parent). the delta also ends with a

lifeless: the data file is made up of "delta records". each delta record has a delta header

that contains; (1) a version id, (2) the size of the delta (in lines), and (3) the digest of

the -expanded data- (ie, the delta applied to the parent). the delta also ends with a

end-marker; simply "end VERSION"

delta can be line or full contents.a

130,130,2

8 if elt.get('executable') == 'yes':

8 ie.executable = True

end robertc@robertcollins.net-20051003014215-ee2990904cc4c7ad

whats in an index:

"""

# TODOS:

# 10:16 < lifeless> make partial index writes safe

# 10:16 < lifeless> implement 'knit.check()' like weave.check()

# 10:17 < lifeless> record known ghosts so we can detect when they are filled in rather than the current 'reweave

# always' approach.

# move sha1 out of the content so that join is faster at verifying parents

# record content length ?

from cStringIO import StringIO

from itertools import izip, chain

138

130

139

131

def __init__(self, basis_vf):

140

132

"""Create an adapter which accesses full texts from basis_vf.

141

133

142

134

:param basis_vf: A versioned file to access basis texts of deltas from.

143

135

May be None for adapters that do not need to access basis texts.

144

136

"""

151

143

class FTAnnotatedToUnannotated(KnitAdapter):

152

144

"""An adapter from FT annotated knits to unannotated ones."""

153

145

154

def get_bytes(self, factory, annotated_compressed_bytes):

146

def get_bytes(self, factory):

147

annotated_compressed_bytes = factory._raw_record

155

148

rec, contents = \

156

149

self._data._parse_record_unchecked(annotated_compressed_bytes)

157

150

content = self._annotate_factory.parse_fulltext(contents, rec[1])

162

155

class DeltaAnnotatedToUnannotated(KnitAdapter):

163

156

"""An adapter for deltas from annotated to unannotated."""

164

157

165

def get_bytes(self, factory, annotated_compressed_bytes):

158

def get_bytes(self, factory):

159

annotated_compressed_bytes = factory._raw_record

166

160

rec, contents = \

167

161

self._data._parse_record_unchecked(annotated_compressed_bytes)

168

162

delta = self._annotate_factory.parse_line_delta(contents, rec[1],

175

169

class FTAnnotatedToFullText(KnitAdapter):

176

170

"""An adapter from FT annotated knits to unannotated ones."""

177

171

178

def get_bytes(self, factory, annotated_compressed_bytes):

172

def get_bytes(self, factory):

173

annotated_compressed_bytes = factory._raw_record

179

174

rec, contents = \

180

175

self._data._parse_record_unchecked(annotated_compressed_bytes)

181

176

content, delta = self._annotate_factory.parse_record(factory.key[-1],

186

181

class DeltaAnnotatedToFullText(KnitAdapter):

187

182

"""An adapter for deltas from annotated to unannotated."""

188

183

189

def get_bytes(self, factory, annotated_compressed_bytes):

184

def get_bytes(self, factory):

185

annotated_compressed_bytes = factory._raw_record

190

186

rec, contents = \

191

187

self._data._parse_record_unchecked(annotated_compressed_bytes)

192

188

delta = self._annotate_factory.parse_line_delta(contents, rec[1],

209

205

class FTPlainToFullText(KnitAdapter):

210

206

"""An adapter from FT plain knits to unannotated ones."""

211

207

212

def get_bytes(self, factory, compressed_bytes):

208

def get_bytes(self, factory):

209

compressed_bytes = factory._raw_record

213

210

rec, contents = \

214

211

self._data._parse_record_unchecked(compressed_bytes)

215

212

content, delta = self._plain_factory.parse_record(factory.key[-1],

220

217

class DeltaPlainToFullText(KnitAdapter):

221

218

"""An adapter for deltas from annotated to unannotated."""

222

219

223

def get_bytes(self, factory, compressed_bytes):

220

def get_bytes(self, factory):

221

compressed_bytes = factory._raw_record

224

222

rec, contents = \

225

223

self._data._parse_record_unchecked(compressed_bytes)

226

224

delta = self._plain_factory.parse_line_delta(contents, rec[1])

242

240

243

241

class KnitContentFactory(ContentFactory):

244

242

"""Content factory for streaming from knits.

245

243

246

244

:seealso ContentFactory:

247

245

"""

248

246

249

247

def __init__(self, key, parents, build_details, sha1, raw_record,

250

annotated, knit=None):

248

annotated, knit=None, network_bytes=None):

251

249

"""Create a KnitContentFactory for key.

252

250

253

251

:param key: The key.

254

252

:param parents: The parents.

255

253

:param build_details: The build details as returned from

257

255

:param sha1: The sha1 expected from the full text of this object.

258

256

:param raw_record: The bytes of the knit data from disk.

259

257

:param annotated: True if the raw data is annotated.

258

:param network_bytes: None to calculate the network bytes on demand,

259

not-none if they are already known.

260

"""

261

ContentFactory.__init__(self)

262

self.sha1 = sha1

272

annotated_kind = ''

273

self.storage_kind = 'knit-%s%s-gz' % (annotated_kind, kind)

274

self._raw_record = raw_record

275

self._network_bytes = network_bytes

275

276

self._build_details = build_details

276

277

self._knit = knit

277

278

279

def _create_network_bytes(self):

280

"""Create a fully serialised network version for transmission."""

281

# storage_kind, key, parents, Noeol, raw_record

282

key_bytes = '\x00'.join(self.key)

283

if self.parents is None:

284

parent_bytes = 'None:'

285

else:

286

parent_bytes = '\t'.join('\x00'.join(key) for key in self.parents)

287

if self._build_details[1]:

288

noeol = 'N'

289

else:

290

noeol = ' '

291

network_bytes = "%s\n%s\n%s\n%s%s" % (self.storage_kind, key_bytes,

292

parent_bytes, noeol, self._raw_record)

293

self._network_bytes = network_bytes

294

278

295

def get_bytes_as(self, storage_kind):

279

296

if storage_kind == self.storage_kind:

280

return self._raw_record

297

if self._network_bytes is None:

298

self._create_network_bytes()

299

return self._network_bytes

281

300

if self._knit is not None:

282

301

if storage_kind == 'chunked':

283

302

return self._knit.get_lines(self.key[0])

287

306

self.storage_kind)

288

307

289

308

309

class LazyKnitContentFactory(ContentFactory):

310

"""A ContentFactory which can either generate full text or a wire form.

311

312

:seealso ContentFactory:

313

"""

314

315

def __init__(self, key, parents, generator, first):

316

"""Create a LazyKnitContentFactory.

317

318

:param key: The key of the record.

319

:param parents: The parents of the record.

320

:param generator: A _ContentMapGenerator containing the record for this

321

key.

322

:param first: Is this the first content object returned from generator?

323

if it is, its storage kind is knit-delta-closure, otherwise it is

324

knit-delta-closure-ref

325

"""

326

self.key = key

327

self.parents = parents

328

self.sha1 = None

329

self._generator = generator

330

self.storage_kind = "knit-delta-closure"

331

if not first:

332

self.storage_kind = self.storage_kind + "-ref"

333

self._first = first

334

335

def get_bytes_as(self, storage_kind):

336

if storage_kind == self.storage_kind:

337

if self._first:

338

return self._generator._wire_bytes()

339

else:

340

# all the keys etc are contained in the bytes returned in the

341

# first record.

342

return ''

343

if storage_kind in ('chunked', 'fulltext'):

344

chunks = self._generator._get_one_work(self.key).text()

345

if storage_kind == 'chunked':

346

return chunks

347

else:

348

return ''.join(chunks)

349

raise errors.UnavailableRepresentation(self.key, storage_kind,

350

self.storage_kind)

351

352

353

def knit_delta_closure_to_records(storage_kind, bytes, line_end):

354

"""Convert a network record to a iterator over stream records.

355

356

:param storage_kind: The storage kind of the record.

357

Must be 'knit-delta-closure'.

358

:param bytes: The bytes of the record on the network.

359

"""

360

generator = _NetworkContentMapGenerator(bytes, line_end)

361

return generator.get_record_stream()

362

363

364

def knit_network_to_record(storage_kind, bytes, line_end):

365

"""Convert a network record to a record object.

366

367

:param storage_kind: The storage kind of the record.

368

:param bytes: The bytes of the record on the network.

369

"""

370

start = line_end

371

line_end = bytes.find('\n', start)

372

key = tuple(bytes[start:line_end].split('\x00'))

373

start = line_end + 1

374

line_end = bytes.find('\n', start)

375

parent_line = bytes[start:line_end]

376

if parent_line == 'None:':

377

parents = None

378

else:

379

parents = tuple(

380

[tuple(segment.split('\x00')) for segment in parent_line.split('\t')

381

if segment])

382

start = line_end + 1

383

noeol = bytes[start] == 'N'

384

if 'ft' in storage_kind:

385

method = 'fulltext'

386

else:

387

method = 'line-delta'

388

build_details = (method, noeol)

389

start = start + 1

390

raw_record = bytes[start:]

391

annotated = 'annotated' in storage_kind

392

return [KnitContentFactory(key, parents, build_details, None, raw_record,

393

annotated, network_bytes=bytes)]

394

395

290

396

class KnitContent(object):

291

397

"""Content of a knit version to which deltas can be applied.

292

398

293

399

This is always stored in memory as a list of lines with \n at the end,

294

plus a flag saying if the final ending is really there or not, because that

400

plus a flag saying if the final ending is really there or not, because that

295

401

corresponds to the on-disk knit representation.

296

402

"""

297

403

386

492

387

493

class PlainKnitContent(KnitContent):

388

494

"""Unannotated content.

389

495

390

496

When annotate[_iter] is called on this content, the same version is reported

391

497

for all lines. Generally, annotate[_iter] is not useful on PlainKnitContent

392

498

objects.

647

753

648

754

This is only functional enough to run interface tests, it doesn't try to

649

755

provide a full pack environment.

650

756

651

757

:param annotated: knit annotations are wanted.

652

758

:param mapper: The mapper from keys to paths.

653

759

"""

663

769

664

770

This is only functional enough to run interface tests, it doesn't try to

665

771

provide a full pack environment.

666

772

667

773

:param graph: Store a graph.

668

774

:param delta: Delta compress contents.

669

775

:param keylength: How long should keys be.

705

811

706

812

Backend storage is managed by indices and data objects.

707

813

708

:ivar _index: A _KnitGraphIndex or similar that can describe the

709

parents, graph, compression and data location of entries in this

710

KnitVersionedFiles. Note that this is only the index for

814

:ivar _index: A _KnitGraphIndex or similar that can describe the

815

parents, graph, compression and data location of entries in this

816

KnitVersionedFiles. Note that this is only the index for

711

817

*this* vfs; if there are fallbacks they must be queried separately.

712

818

"""

713

819

891

997

892

998

def _check_header_version(self, rec, version_id):

893

999

"""Checks the header version on original format knit records.

894

1000

895

1001

These have the last component of the key embedded in the record.

896

1002

"""

897

1003

if rec[1] != version_id:

976

1082

if missing and not allow_missing:

977

1083

raise errors.RevisionNotPresent(missing.pop(), self)

978

1084

return component_data

979

1085

980

1086

def _get_content(self, key, parent_texts={}):

981

1087

"""Returns a content object that makes up the specified

982

1088

version."""

986

1092

if not self.get_parent_map([key]):

987

1093

raise RevisionNotPresent(key, self)

988

1094

return cached_version

989

text_map, contents_map = self._get_content_maps([key])

990

return contents_map[key]

991

992

def _get_content_maps(self, keys, nonlocal_keys=None):

993

"""Produce maps of text and KnitContents

994

995

:param keys: The keys to produce content maps for.

996

:param nonlocal_keys: An iterable of keys(possibly intersecting keys)

997

which are known to not be in this knit, but rather in one of the

998

fallback knits.

999

:return: (text_map, content_map) where text_map contains the texts for

1000

the requested versions and content_map contains the KnitContents.

1001

"""

1002

# FUTURE: This function could be improved for the 'extract many' case

1003

# by tracking each component and only doing the copy when the number of

1004

# children than need to apply delta's to it is > 1 or it is part of the

1005

# final output.

1006

keys = list(keys)

1007

multiple_versions = len(keys) != 1

1008

record_map = self._get_record_map(keys, allow_missing=True)

1009

1010

text_map = {}

1011

content_map = {}

1012

final_content = {}

1013

if nonlocal_keys is None:

1014

nonlocal_keys = set()

1015

else:

1016

nonlocal_keys = frozenset(nonlocal_keys)

1017

missing_keys = set(nonlocal_keys)

1018

for source in self._fallback_vfs:

1019

if not missing_keys:

1020

break

1021

for record in source.get_record_stream(missing_keys,

1022

'unordered', True):

1023

if record.storage_kind == 'absent':

1024

continue

1025

missing_keys.remove(record.key)

1026

lines = osutils.chunks_to_lines(record.get_bytes_as('chunked'))

1027

text_map[record.key] = lines

1028

content_map[record.key] = PlainKnitContent(lines, record.key)

1029

if record.key in keys:

1030

final_content[record.key] = content_map[record.key]

1031

for key in keys:

1032

if key in nonlocal_keys:

1033

# already handled

1034

continue

1035

components = []

1036

cursor = key

1037

while cursor is not None:

1038

try:

1039

record, record_details, digest, next = record_map[cursor]

1040

except KeyError:

1041

raise RevisionNotPresent(cursor, self)

1042

components.append((cursor, record, record_details, digest))

1043

cursor = next

1044

if cursor in content_map:

1045

# no need to plan further back

1046

components.append((cursor, None, None, None))

1047

break

1048

1049

content = None

1050

for (component_id, record, record_details,

1051

digest) in reversed(components):

1052

if component_id in content_map:

1053

content = content_map[component_id]

1054

else:

1055

content, delta = self._factory.parse_record(key[-1],

1056

record, record_details, content,

1057

copy_base_content=multiple_versions)

1058

if multiple_versions:

1059

content_map[component_id] = content

1060

1061

final_content[key] = content

1062

1063

# digest here is the digest from the last applied component.

1064

text = content.text()

1065

actual_sha = sha_strings(text)

1066

if actual_sha != digest:

1067

raise SHA1KnitCorrupt(self, actual_sha, digest, key, text)

1068

text_map[key] = text

1069

return text_map, final_content

1095

generator = _VFContentMapGenerator(self, [key])

1096

return generator._get_content(key)

1070

1097

1071

1098

def get_parent_map(self, keys):

1072

1099

"""Get a map of the graph parents of keys.

1102

1129

1103

1130

def _get_record_map(self, keys, allow_missing=False):

1104

1131

"""Produce a dictionary of knit records.

1105

1132

1106

1133

:return: {key:(record, record_details, digest, next)}

1107

1134

record

1108

data returned from read_records

1135

data returned from read_records (a KnitContentobject)

1109

1136

record_details

1110

1137

opaque information to pass to parse_record

1111

1138

digest

1114

1141

build-parent of the version, i.e. the leftmost ancestor.

1115

1142

Will be None if the record is not a delta.

1116

1143

:param keys: The keys to build a map for

1117

:param allow_missing: If some records are missing, rather than

1144

:param allow_missing: If some records are missing, rather than

1118

1145

error, just return the data that could be generated.

1119

1146

"""

1147

raw_map = self._get_record_map_unparsed(keys,

1148

allow_missing=allow_missing)

1149

return self._raw_map_to_record_map(raw_map)

1150

1151

def _raw_map_to_record_map(self, raw_map):

1152

"""Parse the contents of _get_record_map_unparsed.

1153

1154

:return: see _get_record_map.

1155

"""

1156

result = {}

1157

for key in raw_map:

1158

data, record_details, next = raw_map[key]

1159

content, digest = self._parse_record(key[-1], data)

1160

result[key] = content, record_details, digest, next

1161

return result

1162

1163

def _get_record_map_unparsed(self, keys, allow_missing=False):

1164

"""Get the raw data for reconstructing keys without parsing it.

1165

1166

:return: A dict suitable for parsing via _raw_map_to_record_map.

1167

key-> raw_bytes, (method, noeol), compression_parent

1168

"""

1120

1169

# This retries the whole request if anything fails. Potentially we

1121

1170

# could be a bit more selective. We could track the keys whose records

1122

1171

# we have successfully found, and then only request the new records

1132

1181

# n = next

1133

1182

records = [(key, i_m) for key, (r, i_m, n)

1134

1183

in position_map.iteritems()]

1135

record_map = {}

1136

for key, record, digest in self._read_records_iter(records):

1184

raw_record_map = {}

1185

for key, data in self._read_records_iter_unchecked(records):

1137

1186

(record_details, index_memo, next) = position_map[key]

1138

record_map[key] = record, record_details, digest, next

1139

return record_map

1187

raw_record_map[key] = data, record_details, next

1188

return raw_record_map

1140

1189

except errors.RetryWithNewPacks, e:

1141

1190

self._access.reload_or_raise(e)

1142

1191

1206

1255

absent_keys = keys.difference(set(positions))

1207

1256

# There may be more absent keys : if we're missing the basis component

1208

1257

# and are trying to include the delta closure.

1258

# XXX: We should not ever need to examine remote sources because we do

1259

# not permit deltas across versioned files boundaries.

1209

1260

if include_delta_closure:

1210

1261

needed_from_fallback = set()

1211

1262

# Build up reconstructable_keys dict. key:True in this dict means

1288

1339

for prefix, keys in prefix_split_keys.iteritems():

1289

1340

non_local = prefix_split_non_local_keys.get(prefix, [])

1290

1341

non_local = set(non_local)

1291

text_map, _ = self._get_content_maps(keys, non_local)

1292

for key in keys:

1293

lines = text_map.pop(key)

1294

yield ChunkedContentFactory(key, global_map[key], None,

1295

lines)

1342

generator = _VFContentMapGenerator(self, keys, non_local,

1343

global_map)

1344

for record in generator.get_record_stream():

1345

yield record

1296

1346

else:

1297

1347

for source, keys in source_keys:

1298

1348

if source is parent_maps[0]:

1330

1380

def insert_record_stream(self, stream):

1331

1381

"""Insert a record stream into this container.

1332

1382

1333

:param stream: A stream of records to insert.

1383

:param stream: A stream of records to insert.

1334

1384

:return: None

1335

1385

:seealso VersionedFiles.get_record_stream:

1336

1386

"""

1408

1458

except KeyError:

1409

1459

adapter_key = (record.storage_kind, "knit-ft-gz")

1410

1460

adapter = get_adapter(adapter_key)

1411

bytes = adapter.get_bytes(

1412

record, record.get_bytes_as(record.storage_kind))

1461

bytes = adapter.get_bytes(record)

1413

1462

else:

1414

bytes = record.get_bytes_as(record.storage_kind)

1463

# It's a knit record, it has a _raw_record field (even if

1464

# it was reconstituted from a network stream).

1465

bytes = record._raw_record

1415

1466

options = [record._build_details[0]]

1416

1467

if record._build_details[1]:

1417

1468

options.append('no-eol')

1448

1499

elif record.storage_kind == 'chunked':

1449

1500

self.add_lines(record.key, parents,

1450

1501

osutils.chunks_to_lines(record.get_bytes_as('chunked')))

1451

elif record.storage_kind == 'fulltext':

1452

self.add_lines(record.key, parents,

1453

split_lines(record.get_bytes_as('fulltext')))

1454

1502

else:

1455

# Not a fulltext, and not suitable for direct insertion as a

1503

# Not suitable for direct insertion as a

1456

1504

# delta, either because it's not the right format, or this

1457

1505

# KnitVersionedFiles doesn't permit deltas (_max_delta_chain ==

1458

1506

# 0) or because it depends on a base only present in the

1459

1507

# fallback kvfs.

1460

adapter_key = record.storage_kind, 'fulltext'

1461

adapter = get_adapter(adapter_key)

1462

lines = split_lines(adapter.get_bytes(

1463

record, record.get_bytes_as(record.storage_kind)))

1508

try:

1509

# Try getting a fulltext directly from the record.

1510

bytes = record.get_bytes_as('fulltext')

1511

except errors.UnavailableRepresentation:

1512

adapter_key = record.storage_kind, 'fulltext'

1513

adapter = get_adapter(adapter_key)

1514

bytes = adapter.get_bytes(record)

1515

lines = split_lines(bytes)

1464

1516

try:

1465

1517

self.add_lines(record.key, parents, lines)

1466

1518

except errors.RevisionAlreadyPresent:

1475

1527

added_keys.extend(

1476

1528

[index_entry[0] for index_entry in index_entries])

1477

1529

del buffered_index_entries[key]

1478

# If there were any deltas which had a missing basis parent, error.

1479

1530

if buffered_index_entries:

1480

from pprint import pformat

1481

raise errors.BzrCheckError(

1482

"record_stream refers to compression parents not in %r:\n%s"

1483

% (self, pformat(sorted(buffered_index_entries.keys()))))

1531

# There were index entries buffered at the end of the stream,

1532

# So these need to be added (if the index supports holding such

1533

# entries for later insertion)

1534

for key in buffered_index_entries:

1535

index_entries = buffered_index_entries[key]

1536

self._index.add_records(index_entries,

1537

missing_compression_parents=True)

1538

1539

def get_missing_compression_parent_keys(self):

1540

"""Return an iterable of keys of missing compression parents.

1541

1542

Check this after calling insert_record_stream to find out if there are

1543

any missing compression parents. If there are, the records that

1544

depend on them are not able to be inserted safely. For atomic

1545

KnitVersionedFiles built on packs, the transaction should be aborted or

1546

suspended - commit will fail at this point. Nonatomic knits will error

1547

earlier because they have no staging area to put pending entries into.

1548

"""

1549

return self._index.get_missing_compression_parents()

1484

1550

1485

1551

def iter_lines_added_or_present_in_keys(self, keys, pb=None):

1486

1552

"""Iterate over the lines in the versioned files from keys.

1528

1594

# fulltext

1529

1595

line_iterator = self._factory.get_fulltext_content(data)

1530

1596

else:

1531

# Delta

1597

# Delta

1532

1598

line_iterator = self._factory.get_linedelta_content(data)

1533

1599

# Now that we are yielding the data for this key, remove it

1534

1600

# from the list

1545

1611

# If there are still keys we've not yet found, we look in the fallback

1546

1612

# vfs, and hope to find them there. Note that if the keys are found

1547

1613

# but had no changes or no content, the fallback may not return

1548

# anything.

1614

# anything.

1549

1615

if keys and not self._fallback_vfs:

1550

1616

# XXX: strictly the second parameter is meant to be the file id

1551

1617

# but it's not easily accessible here.

1573

1639

delta=None, annotated=None,

1574

1640

left_matching_blocks=None):

1575

1641

"""Merge annotations for content and generate deltas.

1576

1642

1577

1643

This is done by comparing the annotations based on changes to the text

1578

1644

and generating a delta on the resulting full texts. If annotations are

1579

1645

not being created then a simple delta is created.

1661

1727

rec[1], record_contents))

1662

1728

if last_line != 'end %s\n' % rec[1]:

1663

1729

raise KnitCorrupt(self,

1664

'unexpected version end line %r, wanted %r'

1730

'unexpected version end line %r, wanted %r'

1665

1731

% (last_line, rec[1]))

1666

1732

df.close()

1667

1733

return rec, record_contents

1684

1750

if not needed_records:

1685

1751

return

1686

1752

1687

# The transport optimizes the fetching as well

1753

# The transport optimizes the fetching as well

1688

1754

# (ie, reads continuous ranges.)

1689

1755

raw_data = self._access.get_raw_records(

1690

1756

[index_memo for key, index_memo in needed_records])

1700

1766

This unpacks enough of the text record to validate the id is

1701

1767

as expected but thats all.

1702

1768

1703

Each item the iterator yields is (key, bytes, sha1_of_full_text).

1769

Each item the iterator yields is (key, bytes,

1770

expected_sha1_of_full_text).

1771

"""

1772

for key, data in self._read_records_iter_unchecked(records):

1773

# validate the header (note that we can only use the suffix in

1774

# current knit records).

1775

df, rec = self._parse_record_header(key, data)

1776

df.close()

1777

yield key, data, rec[3]

1778

1779

def _read_records_iter_unchecked(self, records):

1780

"""Read text records from data file and yield raw data.

1781

1782

No validation is done.

1783

1784

Yields tuples of (key, data).

1704

1785

"""

1705

1786

# setup an iterator of the external records:

1706

1787

# uses readv so nice and fast we hope.

1712

1793

1713

1794

for key, index_memo in records:

1714

1795

data = raw_records.next()

1715

# validate the header (note that we can only use the suffix in

1716

# current knit records).

1717

df, rec = self._parse_record_header(key, data)

1718

df.close()

1719

yield key, data, rec[3]

1796

yield key, data

1720

1797

1721

1798

def _record_to_data(self, key, digest, lines, dense_lines=None):

1722

1799

"""Convert key, digest, lines into a raw data block.

1723

1800

1724

1801

:param key: The key of the record. Currently keys are always serialised

1725

1802

using just the trailing component.

1726

1803

:param dense_lines: The bytes of lines but in a denser form. For

1766

1843

return result

1767

1844

1768

1845

1846

class _ContentMapGenerator(object):

1847

"""Generate texts or expose raw deltas for a set of texts."""

1848

1849

def _get_content(self, key):

1850

"""Get the content object for key."""

1851

# Note that _get_content is only called when the _ContentMapGenerator

1852

# has been constructed with just one key requested for reconstruction.

1853

if key in self.nonlocal_keys:

1854

record = self.get_record_stream().next()

1855

# Create a content object on the fly

1856

lines = osutils.chunks_to_lines(record.get_bytes_as('chunked'))

1857

return PlainKnitContent(lines, record.key)

1858

else:

1859

# local keys we can ask for directly

1860

return self._get_one_work(key)

1861

1862

def get_record_stream(self):

1863

"""Get a record stream for the keys requested during __init__."""

1864

for record in self._work():

1865

yield record

1866

1867

def _work(self):

1868

"""Produce maps of text and KnitContents as dicts.

1869

1870

:return: (text_map, content_map) where text_map contains the texts for

1871

the requested versions and content_map contains the KnitContents.

1872

"""

1873

# NB: By definition we never need to read remote sources unless texts

1874

# are requested from them: we don't delta across stores - and we

1875

# explicitly do not want to to prevent data loss situations.

1876

if self.global_map is None:

1877

self.global_map = self.vf.get_parent_map(self.keys)

1878

nonlocal_keys = self.nonlocal_keys

1879

1880

missing_keys = set(nonlocal_keys)

1881

# Read from remote versioned file instances and provide to our caller.

1882

for source in self.vf._fallback_vfs:

1883

if not missing_keys:

1884

break

1885

# Loop over fallback repositories asking them for texts - ignore

1886

# any missing from a particular fallback.

1887

for record in source.get_record_stream(missing_keys,

1888

'unordered', True):

1889

if record.storage_kind == 'absent':

1890

# Not in thie particular stream, may be in one of the

1891

# other fallback vfs objects.

1892

continue

1893

missing_keys.remove(record.key)

1894

yield record

1895

1896

self._raw_record_map = self.vf._get_record_map_unparsed(self.keys,

1897

allow_missing=True)

1898

first = True

1899

for key in self.keys:

1900

if key in self.nonlocal_keys:

1901

continue

1902

yield LazyKnitContentFactory(key, self.global_map[key], self, first)

1903

first = False

1904

1905

def _get_one_work(self, requested_key):

1906

# Now, if we have calculated everything already, just return the

1907

# desired text.

1908

if requested_key in self._contents_map:

1909

return self._contents_map[requested_key]

1910

# To simplify things, parse everything at once - code that wants one text

1911

# probably wants them all.

1912

# FUTURE: This function could be improved for the 'extract many' case

1913

# by tracking each component and only doing the copy when the number of

1914

# children than need to apply delta's to it is > 1 or it is part of the

1915

# final output.

1916

multiple_versions = len(self.keys) != 1

1917

if self._record_map is None:

1918

self._record_map = self.vf._raw_map_to_record_map(

1919

self._raw_record_map)

1920

record_map = self._record_map

1921

# raw_record_map is key:

1922

# Have read and parsed records at this point.

1923

for key in self.keys:

1924

if key in self.nonlocal_keys:

1925

# already handled

1926

continue

1927

components = []

1928

cursor = key

1929

while cursor is not None:

1930

try:

1931

record, record_details, digest, next = record_map[cursor]

1932

except KeyError:

1933

raise RevisionNotPresent(cursor, self)

1934

components.append((cursor, record, record_details, digest))

1935

cursor = next

1936

if cursor in self._contents_map:

1937

# no need to plan further back

1938

components.append((cursor, None, None, None))

1939

break

1940

1941

content = None

1942

for (component_id, record, record_details,

1943

digest) in reversed(components):

1944

if component_id in self._contents_map:

1945

content = self._contents_map[component_id]

1946

else:

1947

content, delta = self._factory.parse_record(key[-1],

1948

record, record_details, content,

1949

copy_base_content=multiple_versions)

1950

if multiple_versions:

1951

self._contents_map[component_id] = content

1952

1953

# digest here is the digest from the last applied component.

1954

text = content.text()

1955

actual_sha = sha_strings(text)

1956

if actual_sha != digest:

1957

raise SHA1KnitCorrupt(self, actual_sha, digest, key, text)

1958

if multiple_versions:

1959

return self._contents_map[requested_key]

1960

else:

1961

return content

1962

1963

def _wire_bytes(self):

1964

"""Get the bytes to put on the wire for 'key'.

1965

1966

The first collection of bytes asked for returns the serialised

1967

raw_record_map and the additional details (key, parent) for key.

1968

Subsequent calls return just the additional details (key, parent).

1969

The wire storage_kind given for the first key is 'knit-delta-closure',

1970

For subsequent keys it is 'knit-delta-closure-ref'.

1971

1972

:param key: A key from the content generator.

1973

:return: Bytes to put on the wire.

1974

"""

1975

lines = []

1976

# kind marker for dispatch on the far side,

1977

lines.append('knit-delta-closure')

1978

# Annotated or not

1979

if self.vf._factory.annotated:

1980

lines.append('annotated')

1981

else:

1982

lines.append('')

1983

# then the list of keys

1984

lines.append('\t'.join(['\x00'.join(key) for key in self.keys

1985

if key not in self.nonlocal_keys]))

1986

# then the _raw_record_map in serialised form:

1987

map_byte_list = []

1988

# for each item in the map:

1989

# 1 line with key

1990

# 1 line with parents if the key is to be yielded (None: for None, '' for ())

1991

# one line with method

1992

# one line with noeol

1993

# one line with next ('' for None)

1994

# one line with byte count of the record bytes

1995

# the record bytes

1996

for key, (record_bytes, (method, noeol), next) in \

1997

self._raw_record_map.iteritems():

1998

key_bytes = '\x00'.join(key)

1999

parents = self.global_map.get(key, None)

2000

if parents is None:

2001

parent_bytes = 'None:'

2002

else:

2003

parent_bytes = '\t'.join('\x00'.join(key) for key in parents)

2004

method_bytes = method

2005

if noeol:

2006

noeol_bytes = "T"

2007

else:

2008

noeol_bytes = "F"

2009

if next:

2010

next_bytes = '\x00'.join(next)

2011

else:

2012

next_bytes = ''

2013

map_byte_list.append('%s\n%s\n%s\n%s\n%s\n%d\n%s' % (

2014

key_bytes, parent_bytes, method_bytes, noeol_bytes, next_bytes,

2015

len(record_bytes), record_bytes))

2016

map_bytes = ''.join(map_byte_list)

2017

lines.append(map_bytes)

2018

bytes = '\n'.join(lines)

2019

return bytes

2020

2021

2022

class _VFContentMapGenerator(_ContentMapGenerator):

2023

"""Content map generator reading from a VersionedFiles object."""

2024

2025

def __init__(self, versioned_files, keys, nonlocal_keys=None,

2026

global_map=None, raw_record_map=None):

2027

"""Create a _ContentMapGenerator.

2028

2029

:param versioned_files: The versioned files that the texts are being

2030

extracted from.

2031

:param keys: The keys to produce content maps for.

2032

:param nonlocal_keys: An iterable of keys(possibly intersecting keys)

2033

which are known to not be in this knit, but rather in one of the

2034

fallback knits.

2035

:param global_map: The result of get_parent_map(keys) (or a supermap).

2036

This is required if get_record_stream() is to be used.

2037

:param raw_record_map: A unparsed raw record map to use for answering

2038

contents.

2039

"""

2040

# The vf to source data from

2041

self.vf = versioned_files

2042

# The keys desired

2043

self.keys = list(keys)

2044

# Keys known to be in fallback vfs objects

2045

if nonlocal_keys is None:

2046

self.nonlocal_keys = set()

2047

else:

2048

self.nonlocal_keys = frozenset(nonlocal_keys)

2049

# Parents data for keys to be returned in get_record_stream

2050

self.global_map = global_map

2051

# The chunked lists for self.keys in text form

2052

self._text_map = {}

2053

# A cache of KnitContent objects used in extracting texts.

2054

self._contents_map = {}

2055

# All the knit records needed to assemble the requested keys as full

2056

# texts.

2057

self._record_map = None

2058

if raw_record_map is None:

2059

self._raw_record_map = self.vf._get_record_map_unparsed(keys,

2060

allow_missing=True)

2061

else:

2062

self._raw_record_map = raw_record_map

2063

# the factory for parsing records

2064

self._factory = self.vf._factory

2065

2066

2067

class _NetworkContentMapGenerator(_ContentMapGenerator):

2068

"""Content map generator sourced from a network stream."""

2069

2070

def __init__(self, bytes, line_end):

2071

"""Construct a _NetworkContentMapGenerator from a bytes block."""

2072

self._bytes = bytes

2073

self.global_map = {}

2074

self._raw_record_map = {}

2075

self._contents_map = {}

2076

self._record_map = None

2077

self.nonlocal_keys = []

2078

# Get access to record parsing facilities

2079

self.vf = KnitVersionedFiles(None, None)

2080

start = line_end

2081

# Annotated or not

2082

line_end = bytes.find('\n', start)

2083

line = bytes[start:line_end]

2084

start = line_end + 1

2085

if line == 'annotated':

2086

self._factory = KnitAnnotateFactory()

2087

else:

2088

self._factory = KnitPlainFactory()

2089

# list of keys to emit in get_record_stream

2090

line_end = bytes.find('\n', start)

2091

line = bytes[start:line_end]

2092

start = line_end + 1

2093

self.keys = [

2094

tuple(segment.split('\x00')) for segment in line.split('\t')

2095

if segment]

2096

# now a loop until the end. XXX: It would be nice if this was just a

2097

# bunch of the same records as get_record_stream(..., False) gives, but

2098

# there is a decent sized gap stopping that at the moment.

2099

end = len(bytes)

2100

while start < end:

2101

# 1 line with key

2102

line_end = bytes.find('\n', start)

2103

key = tuple(bytes[start:line_end].split('\x00'))

2104

start = line_end + 1

2105

# 1 line with parents (None: for None, '' for ())

2106

line_end = bytes.find('\n', start)

2107

line = bytes[start:line_end]

2108

if line == 'None:':

2109

parents = None

2110

else:

2111

parents = tuple(

2112

[tuple(segment.split('\x00')) for segment in line.split('\t')

2113

if segment])

2114

self.global_map[key] = parents

2115

start = line_end + 1

2116

# one line with method

2117

line_end = bytes.find('\n', start)

2118

line = bytes[start:line_end]

2119

method = line

2120

start = line_end + 1

2121

# one line with noeol

2122

line_end = bytes.find('\n', start)

2123

line = bytes[start:line_end]

2124

noeol = line == "T"

2125

start = line_end + 1

2126

# one line with next ('' for None)

2127

line_end = bytes.find('\n', start)

2128

line = bytes[start:line_end]

2129

if not line:

2130

next = None

2131

else:

2132

next = tuple(bytes[start:line_end].split('\x00'))

2133

start = line_end + 1

2134

# one line with byte count of the record bytes

2135

line_end = bytes.find('\n', start)

2136

line = bytes[start:line_end]

2137

count = int(line)

2138

start = line_end + 1

2139

# the record bytes

2140

record_bytes = bytes[start:start+count]

2141

start = start + count

2142

# put it in the map

2143

self._raw_record_map[key] = (record_bytes, (method, noeol), next)

2144

2145

def get_record_stream(self):

2146

"""Get a record stream for for keys requested by the bytestream."""

2147

first = True

2148

for key in self.keys:

2149

yield LazyKnitContentFactory(key, self.global_map[key], self, first)

2150

first = False

2151

2152

def _wire_bytes(self):

2153

return self._bytes

2154

2155

1769

2156

class _KndxIndex(object):

1770

2157

"""Manages knit index files

1771

2158

1785

2172

1786

2173

Duplicate entries may be written to the index for a single version id

1787

2174

if this is done then the latter one completely replaces the former:

1788

this allows updates to correct version and parent information.

2175

this allows updates to correct version and parent information.

1789

2176

Note that the two entries may share the delta, and that successive

1790

2177

annotations and references MUST point to the first entry.

1791

2178

1792

2179

The index file on disc contains a header, followed by one line per knit

1793

2180

record. The same revision can be present in an index file more than once.

1794

The first occurrence gets assigned a sequence number starting from 0.

1795

2181

The first occurrence gets assigned a sequence number starting from 0.

2182

1796

2183

The format of a single line is

1797

2184

REVISION_ID FLAGS BYTE_OFFSET LENGTH( PARENT_ID|PARENT_SEQUENCE_ID)* :\n

1798

2185

REVISION_ID is a utf8-encoded revision id

1799

FLAGS is a comma separated list of flags about the record. Values include

2186

FLAGS is a comma separated list of flags about the record. Values include

1800

2187

no-eol, line-delta, fulltext.

1801

2188

BYTE_OFFSET is the ascii representation of the byte offset in the data file

1802

2189

that the the compressed data starts at.

1806

2193

PARENT_SEQUENCE_ID the ascii representation of the sequence number of a

1807

2194

revision id already in the knit that is a parent of REVISION_ID.

1808

2195

The ' :' marker is the end of record marker.

1809

2196

1810

2197

partial writes:

1811

2198

when a write is interrupted to the index file, it will result in a line

1812

2199

that does not end in ' :'. If the ' :' is not present at the end of a line,

1837

2224

self._reset_cache()

1838

2225

self.has_graph = True

1839

2226

1840

def add_records(self, records, random_id=False):

2227

def add_records(self, records, random_id=False, missing_compression_parents=False):

1841

2228

"""Add multiple records to the index.

1842

2229

1843

2230

:param records: a list of tuples:

1844

2231

(key, options, access_memo, parents).

1845

2232

:param random_id: If True the ids being added were randomly generated

1846

2233

and no check for existence will be performed.

2234

:param missing_compression_parents: If True the records being added are

2235

only compressed against texts already in the index (or inside

2236

records). If False the records all refer to unavailable texts (or

2237

texts inside records) as compression parents.

1847

2238

"""

2239

if missing_compression_parents:

2240

# It might be nice to get the edge of the records. But keys isn't

2241

# _wrong_.

2242

keys = sorted(record[0] for record in records)

2243

raise errors.RevisionNotPresent(keys, self)

1848

2244

paths = {}

1849

2245

for record in records:

1850

2246

key = record[0]

1881

2277

self._kndx_cache[prefix] = (orig_cache, orig_history)

1882

2278

raise

1883

2279

2280

def scan_unvalidated_index(self, graph_index):

2281

"""See _KnitGraphIndex.scan_unvalidated_index."""

2282

# Because kndx files do not support atomic insertion via separate index

2283

# files, they do not support this method.

2284

raise NotImplementedError(self.scan_unvalidated_index)

2285

2286

def get_missing_compression_parents(self):

2287

"""See _KnitGraphIndex.get_missing_compression_parents."""

2288

# Because kndx files do not support atomic insertion via separate index

2289

# files, they do not support this method.

2290

raise NotImplementedError(self.get_missing_compression_parents)

2291

1884

2292

def _cache_key(self, key, options, pos, size, parent_keys):

1885

2293

"""Cache a version record in the history array and index cache.

1886

2294

2019

2427

2020

2428

def get_position(self, key):

2021

2429

"""Return details needed to access the version.

2022

2430

2023

2431

:return: a tuple (key, data position, size) to hand to the access

2024

2432

logic to get the record.

2025

2433

"""

2029

2437

return key, entry[2], entry[3]

2030

2438

2031

2439

has_key = _mod_index._has_key_from_parent_map

2032

2440

2033

2441

def _init_index(self, path, extra_lines=[]):

2034

2442

"""Initialize an index."""

2035

2443

sio = StringIO()

2044

2452

2045

2453

def keys(self):

2046

2454

"""Get all the keys in the collection.

2047

2455

2048

2456

The keys are not ordered.

2049

2457

"""

2050

2458

result = set()

2063

2471

for suffix in self._kndx_cache[prefix][1]:

2064

2472

result.add(prefix + (suffix,))

2065

2473

return result

2066

2474

2067

2475

def _load_prefixes(self, prefixes):

2068

2476

"""Load the indices for prefixes."""

2069

2477

self._check_read()

2107

2515

2108

2516

def _dictionary_compress(self, keys):

2109

2517

"""Dictionary compress keys.

2110

2518

2111

2519

:param keys: The keys to generate references to.

2112

2520

:return: A string representation of keys. keys which are present are

2113

2521

dictionary compressed, and others are emitted as fulltext with a

2177

2585

:param is_locked: A callback to check whether the object should answer

2178

2586

queries.

2179

2587

:param deltas: Allow delta-compressed records.

2180

:param parents: If True, record knits parents, if not do not record

2588

:param parents: If True, record knits parents, if not do not record

2181

2589

parents.

2182

2590

:param add_callback: If not None, allow additions to the index and call

2183

2591

this callback with a list of added GraphIndex nodes:

2196

2604

"parent tracking.")

2197

2605

self.has_graph = parents

2198

2606

self._is_locked = is_locked

2607

self._missing_compression_parents = set()

2199

2608

2200

2609

def __repr__(self):

2201

2610

return "%s(%r)" % (self.__class__.__name__, self._graph_index)

2202

2611

2203

def add_records(self, records, random_id=False):

2612

def add_records(self, records, random_id=False,

2613

missing_compression_parents=False):

2204

2614

"""Add multiple records to the index.

2205

2615

2206

2616

This function does not insert data into the Immutable GraphIndex

2207

2617

backing the KnitGraphIndex, instead it prepares data for insertion by

2208

2618

the caller and checks that it is safe to insert then calls

2212

2622

(key, options, access_memo, parents).

2213

2623

:param random_id: If True the ids being added were randomly generated

2214

2624

and no check for existence will be performed.

2625

:param missing_compression_parents: If True the records being added are

2626

only compressed against texts already in the index (or inside

2627

records). If False the records all refer to unavailable texts (or

2628

texts inside records) as compression parents.

2215

2629

"""

2216

2630

if not self._add_callback:

2217

2631

raise errors.ReadOnlyError(self)

2219

2633

# anymore.

2220

2634

2221

2635

keys = {}

2636

compression_parents = set()

2222

2637

for (key, options, access_memo, parents) in records:

2223

2638

if self._parents:

2224

2639

parents = tuple(parents)

2235

2650

if self._deltas:

2236

2651

if 'line-delta' in options:

2237

2652

node_refs = (parents, (parents[0],))

2653

if missing_compression_parents:

2654

compression_parents.add(parents[0])

2238

2655

else:

2239

2656

node_refs = (parents, ())

2240

2657

else:

2262

2679

for key, (value, node_refs) in keys.iteritems():

2263

2680

result.append((key, value))

2264

2681

self._add_callback(result)

2265

2682

if missing_compression_parents:

2683

# This may appear to be incorrect (it does not check for

2684

# compression parents that are in the existing graph index),

2685

# but such records won't have been buffered, so this is

2686

# actually correct: every entry when

2687

# missing_compression_parents==True either has a missing parent, or

2688

# a parent that is one of the keys in records.

2689

compression_parents.difference_update(keys)

2690

self._missing_compression_parents.update(compression_parents)

2691

# Adding records may have satisfied missing compression parents.

2692

self._missing_compression_parents.difference_update(keys)

2693

2694

def scan_unvalidated_index(self, graph_index):

2695

"""Inform this _KnitGraphIndex that there is an unvalidated index.

2696

2697

This allows this _KnitGraphIndex to keep track of any missing

2698

compression parents we may want to have filled in to make those

2699

indices valid.

2700

2701

:param graph_index: A GraphIndex

2702

"""

2703

if self._deltas:

2704

new_missing = graph_index.external_references(ref_list_num=1)

2705

new_missing.difference_update(self.get_parent_map(new_missing))

2706

self._missing_compression_parents.update(new_missing)

2707

2708

def get_missing_compression_parents(self):

2709

"""Return the keys of missing compression parents.

2710

2711

Missing compression parents occur when a record stream was missing

2712

basis texts, or a index was scanned that had missing basis texts.

2713

"""

2714

return frozenset(self._missing_compression_parents)

2715

2266

2716

def _check_read(self):

2267

2717

"""raise if reads are not permitted."""

2268

2718

if not self._is_locked():

2328

2778

2329

2779

def _get_entries(self, keys, check_present=False):

2330

2780

"""Get the entries for keys.

2331

2781

2332

2782

:param keys: An iterable of index key tuples.

2333

2783

"""

2334

2784

keys = set(keys)

2396

2846

2397

2847

def get_position(self, key):

2398

2848

"""Return details needed to access the version.

2399

2849

2400

2850

:return: a tuple (index, data position, size) to hand to the access

2401

2851

logic to get the record.

2402

2852

"""

2407

2857

2408

2858

def keys(self):

2409

2859

"""Get all the keys in the collection.

2410

2860

2411

2861

The keys are not ordered.

2412

2862

"""

2413

2863

self._check_read()

2414

2864

return [node[1] for node in self._graph_index.iter_all_entries()]

2415

2865

2416

2866

missing_keys = _mod_index._missing_keys_from_parent_map

2417

2867

2418

2868

def _node_to_position(self, node):

2562

3012

def get_raw_records(self, memos_for_retrieval):

2563

3013

"""Get the raw bytes for a records.

2564

3014

2565

:param memos_for_retrieval: An iterable containing the (index, pos,

3015

:param memos_for_retrieval: An iterable containing the (index, pos,

2566

3016

length) memo for retrieving the bytes. The Pack access method

2567

3017

looks up the pack to use for a given record in its index_to_pack

2568

3018

map.

Older »