~bzr-pqm/bzr/bzr.dev

« back to all changes in this revision

Viewing changes to doc/formats.txt

Committer: Robert Collins
Date: 2006-05-24 08:14:45 UTC
mfrom: (1725.1.1 benchmark)
mto: (1725.2.6 commit)
mto: This revision was merged to the branch mainline in revision 1729.
Revision ID: robertc@robertcollins.net-20060524081445-c046b4406ffc8dfa

(rbc)Merge in benchmark --lsprof-timed lsprofiling feature. (Robert Collins, Martin Pool).

files added:
BRANCH.TODO

HACKING

INSTALL

Makefile

NEWS.developers

bzrlib/annotate.py

bzrlib/atomicfile.py

bzrlib/benchmarks

bzrlib/benchmarks/__init__.py

bzrlib/benchmarks/bench_add.py

bzrlib/benchmarks/bench_checkout.py

bzrlib/benchmarks/bench_commit.py

bzrlib/benchmarks/bench_status.py

bzrlib/builtins.py

bzrlib/bzrdir.py

bzrlib/commit.py

bzrlib/config.py

bzrlib/conflicts.py

bzrlib/decorators.py

bzrlib/delta.py

bzrlib/doc

bzrlib/doc/__init__.py

bzrlib/doc/api

bzrlib/doc/api/__init__.py

bzrlib/doc/api/branch.txt

bzrlib/doc/api/transport.txt

bzrlib/export

bzrlib/export/__init__.py

bzrlib/export/dir_exporter.py

bzrlib/export/tar_exporter.py

bzrlib/export/zip_exporter.py

bzrlib/externalcommand.py

bzrlib/fetch.py

bzrlib/gpg.py

bzrlib/graph.py

bzrlib/hashcache.py

bzrlib/help.py

bzrlib/identitymap.py

bzrlib/inter.py

bzrlib/intset.py

bzrlib/iterablefile.py

bzrlib/knit.py

bzrlib/lock.py

bzrlib/lockable_files.py

bzrlib/lockdir.py

bzrlib/log.py

bzrlib/lsprof.py

bzrlib/merge.py

bzrlib/merge3.py

bzrlib/missing.py

bzrlib/msgeditor.py

bzrlib/option.py

bzrlib/patch.py

bzrlib/patiencediff.py

bzrlib/plugin.py

bzrlib/plugins

bzrlib/plugins/__init__.py

bzrlib/plugins/launchpad

bzrlib/plugins/launchpad/__init__.py

bzrlib/plugins/launchpad/lp_registration.py

bzrlib/plugins/launchpad/test_register.py

bzrlib/progress.py

bzrlib/reconcile.py

bzrlib/repository.py

bzrlib/revisionspec.py

bzrlib/rio.py

bzrlib/shellcomplete.py

bzrlib/sign_my_commits.py

bzrlib/status.py

bzrlib/store

bzrlib/store/revision

bzrlib/store/revision/__init__.py

bzrlib/store/revision/knit.py

bzrlib/store/revision/text.py

bzrlib/store/text.py

bzrlib/store/versioned

bzrlib/store/versioned/__init__.py

bzrlib/symbol_versioning.py

bzrlib/testament.py

bzrlib/tests

bzrlib/tests/HTTPTestUtil.py

bzrlib/tests/TestUtil.py

bzrlib/tests/__init__.py

bzrlib/tests/blackbox

bzrlib/tests/blackbox/__init__.py

bzrlib/tests/blackbox/test_add.py

bzrlib/tests/blackbox/test_added.py

bzrlib/tests/blackbox/test_aliases.py

bzrlib/tests/blackbox/test_ancestry.py

bzrlib/tests/blackbox/test_annotate.py

bzrlib/tests/blackbox/test_bound_branches.py

bzrlib/tests/blackbox/test_break_lock.py

bzrlib/tests/blackbox/test_cat.py

bzrlib/tests/blackbox/test_checkout.py

bzrlib/tests/blackbox/test_commit.py

bzrlib/tests/blackbox/test_conflicts.py

bzrlib/tests/blackbox/test_diff.py

bzrlib/tests/blackbox/test_export.py

bzrlib/tests/blackbox/test_find_merge_base.py

bzrlib/tests/blackbox/test_help.py

bzrlib/tests/blackbox/test_ignored.py

bzrlib/tests/blackbox/test_info.py

bzrlib/tests/blackbox/test_init.py

bzrlib/tests/blackbox/test_log.py

bzrlib/tests/blackbox/test_logformats.py

bzrlib/tests/blackbox/test_merge.py

bzrlib/tests/blackbox/test_missing.py

bzrlib/tests/blackbox/test_outside_wt.py

bzrlib/tests/blackbox/test_pull.py

bzrlib/tests/blackbox/test_push.py

bzrlib/tests/blackbox/test_re_sign.py

bzrlib/tests/blackbox/test_reconcile.py

bzrlib/tests/blackbox/test_revert.py

bzrlib/tests/blackbox/test_revision_info.py

bzrlib/tests/blackbox/test_revno.py

bzrlib/tests/blackbox/test_selftest.py

bzrlib/tests/blackbox/test_shared_repository.py

bzrlib/tests/blackbox/test_sign_my_commits.py

bzrlib/tests/blackbox/test_status.py

bzrlib/tests/blackbox/test_too_much.py

bzrlib/tests/blackbox/test_uncommit.py

bzrlib/tests/blackbox/test_update.py

bzrlib/tests/blackbox/test_upgrade.py

bzrlib/tests/blackbox/test_versioning.py

bzrlib/tests/branch_implementations

bzrlib/tests/branch_implementations/__init__.py

bzrlib/tests/branch_implementations/test_bound_sftp.py

bzrlib/tests/branch_implementations/test_branch.py

bzrlib/tests/branch_implementations/test_break_lock.py

bzrlib/tests/branch_implementations/test_parent.py

bzrlib/tests/branch_implementations/test_permissions.py

bzrlib/tests/branch_implementations/test_pull.py

bzrlib/tests/branch_implementations/test_update.py

bzrlib/tests/bzrdir_implementations

bzrlib/tests/bzrdir_implementations/__init__.py

bzrlib/tests/bzrdir_implementations/test_bzrdir.py

bzrlib/tests/interrepository_implementations

bzrlib/tests/interrepository_implementations/__init__.py

bzrlib/tests/interrepository_implementations/test_interrepository.py

bzrlib/tests/interversionedfile_implementations

bzrlib/tests/interversionedfile_implementations/__init__.py

bzrlib/tests/interversionedfile_implementations/test_join.py

bzrlib/tests/repository_implementations

bzrlib/tests/repository_implementations/__init__.py

bzrlib/tests/repository_implementations/test_break_lock.py

bzrlib/tests/repository_implementations/test_fileid_involved.py

bzrlib/tests/repository_implementations/test_reconcile.py

bzrlib/tests/repository_implementations/test_repository.py

bzrlib/tests/revisionstore_implementations

bzrlib/tests/revisionstore_implementations/__init__.py

bzrlib/tests/revisionstore_implementations/test_all.py

bzrlib/tests/stub_sftp.py

bzrlib/tests/test_ancestry.py

bzrlib/tests/test_api.py

bzrlib/tests/test_bad_files.py

bzrlib/tests/test_branch.py

bzrlib/tests/test_bzrdir.py

bzrlib/tests/test_command.py

bzrlib/tests/test_commit.py

bzrlib/tests/test_commit_merge.py

bzrlib/tests/test_config.py

bzrlib/tests/test_conflicts.py

bzrlib/tests/test_decorators.py

bzrlib/tests/test_diff.py

bzrlib/tests/test_doc_generate.py

bzrlib/tests/test_errors.py

bzrlib/tests/test_escaped_store.py

bzrlib/tests/test_fetch.py

bzrlib/tests/test_gpg.py

bzrlib/tests/test_graph.py

bzrlib/tests/test_hashcache.py

bzrlib/tests/test_http.py

bzrlib/tests/test_identitymap.py

bzrlib/tests/test_inv.py

bzrlib/tests/test_knit.py

bzrlib/tests/test_lockable_files.py

bzrlib/tests/test_lockdir.py

bzrlib/tests/test_log.py

bzrlib/tests/test_merge.py

bzrlib/tests/test_merge3.py

bzrlib/tests/test_merge_core.py

bzrlib/tests/test_missing.py

bzrlib/tests/test_msgeditor.py

bzrlib/tests/test_nonascii.py

bzrlib/tests/test_options.py

bzrlib/tests/test_osutils.py

bzrlib/tests/test_patch.py

bzrlib/tests/test_permissions.py

bzrlib/tests/test_plugins.py

bzrlib/tests/test_progress.py

bzrlib/tests/test_reconcile.py

bzrlib/tests/test_repository.py

bzrlib/tests/test_revision.py

bzrlib/tests/test_revisionnamespaces.py

bzrlib/tests/test_revprops.py

bzrlib/tests/test_rio.py

bzrlib/tests/test_sampler.py

bzrlib/tests/test_selftest.py

bzrlib/tests/test_setup.py

bzrlib/tests/test_sftp_transport.py

bzrlib/tests/test_smart_add.py

bzrlib/tests/test_source.py

bzrlib/tests/test_status.py

bzrlib/tests/test_store.py

bzrlib/tests/test_symbol_versioning.py

bzrlib/tests/test_testament.py

bzrlib/tests/test_textfile.py

bzrlib/tests/test_textmerge.py

bzrlib/tests/test_trace.py

bzrlib/tests/test_transactions.py

bzrlib/tests/test_transform.py

bzrlib/tests/test_transport.py

bzrlib/tests/test_transport_implementations.py

bzrlib/tests/test_tsort.py

bzrlib/tests/test_tuned_gzip.py

bzrlib/tests/test_ui.py

bzrlib/tests/test_upgrade.py

bzrlib/tests/test_versionedfile.py

bzrlib/tests/test_weave.py

bzrlib/tests/test_whitebox.py

bzrlib/tests/test_workingtree.py

bzrlib/tests/test_xml.py

bzrlib/tests/treeshape.py

bzrlib/tests/workingtree_implementations

bzrlib/tests/workingtree_implementations/__init__.py

bzrlib/tests/workingtree_implementations/test_basis_inventory.py

bzrlib/tests/workingtree_implementations/test_break_lock.py

bzrlib/tests/workingtree_implementations/test_commit.py

bzrlib/tests/workingtree_implementations/test_is_control_filename.py

bzrlib/tests/workingtree_implementations/test_is_ignored.py

bzrlib/tests/workingtree_implementations/test_pull.py

bzrlib/tests/workingtree_implementations/test_workingtree.py

bzrlib/textfile.py

bzrlib/textmerge.py

bzrlib/transactions.py

bzrlib/transform.py

bzrlib/transport

bzrlib/transport/__init__.py

bzrlib/transport/decorator.py

bzrlib/transport/fakenfs.py

bzrlib/transport/fakevfat.py

bzrlib/transport/ftp.py

bzrlib/transport/http

bzrlib/transport/http/__init__.py

bzrlib/transport/http/_pycurl.py

bzrlib/transport/http/_urllib.py

bzrlib/transport/local.py

bzrlib/transport/memory.py

bzrlib/transport/readonly.py

bzrlib/transport/sftp.py

bzrlib/tsort.py

bzrlib/tuned_gzip.py

bzrlib/ui

bzrlib/ui/__init__.py

bzrlib/ui/text.py

bzrlib/uncommit.py

bzrlib/upgrade.py

bzrlib/util

bzrlib/util/__init__.py

bzrlib/util/configobj

bzrlib/util/configobj/__init__.py

bzrlib/util/configobj/configobj.py

bzrlib/util/configobj/docs

bzrlib/util/configobj/docs/BSD-LICENSE.txt

bzrlib/util/configobj/docs/configobj.txt

bzrlib/util/configobj/docs/validate.txt

bzrlib/util/configobj/validate.py

bzrlib/util/effbot

bzrlib/util/effbot/__init__.py

bzrlib/util/effbot/org

bzrlib/util/effbot/org/__init__.py

bzrlib/util/effbot/org/gzip_consumer.py

bzrlib/util/effbot/org/http_client.py

bzrlib/util/effbot/org/http_manager.py

bzrlib/versionedfile.py

bzrlib/weave.py

bzrlib/weave_commands.py

bzrlib/weavefile.py

bzrlib/win32console.py

bzrlib/workingtree.py

bzrlib/xml4.py

bzrlib/xml5.py

contrib

contrib/add-bzr-to-baz

contrib/bash

contrib/bash/bzr

contrib/bash/bzr.simple

contrib/create_bzr_rollup.py

contrib/emacs

contrib/emacs/bzr-mode.el

contrib/fortune

contrib/pwclient.full

contrib/pwk

contrib/upload-bzr.dev

contrib/zsh

contrib/zsh/_bzr

doc/README.1st

doc/configuration.txt

doc/plugins.txt

doc/setting_up_email.txt

doc/specifying_revisions.txt

doc/tutorial.txt

doc/using_aliases.txt

generate_docs.py

patience-test.py

tools

tools/__init__.py

tools/biobench.py

tools/capture_tree.py

tools/convertfile.py

tools/convertinv.py

tools/doc_generate

tools/doc_generate/__init__.py

tools/doc_generate/autodoc_bash_completion.py

tools/doc_generate/autodoc_man.py

tools/doc_generate/autodoc_rstx.py

tools/history2revfiles.py

tools/http_client.py

tools/riodemo.py

tools/trace-revisions

tools/weavebench.py

tools/weavemerge.sh

files removed:
bzrlib/mdiff.py

bzrlib/remotebranch.py

bzrlib/revfile.py

bzrlib/tests.py

doc/Makefile

doc/adoption.txt

doc/bitkeeper.txt

doc/changelogs.txt

doc/cherry-picking.txt

doc/cmdref.txt

doc/common-format.txt

doc/compared-aegis.txt

doc/compared-codeville.txt

doc/compared-cvsnt.txt

doc/compared-opencm.txt

doc/compared-prcs.txt

doc/compared-teamware.txt

doc/compression.txt

doc/config-specs.txt

doc/conflicts.txt

doc/costs.txt

doc/darcs.txt

doc/deadly-sins.txt

doc/default.css

doc/design.txt

doc/extra-commands.txt

doc/faq.txt

doc/formats.txt

doc/hashes.txt

doc/ignore.txt

doc/index.txt

doc/interrupted.txt

doc/intro.txt

doc/inventory.txt

doc/join-branches.txt

doc/kill-version.txt

doc/layers.txt

doc/library-interface.txt

doc/merge.txt

doc/mirroring.txt

doc/monotone.txt

doc/news.txt

doc/optional-edit.txt

doc/partial-commit.txt

doc/pool.txt

doc/purpose.txt

doc/python.txt

doc/quickref.txt

doc/quilt.txt

doc/quotes.txt

doc/random.txt

doc/requirements.txt

doc/revision-syntax.txt

doc/rollup.txt

doc/scalability.txt

doc/security.txt

doc/shared-branches.txt

doc/short-demo.txt

doc/supportability.txt

doc/svk.txt

doc/tagging.txt

doc/taxonomy.txt

doc/thanks.txt

doc/todo-from-arch.txt

doc/unchanged.txt

doc/unrelated-merge.txt

doc/usability.txt

doc/use-cases.txt

doc/web-interface.txt

doc/workflow.txt

doc/yaml.txt

notes

notes/new-inventory-sample.xml

notes/performance.txt

test.sh

testbzr

files renamed:
bzrlib/store.py => bzrlib/store/__init__.py

elementtree/ => bzrlib/util/elementtree/

urlgrabber/ => bzrlib/util/urlgrabber/

bzrlib/xml.py => bzrlib/xml_serializer.py

bzrlib/newinventory.py => contrib/newinventory.py

files modified:
.bzrignore

.rsyncexclude

NEWS

README

TODO

build-api

bzr *

bzrlib/__init__.py

bzrlib/add.py

bzrlib/branch.py

bzrlib/check.py

bzrlib/commands.py

bzrlib/diff.py

bzrlib/errors.py

bzrlib/info.py

bzrlib/inventory.py

bzrlib/osutils.py

bzrlib/revision.py

bzrlib/textinv.py

bzrlib/textui.py

bzrlib/trace.py

bzrlib/tree.py

bzrlib/util/elementtree/ElementTree.py

bzrlib/util/urlgrabber/keepalive.py

setup.py *

Show diffs side-by-side

added added

removed removed

doc/formats.txt

*****************

Bazaar-NG formats

*****************

.. contents::

Since branches are working directories there is just a single

directory format.

There is one metadata directory called ``.bzr`` at the top of each

tree. Control files inside ``.bzr`` are never touched by patches and

should not normally be edited by the user.

These files are designed so that repository-level operations are ACID

without depending on atomic operations spanning multiple files. There

are two particular cases: aborting a transaction in the middle, and

contention from multiple processes. We also need to be careful to

flush files to disk at appropriate points; even this may not be

totally safe if the filesystem does not guarantee ordering between

multiple file changes, so we need to be sure to roll back.

The design must also be such that the directory can simply be copied

and that hardlinked directories will work. (So we must always replace

files, never just append.)

A cache is kept under here of easily-accessible information about

previous revisions. This should be under a single directory so that

it can be easily identified, excluded from backups, removed, etc.

This might contain pristine tree from previous revisions, manifests

and inventories, etc. It might also contain working directories when

building a commit, etc. Call this maybe ``cache`` or ``tmp``.

I wonder if we should use .zip files for revisions and cacherevs

rather than tar files so that random access is easier/more efficient.

There is a Python library ``zipfile``.

Signing XML files

*****************

bzr relies on storing hashes or GPG signatures of various XML files.

There can be multiple equivalent representations of the same XML tree,

but these will have different byte-by-byte hashes.

Once signed files are written out, they must be stored byte-for-byte

and never re-encoded or renormalized, because that would break their

hash or signature.

Branch metadata

***************

All inside ``.bzr``

``README``

Tells people not to touch anything here.

``branch-format``

Identifies the parent as a Bazaar-NG branch; contains the overall

branch metadata format as a string.

``pristine-directory``

Identifies that this is a pristine directory and may not be

committed to.

``patches/``

Directory containing all patches applied to this branch, one per

file. Patches are stored as compressed deltas. We also store the

hash of the delta, hash of the before and after manifests, and

optionally a GPG signature.

``cache/``

Contains various cached data that can be destroyed and will be

recreated. (It should not be modified.)

``cache/pristine/``

Contains cached full trees for selected previous revisions, used

when generating diffs, etc.

``cache/inventory/``

Contains cached inventories of previous revisions.

``cache/snapshot/``

Contains tarballs of cached revisions of the tree, named by their

revision id. These can also be removed, but

``patch-history``

File containing the UUIDs of all patches taken in this branch,

in the order they were taken.

Each commit adds exactly one line to this file; lines are

never removed or reordered.

``merged-patches``

List of foreign patches that have been merged into this branch.

Must have no entries in common with ``patch-history``. Commits that

include merges add to this file; lines are never removed or

reordered.

100

101

``pending-merge-patches``

102

List of foreign patches that have been merged and are waiting to be

103

committed.

104

105

``branch-name``

106

User-qualified name of the branch, for the purpose of describing the

107

origin of patches, e.g. ``mbp@sourcefrog.net/distcc--main``.

108

109

``friends``

110

List of branches from which we have pulled; file containing a list

111

of pairs of branch-name and location.

112

113

``parent``

114

Default pull/push target.

115

116

``pending-inventory``

117

Mapping from UUIDs to file name in the current working directory.

118

119

``branch-lock``

120

Lock held while modifying the branch, to protect against clashing

121

updates.

122

123

124

Locking

125

*******

126

127

Is locking a good strategy? Perhaps somekind of read-copy-update or

128

seq-lock based mechanism would work better?

129

130

If we do use a locking algorithm, is it OK to rely on filesystem

131

locking or do we need our own mechanism? I think most hosts should

132

have reasonable ``flock()`` or equivalent, even on NFS. One risk is

133

that on NFS it is easy to have broken locking and not know it, so it

134

might be better to have something that will fail safe.

135

136

Filesystem locks go away if the machine crashes or the process is

137

terminated; this can be a feature in that we do not need to deal with

138

stale locks but also a feature in that the lock itself does not

139

indicate cleanup may be needed.

140

141

robertc points out that tla converged on renaming a directory as a

142

mechanism: this is one thing which is known to be atomic on almost all

143

filesystems. Apparently renaming files, creating directories, making

144

symlinks etc are not good enough.

145

146

147

148

Delta

149

*****

150

151

XML document plus a bag of patches, expressing the difference between

152

two revisions. May be a partial delta.

153

154

* list of entries

155

156

* entry

157

158

* parent directory (if any)

159

* before-name or null if new

160

* after-name or null if deleted

161

* uuid

162

* type (dir, file, symlink, ...)

163

* patch type (patch, full-text, xdelta, ...)

164

* patch filename (?)

165

166

167

Inventory

168

*********

169

170

XML document; series of entries. (Quite similar to the svn

171

``entries`` file; perhaps should even have that name.)

172

Stored identified by its hash.

173

174

An inventory is stored for recorded revisions, also a

175

``pending-inventory`` for a working directory.

176

177

178

179

Revision

180

********

181

182

XML document. Stored identified by its hash.

183

184

committer

185

RFC-2822-style name of the committer. Should match the key used to

186

sign the revision.

187

188

comment

189

multi-line free-form text; whitespace and line breaks preserved

190

191

timestamp

192

As floating-point seconds since epoch.

193

194

precursor

195

ID of the previous revision on this branch. May be absent (null) if

196

this is the start of a new branch.

197

198

branch name

199

Name of the branch to which this was originally committed.

200

201

(I'm not totally satisfied that this is the right way to do it; the

202

results will be a bit weird when a series of revisions pass through

203

variously named branches.)

204

205

inventory_hash

206

Acts as a pointer to the inventory for this revision.

207

208

merged-branches

209

Revision ids of complete branches merged into this revision. If a

210

revision is listed, that revision and transitively its predecessor

211

and all other merged-branches are merged. This is empty except

212

where cherry-picks have occurred.

213

214

merged-patches

215

Revision ids of cherry-picked patches. Patches whose branches are

216

merged need not be listed here. Listing a revision ID implies that

217

only the change of that particular revision from its predecessor has

218

been merged in. This is empty except where cherry-picks have

219

occurred.

220

221

The transitive closure avoids Arch's problem of needing to list a

222

large number of previous revisions. As ddaa writes:

223

224

Continuation revisions (created by tla tag or baz branch) are associated

225

to a patchlog whose New-patches header lists the revisions associated to

226

all the patchlogs present in the tree. That was introduced as an

227

optimisation so the set of patchlogs in any revision could be determined

228

solely by examining the patchlogs of ancestor revisions in the same

229

branch. This behaves well as long as the total count of patchlog is

230

reasonably small or new branches are not very frequent.

231

232

A continuation revision on $tree currently creates a patchlog of

233

about 500K. This patchlog is present in all descendent of the revision,

234

and all revisions that merges it.

235

236

It may be useful at some times to keep a cache of all the branches, or

237

all the revisions, present in the history of a branch, so that we do

238

need to walk the whole history of the branch to build this list.

239

240

241

----

242

243

Proposed changes

244

****************

245

246

* Don't store parent-id in all revisions, but rather have <DIRECTORY>

247

nodes that contain entries for children?

248

249

* Assign an id to the root of the tree, perhaps listed in the top of

250

the inventory?

Older »