~bzr-pqm/bzr/bzr.dev

« back to all changes in this revision

Viewing changes to bzrlib/store.py

Committer: Robert Collins
Date: 2005-08-25 12:46:42 UTC
mfrom: (1116)
mto: (974.1.50) (1185.1.10) (1092.3.1)
mto: This revision was merged to the branch mainline in revision 1139.
Revision ID: robertc@robertcollins.net-20050825124642-45ed1cd74db10370

merge from mpool

files added:
bzrlib/meta_store.py

bzrlib/plugins/checkperms

bzrlib/remotebranch.py

bzrlib/upgrade.py

patches/annotate3.patch

patches/annotate4.patch

patches/pending-merge.patch

patches/plugins-no-plugins.patch

patches/progress.diff

patches/symlink-support.patch

testsweet.py

files removed:
INSTALL

NEWS.developers

bzrlib/annotate.py

bzrlib/builtins.py

bzrlib/clone.py

bzrlib/config.py

bzrlib/conflicts.py

bzrlib/externalcommand.py

bzrlib/gpg.py

bzrlib/graph.py

bzrlib/identitymap.py

bzrlib/msgeditor.py

bzrlib/option.py

bzrlib/revisionspec.py

bzrlib/selftest/HTTPTestUtil.py

bzrlib/selftest/stub_sftp.py

bzrlib/selftest/test_ancestry.py

bzrlib/selftest/test_bad_files.py

bzrlib/selftest/test_command.py

bzrlib/selftest/test_commit.py

bzrlib/selftest/test_commit_merge.py

bzrlib/selftest/test_conflicts.py

bzrlib/selftest/test_parent.py

bzrlib/selftest/test_revision_info.py

bzrlib/selftest/test_upgrade.py

bzrlib/selftest/test_xml.py

bzrlib/selftest/testannotate.py

bzrlib/selftest/testapi.py

bzrlib/selftest/testconfig.py

bzrlib/selftest/testgpg.py

bzrlib/selftest/testgraph.py

bzrlib/selftest/testhttp.py

bzrlib/selftest/testidentitymap.py

bzrlib/selftest/testmerge.py

bzrlib/selftest/testnonascii.py

bzrlib/selftest/testoptions.py

bzrlib/selftest/testrevprops.py

bzrlib/selftest/testreweave.py

bzrlib/selftest/testsampler.py

bzrlib/selftest/testsftp.py

bzrlib/selftest/teststore.py

bzrlib/selftest/testtestament.py

bzrlib/selftest/testtransactions.py

bzrlib/selftest/testtransport.py

bzrlib/selftest/testtsort.py

bzrlib/selftest/testworkingtree.py

bzrlib/selftest/treeshape.py

bzrlib/store

bzrlib/store/compressed_text.py

bzrlib/store/text.py

bzrlib/store/weave.py

bzrlib/testament.py

bzrlib/transactions.py

bzrlib/transport

bzrlib/transport/__init__.py

bzrlib/transport/http.py

bzrlib/transport/local.py

bzrlib/transport/memory.py

bzrlib/transport/sftp.py

bzrlib/tsort.py

bzrlib/util/configobj

bzrlib/util/configobj/__init__.py

bzrlib/util/configobj/configobj.py

bzrlib/util/configobj/docs

bzrlib/util/configobj/docs/BSD-LICENSE.txt

bzrlib/util/configobj/docs/configobj.txt

bzrlib/util/configobj/docs/validate.txt

bzrlib/util/configobj/validate.py

bzrlib/win32console.py

bzrlib/xml4.py

bzrlib/xml5.py

patches/cache_weave_inclusions.diff

tools/capture_tree.py

files renamed:
contrib/newinventory.py => bzrlib/newinventory.py

bzrlib/selftest/testplugins.py => bzrlib/selftest/plugins.py

bzrlib/store/__init__.py => bzrlib/store.py

bzrlib/upgrade.py => tools/history2weaves.py

bzrlib/selftest/test_weave.py => tools/testweave.py

files modified:
.bzrignore

.rsyncexclude

HACKING

Makefile

NEWS

README

TODO

bzr *

bzr-man.py

bzrlib/__init__.py

bzrlib/add.py

bzrlib/atomicfile.py

bzrlib/branch.py

bzrlib/changeset.py

bzrlib/check.py

bzrlib/commands.py

bzrlib/commit.py

bzrlib/delta.py

bzrlib/diff.py

bzrlib/errors.py

bzrlib/fetch.py

bzrlib/hashcache.py

bzrlib/help.py

bzrlib/info.py

bzrlib/intset.py

bzrlib/inventory.py

bzrlib/lock.py

bzrlib/log.py

bzrlib/mdiff.py

bzrlib/merge.py

bzrlib/merge3.py

bzrlib/merge_core.py

bzrlib/missing.py

bzrlib/osutils.py

bzrlib/patch.py

bzrlib/plugin.py

bzrlib/plugins/__init__.py

bzrlib/progress.py

bzrlib/revfile.py

bzrlib/revision.py

bzrlib/selftest/__init__.py

bzrlib/selftest/blackbox.py

bzrlib/selftest/test_merge_core.py

bzrlib/selftest/test_smart_add.py

bzrlib/selftest/testbranch.py

bzrlib/selftest/testfetch.py

bzrlib/selftest/testhashcache.py

bzrlib/selftest/testinv.py

bzrlib/selftest/testlog.py

bzrlib/selftest/testmerge3.py

bzrlib/selftest/testrevision.py

bzrlib/selftest/testrevisionnamespaces.py

bzrlib/selftest/teststatus.py

bzrlib/selftest/versioning.py

bzrlib/selftest/whitebox.py

bzrlib/shellcomplete.py

bzrlib/status.py

bzrlib/textinv.py

bzrlib/trace.py

bzrlib/tree.py

bzrlib/weave.py *

bzrlib/weavefile.py

bzrlib/workingtree.py

bzrlib/xml.py

contrib/zsh/_bzr

doc/index.txt

doc/todo-from-arch.txt

setup.py *

tutorial.txt

Show diffs side-by-side

added added

removed removed

bzrlib/store.py

# along with this program; if not, write to the Free Software

# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA

# TODO: Could remember a bias towards whether a particular store is typically

# compressed or not.

"""

Stores are the main data-storage mechanism for Bazaar-NG.

unique ID.

"""

import os

from cStringIO import StringIO

import urllib

from zlib import adler32

import bzrlib

import bzrlib.errors as errors

from bzrlib.errors import BzrError, UnlistableStore, TransportNotPossible

import os, tempfile, types, osutils, gzip, errno

from stat import ST_SIZE

from StringIO import StringIO

from bzrlib.trace import mutter

import bzrlib.transport as transport

from bzrlib.transport.local import LocalTransport

import bzrlib.ui

######################################################################

# stores

pass

class Store(object):

"""This class represents the abstract storage layout for saving information.

class ImmutableStore(object):

"""Store that holds files indexed by unique names.

Files can be added, but not modified once they are in. Typically

the hash is used as the name, or something else known to be unique,

such as a UUID.

>>> st = ImmutableScratchStore()

>>> st.add(StringIO('hello'), 'aa')

>>> 'aa' in st

True

>>> 'foo' in st

False

You are not allowed to add an id that is already present.

Entries can be retrieved as files, which may then be read.

>>> st.add(StringIO('goodbye'), '123123')

>>> st['123123'].read()

'goodbye'

TODO: Atomic add by writing to a temporary file and renaming.

In bzr 0.0.5 and earlier, files within the store were marked

readonly on disk. This is no longer done but existing stores need

to be accomodated.

"""

def __len__(self):

raise NotImplementedError('Children should define their length')

def get(self, file_id, suffix=None):

"""Returns a file reading from a particular entry.

If suffix is present, retrieve the named suffix for file_id.

"""

raise NotImplementedError

def __getitem__(self, fileid):

"""DEPRECATED. Please use .get(file_id) instead."""

raise NotImplementedError

#def __contains__(self, fileid):

# """Deprecated, please use has_id"""

# raise NotImplementedError

def __iter__(self):

raise NotImplementedError

def add(self, f, fileid):

"""Add a file object f to the store accessible from the given fileid"""

raise NotImplementedError('Children of Store must define their method of adding entries.')

def has_id(self, file_id, suffix=None):

"""Return True or false for the presence of file_id in the store.

suffix, if present, is a per file suffix, i.e. for digital signature

data."""

raise NotImplementedError

def listable(self):

"""Return True if this store is able to be listed."""

return hasattr(self, "__iter__")

def copy_multi(self, other, ids, pb=None, permit_failure=False):

def __init__(self, basedir):

self._basedir = basedir

def _path(self, id):

if '\\' in id or '/' in id:

raise ValueError("invalid store id %r" % id)

return os.path.join(self._basedir, id)

def __repr__(self):

return "%s(%r)" % (self.__class__.__name__, self._basedir)

def add(self, f, fileid, compressed=True):

"""Add contents of a file into the store.

f -- An open file, or file-like object."""

# FIXME: Only works on files that will fit in memory

from bzrlib.atomicfile import AtomicFile

mutter("add store entry %r" % (fileid))

if isinstance(f, types.StringTypes):

content = f

else:

content = f.read()

p = self._path(fileid)

if os.access(p, os.F_OK) or os.access(p + '.gz', os.F_OK):

from bzrlib.errors import bailout

raise BzrError("store %r already contains id %r" % (self._basedir, fileid))

fn = p

if compressed:

fn = fn + '.gz'

100

101

af = AtomicFile(fn, 'wb')

102

try:

103

if compressed:

104

gf = gzip.GzipFile(mode='wb', fileobj=af)

105

gf.write(content)

106

gf.close()

107

else:

108

af.write(content)

109

af.commit()

110

finally:

111

af.close()

112

113

114

def copy_multi(self, other, ids, permit_failure=False):

115

"""Copy texts for ids from other into self.

116

If an id is present in self, it is skipped. A count of copied

ids is returned, which may be less than len(ids).

117

If an id is present in self, it is skipped.

118

:param other: Another Store object

:param ids: A list of entry ids to be copied

:param pb: A ProgressBar object, if none is given, the default will be created.

:param permit_failure: Allow missing entries to be ignored

100

:return: (n_copied, [failed]) The number of entries copied successfully,

101

followed by a list of entries which could not be copied (because they

102

were missing)

119

Returns (count_copied, failed), where failed is a collection of ids

120

that could not be copied.

103

121

"""

104

if pb is None:

105

pb = bzrlib.ui.ui_factory.progress_bar()

122

pb = bzrlib.ui.ui_factory.progress_bar()

123

106

124

pb.update('preparing to copy')

125

to_copy = [id for id in ids if id not in self]

126

if isinstance(other, ImmutableStore):

127

return self.copy_multi_immutable(other, to_copy, pb)

128

count = 0

129

for id in to_copy:

130

count += 1

131

pb.update('copy', count, len(to_copy))

132

if not permit_failure:

133

self.add(other[id], id)

134

else:

135

try:

136

entry = other[id]

137

except IndexError:

138

failures.add(id)

139

continue

140

self.add(entry, id)

141

142

assert count == len(to_copy)

143

pb.clear()

144

return count, []

145

146

def copy_multi_immutable(self, other, to_copy, pb, permit_failure=False):

147

from shutil import copyfile

148

count = 0

107

149

failed = set()

108

count = 0

109

ids = list(ids) # get the list for showing a length.

110

for fileid in ids:

111

count += 1

112

if self.has_id(fileid):

113

continue

150

for id in to_copy:

151

p = self._path(id)

152

other_p = other._path(id)

114

153

try:

115

self._copy_one(fileid, None, other, pb)

116

for suffix in self._suffixes:

117

try:

118

self._copy_one(fileid, suffix, other, pb)

119

except KeyError:

120

pass

121

pb.update('copy', count, len(ids))

122

except KeyError:

123

if permit_failure:

124

failed.add(fileid)

154

copyfile(other_p, p)

155

except IOError, e:

156

if e.errno == errno.ENOENT:

157

if not permit_failure:

158

copyfile(other_p+".gz", p+".gz")

159

else:

160

try:

161

copyfile(other_p+".gz", p+".gz")

162

except IOError, e:

163

if e.errno == errno.ENOENT:

164

failed.add(id)

165

else:

166

raise

125

167

else:

126

168

raise

127

assert count == len(ids)

169

170

count += 1

171

pb.update('copy', count, len(to_copy))

172

assert count == len(to_copy)

128

173

pb.clear()

129

174

return count, failed

130

131

def _copy_one(self, fileid, suffix, other, pb):

132

"""Most generic copy-one object routine.

133

134

Subclasses can override this to provide an optimised

135

copy between their own instances. Such overriden routines

136

should call this if they have no optimised facility for a

137

specific 'other'.

138

"""

139

f = other.get(fileid, suffix)

140

self.add(f, fileid, suffix)

141

142

143

class TransportStore(Store):

144

"""A TransportStore is a Store superclass for Stores that use Transports."""

145

146

def add(self, f, fileid, suffix=None):

147

"""Add contents of a file into the store.

148

149

f -- A file-like object, or string

150

"""

151

mutter("add store entry %r" % (fileid))

152

153

if suffix is not None:

154

fn = self._relpath(fileid, [suffix])

155

else:

156

fn = self._relpath(fileid)

157

if self._transport.has(fn):

158

raise BzrError("store %r already contains id %r" % (self._transport.base, fileid))

159

160

if self._prefixed:

161

try:

162

self._transport.mkdir(hash_prefix(fileid)[:-1])

163

except errors.FileExists:

164

pass

165

166

self._add(fn, f)

167

168

def _check_fileid(self, fileid):

169

if not isinstance(fileid, basestring):

170

raise TypeError('Fileids should be a string type: %s %r' % (type(fileid), fileid))

171

if '\\' in fileid or '/' in fileid:

172

raise ValueError("invalid store id %r" % fileid)

173

174

def has_id(self, fileid, suffix=None):

175

"""See Store.has_id."""

176

if suffix is not None:

177

fn = self._relpath(fileid, [suffix])

178

else:

179

fn = self._relpath(fileid)

180

return self._transport.has(fn)

181

182

def _get(self, filename):

183

"""Return an vanilla file stream for clients to read from.

184

185

This is the body of a template method on 'get', and should be

186

implemented by subclasses.

187

"""

188

raise NotImplementedError

189

190

def get(self, fileid, suffix=None):

191

"""See Store.get()."""

192

if suffix is None or suffix == 'gz':

193

fn = self._relpath(fileid)

194

else:

195

fn = self._relpath(fileid, [suffix])

196

try:

197

return self._get(fn)

198

except errors.NoSuchFile:

199

raise KeyError(fileid)

200

201

def __init__(self, a_transport, prefixed=False):

202

assert isinstance(a_transport, transport.Transport)

203

super(TransportStore, self).__init__()

204

self._transport = a_transport

205

self._prefixed = prefixed

206

# conflating the .gz extension and user suffixes was a mistake.

207

# RBC 20051017 - TODO SOON, separate them again.

208

self._suffixes = set()

209

210

def _iter_files_recursive(self):

211

"""Iterate through the files in the transport."""

212

for quoted_relpath in self._transport.iter_files_recursive():

213

yield urllib.unquote(quoted_relpath)

175

176

177

def __contains__(self, fileid):

178

""""""

179

p = self._path(fileid)

180

return (os.access(p, os.R_OK)

181

or os.access(p + '.gz', os.R_OK))

182

183

# TODO: Guard against the same thing being stored twice, compressed and uncompresse

214

184

215

185

def __iter__(self):

216

for relpath in self._iter_files_recursive():

217

# worst case is one of each suffix.

218

name = os.path.basename(relpath)

219

if name.endswith('.gz'):

220

name = name[:-3]

221

skip = False

222

for count in range(len(self._suffixes)):

223

for suffix in self._suffixes:

224

if name.endswith('.' + suffix):

225

skip = True

226

if not skip:

227

yield name

186

for f in os.listdir(self._basedir):

187

if f[-3:] == '.gz':

188

# TODO: case-insensitive?

189

yield f[:-3]

190

else:

191

yield f

228

192

229

193

def __len__(self):

230

return len(list(self.__iter__()))

231

232

def _relpath(self, fileid, suffixes=[]):

233

self._check_fileid(fileid)

234

for suffix in suffixes:

235

if not suffix in self._suffixes:

236

raise ValueError("Unregistered suffix %r" % suffix)

237

self._check_fileid(suffix)

238

if self._prefixed:

239

path = [hash_prefix(fileid) + fileid]

240

else:

241

path = [fileid]

242

path.extend(suffixes)

243

return transport.urlescape('.'.join(path))

244

245

def __repr__(self):

246

if self._transport is None:

247

return "%s(None)" % (self.__class__.__name__)

248

else:

249

return "%s(%r)" % (self.__class__.__name__, self._transport.base)

250

251

__str__ = __repr__

252

253

def listable(self):

254

"""Return True if this store is able to be listed."""

255

return self._transport.listable()

256

257

def register_suffix(self, suffix):

258

"""Register a suffix as being expected in this store."""

259

self._check_fileid(suffix)

260

self._suffixes.add(suffix)

194

return len(os.listdir(self._basedir))

195

196

197

def __getitem__(self, fileid):

198

"""Returns a file reading from a particular entry."""

199

p = self._path(fileid)

200

try:

201

return gzip.GzipFile(p + '.gz', 'rb')

202

except IOError, e:

203

if e.errno != errno.ENOENT:

204

raise

205

206

try:

207

return file(p, 'rb')

208

except IOError, e:

209

if e.errno != errno.ENOENT:

210

raise

211

212

raise IndexError(fileid)

213

261

214

262

215

def total_size(self):

263

216

"""Return (count, bytes)

266

219

the content."""

267

220

total = 0

268

221

count = 0

269

for relpath in self._transport.iter_files_recursive():

222

for fid in self:

270

223

count += 1

271

total += self._transport.stat(relpath).st_size

224

p = self._path(fid)

225

try:

226

total += os.stat(p)[ST_SIZE]

227

except OSError:

228

total += os.stat(p + '.gz')[ST_SIZE]

272

229

273

230

return count, total

274

231

275

232

276

def ImmutableMemoryStore():

277

return bzrlib.store.text.TextStore(transport.memory.MemoryTransport())

278

279

280

class CachedStore(Store):

281

"""A store that caches data locally, to avoid repeated downloads.

282

The precacache method should be used to avoid server round-trips for

283

every piece of data.

233

234

235

class ImmutableScratchStore(ImmutableStore):

236

"""Self-destructing test subclass of ImmutableStore.

237

238

The Store only exists for the lifetime of the Python object.

239

Obviously you should not put anything precious in it.

284

240

"""

285

286

def __init__(self, store, cache_dir):

287

super(CachedStore, self).__init__()

288

self.source_store = store

289

# This clones the source store type with a locally bound

290

# transport. FIXME: it assumes a constructor is == cloning.

291

# clonable store - it might be nicer to actually have a clone()

292

# or something. RBC 20051003

293

self.cache_store = store.__class__(LocalTransport(cache_dir))

294

295

def get(self, id):

296

mutter("Cache add %s" % id)

297

if id not in self.cache_store:

298

self.cache_store.add(self.source_store.get(id), id)

299

return self.cache_store.get(id)

300

301

def has_id(self, fileid, suffix=None):

302

"""See Store.has_id."""

303

if self.cache_store.has_id(fileid, suffix):

304

return True

305

if self.source_store.has_id(fileid, suffix):

306

# We could copy at this time

307

return True

308

return False

309

310

311

def copy_all(store_from, store_to):

312

"""Copy all ids from one store to another."""

313

# TODO: Optional progress indicator

314

if not store_from.listable():

315

raise UnlistableStore(store_from)

316

ids = [f for f in store_from]

317

store_to.copy_multi(store_from, ids)

318

319

def hash_prefix(file_id):

320

return "%02x/" % (adler32(file_id) & 0xff)

321

241

def __init__(self):

242

ImmutableStore.__init__(self, tempfile.mkdtemp())

243

244

def __del__(self):

245

for f in os.listdir(self._basedir):

246

fpath = os.path.join(self._basedir, f)

247

# needed on windows, and maybe some other filesystems

248

os.chmod(fpath, 0600)

249

os.remove(fpath)

250

os.rmdir(self._basedir)

251

mutter("%r destroyed" % self)

Older »