~bzr-pqm/bzr/bzr.dev

« back to all changes in this revision

Viewing changes to bzrlib/store.py

Committer: Aaron Bentley
Date: 2005-09-19 02:52:24 UTC
mto: (1185.1.29)
mto: This revision was merged to the branch mainline in revision 1390.
Revision ID: aaron.bentley@utoronto.ca-20050919025224-1cc3c70640086e09

TODO re tests

files added:
bzrlib/meta_store.py

bzrlib/remotebranch.py

bzrlib/selftest/testremotebranch.py

bzrlib/upgrade.py

patches/annotate3.patch

patches/annotate4.patch

patches/pending-merge.patch

patches/plugins-no-plugins.patch

patches/progress.diff

patches/symlink-support.patch

testsweet.py

files removed:
NEWS.developers

bzrlib/annotate.py

bzrlib/clone.py

bzrlib/selftest/test_ancestry.py

bzrlib/selftest/test_bad_files.py

bzrlib/selftest/test_commit.py

bzrlib/selftest/test_commit_merge.py

bzrlib/selftest/test_revision_info.py

bzrlib/selftest/testtransport.py

bzrlib/store

bzrlib/store/compressed_text.py

bzrlib/store/text.py

bzrlib/store/weave.py

bzrlib/transport

bzrlib/transport/__init__.py

bzrlib/transport/http.py

bzrlib/transport/local.py

bzrlib/xml4.py

bzrlib/xml5.py

patches/cache_weave_inclusions.diff

files renamed:
contrib/newinventory.py => bzrlib/newinventory.py

bzrlib/store/__init__.py => bzrlib/store.py

bzrlib/upgrade.py => tools/history2weaves.py

bzrlib/selftest/test_weave.py => tools/testweave.py

files modified:
.bzrignore

NEWS

bzr-man.py

bzrlib/__init__.py

bzrlib/add.py

bzrlib/atomicfile.py

bzrlib/branch.py

bzrlib/builtins.py

bzrlib/changeset.py

bzrlib/check.py

bzrlib/commands.py

bzrlib/commit.py

bzrlib/delta.py

bzrlib/diff.py

bzrlib/errors.py

bzrlib/externalcommand.py

bzrlib/fetch.py

bzrlib/graph.py

bzrlib/hashcache.py

bzrlib/info.py

bzrlib/inventory.py

bzrlib/lock.py

bzrlib/log.py

bzrlib/mdiff.py

bzrlib/merge.py

bzrlib/merge_core.py

bzrlib/missing.py

bzrlib/msgeditor.py

bzrlib/osutils.py

bzrlib/patch.py

bzrlib/progress.py

bzrlib/revfile.py

bzrlib/revision.py

bzrlib/revisionspec.py

bzrlib/selftest/HTTPTestUtil.py

bzrlib/selftest/__init__.py

bzrlib/selftest/blackbox.py

bzrlib/selftest/test_merge_core.py

bzrlib/selftest/test_parent.py

bzrlib/selftest/test_xml.py

bzrlib/selftest/testbranch.py

bzrlib/selftest/testfetch.py

bzrlib/selftest/testgraph.py

bzrlib/selftest/testhashcache.py

bzrlib/selftest/testinv.py

bzrlib/selftest/testmerge.py

bzrlib/selftest/testrevision.py

bzrlib/selftest/testrevisionnamespaces.py

bzrlib/selftest/teststatus.py

bzrlib/selftest/teststore.py

bzrlib/selftest/versioning.py

bzrlib/selftest/whitebox.py

bzrlib/status.py

bzrlib/textinv.py

bzrlib/trace.py

bzrlib/tree.py

bzrlib/weave.py

bzrlib/weavefile.py

bzrlib/workingtree.py

bzrlib/xml.py

setup.py

Show diffs side-by-side

added added

removed removed

bzrlib/store.py

# along with this program; if not, write to the Free Software

# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA

# TODO: Could remember a bias towards whether a particular store is typically

# compressed or not.

"""

Stores are the main data-storage mechanism for Bazaar-NG.

unique ID.

"""

from cStringIO import StringIO

from bzrlib.errors import BzrError, UnlistableStore, TransportNotPossible

import os, tempfile, types, osutils, gzip, errno

from stat import ST_SIZE

from StringIO import StringIO

from bzrlib.errors import BzrError

from bzrlib.trace import mutter

import bzrlib.transport

from bzrlib.transport.local import LocalTransport

import bzrlib.ui

######################################################################

# stores

pass

class Store(object):

"""This class represents the abstract storage layout for saving information.

class ImmutableStore(object):

"""Store that holds files indexed by unique names.

Files can be added, but not modified once they are in. Typically

the hash is used as the name, or something else known to be unique,

such as a UUID.

>>> st = ImmutableScratchStore()

>>> st.add(StringIO('hello'), 'aa')

>>> 'aa' in st

True

>>> 'foo' in st

False

You are not allowed to add an id that is already present.

Entries can be retrieved as files, which may then be read.

>>> st.add(StringIO('goodbye'), '123123')

>>> st['123123'].read()

'goodbye'

TODO: Atomic add by writing to a temporary file and renaming.

In bzr 0.0.5 and earlier, files within the store were marked

readonly on disk. This is no longer done but existing stores need

to be accomodated.

"""

def __len__(self):

raise NotImplementedError('Children should define their length')

def __getitem__(self, fileid):

"""Returns a file reading from a particular entry."""

raise NotImplementedError

def __contains__(self, fileid):

""""""

raise NotImplementedError

def __iter__(self):

raise NotImplementedError

def add(self, f, fileid):

"""Add a file object f to the store accessible from the given fileid"""

raise NotImplementedError('Children of Store must define their method of adding entries.')

def add_multi(self, entries):

"""Add a series of file-like or string objects to the store with the given

identities.

:param entries: A list of tuples of file,id pairs [(file1, id1), (file2, id2), ...]

This could also be a generator yielding (file,id) pairs.

"""

for f, fileid in entries:

self.add(f, fileid)

def has(self, fileids):

"""Return True/False for each entry in fileids.

:param fileids: A List or generator yielding file ids.

:return: A generator or list returning True/False for each entry.

"""

for fileid in fileids:

if fileid in self:

yield True

def __init__(self, basedir):

self._basedir = basedir

def _path(self, entry_id):

if not isinstance(entry_id, basestring):

raise TypeError(type(entry_id))

if '\\' in entry_id or '/' in entry_id:

raise ValueError("invalid store id %r" % entry_id)

return os.path.join(self._basedir, entry_id)

def __repr__(self):

return "%s(%r)" % (self.__class__.__name__, self._basedir)

def add(self, f, fileid, compressed=True):

"""Add contents of a file into the store.

f -- An open file, or file-like object."""

# FIXME: Only works on files that will fit in memory

from bzrlib.atomicfile import AtomicFile

mutter("add store entry %r" % (fileid))

if isinstance(f, types.StringTypes):

content = f

else:

content = f.read()

p = self._path(fileid)

if os.access(p, os.F_OK) or os.access(p + '.gz', os.F_OK):

raise BzrError("store %r already contains id %r" % (self._basedir, fileid))

fn = p

100

if compressed:

101

fn = fn + '.gz'

102

103

af = AtomicFile(fn, 'wb')

104

try:

105

if compressed:

106

gf = gzip.GzipFile(mode='wb', fileobj=af)

107

gf.write(content)

108

gf.close()

109

else:

yield False

def get(self, fileids, permit_failure=False, pb=None):

"""Return a set of files, one for each requested entry.

:param permit_failure: If true, return None for entries which do not

exist.

:return: A list or generator of file-like objects, one for each id.

"""

for fileid in fileids:

try:

yield self[fileid]

except KeyError:

100

if permit_failure:

101

yield None

102

else:

103

raise

104

105

def copy_multi(self, other, ids, pb=None, permit_failure=False):

110

af.write(content)

111

af.commit()

112

finally:

113

af.close()

114

115

116

def copy_multi(self, other, ids, permit_failure=False):

106

117

"""Copy texts for ids from other into self.

107

118

108

If an id is present in self, it is skipped. A count of copied

109

ids is returned, which may be less than len(ids).

119

If an id is present in self, it is skipped.

110

120

111

:param other: Another Store object

112

:param ids: A list of entry ids to be copied

113

:param pb: A ProgressBar object, if none is given, the default will be created.

114

:param permit_failure: Allow missing entries to be ignored

115

:return: (n_copied, [failed]) The number of entries copied successfully,

116

followed by a list of entries which could not be copied (because they

117

were missing)

121

Returns (count_copied, failed), where failed is a collection of ids

122

that could not be copied.

118

123

"""

119

if pb is None:

120

pb = bzrlib.ui.ui_factory.progress_bar()

121

122

# XXX: Is there any reason why we couldn't make this accept a generator

123

# and build a list as it finds things to copy?

124

ids = list(ids) # Make sure we don't have a generator, since we iterate 2 times

124

pb = bzrlib.ui.ui_factory.progress_bar()

125

126

pb.update('preparing to copy')

126

to_copy = []

127

for file_id, has in zip(ids, self.has(ids)):

128

if not has:

129

to_copy.append(file_id)

130

return self._do_copy(other, to_copy, pb, permit_failure=permit_failure)

131

132

def _do_copy(self, other, to_copy, pb, permit_failure=False):

133

"""This is the standard copying mechanism, just get them one at

134

a time from remote, and store them locally.

135

136

:param other: Another Store object

137

:param to_copy: A list of entry ids to copy

138

:param pb: A ProgressBar object to display completion status.

139

:param permit_failure: Allow missing entries to be ignored

140

:return: (n_copied, [failed])

141

The number of entries copied, and a list of failed entries.

142

"""

143

# This should be updated to use add_multi() rather than

144

# the current methods of buffering requests.

145

# One question, is it faster to queue up 1-10 and then copy 1-10

146

# then queue up 11-20, copy 11-20

147

# or to queue up 1-10, copy 1, queue 11, copy 2, etc?

148

# sort of pipeline versus batch.

149

150

# We can't use self._transport.copy_to because we don't know

151

# whether the local tree is in the same format as other

127

to_copy = [id for id in ids if id not in self]

128

if isinstance(other, ImmutableStore):

129

return self.copy_multi_immutable(other, to_copy, pb,

130

permit_failure=permit_failure)

131

count = 0

152

132

failed = set()

153

def buffer_requests():

154

count = 0

155

buffered_requests = []

156

for fileid in to_copy:

133

for id in to_copy:

134

count += 1

135

pb.update('copy', count, len(to_copy))

136

if not permit_failure:

137

self.add(other[id], id)

138

else:

157

139

try:

158

f = other[fileid]

140

entry = other[id]

159

141

except KeyError:

160

if permit_failure:

161

failed.add(fileid)

162

continue

142

failed.add(id)

143

continue

144

self.add(entry, id)

145

146

if not permit_failure:

147

assert count == len(to_copy)

148

pb.clear()

149

return count, failed

150

151

def copy_multi_immutable(self, other, to_copy, pb, permit_failure=False):

152

from shutil import copyfile

153

count = 0

154

failed = set()

155

for id in to_copy:

156

p = self._path(id)

157

other_p = other._path(id)

158

try:

159

copyfile(other_p, p)

160

except IOError, e:

161

if e.errno == errno.ENOENT:

162

if not permit_failure:

163

copyfile(other_p+".gz", p+".gz")

163

164

else:

164

raise

165

166

buffered_requests.append((f, fileid))

167

if len(buffered_requests) > self._max_buffered_requests:

168

yield buffered_requests.pop(0)

169

count += 1

170

pb.update('copy', count, len(to_copy))

171

172

for req in buffered_requests:

173

yield req

174

count += 1

175

pb.update('copy', count, len(to_copy))

176

177

assert count == len(to_copy)

178

179

self.add_multi(buffer_requests())

180

165

try:

166

copyfile(other_p+".gz", p+".gz")

167

except IOError, e:

168

if e.errno == errno.ENOENT:

169

failed.add(id)

170

else:

171

raise

172

else:

173

raise

174

175

count += 1

176

pb.update('copy', count, len(to_copy))

177

assert count == len(to_copy)

181

178

pb.clear()

182

return len(to_copy), failed

183

184

185

class TransportStore(Store):

186

"""A TransportStore is a Store superclass for Stores that use Transports."""

187

188

_max_buffered_requests = 10

189

190

def __init__(self, transport):

191

assert isinstance(transport, bzrlib.transport.Transport)

192

super(TransportStore, self).__init__()

193

self._transport = transport

194

195

def __repr__(self):

196

if self._transport is None:

197

return "%s(None)" % (self.__class__.__name__)

198

else:

199

return "%s(%r)" % (self.__class__.__name__, self._transport.base)

200

201

__str__ = __repr__

202

203

204

class ImmutableMemoryStore(Store):

205

"""A memory only store."""

179

return count, failed

180

206

181

207

182

def __contains__(self, fileid):

208

return self._contents.has_key(fileid)

209

183

""""""

184

p = self._path(fileid)

185

return (os.access(p, os.R_OK)

186

or os.access(p + '.gz', os.R_OK))

187

188

# TODO: Guard against the same thing being stored twice, compressed and uncompresse

189

190

def __iter__(self):

191

for f in os.listdir(self._basedir):

192

if f[-3:] == '.gz':

193

# TODO: case-insensitive?

194

yield f[:-3]

195

else:

196

yield f

197

198

def __len__(self):

199

return len(os.listdir(self._basedir))

200

201

202

def __getitem__(self, fileid):

203

"""Returns a file reading from a particular entry."""

204

p = self._path(fileid)

205

try:

206

return gzip.GzipFile(p + '.gz', 'rb')

207

except IOError, e:

208

if e.errno != errno.ENOENT:

209

raise

210

211

try:

212

return file(p, 'rb')

213

except IOError, e:

214

if e.errno != errno.ENOENT:

215

raise

216

217

raise KeyError(fileid)

218

219

220

def total_size(self):

221

"""Return (count, bytes)

222

223

This is the (compressed) size stored on disk, not the size of

224

the content."""

225

total = 0

226

count = 0

227

for fid in self:

228

count += 1

229

p = self._path(fid)

230

try:

231

total += os.stat(p)[ST_SIZE]

232

except OSError:

233

total += os.stat(p + '.gz')[ST_SIZE]

234

235

return count, total

236

237

238

239

240

class ImmutableScratchStore(ImmutableStore):

241

"""Self-destructing test subclass of ImmutableStore.

242

243

The Store only exists for the lifetime of the Python object.

244

Obviously you should not put anything precious in it.

245

"""

210

246

def __init__(self):

211

super(ImmutableMemoryStore, self).__init__()

212

self._contents = {}

213

214

def add(self, stream, fileid, compressed=True):

215

if self._contents.has_key(fileid):

216

raise StoreError("fileid %s already in the store" % fileid)

217

self._contents[fileid] = stream.read()

218

219

def __getitem__(self, fileid):

220

"""Returns a file reading from a particular entry."""

221

if not self._contents.has_key(fileid):

222

raise IndexError

223

return StringIO(self._contents[fileid])

224

225

def _item_size(self, fileid):

226

return len(self._contents[fileid])

227

228

def __iter__(self):

229

return iter(self._contents.keys())

230

231

def total_size(self):

232

result = 0

233

count = 0

234

for fileid in self:

235

count += 1

236

result += self._item_size(fileid)

237

return count, result

238

239

240

class CachedStore(Store):

241

"""A store that caches data locally, to avoid repeated downloads.

242

The precacache method should be used to avoid server round-trips for

243

every piece of data.

244

"""

245

246

def __init__(self, store, cache_dir):

247

super(CachedStore, self).__init__()

248

self.source_store = store

249

# This clones the source store type with a locally bound

250

# transport. FIXME: it assumes a constructor is == cloning.

251

# clonable store - it might be nicer to actually have a clone()

252

# or something. RBC 20051003

253

self.cache_store = store.__class__(LocalTransport(cache_dir))

254

255

def __getitem__(self, id):

256

mutter("Cache add %s" % id)

257

if id not in self.cache_store:

258

self.cache_store.add(self.source_store[id], id)

259

return self.cache_store[id]

260

261

def __contains__(self, fileid):

262

if fileid in self.cache_store:

263

return True

264

if fileid in self.source_store:

265

# We could copy at this time

266

return True

267

return False

268

269

def get(self, fileids, permit_failure=False, pb=None):

270

fileids = list(fileids)

271

hasids = self.cache_store.has(fileids)

272

needs = set()

273

for has, fileid in zip(hasids, fileids):

274

if not has:

275

needs.add(fileid)

276

if needs:

277

self.cache_store.copy_multi(self.source_store, needs,

278

permit_failure=permit_failure)

279

return self.cache_store.get(fileids,

280

permit_failure=permit_failure, pb=pb)

281

282

def prefetch(self, ids):

283

"""Copy a series of ids into the cache, before they are used.

284

For remote stores that support pipelining or async downloads, this can

285

increase speed considerably.

286

287

Failures while prefetching are ignored.

288

"""

289

mutter("Prefetch of ids %s" % ",".join(ids))

290

self.cache_store.copy_multi(self.source_store, ids,

291

permit_failure=True)

292

293

294

def copy_all(store_from, store_to):

295

"""Copy all ids from one store to another."""

296

# TODO: Optional progress indicator

297

if not hasattr(store_from, "__iter__"):

298

raise UnlistableStore(store_from)

299

try:

300

ids = [f for f in store_from]

301

except (NotImplementedError, TransportNotPossible):

302

raise UnlistableStore(store_from)

303

store_to.copy_multi(store_from, ids)

304

247

ImmutableStore.__init__(self, tempfile.mkdtemp())

248

249

def __del__(self):

250

for f in os.listdir(self._basedir):

251

fpath = os.path.join(self._basedir, f)

252

# needed on windows, and maybe some other filesystems

253

os.chmod(fpath, 0600)

254

os.remove(fpath)

255

os.rmdir(self._basedir)

256

mutter("%r destroyed" % self)

Older »