~bzr-pqm/bzr/bzr.dev

« back to all changes in this revision

Viewing changes to bzrlib/store/__init__.py

Committer: Robert Collins
Date: 2005-10-02 01:53:46 UTC
mfrom: (1393.1.23)
Revision ID: robertc@robertcollins.net-20051002015346-587422189352289e

merge from upstream newformat

files added:
bzrlib/clone.py

bzrlib/selftest/testtransport.py

bzrlib/store

bzrlib/store/compressed_text.py

bzrlib/store/text.py

bzrlib/transport

bzrlib/transport/__init__.py

bzrlib/transport/http.py

bzrlib/transport/local.py

files removed:
bzrlib/remotebranch.py

bzrlib/selftest/testremotebranch.py

testsweet.py

files renamed:
bzrlib/store.py => bzrlib/store/__init__.py

bzrlib/weavestore.py => bzrlib/store/weave.py

files modified:
NEWS

NEWS.developers

bzrlib/__init__.py

bzrlib/atomicfile.py

bzrlib/branch.py

bzrlib/builtins.py

bzrlib/commands.py

bzrlib/commit.py

bzrlib/errors.py

bzrlib/fetch.py

bzrlib/merge.py

bzrlib/merge_core.py

bzrlib/meta_store.py

bzrlib/missing.py

bzrlib/selftest/__init__.py

bzrlib/selftest/blackbox.py

bzrlib/selftest/test_merge_core.py

bzrlib/selftest/test_parent.py

bzrlib/selftest/test_weave.py

bzrlib/selftest/testbranch.py

bzrlib/selftest/testrevisionnamespaces.py

bzrlib/selftest/teststore.py

bzrlib/upgrade.py

bzrlib/weave.py

Show diffs side-by-side

added added

removed removed

bzrlib/store/__init__.py

unique ID.

"""

import errno

import gzip

import os

import tempfile

import types

from stat import ST_SIZE

from StringIO import StringIO

from bzrlib.errors import BzrError, UnlistableStore

from bzrlib.errors import BzrError, UnlistableStore, TransportNotPossible

from bzrlib.trace import mutter

import bzrlib.ui

import bzrlib.osutils as osutils

import bzrlib.transport

######################################################################

# stores

class StoreError(Exception):

pass

class ImmutableStore(object):

"""Store that holds files indexed by unique names.

Files can be added, but not modified once they are in. Typically

the hash is used as the name, or something else known to be unique,

such as a UUID.

>>> st = ImmutableScratchStore()

>>> st.add(StringIO('hello'), 'aa')

>>> 'aa' in st

True

>>> 'foo' in st

False

You are not allowed to add an id that is already present.

Entries can be retrieved as files, which may then be read.

>>> st.add(StringIO('goodbye'), '123123')

>>> st['123123'].read()

'goodbye'

TODO: Atomic add by writing to a temporary file and renaming.

In bzr 0.0.5 and earlier, files within the store were marked

readonly on disk. This is no longer done but existing stores need

to be accomodated.

class Store(object):

"""This class represents the abstract storage layout for saving information.

"""

def __init__(self, basedir):

self._basedir = basedir

def _path(self, entry_id):

if not isinstance(entry_id, basestring):

raise TypeError(type(entry_id))

if '\\' in entry_id or '/' in entry_id:

raise ValueError("invalid store id %r" % entry_id)

return os.path.join(self._basedir, entry_id)

_transport = None

_max_buffered_requests = 10

def __init__(self, transport):

assert isinstance(transport, bzrlib.transport.Transport)

self._transport = transport

def __repr__(self):

return "%s(%r)" % (self.__class__.__name__, self._basedir)

def add(self, f, fileid, compressed=True):

"""Add contents of a file into the store.

f -- An open file, or file-like object."""

# FIXME: Only works on files that will fit in memory

from bzrlib.atomicfile import AtomicFile

mutter("add store entry %r" % (fileid))

100

if isinstance(f, types.StringTypes):

101

content = f

if self._transport is None:

return "%s(None)" % (self.__class__.__name__)

102

else:

103

content = f.read()

104

105

p = self._path(fileid)

106

if os.access(p, os.F_OK) or os.access(p + '.gz', os.F_OK):

107

raise BzrError("store %r already contains id %r" % (self._basedir, fileid))

108

109

fn = p

110

if compressed:

111

fn = fn + '.gz'

112

113

af = AtomicFile(fn, 'wb')

114

try:

115

if compressed:

116

gf = gzip.GzipFile(mode='wb', fileobj=af)

117

gf.write(content)

118

gf.close()

return "%s(%r)" % (self.__class__.__name__, self._transport.base)

__str__ = __repr__

def __len__(self):

raise NotImplementedError('Children should define their length')

def __getitem__(self, fileid):

"""Returns a file reading from a particular entry."""

raise NotImplementedError

def __contains__(self, fileid):

""""""

raise NotImplementedError

def __iter__(self):

raise NotImplementedError

def add(self, f, fileid):

"""Add a file object f to the store accessible from the given fileid"""

raise NotImplementedError('Children of Store must define their method of adding entries.')

def add_multi(self, entries):

"""Add a series of file-like or string objects to the store with the given

identities.

:param entries: A list of tuples of file,id pairs [(file1, id1), (file2, id2), ...]

This could also be a generator yielding (file,id) pairs.

"""

for f, fileid in entries:

self.add(f, fileid)

def has(self, fileids):

"""Return True/False for each entry in fileids.

:param fileids: A List or generator yielding file ids.

:return: A generator or list returning True/False for each entry.

"""

for fileid in fileids:

if fileid in self:

yield True

119

else:

120

af.write(content)

121

af.commit()

122

finally:

123

af.close()

124

125

126

def copy_multi(self, other, ids, permit_failure=False):

yield False

def get(self, fileids, permit_failure=False, pb=None):

"""Return a set of files, one for each requested entry.

:param permit_failure: If true, return None for entries which do not

exist.

100

:return: A list or generator of file-like objects, one for each id.

101

"""

102

for fileid in fileids:

103

try:

104

yield self[fileid]

105

except KeyError:

106

if permit_failure:

107

yield None

108

else:

109

raise

110

111

def copy_multi(self, other, ids, pb=None, permit_failure=False):

127

112

"""Copy texts for ids from other into self.

128

113

129

If an id is present in self, it is skipped.

114

If an id is present in self, it is skipped. A count of copied

115

ids is returned, which may be less than len(ids).

130

116

131

Returns (count_copied, failed), where failed is a collection of ids

132

that could not be copied.

117

:param other: Another Store object

118

:param ids: A list of entry ids to be copied

119

:param pb: A ProgressBar object, if none is given, the default will be created.

120

:param permit_failure: Allow missing entries to be ignored

121

:return: (n_copied, [failed]) The number of entries copied successfully,

122

followed by a list of entries which could not be copied (because they

123

were missing)

133

124

"""

134

pb = bzrlib.ui.ui_factory.progress_bar()

135

125

if pb is None:

126

pb = bzrlib.ui.ui_factory.progress_bar()

127

128

# XXX: Is there any reason why we couldn't make this accept a generator

129

# and build a list as it finds things to copy?

130

ids = list(ids) # Make sure we don't have a generator, since we iterate 2 times

136

131

pb.update('preparing to copy')

137

to_copy = [id for id in ids if id not in self]

138

if isinstance(other, ImmutableStore):

139

return self.copy_multi_immutable(other, to_copy, pb,

140

permit_failure=permit_failure)

141

count = 0

132

to_copy = []

133

for file_id, has in zip(ids, self.has(ids)):

134

if not has:

135

to_copy.append(file_id)

136

return self._do_copy(other, to_copy, pb, permit_failure=permit_failure)

137

138

def _do_copy(self, other, to_copy, pb, permit_failure=False):

139

"""This is the standard copying mechanism, just get them one at

140

a time from remote, and store them locally.

141

142

:param other: Another Store object

143

:param to_copy: A list of entry ids to copy

144

:param pb: A ProgressBar object to display completion status.

145

:param permit_failure: Allow missing entries to be ignored

146

:return: (n_copied, [failed])

147

The number of entries copied, and a list of failed entries.

148

"""

149

# This should be updated to use add_multi() rather than

150

# the current methods of buffering requests.

151

# One question, is it faster to queue up 1-10 and then copy 1-10

152

# then queue up 11-20, copy 11-20

153

# or to queue up 1-10, copy 1, queue 11, copy 2, etc?

154

# sort of pipeline versus batch.

155

156

# We can't use self._transport.copy_to because we don't know

157

# whether the local tree is in the same format as other

142

158

failed = set()

143

for id in to_copy:

144

count += 1

145

pb.update('copy', count, len(to_copy))

146

if not permit_failure:

147

self.add(other[id], id)

148

else:

159

def buffer_requests():

160

count = 0

161

buffered_requests = []

162

for fileid in to_copy:

149

163

try:

150

entry = other[id]

164

f = other[fileid]

151

165

except KeyError:

152

failed.add(id)

153

continue

154

self.add(entry, id)

155

156

if not permit_failure:

166

if permit_failure:

167

failed.add(fileid)

168

continue

169

else:

170

raise

171

172

buffered_requests.append((f, fileid))

173

if len(buffered_requests) > self._max_buffered_requests:

174

yield buffered_requests.pop(0)

175

count += 1

176

pb.update('copy', count, len(to_copy))

177

178

for req in buffered_requests:

179

yield req

180

count += 1

181

pb.update('copy', count, len(to_copy))

182

157

183

assert count == len(to_copy)

158

pb.clear()

159

return count, failed

160

161

def copy_multi_immutable(self, other, to_copy, pb, permit_failure=False):

162

count = 0

163

failed = set()

164

for id in to_copy:

165

p = self._path(id)

166

other_p = other._path(id)

167

try:

168

osutils.link_or_copy(other_p, p)

169

except (IOError, OSError), e:

170

if e.errno == errno.ENOENT:

171

if not permit_failure:

172

osutils.link_or_copy(other_p+".gz", p+".gz")

173

else:

174

try:

175

osutils.link_or_copy(other_p+".gz", p+".gz")

176

except IOError, e:

177

if e.errno == errno.ENOENT:

178

failed.add(id)

179

else:

180

raise

181

else:

182

raise

183

184

count += 1

185

pb.update('copy', count, len(to_copy))

186

assert count == len(to_copy)

187

pb.clear()

188

return count, failed

189

190

191

def __contains__(self, fileid):

192

""""""

193

p = self._path(fileid)

194

return (os.access(p, os.R_OK)

195

or os.access(p + '.gz', os.R_OK))

196

197

# TODO: Guard against the same thing being stored twice,

198

# compressed and uncompressed

199

200

def __iter__(self):

201

for f in os.listdir(self._basedir):

202

if f[-3:] == '.gz':

203

# TODO: case-insensitive?

204

yield f[:-3]

205

else:

206

yield f

207

208

def __len__(self):

209

return len(os.listdir(self._basedir))

210

211

212

def __getitem__(self, fileid):

213

"""Returns a file reading from a particular entry."""

214

p = self._path(fileid)

215

try:

216

return gzip.GzipFile(p + '.gz', 'rb')

217

except IOError, e:

218

if e.errno != errno.ENOENT:

219

raise

220

221

try:

222

return file(p, 'rb')

223

except IOError, e:

224

if e.errno != errno.ENOENT:

225

raise

226

227

raise KeyError(fileid)

228

229

230

def total_size(self):

231

"""Return (count, bytes)

232

233

This is the (compressed) size stored on disk, not the size of

234

the content."""

235

total = 0

236

count = 0

237

for fid in self:

238

count += 1

239

p = self._path(fid)

240

try:

241

total += os.stat(p)[ST_SIZE]

242

except OSError:

243

total += os.stat(p + '.gz')[ST_SIZE]

244

245

return count, total

246

247

248

249

250

class ImmutableScratchStore(ImmutableStore):

251

"""Self-destructing test subclass of ImmutableStore.

252

253

The Store only exists for the lifetime of the Python object.

254

Obviously you should not put anything precious in it.

255

"""

256

def __init__(self):

257

ImmutableStore.__init__(self, tempfile.mkdtemp())

258

259

def __del__(self):

260

for f in os.listdir(self._basedir):

261

fpath = os.path.join(self._basedir, f)

262

# needed on windows, and maybe some other filesystems

263

os.chmod(fpath, 0600)

264

os.remove(fpath)

265

os.rmdir(self._basedir)

266

mutter("%r destroyed" % self)

184

185

self.add_multi(buffer_requests())

186

187

pb.clear()

188

return len(to_copy), failed

267

189

268

190

def copy_all(store_from, store_to):

269

191

"""Copy all ids from one store to another."""

192

# TODO: Optional progress indicator

270

193

if not hasattr(store_from, "__iter__"):

271

194

raise UnlistableStore(store_from)

272

ids = [f for f in store_from]

195

try:

196

ids = [f for f in store_from]

197

except (NotImplementedError, TransportNotPossible):

198

raise UnlistableStore(store_from)

273

199

store_to.copy_multi(store_from, ids)

200

Older »