~bzr-pqm/bzr/bzr.dev

« back to all changes in this revision

Viewing changes to bzrlib/store.py

Committer: Martin Pool
Date: 2005-09-06 23:19:08 UTC
mto: (1185.41.1 bzr.sftp) (1393.2.1) (1092.3.4) (1417.1.4) (1464.1.1) (1534.1.1 integration) (1495.1.4) (1505.1.16 bzr-bound-branch) (1508.1.15) (1526.1.3 run_tests_twice_for_i18n) (1540.1.3 bzr.dev (Main development branch)) (1685.1.1 bzr-encoding) (1553.5.1 bzr.dev (Main development branch)) (1608.2.1 bzr.mbp.escape-stores) (974.1.76)
mto: This revision was merged to the branch mainline in revision 1390.
Revision ID: mbp@sourcefrog.net-20050906231908-f3fc1093279cedba

- cleanup Store._path

files removed:
bzrlib/graph.py

bzrlib/selftest/HTTPTestUtil.py

bzrlib/selftest/testgraph.py

bzrlib/selftest/testmerge.py

bzrlib/selftest/testremotebranch.py

bzrlib/selftest/testtransport.py

bzrlib/store

bzrlib/store/compressed_text.py

bzrlib/transport

bzrlib/transport/__init__.py

bzrlib/transport/http.py

bzrlib/transport/local.py

files renamed:
bzrlib/store/__init__.py => bzrlib/store.py

files modified:
Makefile

NEWS

TODO

bzr-man.py

bzrlib/__init__.py

bzrlib/add.py

bzrlib/branch.py

bzrlib/builtins.py

bzrlib/commands.py

bzrlib/commit.py

bzrlib/delta.py

bzrlib/diff.py

bzrlib/errors.py

bzrlib/hashcache.py

bzrlib/help.py

bzrlib/intset.py

bzrlib/merge.py

bzrlib/meta_store.py

bzrlib/missing.py

bzrlib/msgeditor.py

bzrlib/osutils.py

bzrlib/remotebranch.py

bzrlib/revision.py

bzrlib/selftest/__init__.py

bzrlib/selftest/blackbox.py

bzrlib/selftest/testrevision.py

bzrlib/selftest/testrevisionnamespaces.py

bzrlib/selftest/teststore.py

bzrlib/selftest/versioning.py

bzrlib/selftest/whitebox.py

bzrlib/shellcomplete.py

bzrlib/status.py

bzrlib/weavefile.py

bzrlib/xml.py

testsweet.py

tutorial.txt

Show diffs side-by-side

added added

removed removed

bzrlib/store.py

unique ID.

"""

import bzrlib

import os, tempfile, types, osutils, gzip, errno

from stat import ST_SIZE

from StringIO import StringIO

from bzrlib.errors import BzrError

from bzrlib.trace import mutter

import bzrlib.ui

import bzrlib.transport

######################################################################

# stores

class StoreError(Exception):

pass

class Store(object):

"""This class represents the abstract storage layout for saving information.

class ImmutableStore(object):

"""Store that holds files indexed by unique names.

Files can be added, but not modified once they are in. Typically

the hash is used as the name, or something else known to be unique,

such as a UUID.

>>> st = ImmutableScratchStore()

>>> st.add(StringIO('hello'), 'aa')

>>> 'aa' in st

True

>>> 'foo' in st

False

You are not allowed to add an id that is already present.

Entries can be retrieved as files, which may then be read.

>>> st.add(StringIO('goodbye'), '123123')

>>> st['123123'].read()

'goodbye'

TODO: Atomic add by writing to a temporary file and renaming.

In bzr 0.0.5 and earlier, files within the store were marked

readonly on disk. This is no longer done but existing stores need

to be accomodated.

"""

_transport = None

_max_buffered_requests = 10

def __init__(self, transport):

assert isinstance(transport, bzrlib.transport.Transport)

self._transport = transport

def __init__(self, basedir):

self._basedir = basedir

def _path(self, entry_id):

if not isinstance(entry_id, basestring):

raise TypeError(type(entry_id))

if '\\' in entry_id or '/' in entry_id:

raise ValueError("invalid store id %r" % entry_id)

return os.path.join(self._basedir, entry_id)

def __repr__(self):

if self._transport is None:

return "%s(None)" % (self.__class__.__name__)

return "%s(%r)" % (self.__class__.__name__, self._basedir)

def add(self, f, fileid, compressed=True):

"""Add contents of a file into the store.

f -- An open file, or file-like object."""

# FIXME: Only works on files that will fit in memory

from bzrlib.atomicfile import AtomicFile

mutter("add store entry %r" % (fileid))

if isinstance(f, types.StringTypes):

content = f

else:

return "%s(%r)" % (self.__class__.__name__, self._transport.base)

__str__ = __repr__

def __len__(self):

raise NotImplementedError('Children should define their length')

def __getitem__(self, fileid):

"""Returns a file reading from a particular entry."""

raise NotImplementedError

def __contains__(self, fileid):

""""""

raise NotImplementedError

def __iter__(self):

raise NotImplementedError

def add(self, f, fileid):

"""Add a file object f to the store accessible from the given fileid"""

raise NotImplementedError('Children of Store must define their method of adding entries.')

def add_multi(self, entries):

"""Add a series of file-like or string objects to the store with the given

identities.

:param entries: A list of tuples of file,id pairs [(file1, id1), (file2, id2), ...]

This could also be a generator yielding (file,id) pairs.

"""

for f, fileid in entries:

self.add(f, fileid)

def has(self, fileids):

"""Return True/False for each entry in fileids.

:param fileids: A List or generator yielding file ids.

:return: A generator or list returning True/False for each entry.

"""

for fileid in fileids:

if fileid in self:

yield True

content = f.read()

p = self._path(fileid)

if os.access(p, os.F_OK) or os.access(p + '.gz', os.F_OK):

raise BzrError("store %r already contains id %r" % (self._basedir, fileid))

fn = p

100

if compressed:

101

fn = fn + '.gz'

102

103

af = AtomicFile(fn, 'wb')

104

try:

105

if compressed:

106

gf = gzip.GzipFile(mode='wb', fileobj=af)

107

gf.write(content)

108

gf.close()

109

else:

yield False

def get(self, fileids, ignore_missing=False, pb=None):

"""Return a set of files, one for each requested entry.

:param ignore_missing: If true, return None for entries which do not

exist.

:return: A list or generator of file-like objects, one for each id.

"""

100

for fileid in fileids:

101

try:

102

yield self[fileid]

103

except KeyError:

104

if ignore_missing:

105

yield None

106

else:

107

raise

108

109

def copy_multi(self, other, ids, pb=None, permit_failure=False):

110

af.write(content)

111

af.commit()

112

finally:

113

af.close()

114

115

116

def copy_multi(self, other, ids, permit_failure=False):

110

117

"""Copy texts for ids from other into self.

111

118

112

If an id is present in self, it is skipped. A count of copied

113

ids is returned, which may be less than len(ids).

119

If an id is present in self, it is skipped.

114

120

115

:param other: Another Store object

116

:param ids: A list of entry ids to be copied

117

:param pb: A ProgressBar object, if none is given, the default will be created.

118

:param permit_failure: Allow missing entries to be ignored

119

:return: (n_copied, [failed]) The number of entries copied successfully,

120

followed by a list of entries which could not be copied (because they

121

were missing)

121

Returns (count_copied, failed), where failed is a collection of ids

122

that could not be copied.

122

123

"""

123

if pb is None:

124

pb = bzrlib.ui.ui_factory.progress_bar()

125

126

ids = list(ids) # Make sure we don't have a generator, since we iterate 2 times

124

pb = bzrlib.ui.ui_factory.progress_bar()

125

127

126

pb.update('preparing to copy')

128

to_copy = []

129

for file_id, has in zip(ids, self.has(ids)):

130

if not has:

131

to_copy.append(file_id)

132

return self._do_copy(other, to_copy, pb, permit_failure=permit_failure)

133

134

def _do_copy(self, other, to_copy, pb, permit_failure=False):

135

"""This is the standard copying mechanism, just get them one at

136

a time from remote, and store them locally.

137

138

:param other: Another Store object

139

:param to_copy: A list of entry ids to copy

140

:param pb: A ProgressBar object to display completion status.

141

:param permit_failure: Allow missing entries to be ignored

142

:return: (n_copied, [failed])

143

The number of entries copied, and a list of failed entries.

144

"""

145

# This should be updated to use add_multi() rather than

146

# the current methods of buffering requests.

147

# One question, is it faster to queue up 1-10 and then copy 1-10

148

# then queue up 11-20, copy 11-20

149

# or to queue up 1-10, copy 1, queue 11, copy 2, etc?

150

# sort of pipeline versus batch.

151

152

# We can't use self._transport.copy_to because we don't know

153

# whether the local tree is in the same format as other

127

to_copy = [id for id in ids if id not in self]

128

if isinstance(other, ImmutableStore):

129

return self.copy_multi_immutable(other, to_copy, pb)

130

count = 0

154

131

failed = set()

155

def buffer_requests():

156

count = 0

157

buffered_requests = []

158

for fileid in to_copy:

132

for id in to_copy:

133

count += 1

134

pb.update('copy', count, len(to_copy))

135

if not permit_failure:

136

self.add(other[id], id)

137

else:

159

138

try:

160

f = other[fileid]

161

except KeyError:

162

if permit_failure:

163

failed.add(fileid)

164

continue

139

entry = other[id]

140

except IndexError:

141

failed.add(id)

142

continue

143

self.add(entry, id)

144

145

if not permit_failure:

146

assert count == len(to_copy)

147

pb.clear()

148

return count, failed

149

150

def copy_multi_immutable(self, other, to_copy, pb, permit_failure=False):

151

from shutil import copyfile

152

count = 0

153

failed = set()

154

for id in to_copy:

155

p = self._path(id)

156

other_p = other._path(id)

157

try:

158

copyfile(other_p, p)

159

except IOError, e:

160

if e.errno == errno.ENOENT:

161

if not permit_failure:

162

copyfile(other_p+".gz", p+".gz")

165

163

else:

166

raise

167

168

buffered_requests.append((f, fileid))

169

if len(buffered_requests) > self._max_buffered_requests:

170

yield buffered_requests.pop(0)

171

count += 1

172

pb.update('copy', count, len(to_copy))

173

174

for req in buffered_requests:

175

yield req

176

count += 1

177

pb.update('copy', count, len(to_copy))

178

179

assert count == len(to_copy)

180

181

self.add_multi(buffer_requests())

182

164

try:

165

copyfile(other_p+".gz", p+".gz")

166

except IOError, e:

167

if e.errno == errno.ENOENT:

168

failed.add(id)

169

else:

170

raise

171

else:

172

raise

173

174

count += 1

175

pb.update('copy', count, len(to_copy))

176

assert count == len(to_copy)

183

177

pb.clear()

184

return len(to_copy), failed

185

178

return count, failed

179

180

181

def __contains__(self, fileid):

182

""""""

183

p = self._path(fileid)

184

return (os.access(p, os.R_OK)

185

or os.access(p + '.gz', os.R_OK))

186

187

# TODO: Guard against the same thing being stored twice, compressed and uncompresse

188

189

def __iter__(self):

190

for f in os.listdir(self._basedir):

191

if f[-3:] == '.gz':

192

# TODO: case-insensitive?

193

yield f[:-3]

194

else:

195

yield f

196

197

def __len__(self):

198

return len(os.listdir(self._basedir))

199

200

201

def __getitem__(self, fileid):

202

"""Returns a file reading from a particular entry."""

203

p = self._path(fileid)

204

try:

205

return gzip.GzipFile(p + '.gz', 'rb')

206

except IOError, e:

207

if e.errno != errno.ENOENT:

208

raise

209

210

try:

211

return file(p, 'rb')

212

except IOError, e:

213

if e.errno != errno.ENOENT:

214

raise

215

216

raise IndexError(fileid)

217

218

219

def total_size(self):

220

"""Return (count, bytes)

221

222

This is the (compressed) size stored on disk, not the size of

223

the content."""

224

total = 0

225

count = 0

226

for fid in self:

227

count += 1

228

p = self._path(fid)

229

try:

230

total += os.stat(p)[ST_SIZE]

231

except OSError:

232

total += os.stat(p + '.gz')[ST_SIZE]

233

234

return count, total

235

236

237

238

239

class ImmutableScratchStore(ImmutableStore):

240

"""Self-destructing test subclass of ImmutableStore.

241

242

The Store only exists for the lifetime of the Python object.

243

Obviously you should not put anything precious in it.

244

"""

245

def __init__(self):

246

ImmutableStore.__init__(self, tempfile.mkdtemp())

247

248

def __del__(self):

249

for f in os.listdir(self._basedir):

250

fpath = os.path.join(self._basedir, f)

251

# needed on windows, and maybe some other filesystems

252

os.chmod(fpath, 0600)

253

os.remove(fpath)

254

os.rmdir(self._basedir)

255

mutter("%r destroyed" % self)

Older »