~bzr-pqm/bzr/bzr.dev

« back to all changes in this revision

Viewing changes to bzrlib/store.py

Committer: John Arbash Meinel
Date: 2005-07-11 18:53:10 UTC
mto: (1185.11.1)
mto: This revision was merged to the branch mainline in revision 1396.
Revision ID: john@arbash-meinel.com-20050711185310-3ed8748ad27e9baf

Working on making Branch() do all of it's work over a Transport.

files modified:
bzrlib/branch.py

bzrlib/local_transport.py

bzrlib/store.py

bzrlib/transport.py

Show diffs side-by-side

added added

removed removed

bzrlib/store.py

def __iter__(self):

raise NotImplementedError

def add(self, f, fileid):

def add(self, fileid, f):

"""Add a file object f to the store accessible from the given fileid"""

raise NotImplementedError('Children of Storage must define their method of adding entries.')

def add_multi(self, entries):

"""Add a series of file-like or string objects to the store with the given

identities.

:param entries: A list of tuples of id,file pairs [(id1, file1), (id2, file2), ...]

This could also be a generator yielding (id,file) pairs.

"""

for fileid, f in entries:

self.add(fileid, f)

def copy_multi(self, other, ids):

"""Copy texts for ids from other into self.

If an id is present in self, it is skipped. A count of copied

ids is returned, which may be less than len(ids).

:param other: Another Storage object

:param ids: A list of entry ids to be copied

:return: The number of entries copied

"""

from bzrlib.progress import ProgressBar

pb = ProgressBar()

pb.update('preparing to copy')

to_copy = [fileid for fileid in ids if fileid not in self]

to_copy = [fileid for fileid in text_ids if fileid not in self]

return self._do_copy(other, to_copy, pb)

def _do_copy(self, other, to_copy, pb):

"""This is the standard copying mechanism, just get them one at

a time from remote, and store them locally.

:param other: Another Storage object

:param to_copy: A list of entry ids to copy

:param pb: A ProgressBar object to display completion status.

:return: The number of entries copied.

"""

100

# This should be updated to use add_multi() rather than

101

# the current methods of buffering requests.

102

# One question, is it faster to queue up 1-10 and then copy 1-10

103

# then queue up 11-20, copy 11-20

104

# or to queue up 1-10, copy 1, queue 11, copy 2, etc?

105

# sort of pipeline versus batch.

106

count = 0

buffered_requests = []

for fileid in to_copy:

buffered_requests.append((other[fileid], fileid))

if len(buffered_requests) > self._max_buffered_requests:

self.add(*buffered_requests.pop(0))

107

def buffer_requests():

108

buffered_requests = []

109

for fileid in to_copy:

110

buffered_requests.append((fileid, other[fileid]))

111

if len(buffered_requests) > self._max_buffered_requests:

112

yield buffered_requests.pop(0)

113

count += 1

114

pb.update('copy', count, len(to_copy))

115

116

for req in buffered_requests:

117

yield req

118

count += 1

119

pb.update('copy', count, len(to_copy))

120

for req in buffered_requests:

self.add(*req)

count += 1

pb.update('copy', count, len(to_copy))

121

self.add_multi(buffer_requests())

122

123

assert count == len(to_copy)

124

pb.clear()

125

return count

126

100

101

127

class CompressedTextStore(Storage):

102

128

"""Store that holds files indexed by unique names.

103

129

107

133

108

134

Files are stored gzip compressed, with no delta compression.

109

135

110

>>> st = ScratchFlatTextStore()

136

>>> st = ScratchCompressedTextStore()

111

137

112

>>> st.add(StringIO('hello'), 'aa')

138

>>> st.add('aa', StringIO('hello'))

113

139

>>> 'aa' in st

114

140

True

115

141

>>> 'foo' in st

119

145

120

146

Entries can be retrieved as files, which may then be read.

121

147

122

>>> st.add(StringIO('goodbye'), '123123')

148

>>> st.add('123123', StringIO('goodbye'))

123

149

>>> st['123123'].read()

124

150

'goodbye'

125

151

133

159

def __init__(self, basedir):

134

160

super(CompressedTextStore, self).__init__(basedir)

135

161

136

def _path(self, fileid):

162

def _check_fileid(self, fileid):

137

163

if '\\' in fileid or '/' in fileid:

138

164

raise ValueError("invalid store id %r" % fileid)

139

return self._transport.get_filename(fileid)

165

166

def _relpath(self, fileid):

167

self._check_fileid(fileid)

168

return fileid + '.gz'

140

169

141

170

def __repr__(self):

142

171

return "%s(%r)" % (self.__class__.__name__, self._location)

143

172

144

def add(self, f, fileid, compressed=True):

173

def add(self, fileid, f):

145

174

"""Add contents of a file into the store.

146

175

147

176

f -- An open file, or file-like object."""

148

# FIXME: Only works on files that will fit in memory

149

177

# TODO: implement an add_multi which can do some of it's

178

# own piplelining, and possible take advantage of

179

# transport.put_multi(). The problem is that

180

# entries potentially need to be compressed as they

181

# are received, which implies translation, which

182

# means it isn't as straightforward as we would like.

150

183

from cStringIO import StringIO

184

from bzrlib.osutils import pumpfile

151

185

152

186

mutter("add store entry %r" % (fileid))

153

187

if isinstance(f, basestring):

154

content = f

155

else:

156

content = f.read()

157

158

if self._transport.has(fileid) or self._transport.has(fileid + '.gz'):

159

raise BzrError("store %r already contains id %r" % (self._location, fileid))

160

161

fn = fileid

162

if compressed:

163

fn = fn + '.gz'

164

188

f = StringIO(f)

189

190

fn = self._relpath(fileid)

191

if self._transport.has(fn):

192

raise BzrError("store %r already contains id %r" % (self._transport.base, fileid))

193

194

165

195

sio = StringIO()

166

if compressed:

167

gf = gzip.GzipFile(mode='wb', fileobj=sio)

168

gf.write(content)

169

gf.close()

170

else:

171

sio.write(content)

196

gf = gzip.GzipFile(mode='wb', fileobj=sio)

197

# if pumpfile handles files that don't fit in ram,

198

# so will this function

199

pumpfile(f, gf)

200

gf.close()

172

201

sio.seek(0)

173

202

self._transport.put(fn, sio)

174

203

177

206

return self._copy_multi_text(other, to_copy, pb)

178

207

return super(CompressedTextStore, self)._do_copy(other, to_copy, pb)

179

208

180

181

209

def _copy_multi_text(self, other, to_copy, pb):

182

from shutil import copyfile

183

count = 0

184

for id in to_copy:

185

p = self._path(id)

186

other_p = other._path(id)

187

try:

188

copyfile(other_p, p)

189

except IOError, e:

190

if e.errno == errno.ENOENT:

191

copyfile(other_p+".gz", p+".gz")

192

else:

193

raise

194

195

count += 1

196

pb.update('copy', count, len(to_copy))

210

# Because of _transport, we can no longer assume

211

# that they are on the same filesystem, we can, however

212

# assume that we only need to copy the exact bytes,

213

# we don't need to process the files.

214

215

paths = [self._relpath(fileid) for fileid in to_copy]

216

count = self._transport.put_multi(

217

zip(paths, other._transport.get_multi(paths, pb=pb)))

197

218

assert count == len(to_copy)

198

219

pb.clear()

199

220

return count

200

201

221

202

222

def __contains__(self, fileid):

203

223

""""""

204

p = self._path(fileid)

205

return (os.access(p, os.R_OK)

206

or os.access(p + '.gz', os.R_OK))

224

fn = self._relpath(fileid)

225

return self._transport.has(fn)

207

226

208

227

# TODO: Guard against the same thing being stored twice, compressed and uncompresse

209

228

210

229

def __iter__(self):

211

for f in os.listdir(self._location):

230

# TODO: case-insensitive?

231

for f in self._transport.list_dir('.'):

212

232

if f[-3:] == '.gz':

213

# TODO: case-insensitive?

214

233

yield f[:-3]

215

234

else:

216

235

yield f

217

236

218

237

def __len__(self):

219

return len(os.listdir(self._location))

238

return len([f for f in self._transport.list_dir('.')])

220

239

221

240

def __getitem__(self, fileid):

222

241

"""Returns a file reading from a particular entry."""

223

p = self._path(fileid)

224

try:

225

return gzip.GzipFile(p + '.gz', 'rb')

226

except IOError, e:

227

if e.errno == errno.ENOENT:

228

return file(p, 'rb')

229

else:

230

raise e

242

fn = self._relpath(fileid)

243

f = self._transport.get(fn)

244

return gzip.GzipFile(mode='rb', fileobj=f)

231

245

232

246

def total_size(self):

233

247

"""Return (count, bytes)

236

250

the content."""

237

251

total = 0

238

252

count = 0

239

for fid in self:

253

relpaths = [self._relpath(fid) for fid in self]

254

stats =

255

for st in self._transport.stat_multi(relpaths):

240

256

count += 1

241

p = self._path(fid)

242

try:

243

total += os.stat(p)[ST_SIZE]

244

except OSError:

245

total += os.stat(p + '.gz')[ST_SIZE]

257

total += st[ST_SIZE]

246

258

247

259

return count, total

248

260

249

250

251

252

class ScratchFlatTextStore(CompressedTextStore):

261

class ScratchCompressedTextStore(CompressedTextStore):

253

262

"""Self-destructing test subclass of ImmutableStore.

254

263

255

264

The Store only exists for the lifetime of the Python object.

256

265

Obviously you should not put anything precious in it.

257

266

"""

258

267

def __init__(self):

259

super(ScratchFlatTextStore, self).__init__(tempfile.mkdtemp())

268

from transport import transport

269

super(ScratchCompressedTextStore, self).__init__(transport(tempfile.mkdtemp()))

260

270

261

271

def __del__(self):

262

for f in os.listdir(self._location):

263

fpath = os.path.join(self._location, f)

264

# needed on windows, and maybe some other filesystems

265

os.chmod(fpath, 0600)

266

os.remove(fpath)

267

os.rmdir(self._location)

272

self._transport.delete_multi(self._transport.list_dir('.'))

273

os.rmdir(self._transport.base)

268

274

mutter("%r destroyed" % self)

275

Older »