~bzr-pqm/bzr/bzr.dev

Viewing changes to bzrlib/btree_index.py

Committer: John Arbash Meinel
Date: 2008-08-28 20:05:52 UTC
mto: This revision was merged to the branch mainline in revision 3688.
Revision ID: john@arbash-meinel.com-20080828200552-sw5lzds9mmi3qnnb

Refactor some code.
Move the key, reference, value checking into a helper function.
This func also finds absent references, but that should have
minimal overhead either way.
Also use the _update_nodes_by_key functionality for both
indexes, as _nodes_by_key has the same signature.
Move _spill_mem_keys_to_disk into a separate function
not necessary, but it makes add_node() easier to understand.

files modified:
bzrlib/btree_index.py

bzrlib/index.py

Show diffs side-by-side

added added

removed removed

bzrlib/btree_index.py

trace,

)

from bzrlib.index import _OPTION_NODE_REFS, _OPTION_KEY_ELEMENTS, _OPTION_LEN

from bzrlib.osutils import basename, dirname

from bzrlib.transport import get_transport

161

160

:param value: The value to associate with the key. It may be any

162

161

bytes as long as it does not contain \0 or \n.

163

162

"""

164

self._check_key(key)

165

if index._newline_null_re.search(value) is not None:

166

raise errors.BadIndexValue(value)

167

if len(references) != self.reference_lists:

168

raise errors.BadIndexValue(references)

169

node_refs = []

170

for reference_list in references:

171

for reference in reference_list:

172

# If reference is in self._nodes, then we have already checked

173

# it

174

if reference not in self._nodes:

175

self._check_key(reference)

176

node_refs.append(tuple(reference_list))

163

# we don't care about absent_references

164

node_refs, _ = self._check_key_ref_value(key, references, value)

177

165

if key in self._nodes:

178

166

raise errors.BadIndexDuplicateKey(key, self)

179

167

self._nodes[key] = (tuple(node_refs), value)

180

168

self._keys.add(key)

181

if self._key_length > 1 and self._nodes_by_key is not None:

182

key_dict = self._nodes_by_key

183

if self.reference_lists:

184

key_value = key, value, tuple(node_refs)

185

else:

186

key_value = key, value

187

# possibly should do this on-demand, but it seems likely it is

188

# always wanted

189

# For a key of (foo, bar, baz) create

190

# _nodes_by_key[foo][bar][baz] = key_value

191

for subkey in key[:-1]:

192

key_dict = key_dict.setdefault(subkey, {})

193

key_dict[key[-1]] = key_value

169

if self._nodes_by_key is not None and self._key_length > 1:

170

self._update_nodes_by_key(key, value, node_refs)

194

171

if len(self._keys) < self._spill_at:

195

172

return

173

self._spill_mem_keys_to_disk()

174

175

def _spill_mem_keys_to_disk(self):

176

"""Write the in memory keys down to disk to cap memory consumption.

177

178

If we already have some keys written to disk, we will combine them so

179

as to preserve the sorted order. The algorithm for combining uses

180

powers of two. So on the first spill, write all mem nodes into a

181

single index. On the second spill, combine the mem nodes with the nodes

182

on disk to create a 2x sized disk index and get rid of the first index.

183

On the third spill, create a single new disk index, which will contain

184

the mem nodes, and preserve the existing 2x sized index. On the fourth,

185

combine mem with the first and second indexes, creating a new one of

186

size 4x. On the fifth create a single new one, etc.

187

"""

196

188

iterators_to_combine = [self._iter_mem_nodes()]

197

189

pos = -1

198

190

for pos, backing in enumerate(self._backing_indices):

203

195

backing_pos = pos + 1

204

196

new_backing_file, size = \

205

197

self._write_nodes(self._iter_smallest(iterators_to_combine))

206

new_backing = BTreeGraphIndex(

207

get_transport(dirname(new_backing_file.name)),

208

basename(new_backing_file.name), size)

198

dir_path, base_name = osutils.split(new_backing_file.name)

199

# Note: The transport here isn't strictly needed, because we will use

200

# direct access to the new_backing._file object

201

new_backing = BTreeGraphIndex(get_transport(dir_path),

202

base_name, size)

209

203

# GC will clean up the file

210

204

new_backing._file = new_backing_file

211

205

if len(self._backing_indices) == backing_pos:

Older »