~bzr-pqm/bzr/bzr.dev

Committer: John Arbash Meinel
Date: 2008-08-28 20:05:52 UTC
mto: This revision was merged to the branch mainline in revision 3688.
Revision ID: john@arbash-meinel.com-20080828200552-sw5lzds9mmi3qnnb

Refactor some code.
Move the key, reference, value checking into a helper function.
This func also finds absent references, but that should have
minimal overhead either way.
Also use the _update_nodes_by_key functionality for both
indexes, as _nodes_by_key has the same signature.
Move _spill_mem_keys_to_disk into a separate function
not necessary, but it makes add_node() easier to understand.

files modified:
bzrlib/btree_index.py

bzrlib/index.py

Show diffs side-by-side

added added

removed removed

bzrlib/index.py

134

key_dict = key_dict.setdefault(subkey, {})

135

key_dict[key[-1]] = key_value

136

137

def add_node(self, key, value, references=()):

138

"""Add a node to the index.

137

def _check_key_ref_value(self, key, references, value):

138

"""Check that 'key' and 'references' are all valid.

139

140

:param key: The key. keys are non-empty tuples containing

141

as many whitespace-free utf8 bytestrings as the key length

142

defined for this index.

143

:param references: An iterable of iterables of keys. Each is a

144

reference to another key.

145

:param value: The value to associate with the key. It may be any

146

bytes as long as it does not contain \0 or \n.

140

:param key: A key tuple. Must conform to the key interface (be a tuple,

141

be of the right length, not have any whitespace or nulls in any key

142

element.)

143

:param references: An iterable of reference lists. Something like

144

[[(ref, key)], [(ref, key), (other, key)]]

145

:param value: The value associate with this key. Must not contain

146

newlines or null characters.

147

:return: (node_refs, absent_references)

148

node_refs basically a packed form of 'references' where all

149

iterables are tuples

150

absent_references reference keys that are not in self._nodes.

151

This may contain duplicates if the same key is

152

referenced in multiple lists.

147

153

"""

148

154

self._check_key(key)

149

155

if _newline_null_re.search(value) is not None:

151

157

if len(references) != self.reference_lists:

152

158

raise errors.BadIndexValue(references)

153

159

node_refs = []

160

absent_references = []

154

161

for reference_list in references:

155

162

for reference in reference_list:

156

self._check_key(reference)

163

# If reference *is* in self._nodes, then we know it has already

164

# been checked.

157

165

if reference not in self._nodes:

158

self._nodes[reference] = ('a', (), '')

166

self._check_key(reference)

167

absent_references.append(reference)

159

168

node_refs.append(tuple(reference_list))

160

if key in self._nodes and self._nodes[key][0] == '':

169

return tuple(node_refs), absent_references

170

171

def add_node(self, key, value, references=()):

172

"""Add a node to the index.

173

174

:param key: The key. keys are non-empty tuples containing

175

as many whitespace-free utf8 bytestrings as the key length

176

defined for this index.

177

:param references: An iterable of iterables of keys. Each is a

178

reference to another key.

179

:param value: The value to associate with the key. It may be any

180

bytes as long as it does not contain \0 or \n.

181

"""

182

(node_refs,

183

absent_references) = self._check_key_ref_value(key, references, value)

184

if key in self._nodes and self._nodes[key][0] != 'a':

161

185

raise errors.BadIndexDuplicateKey(key, self)

162

node_refs = tuple(node_refs)

186

for reference in absent_references:

187

# There may be duplicates, but I don't think it is worth worrying

188

# about

189

self._nodes[reference] = ('a', (), '')

163

190

self._nodes[key] = ('', node_refs, value)

164

191

self._keys.add(key)

165

if self._key_length > 1 and self._nodes_by_key is not None:

192

if self._nodes_by_key is not None and self._key_length > 1:

166

193

self._update_nodes_by_key(key, value, node_refs)

167

194

168

195

def finish(self):

172

199

lines.append(_OPTION_LEN + str(len(self._keys)) + '\n')

173

200

prefix_length = sum(len(x) for x in lines)

174

201

# references are byte offsets. To avoid having to do nasty

175

# polynomial work to resolve offsets (references to later in the

202

# polynomial work to resolve offsets (references to later in the

176

203

# file cannot be determined until all the inbetween references have

177

204

# been calculated too) we pad the offsets with 0's to make them be

178

205

# of consistent length. Using binary offsets would break the trivial

Older »