549
548
self._ensure_block(block_index, entry_index, utf8path)
550
549
self._dirblock_state = DirState.IN_MEMORY_MODIFIED
551
550
if self._id_index:
552
self._add_to_id_index(self._id_index, entry_key)
551
self._id_index.setdefault(entry_key[2], set()).add(entry_key)
554
553
def _bisect(self, paths):
555
554
"""Bisect through the disk structure for specific rows.
1568
1567
id_index = self._get_id_index()
1569
1568
for file_id in new_ids:
1570
for key in id_index.get(file_id, ()):
1569
for key in id_index.get(file_id, []):
1571
1570
block_i, entry_i, d_present, f_present = \
1572
1571
self._get_block_entry_index(key[0], key[1], tree_index)
1573
1572
if not f_present:
1981
1980
' tree_index, file_id and path')
1984
possible_keys = self._get_id_index().get(fileid_utf8, ())
1983
possible_keys = self._get_id_index().get(fileid_utf8, None)
1985
1984
if not possible_keys:
1986
1985
return None, None
1987
1986
for key in possible_keys:
2146
2145
def _get_id_index(self):
2147
"""Get an id index of self._dirblocks.
2149
This maps from file_id => [(directory, name, file_id)] entries where
2150
that file_id appears in one of the trees.
2146
"""Get an id index of self._dirblocks."""
2152
2147
if self._id_index is None:
2154
2149
for key, tree_details in self._iter_entries():
2155
self._add_to_id_index(id_index, key)
2150
id_index.setdefault(key[2], set()).add(key)
2156
2151
self._id_index = id_index
2157
2152
return self._id_index
2159
def _add_to_id_index(self, id_index, entry_key):
2160
"""Add this entry to the _id_index mapping."""
2161
# This code used to use a set for every entry in the id_index. However,
2162
# it is *rare* to have more than one entry. So a set is a large
2163
# overkill. And even when we do, we won't ever have more than the
2164
# number of parent trees. Which is still a small number (rarely >2). As
2165
# such, we use a simple tuple, and do our own uniqueness checks. While
2166
# the 'in' check is O(N) since N is nicely bounded it shouldn't ever
2167
# cause quadratic failure.
2168
# TODO: This should use StaticTuple
2169
file_id = entry_key[2]
2170
entry_key = static_tuple.StaticTuple.from_sequence(entry_key)
2171
if file_id not in id_index:
2172
id_index[file_id] = static_tuple.StaticTuple(entry_key,)
2174
entry_keys = id_index[file_id]
2175
if entry_key not in entry_keys:
2176
id_index[file_id] = entry_keys + (entry_key,)
2178
def _remove_from_id_index(self, id_index, entry_key):
2179
"""Remove this entry from the _id_index mapping.
2181
It is an programming error to call this when the entry_key is not
2184
file_id = entry_key[2]
2185
entry_keys = list(id_index[file_id])
2186
entry_keys.remove(entry_key)
2187
id_index[file_id] = static_tuple.StaticTuple.from_sequence(entry_keys)
2189
2154
def _get_output_lines(self, lines):
2190
2155
"""Format lines for final output.
2450
2415
by_path[entry[0]] = [entry[1][0]] + \
2451
2416
[DirState.NULL_PARENT_DETAILS] * parent_count
2452
# TODO: Possibly inline this, since we know it isn't present yet
2453
# id_index[entry[0][2]] = (entry[0],)
2454
self._add_to_id_index(id_index, entry[0])
2417
id_index[entry[0][2]] = set([entry[0]])
2456
2419
# now the parent trees:
2457
2420
for tree_index, tree in enumerate(parent_trees):
2479
2442
new_entry_key = (dirname, basename, file_id)
2480
2443
# tree index consistency: All other paths for this id in this tree
2481
2444
# index must point to the correct path.
2482
for entry_key in id_index.get(file_id, ()):
2445
for entry_key in id_index.setdefault(file_id, set()):
2483
2446
# TODO:PROFILING: It might be faster to just update
2484
2447
# rather than checking if we need to, and then overwrite
2485
2448
# the one we are located at.
2491
2454
by_path[entry_key][tree_index] = ('r', path_utf8, 0, False, '')
2492
2455
# by path consistency: Insert into an existing path record (trivial), or
2493
2456
# add a new one with relocation pointers for the other tree indexes.
2494
entry_keys = id_index.get(file_id, ())
2495
if new_entry_key in entry_keys:
2457
if new_entry_key in id_index[file_id]:
2496
2458
# there is already an entry where this data belongs, just insert it.
2497
2459
by_path[new_entry_key][tree_index] = \
2498
2460
self._inv_entry_to_details(entry)
2503
2465
new_details = []
2504
2466
for lookup_index in xrange(tree_index):
2505
2467
# boundary case: this is the first occurence of file_id
2506
# so there are no id_indexes, possibly take this out of
2468
# so there are no id_indexs, possibly take this out of
2508
if not len(entry_keys):
2470
if not len(id_index[file_id]):
2509
2471
new_details.append(DirState.NULL_PARENT_DETAILS)
2511
2473
# grab any one entry, use it to find the right path.
2512
2474
# TODO: optimise this to reduce memory use in highly
2513
2475
# fragmented situations by reusing the relocation
2515
a_key = iter(entry_keys).next()
2477
a_key = iter(id_index[file_id]).next()
2516
2478
if by_path[a_key][lookup_index][0] in ('r', 'a'):
2517
2479
# its a pointer or missing statement, use it as is.
2518
2480
new_details.append(by_path[a_key][lookup_index])
2523
2485
new_details.append(self._inv_entry_to_details(entry))
2524
2486
new_details.extend(new_location_suffix)
2525
2487
by_path[new_entry_key] = new_details
2526
self._add_to_id_index(id_index, new_entry_key)
2488
id_index[file_id].add(new_entry_key)
2527
2489
# --- end generation of full tree mappings
2529
2491
# sort and output all the entries
2711
2673
block[1].pop(entry_index)
2712
2674
# if we have an id_index in use, remove this key from it for this id.
2713
2675
if self._id_index is not None:
2714
self._remove_from_id_index(self._id_index, current_old[0])
2676
self._id_index[current_old[0][2]].remove(current_old[0])
2715
2677
# update all remaining keys for this id to record it as absent. The
2716
2678
# existing details may either be the record we are marking as deleted
2717
2679
# (if there were other trees with the id present at this path), or may
2788
2750
# new entry, synthesis cross reference here,
2789
existing_keys = id_index.get(key[2], ())
2751
existing_keys = id_index.setdefault(key[2], set())
2790
2752
if not existing_keys:
2791
2753
# not currently in the state, simplest case
2792
2754
new_entry = key, [new_details] + self._empty_parent_info()
2824
2786
other_entry = other_block[other_entry_index]
2825
2787
other_entry[1][0] = ('r', path_utf8, 0, False, '')
2826
if self._maybe_remove_row(other_block, other_entry_index,
2828
# If the row holding this was removed, we need to
2829
# recompute where this entry goes
2830
entry_index, _ = self._find_entry_index(key, block)
2788
self._maybe_remove_row(other_block, other_entry_index,
2833
2792
# adds a tuple to the new details for each column
2835
2794
# - or by creating a new pointer to the right row inside that column
2836
2795
num_present_parents = self._num_present_parents()
2837
2796
if num_present_parents:
2838
# TODO: This re-evaluates the existing_keys set, do we need
2839
# to do that ourselves?
2840
2797
other_key = list(existing_keys)[0]
2841
2798
for lookup_index in xrange(1, num_present_parents + 1):
2842
2799
# grab any one entry, use it to find the right path.
2861
2818
pointer_path = osutils.pathjoin(*other_key[0:2])
2862
2819
new_entry[1].append(('r', pointer_path, 0, False, ''))
2863
2820
block.insert(entry_index, new_entry)
2864
self._add_to_id_index(id_index, key)
2821
existing_keys.add(key)
2866
2823
# Does the new state matter?
2867
2824
block[entry_index][1][0] = new_details
2876
2833
# converted to relocated.
2877
2834
if path_utf8 is None:
2878
2835
raise AssertionError('no path')
2879
existing_keys = id_index.get(key[2], ())
2880
if key not in existing_keys:
2881
raise AssertionError('We found the entry in the blocks, but'
2882
' the key is not in the id_index.'
2883
' key: %s, existing_keys: %s' % (key, existing_keys))
2884
for entry_key in existing_keys:
2836
for entry_key in id_index.setdefault(key[2], set()):
2885
2837
# TODO:PROFILING: It might be faster to just update
2886
2838
# rather than checking if we need to, and then overwrite
2887
2839
# the one we are located at.