~bzr-pqm/bzr/bzr.dev

« back to all changes in this revision

Viewing changes to bzrlib/dirstate.py

545ms, 600ms: Switch memory model from storing kind to using minikind
Need to profile effect on _generate_inventory, but makes a significant improvement for
both fast path and slow path, and should have minimal effect elsewhere.

Show diffs side-by-side

added added

removed removed

Lines of Context:
57
57
entry[1][1]: The second tree
58
58
 
59
59
For an entry for a tree, we have (using tree 0 - current tree) to demonstrate:
60
 
entry[1][0][0]: kind
 
60
entry[1][0][0]: minikind
61
61
entry[1][0][1]: fingerprint
62
62
entry[1][0][2]: size
63
63
entry[1][0][3]: executable
240
240
    # A pack_stat (the x's) that is just noise and will never match the output
241
241
    # of base64 encode.
242
242
    NULLSTAT = 'x' * 32
243
 
    NULL_PARENT_DETAILS = ('absent', '', 0, False, '')
 
243
    NULL_PARENT_DETAILS = ('a', '', 0, False, '')
244
244
 
245
245
    def __init__(self):
246
246
        """Create a  DirState object.
325
325
            size = stat.st_size
326
326
            packed_stat = pack_stat(stat)
327
327
        parent_info = self._empty_parent_info()
 
328
        minikind = DirState._kind_to_minikind[kind]
328
329
        if kind == 'file':
329
330
            entry_data = entry_key, [
330
 
                (kind, link_or_sha1, size, False, packed_stat),
 
331
                (minikind, link_or_sha1, size, False, packed_stat),
331
332
                ] + parent_info
332
333
        elif kind == 'directory':
333
334
            entry_data = entry_key, [
334
 
                (kind, '', 0, False, packed_stat),
 
335
                (minikind, '', 0, False, packed_stat),
335
336
                ] + parent_info
336
337
        elif kind == 'symlink':
337
338
            entry_data = entry_key, [
338
 
                (kind, link_or_sha1, size, False, packed_stat),
 
339
                (minikind, link_or_sha1, size, False, packed_stat),
339
340
                ] + parent_info
340
341
        else:
341
342
            raise errors.BzrError('unknown kind %r' % kind)
442
443
        """
443
444
        entire_entry = list(entry[0])
444
445
        for tree_number, tree_data in enumerate(entry[1]):
445
 
            # (kind, fingerprint, size, executable, tree_specific_string)
 
446
            # (minikind, fingerprint, size, executable, tree_specific_string)
446
447
            entire_entry.extend(tree_data)
447
448
            # 3 for the key, 5 for the fields per tree.
448
449
            tree_offset = 3 + tree_number * 5
449
 
            # kind
450
 
            entire_entry[tree_offset + 0] = DirState._kind_to_minikind[tree_data[0]]
 
450
            # minikind
 
451
            entire_entry[tree_offset + 0] = tree_data[0]
451
452
            # size
452
453
            entire_entry[tree_offset + 2] = str(tree_data[2])
453
454
            # executable
570
571
    def _get_fields_to_entry(self):
571
572
        """Get a function which converts entry fields into a entry record.
572
573
 
573
 
        This handles kind, size, and executable, as well as parent records.
 
574
        This handles size and executable, as well as parent records.
574
575
 
575
576
        :return: A function which takes a list of fields, and returns an
576
577
            appropriate record for storing in memory.
578
579
        # This is intentionally unrolled for performance
579
580
        num_present_parents = self._num_present_parents()
580
581
        if num_present_parents == 0:
581
 
            def fields_to_entry_0_parents(fields, _int=int, _tuple=tuple,
582
 
                                          _mini_to_kind=self._minikind_to_kind):
 
582
            def fields_to_entry_0_parents(fields, _int=int):
583
583
                path_name_file_id_key = (fields[0], fields[1], fields[2])
584
584
                return (path_name_file_id_key, [
585
585
                    ( # Current tree
586
 
                        _mini_to_kind[fields[3]], # kind
 
586
                        fields[3],                # minikind
587
587
                        fields[4],                # fingerprint
588
588
                        _int(fields[5]),          # size
589
589
                        fields[6] == 'y',         # executable
591
591
                    )])
592
592
            return fields_to_entry_0_parents
593
593
        elif num_present_parents == 1:
594
 
            def fields_to_entry_1_parent(fields, _int=int, _tuple=tuple,
595
 
                                         _mini_to_kind=self._minikind_to_kind):
 
594
            def fields_to_entry_1_parent(fields, _int=int):
596
595
                path_name_file_id_key = (fields[0], fields[1], fields[2])
597
596
                return (path_name_file_id_key, [
598
597
                    ( # Current tree
599
 
                        _mini_to_kind[fields[3]], # kind
 
598
                        fields[3],                # minikind
600
599
                        fields[4],                # fingerprint
601
600
                        _int(fields[5]),          # size
602
601
                        fields[6] == 'y',         # executable
603
602
                        fields[7],                # packed_stat or revision_id
604
603
                    ),
605
604
                    ( # Parent 1
606
 
                        _mini_to_kind[fields[8]], # kind
 
605
                        fields[8],                # minikind
607
606
                        fields[9],                # fingerprint
608
607
                        _int(fields[10]),         # size
609
608
                        fields[11] == 'y',        # executable
612
611
                    ])
613
612
            return fields_to_entry_1_parent
614
613
        elif num_present_parents == 2:
615
 
            def fields_to_entry_2_parents(fields, _int=int, _tuple=tuple,
616
 
                                          _mini_to_kind=self._minikind_to_kind):
 
614
            def fields_to_entry_2_parents(fields, _int=int):
617
615
                path_name_file_id_key = (fields[0], fields[1], fields[2])
618
616
                return (path_name_file_id_key, [
619
617
                    ( # Current tree
620
 
                        _mini_to_kind[fields[3]], # kind
 
618
                        fields[3],                # minikind
621
619
                        fields[4],                # fingerprint
622
620
                        _int(fields[5]),          # size
623
621
                        fields[6] == 'y',         # executable
624
622
                        fields[7],                # packed_stat or revision_id
625
623
                    ),
626
624
                    ( # Parent 1
627
 
                        _mini_to_kind[fields[8]], # kind
 
625
                        fields[8],                # minikind
628
626
                        fields[9],                # fingerprint
629
627
                        _int(fields[10]),         # size
630
628
                        fields[11] == 'y',        # executable
631
629
                        fields[12],               # packed_stat or revision_id
632
630
                    ),
633
631
                    ( # Parent 2
634
 
                        _mini_to_kind[fields[13]],# kind
 
632
                        fields[13],               # minikind
635
633
                        fields[14],               # fingerprint
636
634
                        _int(fields[15]),         # size
637
635
                        fields[16] == 'y',        # executable
640
638
                    ])
641
639
            return fields_to_entry_2_parents
642
640
        else:
643
 
            def fields_to_entry_n_parents(fields, _int=int, _tuple=tuple,
644
 
                                          _mini_to_kind=self._minikind_to_kind):
 
641
            def fields_to_entry_n_parents(fields, _int=int):
645
642
                path_name_file_id_key = (fields[0], fields[1], fields[2])
646
 
                trees = [(_mini_to_kind[fields[cur]], # kind
 
643
                trees = [(fields[cur],                # minikind
647
644
                          fields[cur+1],              # fingerprint
648
645
                          _int(fields[cur+2]),        # size
649
646
                          fields[cur+3] == 'y',       # executable
689
686
        # requested.
690
687
        while entry_index < len(block) and block[entry_index][0][1] == basename:
691
688
            if block[entry_index][1][tree_index][0] not in \
692
 
                       ('absent', 'relocated'):
 
689
                       ('a', 'r'): # absent, relocated
693
690
                return block_index, entry_index, True, True
694
691
            entry_index += 1
695
692
        return block_index, entry_index, True, False
718
715
            if not file_present:
719
716
                return None, None
720
717
            entry = self._dirblocks[block_index][1][entry_index]
721
 
            assert entry[0][2] and entry[1][tree_index][0] not in ('absent', 'relocated'), 'unversioned entry?!?!'
 
718
            assert entry[0][2] and entry[1][tree_index][0] not in ('a', 'r'), 'unversioned entry?!?!'
722
719
            if fileid_utf8:
723
720
                if entry[0][2] != fileid_utf8:
724
721
                    raise BzrError('integrity error ? : mismatching tree_index, file_id and path')
726
723
        else:
727
724
            for entry in self._iter_entries():
728
725
                if entry[0][2] == fileid_utf8:
729
 
                    if entry[1][tree_index][0] == 'relocated':
 
726
                    if entry[1][tree_index][0] == 'r': # relocated
730
727
                        # look up the real location directly by path
731
728
                        return self._get_entry(tree_index,
732
729
                            fileid_utf8=fileid_utf8,
733
730
                            path_utf8=entry[1][tree_index][1])
734
 
                    if entry[1][tree_index][0] == 'absent':
 
731
                    if entry[1][tree_index][0] == 'a': # absent
735
732
                        # not in the tree at all.
736
733
                        return None, None
737
734
                    return entry
759
756
        # a new root directory, with a NULLSTAT.
760
757
        empty_tree_dirblocks[0][1].append(
761
758
            (('', '', bzrlib.inventory.ROOT_ID), [
762
 
                ('directory', '', 0, False, DirState.NULLSTAT),
 
759
                ('d', '', 0, False, DirState.NULLSTAT),
763
760
            ]))
764
761
        result._set_data([], empty_tree_dirblocks)
765
762
        try:
778
775
            id.
779
776
        """
780
777
        kind = inv_entry.kind
 
778
        minikind = DirState._kind_to_minikind[kind]
781
779
        tree_data = inv_entry.revision
782
780
        assert len(tree_data) > 0, 'empty revision for the inv_entry.'
783
781
        if kind == 'directory':
794
792
            executable = inv_entry.executable
795
793
        else:
796
794
            raise Exception
797
 
        return (kind, fingerprint, size, executable, tree_data)
 
795
        return (minikind, fingerprint, size, executable, tree_data)
798
796
 
799
797
    def _iter_entries(self):
800
798
        """Iterate over all the entries in the dirstate.
892
890
 
893
891
            if num_present_parents == 1:
894
892
                # Bind external functions to local names
895
 
                _mini_to_kind = DirState._minikind_to_kind
896
893
                _int = int
897
894
                # We access all fields in order, so we can just iterate over
898
895
                # them. Grab an straight iterator over the fields. (We use an
922
919
                    # creating new strings
923
920
                    entry = ((current_dirname, name, file_id),
924
921
                             [(# Current Tree
925
 
                                 _mini_to_kind[next()], # kind
 
922
                                 next(),                # minikind
926
923
                                 next(),                # fingerprint
927
924
                                 _int(next()),          # size
928
925
                                 next() == 'y',         # executable
929
926
                                 next(),                # packed_stat or revision_id
930
927
                             ),
931
928
                             ( # Parent 1
932
 
                                 _mini_to_kind[next()], # kind
 
929
                                 next(),                # minikind
933
930
                                 next(),                # fingerprint
934
931
                                 _int(next()),          # size
935
932
                                 next() == 'y',         # executable
1113
1110
        # one: the current tree
1114
1111
        for entry in self._iter_entries():
1115
1112
            # skip entries not in the current tree
1116
 
            if entry[1][0][0] in ('absent', 'relocated'):
 
1113
            if entry[1][0][0] in ('a', 'r'): # absent, relocated
1117
1114
                continue
1118
1115
            by_path[entry[0]] = [entry[1][0]] + \
1119
1116
                [DirState.NULL_PARENT_DETAILS] * parent_count
1154
1151
                        # other trees, so put absent pointers there
1155
1152
                        # This is the vertical axis in the matrix, all pointing
1156
1153
                        # tot he real path.
1157
 
                        by_path[entry_key][tree_index] = ('relocated', path_utf8, 0, False, '')
 
1154
                        by_path[entry_key][tree_index] = ('r', path_utf8, 0, False, '')
1158
1155
                # by path consistency: Insert into an existing path record (trivial), or 
1159
1156
                # add a new one with relocation pointers for the other tree indexes.
1160
1157
                if new_entry_key in id_index[file_id]:
1178
1175
                            # fragmented situations by reusing the relocation
1179
1176
                            # records.
1180
1177
                            a_key = iter(id_index[file_id]).next()
1181
 
                            if by_path[a_key][lookup_index][0] in ('relocated', 'absent'):
 
1178
                            if by_path[a_key][lookup_index][0] in ('r', 'a'):
1182
1179
                                # its a pointer or missing statement, use it as is.
1183
1180
                                new_details.append(by_path[a_key][lookup_index])
1184
1181
                            else:
1185
1182
                                # we have the right key, make a pointer to it.
1186
1183
                                real_path = ('/'.join(a_key[0:2])).strip('/')
1187
 
                                new_details.append(('relocated', real_path, 0, False, ''))
 
1184
                                new_details.append(('r', real_path, 0, False, ''))
1188
1185
                    new_details.append(self._inv_entry_to_details(entry))
1189
1186
                    new_details.extend(new_location_suffix)
1190
1187
                    by_path[new_entry_key] = new_details
1230
1227
                return None
1231
1228
        while current_new or current_old:
1232
1229
            # skip entries in old that are not really there
1233
 
            if current_old and current_old[1][0][0] in ('relocated', 'absent'):
 
1230
            if current_old and current_old[1][0][0] in ('r', 'a'):
 
1231
                # relocated or absent
1234
1232
                current_old = advance(old_iterator)
1235
1233
                continue
1236
1234
            if current_new:
1259
1257
                # TODO: update the record if anything significant has changed.
1260
1258
                # the minimal required trigger is if the execute bit or cached
1261
1259
                # kind has changed.
 
1260
                kind = DirState._minikind_to_kind[current_old[1][0][0]]
1262
1261
                if (current_old[1][0][3] != current_new[1].executable or
1263
 
                    current_old[1][0][0] != current_new[1].kind):
 
1262
                    kind != current_new[1].kind):
1264
1263
                    self.update_minimal(current_old[0], current_new[1].kind,
1265
1264
                        num_present_parents,
1266
1265
                        executable=current_new[1].executable,
1295
1294
        all_remaining_keys = set()
1296
1295
        # Dont check the working tree, because its going.
1297
1296
        for details in current_old[1][1:]:
1298
 
            if details[0] not in ('absent', 'relocated'):
 
1297
            if details[0] not in ('a', 'r'): # absent, relocated
1299
1298
                all_remaining_keys.add(current_old[0])
1300
 
            elif details[0] == 'relocated':
 
1299
            elif details[0] == 'r': # relocated
1301
1300
                # record the key for the real path.
1302
1301
                all_remaining_keys.add(tuple(os.path.split(details[1])) + (current_old[0][2],))
1303
1302
            # absent rows are not present at any path.
1326
1325
            assert present
1327
1326
            update_tree_details = self._dirblocks[update_block_index][1][update_entry_index][1]
1328
1327
            # it must not be absent at the moment
1329
 
            assert update_tree_details[0][0] != 'absent'
 
1328
            assert update_tree_details[0][0] != 'a' # absent
1330
1329
            update_tree_details[0] = DirState.NULL_PARENT_DETAILS
1331
1330
        self._dirblock_state = DirState.IN_MEMORY_MODIFIED
1332
1331
        return last_reference
1339
1338
        if packed_stat is None:
1340
1339
            packed_stat = DirState.NULLSTAT
1341
1340
        entry_index, present = self._find_entry_index(key, block)
1342
 
        new_details = (kind, fingerprint, size, executable, packed_stat)
 
1341
        minikind = DirState._kind_to_minikind[kind]
 
1342
        new_details = (minikind, fingerprint, size, executable, packed_stat)
1343
1343
        assert id_index is not None, 'need an id index to do updates for now !'
1344
1344
        if not present:
1345
1345
            # new entry, synthesis cross reference here,
1364
1364
                    assert present
1365
1365
                    assert path_utf8 is not None
1366
1366
                    self._dirblocks[other_block_index][1][other_entry_index][1][0] = \
1367
 
                        ('relocated', path_utf8, 0, False, '')
 
1367
                        ('r', path_utf8, 0, False, '')
1368
1368
 
1369
1369
                for lookup_index in xrange(1, num_present_parents + 1):
1370
1370
                    # grab any one entry, use it to find the right path.
1378
1378
                        self._find_entry_index(other_key, self._dirblocks[update_block_index][1])
1379
1379
                    assert present
1380
1380
                    update_details = self._dirblocks[update_block_index][1][update_entry_index][1][lookup_index]
1381
 
                    if update_details[0] in ('relocated', 'absent'):
 
1381
                    if update_details[0] in ('r', 'a'): # relocated, absent
1382
1382
                        # its a pointer or absent in lookup_index's tree, use
1383
1383
                        # it as is.
1384
1384
                        new_entry[1].append(update_details)
1385
1385
                    else:
1386
1386
                        # we have the right key, make a pointer to it.
1387
1387
                        pointer_path = os.path.join(*other_key[0:2])
1388
 
                        new_entry[1].append(('relocated', pointer_path, 0, False, ''))
 
1388
                        new_entry[1].append(('r', pointer_path, 0, False, ''))
1389
1389
            block.insert(entry_index, new_entry)
1390
1390
            existing_keys.add(key)
1391
1391
        else:
1415
1415
                    entry_index, present = self._find_entry_index(entry_key, self._dirblocks[block_index][1])
1416
1416
                    assert present
1417
1417
                    self._dirblocks[block_index][1][entry_index][1][0] = \
1418
 
                        ('relocated', path_utf8, 0, False, '')
 
1418
                        ('r', path_utf8, 0, False, '')
1419
1419
        # add a containing dirblock if needed.
1420
 
        if new_details[0] == 'directory':
 
1420
        if new_details[0] == 'd':
1421
1421
            subdir_key = (os.path.join(*key[0:2]), '', '')
1422
1422
            block_index, present = self._find_block_index_from_key(subdir_key)
1423
1423
            if not present: