~bzr-pqm/bzr/bzr.dev

« back to all changes in this revision

Viewing changes to bzrlib/dirstate.py

  • Committer: INADA Naoki
  • Date: 2011-05-17 00:45:09 UTC
  • mfrom: (5875 +trunk)
  • mto: This revision was merged to the branch mainline in revision 5891.
  • Revision ID: songofacandy@gmail.com-20110517004509-q58negjbdjh7t6u1
mergeĀ fromĀ lp:bzr

Show diffs side-by-side

added added

removed removed

Lines of Context:
366
366
    NOT_IN_MEMORY = 0
367
367
    IN_MEMORY_UNMODIFIED = 1
368
368
    IN_MEMORY_MODIFIED = 2
 
369
    IN_MEMORY_HASH_MODIFIED = 3 # Only hash-cache updates
369
370
 
370
371
    # A pack_stat (the x's) that is just noise and will never match the output
371
372
    # of base64 encode.
375
376
    HEADER_FORMAT_2 = '#bazaar dirstate flat format 2\n'
376
377
    HEADER_FORMAT_3 = '#bazaar dirstate flat format 3\n'
377
378
 
378
 
    def __init__(self, path, sha1_provider):
 
379
    def __init__(self, path, sha1_provider, worth_saving_limit=0):
379
380
        """Create a  DirState object.
380
381
 
381
382
        :param path: The path at which the dirstate file on disk should live.
382
383
        :param sha1_provider: an object meeting the SHA1Provider interface.
 
384
        :param worth_saving_limit: when the exact number of hash changed
 
385
            entries is known, only bother saving the dirstate if more than
 
386
            this count of entries have changed.
 
387
            -1 means never save hash changes, 0 means always save hash changes.
383
388
        """
384
389
        # _header_state and _dirblock_state represent the current state
385
390
        # of the dirstate metadata and the per-row data respectiely.
422
427
        # during commit.
423
428
        self._last_block_index = None
424
429
        self._last_entry_index = None
 
430
        # The set of known hash changes
 
431
        self._known_hash_changes = set()
 
432
        # How many hash changed entries can we have without saving
 
433
        self._worth_saving_limit = worth_saving_limit
425
434
 
426
435
    def __repr__(self):
427
436
        return "%s(%r)" % \
428
437
            (self.__class__.__name__, self._filename)
429
438
 
 
439
    def _mark_modified(self, hash_changed_entries=None, header_modified=False):
 
440
        """Mark this dirstate as modified.
 
441
 
 
442
        :param hash_changed_entries: if non-None, mark just these entries as
 
443
            having their hash modified.
 
444
        :param header_modified: mark the header modified as well, not just the
 
445
            dirblocks.
 
446
        """
 
447
        #trace.mutter_callsite(3, "modified hash entries: %s", hash_changed_entries)
 
448
        if hash_changed_entries:
 
449
            self._known_hash_changes.update([e[0] for e in hash_changed_entries])
 
450
            if self._dirblock_state in (DirState.NOT_IN_MEMORY,
 
451
                                        DirState.IN_MEMORY_UNMODIFIED):
 
452
                # If the dirstate is already marked a IN_MEMORY_MODIFIED, then
 
453
                # that takes precedence.
 
454
                self._dirblock_state = DirState.IN_MEMORY_HASH_MODIFIED
 
455
        else:
 
456
            # TODO: Since we now have a IN_MEMORY_HASH_MODIFIED state, we
 
457
            #       should fail noisily if someone tries to set
 
458
            #       IN_MEMORY_MODIFIED but we don't have a write-lock!
 
459
            # We don't know exactly what changed so disable smart saving
 
460
            self._dirblock_state = DirState.IN_MEMORY_MODIFIED
 
461
        if header_modified:
 
462
            self._header_state = DirState.IN_MEMORY_MODIFIED
 
463
 
 
464
    def _mark_unmodified(self):
 
465
        """Mark this dirstate as unmodified."""
 
466
        self._header_state = DirState.IN_MEMORY_UNMODIFIED
 
467
        self._dirblock_state = DirState.IN_MEMORY_UNMODIFIED
 
468
        self._known_hash_changes = set()
 
469
 
430
470
    def add(self, path, file_id, kind, stat, fingerprint):
431
471
        """Add a path to be tracked.
432
472
 
558
598
        if kind == 'directory':
559
599
           # insert a new dirblock
560
600
           self._ensure_block(block_index, entry_index, utf8path)
561
 
        self._dirblock_state = DirState.IN_MEMORY_MODIFIED
 
601
        self._mark_modified()
562
602
        if self._id_index:
563
603
            self._add_to_id_index(self._id_index, entry_key)
564
604
 
1030
1070
 
1031
1071
        self._ghosts = []
1032
1072
        self._parents = [parents[0]]
1033
 
        self._dirblock_state = DirState.IN_MEMORY_MODIFIED
1034
 
        self._header_state = DirState.IN_MEMORY_MODIFIED
 
1073
        self._mark_modified(header_modified=True)
1035
1074
 
1036
1075
    def _empty_parent_info(self):
1037
1076
        return [DirState.NULL_PARENT_DETAILS] * (len(self._parents) -
1567
1606
            # the active tree.
1568
1607
            raise errors.InconsistentDeltaDelta(delta, "error from _get_entry.")
1569
1608
 
1570
 
        self._dirblock_state = DirState.IN_MEMORY_MODIFIED
1571
 
        self._header_state = DirState.IN_MEMORY_MODIFIED
 
1609
        self._mark_modified(header_modified=True)
1572
1610
        self._id_index = None
1573
1611
        return
1574
1612
 
1747
1785
                and stat_value.st_ctime < self._cutoff_time):
1748
1786
                entry[1][0] = ('f', sha1, stat_value.st_size, entry[1][0][3],
1749
1787
                               packed_stat)
1750
 
                self._dirblock_state = DirState.IN_MEMORY_MODIFIED
 
1788
                self._mark_modified([entry])
1751
1789
 
1752
1790
    def _sha_cutoff_time(self):
1753
1791
        """Return cutoff time.
1811
1849
        """Serialise the entire dirstate to a sequence of lines."""
1812
1850
        if (self._header_state == DirState.IN_MEMORY_UNMODIFIED and
1813
1851
            self._dirblock_state == DirState.IN_MEMORY_UNMODIFIED):
1814
 
            # read whats on disk.
 
1852
            # read what's on disk.
1815
1853
            self._state_file.seek(0)
1816
1854
            return self._state_file.readlines()
1817
1855
        lines = []
1818
1856
        lines.append(self._get_parents_line(self.get_parent_ids()))
1819
1857
        lines.append(self._get_ghosts_line(self._ghosts))
1820
 
        # append the root line which is special cased
1821
 
        lines.extend(map(self._entry_to_line, self._iter_entries()))
 
1858
        lines.extend(self._get_entry_lines())
1822
1859
        return self._get_output_lines(lines)
1823
1860
 
1824
1861
    def _get_ghosts_line(self, ghost_ids):
1829
1866
        """Create a line for the state file for parents information."""
1830
1867
        return '\0'.join([str(len(parent_ids))] + parent_ids)
1831
1868
 
 
1869
    def _get_entry_lines(self):
 
1870
        """Create lines for entries."""
 
1871
        return map(self._entry_to_line, self._iter_entries())
 
1872
 
1832
1873
    def _get_fields_to_entry(self):
1833
1874
        """Get a function which converts entry fields into a entry record.
1834
1875
 
2100
2141
            executable = False
2101
2142
        else:
2102
2143
            raise Exception("can't pack %s" % inv_entry)
2103
 
        return (minikind, fingerprint, size, executable, tree_data)
 
2144
        return static_tuple.StaticTuple(minikind, fingerprint, size,
 
2145
                                        executable, tree_data)
2104
2146
 
2105
2147
    def _iter_child_entries(self, tree_index, path_utf8):
2106
2148
        """Iterate over all the entries that are children of path_utf.
2222
2264
        """The number of parent entries in each record row."""
2223
2265
        return len(self._parents) - len(self._ghosts)
2224
2266
 
2225
 
    @staticmethod
2226
 
    def on_file(path, sha1_provider=None):
 
2267
    @classmethod
 
2268
    def on_file(cls, path, sha1_provider=None, worth_saving_limit=0):
2227
2269
        """Construct a DirState on the file at path "path".
2228
2270
 
2229
2271
        :param path: The path at which the dirstate file on disk should live.
2230
2272
        :param sha1_provider: an object meeting the SHA1Provider interface.
2231
2273
            If None, a DefaultSHA1Provider is used.
 
2274
        :param worth_saving_limit: when the exact number of hash changed
 
2275
            entries is known, only bother saving the dirstate if more than
 
2276
            this count of entries have changed. -1 means never save.
2232
2277
        :return: An unlocked DirState object, associated with the given path.
2233
2278
        """
2234
2279
        if sha1_provider is None:
2235
2280
            sha1_provider = DefaultSHA1Provider()
2236
 
        result = DirState(path, sha1_provider)
 
2281
        result = cls(path, sha1_provider,
 
2282
                     worth_saving_limit=worth_saving_limit)
2237
2283
        return result
2238
2284
 
2239
2285
    def _read_dirblocks_if_needed(self):
2331
2377
            trace.mutter('Not saving DirState because '
2332
2378
                    '_changes_aborted is set.')
2333
2379
            return
2334
 
        if (self._header_state == DirState.IN_MEMORY_MODIFIED or
2335
 
            self._dirblock_state == DirState.IN_MEMORY_MODIFIED):
2336
 
 
 
2380
        # TODO: Since we now distinguish IN_MEMORY_MODIFIED from
 
2381
        #       IN_MEMORY_HASH_MODIFIED, we should only fail quietly if we fail
 
2382
        #       to save an IN_MEMORY_HASH_MODIFIED, and fail *noisily* if we
 
2383
        #       fail to save IN_MEMORY_MODIFIED
 
2384
        if self._worth_saving():
2337
2385
            grabbed_write_lock = False
2338
2386
            if self._lock_state != 'w':
2339
2387
                grabbed_write_lock, new_lock = self._lock_token.temporary_write_lock()
2347
2395
                    # We couldn't grab a write lock, so we switch back to a read one
2348
2396
                    return
2349
2397
            try:
 
2398
                lines = self.get_lines()
2350
2399
                self._state_file.seek(0)
2351
 
                self._state_file.writelines(self.get_lines())
 
2400
                self._state_file.writelines(lines)
2352
2401
                self._state_file.truncate()
2353
2402
                self._state_file.flush()
2354
 
                self._header_state = DirState.IN_MEMORY_UNMODIFIED
2355
 
                self._dirblock_state = DirState.IN_MEMORY_UNMODIFIED
 
2403
                self._mark_unmodified()
2356
2404
            finally:
2357
2405
                if grabbed_write_lock:
2358
2406
                    self._lock_token = self._lock_token.restore_read_lock()
2361
2409
                    #       not changed contents. Since restore_read_lock may
2362
2410
                    #       not be an atomic operation.
2363
2411
 
 
2412
    def _worth_saving(self):
 
2413
        """Is it worth saving the dirstate or not?"""
 
2414
        if (self._header_state == DirState.IN_MEMORY_MODIFIED
 
2415
            or self._dirblock_state == DirState.IN_MEMORY_MODIFIED):
 
2416
            return True
 
2417
        if self._dirblock_state == DirState.IN_MEMORY_HASH_MODIFIED:
 
2418
            if self._worth_saving_limit == -1:
 
2419
                # We never save hash changes when the limit is -1
 
2420
                return False
 
2421
            # If we're using smart saving and only a small number of
 
2422
            # entries have changed their hash, don't bother saving. John has
 
2423
            # suggested using a heuristic here based on the size of the
 
2424
            # changed files and/or tree. For now, we go with a configurable
 
2425
            # number of changes, keeping the calculation time
 
2426
            # as low overhead as possible. (This also keeps all existing
 
2427
            # tests passing as the default is 0, i.e. always save.)
 
2428
            if len(self._known_hash_changes) >= self._worth_saving_limit:
 
2429
                return True
 
2430
        return False
 
2431
 
2364
2432
    def _set_data(self, parent_ids, dirblocks):
2365
2433
        """Set the full dirstate data in memory.
2366
2434
 
2374
2442
        """
2375
2443
        # our memory copy is now authoritative.
2376
2444
        self._dirblocks = dirblocks
2377
 
        self._header_state = DirState.IN_MEMORY_MODIFIED
2378
 
        self._dirblock_state = DirState.IN_MEMORY_MODIFIED
 
2445
        self._mark_modified(header_modified=True)
2379
2446
        self._parents = list(parent_ids)
2380
2447
        self._id_index = None
2381
2448
        self._packed_stat_index = None
2401
2468
        self._make_absent(entry)
2402
2469
        self.update_minimal(('', '', new_id), 'd',
2403
2470
            path_utf8='', packed_stat=entry[1][0][4])
2404
 
        self._dirblock_state = DirState.IN_MEMORY_MODIFIED
 
2471
        self._mark_modified()
 
2472
        # XXX: This was added by Ian, we need to make sure there
 
2473
        #      are tests for it, because it isn't in bzr.dev TRUNK
 
2474
        #      It looks like the only place it is called is in setting the root
 
2475
        #      id of the tree. So probably we never had an _id_index when we
 
2476
        #      don't even have a root yet.
 
2477
        if self._id_index is not None:
 
2478
            self._add_to_id_index(self._id_index, entry[0])
2405
2479
 
2406
2480
    def set_parent_trees(self, trees, ghosts):
2407
2481
        """Set the parent trees for the dirstate.
2452
2526
        parent_trees = [tree for rev_id, tree in trees if rev_id not in ghosts]
2453
2527
        # how many trees do we end up with
2454
2528
        parent_count = len(parent_trees)
 
2529
        st = static_tuple.StaticTuple
2455
2530
 
2456
2531
        # one: the current tree
2457
2532
        for entry in self._iter_entries():
2474
2549
            # the suffix is from tree_index+1:parent_count+1.
2475
2550
            new_location_suffix = [DirState.NULL_PARENT_DETAILS] * (parent_count - tree_index)
2476
2551
            # now stitch in all the entries from this tree
 
2552
            last_dirname = None
2477
2553
            for path, entry in tree.iter_entries_by_dir():
2478
2554
                # here we process each trees details for each item in the tree.
2479
2555
                # we first update any existing entries for the id at other paths,
2487
2563
                file_id = entry.file_id
2488
2564
                path_utf8 = path.encode('utf8')
2489
2565
                dirname, basename = osutils.split(path_utf8)
2490
 
                new_entry_key = (dirname, basename, file_id)
 
2566
                if dirname == last_dirname:
 
2567
                    # Try to re-use objects as much as possible
 
2568
                    dirname = last_dirname
 
2569
                else:
 
2570
                    last_dirname = dirname
 
2571
                new_entry_key = st(dirname, basename, file_id)
2491
2572
                # tree index consistency: All other paths for this id in this tree
2492
2573
                # index must point to the correct path.
2493
 
                for entry_key in id_index.get(file_id, ()):
 
2574
                entry_keys = id_index.get(file_id, ())
 
2575
                for entry_key in entry_keys:
2494
2576
                    # TODO:PROFILING: It might be faster to just update
2495
2577
                    # rather than checking if we need to, and then overwrite
2496
2578
                    # the one we are located at.
2499
2581
                        # other trees, so put absent pointers there
2500
2582
                        # This is the vertical axis in the matrix, all pointing
2501
2583
                        # to the real path.
2502
 
                        by_path[entry_key][tree_index] = ('r', path_utf8, 0, False, '')
2503
 
                # by path consistency: Insert into an existing path record (trivial), or
2504
 
                # add a new one with relocation pointers for the other tree indexes.
2505
 
                entry_keys = id_index.get(file_id, ())
 
2584
                        by_path[entry_key][tree_index] = st('r', path_utf8, 0,
 
2585
                                                            False, '')
 
2586
                # by path consistency: Insert into an existing path record
 
2587
                # (trivial), or add a new one with relocation pointers for the
 
2588
                # other tree indexes.
2506
2589
                if new_entry_key in entry_keys:
2507
 
                    # there is already an entry where this data belongs, just insert it.
 
2590
                    # there is already an entry where this data belongs, just
 
2591
                    # insert it.
2508
2592
                    by_path[new_entry_key][tree_index] = \
2509
2593
                        self._inv_entry_to_details(entry)
2510
2594
                else:
2520
2604
                            new_details.append(DirState.NULL_PARENT_DETAILS)
2521
2605
                        else:
2522
2606
                            # grab any one entry, use it to find the right path.
2523
 
                            # TODO: optimise this to reduce memory use in highly
2524
 
                            # fragmented situations by reusing the relocation
2525
 
                            # records.
2526
2607
                            a_key = iter(entry_keys).next()
2527
2608
                            if by_path[a_key][lookup_index][0] in ('r', 'a'):
2528
 
                                # its a pointer or missing statement, use it as is.
 
2609
                                # its a pointer or missing statement, use it as
 
2610
                                # is.
2529
2611
                                new_details.append(by_path[a_key][lookup_index])
2530
2612
                            else:
2531
2613
                                # we have the right key, make a pointer to it.
2532
2614
                                real_path = ('/'.join(a_key[0:2])).strip('/')
2533
 
                                new_details.append(('r', real_path, 0, False, ''))
 
2615
                                new_details.append(st('r', real_path, 0, False,
 
2616
                                                      ''))
2534
2617
                    new_details.append(self._inv_entry_to_details(entry))
2535
2618
                    new_details.extend(new_location_suffix)
2536
2619
                    by_path[new_entry_key] = new_details
2542
2625
        self._entries_to_current_state(new_entries)
2543
2626
        self._parents = [rev_id for rev_id, tree in trees]
2544
2627
        self._ghosts = list(ghosts)
2545
 
        self._header_state = DirState.IN_MEMORY_MODIFIED
2546
 
        self._dirblock_state = DirState.IN_MEMORY_MODIFIED
 
2628
        self._mark_modified(header_modified=True)
2547
2629
        self._id_index = id_index
2548
2630
 
2549
2631
    def _sort_entries(self, entry_list):
2553
2635
        try to keep everything in sorted blocks all the time, but sometimes
2554
2636
        it's easier to sort after the fact.
2555
2637
        """
2556
 
        def _key(entry):
 
2638
        # When sorting, we usually have 10x more entries than directories. (69k
 
2639
        # total entries, 4k directories). So cache the results of splitting.
 
2640
        # Saving time and objects. Also, use StaticTuple to avoid putting all
 
2641
        # of these object into python's garbage collector.
 
2642
        split_dirs = {}
 
2643
        def _key(entry, _split_dirs=split_dirs, _st=static_tuple.StaticTuple):
2557
2644
            # sort by: directory parts, file name, file id
2558
 
            return entry[0][0].split('/'), entry[0][1], entry[0][2]
 
2645
            dirpath, fname, file_id = entry[0]
 
2646
            try:
 
2647
                split = _split_dirs[dirpath]
 
2648
            except KeyError:
 
2649
                split = _st.from_sequence(dirpath.split('/'))
 
2650
                _split_dirs[dirpath] = split
 
2651
            return _st(split, fname, file_id)
2559
2652
        return sorted(entry_list, key=_key)
2560
2653
 
2561
2654
    def set_state_from_inventory(self, new_inv):
2686
2779
                        current_old[0][1].decode('utf8'))
2687
2780
                self._make_absent(current_old)
2688
2781
                current_old = advance(old_iterator)
2689
 
        self._dirblock_state = DirState.IN_MEMORY_MODIFIED
 
2782
        self._mark_modified()
2690
2783
        self._id_index = None
2691
2784
        self._packed_stat_index = None
2692
2785
        if tracing:
2758
2851
            if update_tree_details[0][0] == 'a': # absent
2759
2852
                raise AssertionError('bad row %r' % (update_tree_details,))
2760
2853
            update_tree_details[0] = DirState.NULL_PARENT_DETAILS
2761
 
        self._dirblock_state = DirState.IN_MEMORY_MODIFIED
 
2854
        self._mark_modified()
2762
2855
        return last_reference
2763
2856
 
2764
2857
    def update_minimal(self, key, minikind, executable=False, fingerprint='',
2933
3026
            if not present:
2934
3027
                self._dirblocks.insert(block_index, (subdir_key[0], []))
2935
3028
 
2936
 
        self._dirblock_state = DirState.IN_MEMORY_MODIFIED
 
3029
        self._mark_modified()
2937
3030
 
2938
3031
    def _maybe_remove_row(self, block, index, id_index):
2939
3032
        """Remove index if it is absent or relocated across the row.
3242
3335
        else:
3243
3336
            worth_saving = False
3244
3337
    elif minikind == 'l':
 
3338
        if saved_minikind == 'l':
 
3339
            worth_saving = False
3245
3340
        link_or_sha1 = state._read_link(abspath, saved_link_or_sha1)
3246
3341
        if state._cutoff_time is None:
3247
3342
            state._sha_cutoff_time()
3253
3348
            entry[1][0] = ('l', '', stat_value.st_size,
3254
3349
                           False, DirState.NULLSTAT)
3255
3350
    if worth_saving:
3256
 
        state._dirblock_state = DirState.IN_MEMORY_MODIFIED
 
3351
        state._mark_modified([entry])
3257
3352
    return link_or_sha1
3258
3353
 
3259
3354