~bzr-pqm/bzr/bzr.dev

« back to all changes in this revision

Viewing changes to bzrlib/dirstate.py

  • Committer: Jelmer Vernooij
  • Date: 2011-05-10 07:46:15 UTC
  • mfrom: (5844 +trunk)
  • mto: This revision was merged to the branch mainline in revision 5845.
  • Revision ID: jelmer@samba.org-20110510074615-eptod049ndjxc4i7
Merge bzr.dev.

Show diffs side-by-side

added added

removed removed

Lines of Context:
265
265
        # return '%X.%X' % (int(st.st_mtime), st.st_mode)
266
266
 
267
267
 
 
268
def _unpack_stat(packed_stat):
 
269
    """Turn a packed_stat back into the stat fields.
 
270
 
 
271
    This is meant as a debugging tool, should not be used in real code.
 
272
    """
 
273
    (st_size, st_mtime, st_ctime, st_dev, st_ino,
 
274
     st_mode) = struct.unpack('>LLLLLL', binascii.a2b_base64(packed_stat))
 
275
    return dict(st_size=st_size, st_mtime=st_mtime, st_ctime=st_ctime,
 
276
                st_dev=st_dev, st_ino=st_ino, st_mode=st_mode)
 
277
 
 
278
 
268
279
class SHA1Provider(object):
269
280
    """An interface for getting sha1s of a file."""
270
281
 
355
366
    NOT_IN_MEMORY = 0
356
367
    IN_MEMORY_UNMODIFIED = 1
357
368
    IN_MEMORY_MODIFIED = 2
 
369
    IN_MEMORY_HASH_MODIFIED = 3 # Only hash-cache updates
358
370
 
359
371
    # A pack_stat (the x's) that is just noise and will never match the output
360
372
    # of base64 encode.
364
376
    HEADER_FORMAT_2 = '#bazaar dirstate flat format 2\n'
365
377
    HEADER_FORMAT_3 = '#bazaar dirstate flat format 3\n'
366
378
 
367
 
    def __init__(self, path, sha1_provider):
 
379
    def __init__(self, path, sha1_provider, worth_saving_limit=0):
368
380
        """Create a  DirState object.
369
381
 
370
382
        :param path: The path at which the dirstate file on disk should live.
371
383
        :param sha1_provider: an object meeting the SHA1Provider interface.
 
384
        :param worth_saving_limit: when the exact number of hash changed
 
385
            entries is known, only bother saving the dirstate if more than
 
386
            this count of entries have changed.
 
387
            -1 means never save hash changes, 0 means always save hash changes.
372
388
        """
373
389
        # _header_state and _dirblock_state represent the current state
374
390
        # of the dirstate metadata and the per-row data respectiely.
411
427
        # during commit.
412
428
        self._last_block_index = None
413
429
        self._last_entry_index = None
 
430
        # The set of known hash changes
 
431
        self._known_hash_changes = set()
 
432
        # How many hash changed entries can we have without saving
 
433
        self._worth_saving_limit = worth_saving_limit
414
434
 
415
435
    def __repr__(self):
416
436
        return "%s(%r)" % \
417
437
            (self.__class__.__name__, self._filename)
418
438
 
 
439
    def _mark_modified(self, hash_changed_entries=None, header_modified=False):
 
440
        """Mark this dirstate as modified.
 
441
 
 
442
        :param hash_changed_entries: if non-None, mark just these entries as
 
443
            having their hash modified.
 
444
        :param header_modified: mark the header modified as well, not just the
 
445
            dirblocks.
 
446
        """
 
447
        #trace.mutter_callsite(3, "modified hash entries: %s", hash_changed_entries)
 
448
        if hash_changed_entries:
 
449
            self._known_hash_changes.update([e[0] for e in hash_changed_entries])
 
450
            if self._dirblock_state in (DirState.NOT_IN_MEMORY,
 
451
                                        DirState.IN_MEMORY_UNMODIFIED):
 
452
                # If the dirstate is already marked a IN_MEMORY_MODIFIED, then
 
453
                # that takes precedence.
 
454
                self._dirblock_state = DirState.IN_MEMORY_HASH_MODIFIED
 
455
        else:
 
456
            # TODO: Since we now have a IN_MEMORY_HASH_MODIFIED state, we
 
457
            #       should fail noisily if someone tries to set
 
458
            #       IN_MEMORY_MODIFIED but we don't have a write-lock!
 
459
            # We don't know exactly what changed so disable smart saving
 
460
            self._dirblock_state = DirState.IN_MEMORY_MODIFIED
 
461
        if header_modified:
 
462
            self._header_state = DirState.IN_MEMORY_MODIFIED
 
463
 
 
464
    def _mark_unmodified(self):
 
465
        """Mark this dirstate as unmodified."""
 
466
        self._header_state = DirState.IN_MEMORY_UNMODIFIED
 
467
        self._dirblock_state = DirState.IN_MEMORY_UNMODIFIED
 
468
        self._known_hash_changes = set()
 
469
 
419
470
    def add(self, path, file_id, kind, stat, fingerprint):
420
471
        """Add a path to be tracked.
421
472
 
547
598
        if kind == 'directory':
548
599
           # insert a new dirblock
549
600
           self._ensure_block(block_index, entry_index, utf8path)
550
 
        self._dirblock_state = DirState.IN_MEMORY_MODIFIED
 
601
        self._mark_modified()
551
602
        if self._id_index:
552
603
            self._add_to_id_index(self._id_index, entry_key)
553
604
 
1019
1070
 
1020
1071
        self._ghosts = []
1021
1072
        self._parents = [parents[0]]
1022
 
        self._dirblock_state = DirState.IN_MEMORY_MODIFIED
1023
 
        self._header_state = DirState.IN_MEMORY_MODIFIED
 
1073
        self._mark_modified(header_modified=True)
1024
1074
 
1025
1075
    def _empty_parent_info(self):
1026
1076
        return [DirState.NULL_PARENT_DETAILS] * (len(self._parents) -
1556
1606
            # the active tree.
1557
1607
            raise errors.InconsistentDeltaDelta(delta, "error from _get_entry.")
1558
1608
 
1559
 
        self._dirblock_state = DirState.IN_MEMORY_MODIFIED
1560
 
        self._header_state = DirState.IN_MEMORY_MODIFIED
 
1609
        self._mark_modified(header_modified=True)
1561
1610
        self._id_index = None
1562
1611
        return
1563
1612
 
1734
1783
                self._sha_cutoff_time()
1735
1784
            if (stat_value.st_mtime < self._cutoff_time
1736
1785
                and stat_value.st_ctime < self._cutoff_time):
1737
 
                entry[1][0] = ('f', sha1, entry[1][0][2], entry[1][0][3],
1738
 
                    packed_stat)
1739
 
                self._dirblock_state = DirState.IN_MEMORY_MODIFIED
 
1786
                entry[1][0] = ('f', sha1, stat_value.st_size, entry[1][0][3],
 
1787
                               packed_stat)
 
1788
                self._mark_modified([entry])
1740
1789
 
1741
1790
    def _sha_cutoff_time(self):
1742
1791
        """Return cutoff time.
1800
1849
        """Serialise the entire dirstate to a sequence of lines."""
1801
1850
        if (self._header_state == DirState.IN_MEMORY_UNMODIFIED and
1802
1851
            self._dirblock_state == DirState.IN_MEMORY_UNMODIFIED):
1803
 
            # read whats on disk.
 
1852
            # read what's on disk.
1804
1853
            self._state_file.seek(0)
1805
1854
            return self._state_file.readlines()
1806
1855
        lines = []
1807
1856
        lines.append(self._get_parents_line(self.get_parent_ids()))
1808
1857
        lines.append(self._get_ghosts_line(self._ghosts))
1809
 
        # append the root line which is special cased
1810
 
        lines.extend(map(self._entry_to_line, self._iter_entries()))
 
1858
        lines.extend(self._get_entry_lines())
1811
1859
        return self._get_output_lines(lines)
1812
1860
 
1813
1861
    def _get_ghosts_line(self, ghost_ids):
1818
1866
        """Create a line for the state file for parents information."""
1819
1867
        return '\0'.join([str(len(parent_ids))] + parent_ids)
1820
1868
 
 
1869
    def _get_entry_lines(self):
 
1870
        """Create lines for entries."""
 
1871
        return map(self._entry_to_line, self._iter_entries())
 
1872
 
1821
1873
    def _get_fields_to_entry(self):
1822
1874
        """Get a function which converts entry fields into a entry record.
1823
1875
 
2211
2263
        """The number of parent entries in each record row."""
2212
2264
        return len(self._parents) - len(self._ghosts)
2213
2265
 
2214
 
    @staticmethod
2215
 
    def on_file(path, sha1_provider=None):
 
2266
    @classmethod
 
2267
    def on_file(cls, path, sha1_provider=None, worth_saving_limit=0):
2216
2268
        """Construct a DirState on the file at path "path".
2217
2269
 
2218
2270
        :param path: The path at which the dirstate file on disk should live.
2219
2271
        :param sha1_provider: an object meeting the SHA1Provider interface.
2220
2272
            If None, a DefaultSHA1Provider is used.
 
2273
        :param worth_saving_limit: when the exact number of hash changed
 
2274
            entries is known, only bother saving the dirstate if more than
 
2275
            this count of entries have changed. -1 means never save.
2221
2276
        :return: An unlocked DirState object, associated with the given path.
2222
2277
        """
2223
2278
        if sha1_provider is None:
2224
2279
            sha1_provider = DefaultSHA1Provider()
2225
 
        result = DirState(path, sha1_provider)
 
2280
        result = cls(path, sha1_provider,
 
2281
                     worth_saving_limit=worth_saving_limit)
2226
2282
        return result
2227
2283
 
2228
2284
    def _read_dirblocks_if_needed(self):
2320
2376
            trace.mutter('Not saving DirState because '
2321
2377
                    '_changes_aborted is set.')
2322
2378
            return
2323
 
        if (self._header_state == DirState.IN_MEMORY_MODIFIED or
2324
 
            self._dirblock_state == DirState.IN_MEMORY_MODIFIED):
2325
 
 
 
2379
        # TODO: Since we now distinguish IN_MEMORY_MODIFIED from
 
2380
        #       IN_MEMORY_HASH_MODIFIED, we should only fail quietly if we fail
 
2381
        #       to save an IN_MEMORY_HASH_MODIFIED, and fail *noisily* if we
 
2382
        #       fail to save IN_MEMORY_MODIFIED
 
2383
        if self._worth_saving():
2326
2384
            grabbed_write_lock = False
2327
2385
            if self._lock_state != 'w':
2328
2386
                grabbed_write_lock, new_lock = self._lock_token.temporary_write_lock()
2336
2394
                    # We couldn't grab a write lock, so we switch back to a read one
2337
2395
                    return
2338
2396
            try:
 
2397
                lines = self.get_lines()
2339
2398
                self._state_file.seek(0)
2340
 
                self._state_file.writelines(self.get_lines())
 
2399
                self._state_file.writelines(lines)
2341
2400
                self._state_file.truncate()
2342
2401
                self._state_file.flush()
2343
 
                self._header_state = DirState.IN_MEMORY_UNMODIFIED
2344
 
                self._dirblock_state = DirState.IN_MEMORY_UNMODIFIED
 
2402
                self._mark_unmodified()
2345
2403
            finally:
2346
2404
                if grabbed_write_lock:
2347
2405
                    self._lock_token = self._lock_token.restore_read_lock()
2350
2408
                    #       not changed contents. Since restore_read_lock may
2351
2409
                    #       not be an atomic operation.
2352
2410
 
 
2411
    def _worth_saving(self):
 
2412
        """Is it worth saving the dirstate or not?"""
 
2413
        if (self._header_state == DirState.IN_MEMORY_MODIFIED
 
2414
            or self._dirblock_state == DirState.IN_MEMORY_MODIFIED):
 
2415
            return True
 
2416
        if self._dirblock_state == DirState.IN_MEMORY_HASH_MODIFIED:
 
2417
            if self._worth_saving_limit == -1:
 
2418
                # We never save hash changes when the limit is -1
 
2419
                return False
 
2420
            # If we're using smart saving and only a small number of
 
2421
            # entries have changed their hash, don't bother saving. John has
 
2422
            # suggested using a heuristic here based on the size of the
 
2423
            # changed files and/or tree. For now, we go with a configurable
 
2424
            # number of changes, keeping the calculation time
 
2425
            # as low overhead as possible. (This also keeps all existing
 
2426
            # tests passing as the default is 0, i.e. always save.)
 
2427
            if len(self._known_hash_changes) >= self._worth_saving_limit:
 
2428
                return True
 
2429
        return False
 
2430
 
2353
2431
    def _set_data(self, parent_ids, dirblocks):
2354
2432
        """Set the full dirstate data in memory.
2355
2433
 
2363
2441
        """
2364
2442
        # our memory copy is now authoritative.
2365
2443
        self._dirblocks = dirblocks
2366
 
        self._header_state = DirState.IN_MEMORY_MODIFIED
2367
 
        self._dirblock_state = DirState.IN_MEMORY_MODIFIED
 
2444
        self._mark_modified(header_modified=True)
2368
2445
        self._parents = list(parent_ids)
2369
2446
        self._id_index = None
2370
2447
        self._packed_stat_index = None
2390
2467
        self._make_absent(entry)
2391
2468
        self.update_minimal(('', '', new_id), 'd',
2392
2469
            path_utf8='', packed_stat=entry[1][0][4])
2393
 
        self._dirblock_state = DirState.IN_MEMORY_MODIFIED
 
2470
        self._mark_modified()
 
2471
        # XXX: This was added by Ian, we need to make sure there
 
2472
        #      are tests for it, because it isn't in bzr.dev TRUNK
 
2473
        #      It looks like the only place it is called is in setting the root
 
2474
        #      id of the tree. So probably we never had an _id_index when we
 
2475
        #      don't even have a root yet.
 
2476
        if self._id_index is not None:
 
2477
            self._add_to_id_index(self._id_index, entry[0])
2394
2478
 
2395
2479
    def set_parent_trees(self, trees, ghosts):
2396
2480
        """Set the parent trees for the dirstate.
2463
2547
            # the suffix is from tree_index+1:parent_count+1.
2464
2548
            new_location_suffix = [DirState.NULL_PARENT_DETAILS] * (parent_count - tree_index)
2465
2549
            # now stitch in all the entries from this tree
2466
 
            for path, entry in tree.inventory.iter_entries_by_dir():
 
2550
            for path, entry in tree.iter_entries_by_dir():
2467
2551
                # here we process each trees details for each item in the tree.
2468
2552
                # we first update any existing entries for the id at other paths,
2469
2553
                # then we either create or update the entry for the id at the
2531
2615
        self._entries_to_current_state(new_entries)
2532
2616
        self._parents = [rev_id for rev_id, tree in trees]
2533
2617
        self._ghosts = list(ghosts)
2534
 
        self._header_state = DirState.IN_MEMORY_MODIFIED
2535
 
        self._dirblock_state = DirState.IN_MEMORY_MODIFIED
 
2618
        self._mark_modified(header_modified=True)
2536
2619
        self._id_index = id_index
2537
2620
 
2538
2621
    def _sort_entries(self, entry_list):
2675
2758
                        current_old[0][1].decode('utf8'))
2676
2759
                self._make_absent(current_old)
2677
2760
                current_old = advance(old_iterator)
2678
 
        self._dirblock_state = DirState.IN_MEMORY_MODIFIED
 
2761
        self._mark_modified()
2679
2762
        self._id_index = None
2680
2763
        self._packed_stat_index = None
2681
2764
        if tracing:
2747
2830
            if update_tree_details[0][0] == 'a': # absent
2748
2831
                raise AssertionError('bad row %r' % (update_tree_details,))
2749
2832
            update_tree_details[0] = DirState.NULL_PARENT_DETAILS
2750
 
        self._dirblock_state = DirState.IN_MEMORY_MODIFIED
 
2833
        self._mark_modified()
2751
2834
        return last_reference
2752
2835
 
2753
2836
    def update_minimal(self, key, minikind, executable=False, fingerprint='',
2922
3005
            if not present:
2923
3006
                self._dirblocks.insert(block_index, (subdir_key[0], []))
2924
3007
 
2925
 
        self._dirblock_state = DirState.IN_MEMORY_MODIFIED
 
3008
        self._mark_modified()
2926
3009
 
2927
3010
    def _maybe_remove_row(self, block, index, id_index):
2928
3011
        """Remove index if it is absent or relocated across the row.
3194
3277
    # If we have gotten this far, that means that we need to actually
3195
3278
    # process this entry.
3196
3279
    link_or_sha1 = None
 
3280
    worth_saving = True
3197
3281
    if minikind == 'f':
3198
3282
        executable = state._is_executable(stat_value.st_mode,
3199
3283
                                         saved_executable)
3215
3299
        else:
3216
3300
            entry[1][0] = ('f', '', stat_value.st_size,
3217
3301
                           executable, DirState.NULLSTAT)
 
3302
            worth_saving = False
3218
3303
    elif minikind == 'd':
3219
3304
        link_or_sha1 = None
3220
3305
        entry[1][0] = ('d', '', 0, False, packed_stat)
3226
3311
                state._get_block_entry_index(entry[0][0], entry[0][1], 0)
3227
3312
            state._ensure_block(block_index, entry_index,
3228
3313
                               osutils.pathjoin(entry[0][0], entry[0][1]))
 
3314
        else:
 
3315
            worth_saving = False
3229
3316
    elif minikind == 'l':
 
3317
        if saved_minikind == 'l':
 
3318
            worth_saving = False
3230
3319
        link_or_sha1 = state._read_link(abspath, saved_link_or_sha1)
3231
3320
        if state._cutoff_time is None:
3232
3321
            state._sha_cutoff_time()
3237
3326
        else:
3238
3327
            entry[1][0] = ('l', '', stat_value.st_size,
3239
3328
                           False, DirState.NULLSTAT)
3240
 
    state._dirblock_state = DirState.IN_MEMORY_MODIFIED
 
3329
    if worth_saving:
 
3330
        state._mark_modified([entry])
3241
3331
    return link_or_sha1
3242
3332
 
3243
3333