~bzr-pqm/bzr/bzr.dev

« back to all changes in this revision

Viewing changes to bzrlib/dirstate.py

  • Committer: Vincent Ladeuil
  • Date: 2012-03-13 17:25:29 UTC
  • mfrom: (6499 +trunk)
  • mto: This revision was merged to the branch mainline in revision 6501.
  • Revision ID: v.ladeuil+lp@free.fr-20120313172529-i0suyjnepsor25i7
Merge trunk

Show diffs side-by-side

added added

removed removed

Lines of Context:
218
218
 
219
219
"""
220
220
 
 
221
from __future__ import absolute_import
 
222
 
221
223
import bisect
222
 
import binascii
223
224
import errno
224
225
import operator
225
226
import os
226
227
from stat import S_IEXEC
227
228
import stat
228
 
import struct
229
229
import sys
230
230
import time
231
231
import zlib
232
232
 
233
233
from bzrlib import (
234
234
    cache_utf8,
 
235
    config,
235
236
    debug,
236
237
    errors,
237
238
    inventory,
239
240
    osutils,
240
241
    static_tuple,
241
242
    trace,
 
243
    urlutils,
242
244
    )
243
245
 
244
246
 
249
251
ERROR_DIRECTORY = 267
250
252
 
251
253
 
252
 
if not getattr(struct, '_compile', None):
253
 
    # Cannot pre-compile the dirstate pack_stat
254
 
    def pack_stat(st, _encode=binascii.b2a_base64, _pack=struct.pack):
255
 
        """Convert stat values into a packed representation."""
256
 
        return _encode(_pack('>LLLLLL', st.st_size, int(st.st_mtime),
257
 
            int(st.st_ctime), st.st_dev, st.st_ino & 0xFFFFFFFF,
258
 
            st.st_mode))[:-1]
259
 
else:
260
 
    # compile the struct compiler we need, so as to only do it once
261
 
    from _struct import Struct
262
 
    _compiled_pack = Struct('>LLLLLL').pack
263
 
    def pack_stat(st, _encode=binascii.b2a_base64, _pack=_compiled_pack):
264
 
        """Convert stat values into a packed representation."""
265
 
        # jam 20060614 it isn't really worth removing more entries if we
266
 
        # are going to leave it in packed form.
267
 
        # With only st_mtime and st_mode filesize is 5.5M and read time is 275ms
268
 
        # With all entries, filesize is 5.9M and read time is maybe 280ms
269
 
        # well within the noise margin
270
 
 
271
 
        # base64 encoding always adds a final newline, so strip it off
272
 
        # The current version
273
 
        return _encode(_pack(st.st_size, int(st.st_mtime), int(st.st_ctime),
274
 
            st.st_dev, st.st_ino & 0xFFFFFFFF, st.st_mode))[:-1]
275
 
        # This is 0.060s / 1.520s faster by not encoding as much information
276
 
        # return _encode(_pack('>LL', int(st.st_mtime), st.st_mode))[:-1]
277
 
        # This is not strictly faster than _encode(_pack())[:-1]
278
 
        # return '%X.%X.%X.%X.%X.%X' % (
279
 
        #      st.st_size, int(st.st_mtime), int(st.st_ctime),
280
 
        #      st.st_dev, st.st_ino, st.st_mode)
281
 
        # Similar to the _encode(_pack('>LL'))
282
 
        # return '%X.%X' % (int(st.st_mtime), st.st_mode)
283
 
 
284
 
 
285
 
def _unpack_stat(packed_stat):
286
 
    """Turn a packed_stat back into the stat fields.
287
 
 
288
 
    This is meant as a debugging tool, should not be used in real code.
289
 
    """
290
 
    (st_size, st_mtime, st_ctime, st_dev, st_ino,
291
 
     st_mode) = struct.unpack('>LLLLLL', binascii.a2b_base64(packed_stat))
292
 
    return dict(st_size=st_size, st_mtime=st_mtime, st_ctime=st_ctime,
293
 
                st_dev=st_dev, st_ino=st_ino, st_mode=st_mode)
294
 
 
295
 
 
296
254
class SHA1Provider(object):
297
255
    """An interface for getting sha1s of a file."""
298
256
 
448
406
        self._known_hash_changes = set()
449
407
        # How many hash changed entries can we have without saving
450
408
        self._worth_saving_limit = worth_saving_limit
 
409
        self._config_stack = config.LocationStack(urlutils.local_path_to_url(
 
410
            path))
451
411
 
452
412
    def __repr__(self):
453
413
        return "%s(%r)" % \
1332
1292
                    parent_trees.append((parent_id, parent_tree))
1333
1293
                    parent_tree.lock_read()
1334
1294
                result.set_parent_trees(parent_trees, [])
1335
 
                result.set_state_from_inventory(tree.inventory)
 
1295
                result.set_state_from_inventory(tree.root_inventory)
1336
1296
            finally:
1337
1297
                for revid, parent_tree in parent_trees:
1338
1298
                    parent_tree.unlock()
1601
1561
                    else:
1602
1562
                        source_path = child_basename
1603
1563
                    if new_path_utf8:
1604
 
                        target_path = new_path_utf8 + source_path[len(old_path):]
 
1564
                        target_path = \
 
1565
                            new_path_utf8 + source_path[len(old_path_utf8):]
1605
1566
                    else:
1606
 
                        if old_path == '':
 
1567
                        if old_path_utf8 == '':
1607
1568
                            raise AssertionError("cannot rename directory to"
1608
1569
                                                 " itself")
1609
 
                        target_path = source_path[len(old_path) + 1:]
 
1570
                        target_path = source_path[len(old_path_utf8) + 1:]
1610
1571
                    adds.append((None, target_path, entry[0][2], entry[1][1], False))
1611
1572
                    deletes.append(
1612
1573
                        (source_path, target_path, entry[0][2], None, False))
1613
 
                deletes.append((old_path_utf8, new_path, file_id, None, False))
 
1574
                deletes.append(
 
1575
                    (old_path_utf8, new_path_utf8, file_id, None, False))
 
1576
 
1614
1577
        self._check_delta_ids_absent(new_ids, delta, 1)
1615
1578
        try:
1616
1579
            # Finish expunging deletes/first half of renames.
1892
1855
                    file_id, "This parent is not a directory.")
1893
1856
 
1894
1857
    def _observed_sha1(self, entry, sha1, stat_value,
1895
 
        _stat_to_minikind=_stat_to_minikind, _pack_stat=pack_stat):
 
1858
        _stat_to_minikind=_stat_to_minikind):
1896
1859
        """Note the sha1 of a file.
1897
1860
 
1898
1861
        :param entry: The entry the sha1 is for.
1904
1867
        except KeyError:
1905
1868
            # Unhandled kind
1906
1869
            return None
1907
 
        packed_stat = _pack_stat(stat_value)
1908
1870
        if minikind == 'f':
1909
1871
            if self._cutoff_time is None:
1910
1872
                self._sha_cutoff_time()
1911
1873
            if (stat_value.st_mtime < self._cutoff_time
1912
1874
                and stat_value.st_ctime < self._cutoff_time):
1913
1875
                entry[1][0] = ('f', sha1, stat_value.st_size, entry[1][0][3],
1914
 
                               packed_stat)
 
1876
                               pack_stat(stat_value))
1915
1877
                self._mark_modified([entry])
1916
1878
 
1917
1879
    def _sha_cutoff_time(self):
1962
1924
            # paths are produced by UnicodeDirReader on purpose.
1963
1925
            abspath = abspath.encode(fs_encoding)
1964
1926
        target = os.readlink(abspath)
1965
 
        if fs_encoding not in ('UTF-8', 'US-ASCII', 'ANSI_X3.4-1968'):
 
1927
        if fs_encoding not in ('utf-8', 'ascii'):
1966
1928
            # Change encoding if needed
1967
1929
            target = target.decode(fs_encoding).encode('UTF-8')
1968
1930
        return target
2470
2432
            raise errors.BzrError('missing num_entries line')
2471
2433
        self._num_entries = int(num_entries_line[len('num_entries: '):-1])
2472
2434
 
2473
 
    def sha1_from_stat(self, path, stat_result, _pack_stat=pack_stat):
 
2435
    def sha1_from_stat(self, path, stat_result):
2474
2436
        """Find a sha1 given a stat lookup."""
2475
 
        return self._get_packed_stat_index().get(_pack_stat(stat_result), None)
 
2437
        return self._get_packed_stat_index().get(pack_stat(stat_result), None)
2476
2438
 
2477
2439
    def _get_packed_stat_index(self):
2478
2440
        """Get a packed_stat index of self._dirblocks."""
2508
2470
        #       IN_MEMORY_HASH_MODIFIED, we should only fail quietly if we fail
2509
2471
        #       to save an IN_MEMORY_HASH_MODIFIED, and fail *noisily* if we
2510
2472
        #       fail to save IN_MEMORY_MODIFIED
2511
 
        if self._worth_saving():
2512
 
            grabbed_write_lock = False
2513
 
            if self._lock_state != 'w':
2514
 
                grabbed_write_lock, new_lock = self._lock_token.temporary_write_lock()
2515
 
                # Switch over to the new lock, as the old one may be closed.
 
2473
        if not self._worth_saving():
 
2474
            return
 
2475
 
 
2476
        grabbed_write_lock = False
 
2477
        if self._lock_state != 'w':
 
2478
            grabbed_write_lock, new_lock = self._lock_token.temporary_write_lock()
 
2479
            # Switch over to the new lock, as the old one may be closed.
 
2480
            # TODO: jam 20070315 We should validate the disk file has
 
2481
            #       not changed contents, since temporary_write_lock may
 
2482
            #       not be an atomic operation.
 
2483
            self._lock_token = new_lock
 
2484
            self._state_file = new_lock.f
 
2485
            if not grabbed_write_lock:
 
2486
                # We couldn't grab a write lock, so we switch back to a read one
 
2487
                return
 
2488
        try:
 
2489
            lines = self.get_lines()
 
2490
            self._state_file.seek(0)
 
2491
            self._state_file.writelines(lines)
 
2492
            self._state_file.truncate()
 
2493
            self._state_file.flush()
 
2494
            self._maybe_fdatasync()
 
2495
            self._mark_unmodified()
 
2496
        finally:
 
2497
            if grabbed_write_lock:
 
2498
                self._lock_token = self._lock_token.restore_read_lock()
 
2499
                self._state_file = self._lock_token.f
2516
2500
                # TODO: jam 20070315 We should validate the disk file has
2517
 
                #       not changed contents. Since temporary_write_lock may
2518
 
                #       not be an atomic operation.
2519
 
                self._lock_token = new_lock
2520
 
                self._state_file = new_lock.f
2521
 
                if not grabbed_write_lock:
2522
 
                    # We couldn't grab a write lock, so we switch back to a read one
2523
 
                    return
2524
 
            try:
2525
 
                lines = self.get_lines()
2526
 
                self._state_file.seek(0)
2527
 
                self._state_file.writelines(lines)
2528
 
                self._state_file.truncate()
2529
 
                self._state_file.flush()
2530
 
                self._mark_unmodified()
2531
 
            finally:
2532
 
                if grabbed_write_lock:
2533
 
                    self._lock_token = self._lock_token.restore_read_lock()
2534
 
                    self._state_file = self._lock_token.f
2535
 
                    # TODO: jam 20070315 We should validate the disk file has
2536
 
                    #       not changed contents. Since restore_read_lock may
2537
 
                    #       not be an atomic operation.
 
2501
                #       not changed contents. Since restore_read_lock may
 
2502
                #       not be an atomic operation.                
 
2503
 
 
2504
    def _maybe_fdatasync(self):
 
2505
        """Flush to disk if possible and if not configured off."""
 
2506
        if self._config_stack.get('dirstate.fdatasync'):
 
2507
            osutils.fdatasync(self._state_file.fileno())
2538
2508
 
2539
2509
    def _worth_saving(self):
2540
2510
        """Is it worth saving the dirstate or not?"""
3385
3355
 
3386
3356
 
3387
3357
def py_update_entry(state, entry, abspath, stat_value,
3388
 
                 _stat_to_minikind=DirState._stat_to_minikind,
3389
 
                 _pack_stat=pack_stat):
 
3358
                 _stat_to_minikind=DirState._stat_to_minikind):
3390
3359
    """Update the entry based on what is actually on disk.
3391
3360
 
3392
3361
    This function only calculates the sha if it needs to - if the entry is
3405
3374
    except KeyError:
3406
3375
        # Unhandled kind
3407
3376
        return None
3408
 
    packed_stat = _pack_stat(stat_value)
 
3377
    packed_stat = pack_stat(stat_value)
3409
3378
    (saved_minikind, saved_link_or_sha1, saved_file_size,
3410
3379
     saved_executable, saved_packed_stat) = entry[1][0]
3411
3380
 
4284
4253
        _bisect_path_left,
4285
4254
        _bisect_path_right,
4286
4255
        cmp_by_dirs,
 
4256
        pack_stat,
4287
4257
        ProcessEntryC as _process_entry,
4288
4258
        update_entry as update_entry,
4289
4259
        )
4295
4265
        _bisect_path_left,
4296
4266
        _bisect_path_right,
4297
4267
        cmp_by_dirs,
 
4268
        pack_stat,
4298
4269
        )
4299
4270
    # FIXME: It would be nice to be able to track moved lines so that the
4300
4271
    # corresponding python code can be moved to the _dirstate_helpers_py