~bzr-pqm/bzr/bzr.dev

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
# (C) 2005 Canonical Ltd

# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.

# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.

# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA

import stat, os, sha, time
from binascii import b2a_qp, a2b_qp

from trace import mutter


# file fingerprints are: (path, size, mtime, ctime, ino, dev).
#
# if this is the same for this file as in the previous revision, we
# assume the content is the same and the SHA-1 is the same.

# This is stored in a fingerprint file that also contains the file-id
# and the content SHA-1.

# Thus for any given file we can quickly get the SHA-1, either from
# the cache or if the cache is out of date.

# At the moment this is stored in a simple textfile; it might be nice
# to use a tdb instead.


# What we need:

# build a new cache from scratch
# load cache, incrementally update it

# TODO: Have a paranoid mode where we always compare the texts and
# always recalculate the digest, to trap modification without stat
# change and SHA collisions.



def fingerprint(path, abspath):
    try:
        fs = os.lstat(abspath)
    except OSError:
        # might be missing, etc
        return None

    if stat.S_ISDIR(fs.st_mode):
        return None

    return (fs.st_size, fs.st_mtime,
            fs.st_ctime, fs.st_ino, fs.st_dev)


def write_cache(branch, entry_iter):
    outf = branch.controlfile('work-cache.tmp', 'wt')
    for entry in entry_iter:
        outf.write(entry[0] + ' ' + entry[1] + ' ')
        outf.write(b2a_qp(entry[2], True))
        outf.write(' %d %d %d %d %d\n' % entry[3:])
        
    outf.close()
    os.rename(branch.controlfilename('work-cache.tmp'),
              branch.controlfilename('work-cache'))

        
        
def load_cache(branch):
    cache = {}

    try:
        cachefile = branch.controlfile('work-cache', 'rt')
    except IOError:
        return cache
    
    for l in cachefile:
        f = l.split(' ')
        file_id = f[0]
        if file_id in cache:
            raise BzrError("duplicated file_id in cache: {%s}" % file_id)
        cache[file_id] = (f[0], f[1], a2b_qp(f[2])) + tuple([long(x) for x in f[3:]])
    return cache




def _files_from_inventory(inv):
    for path, ie in inv.iter_entries():
        if ie.kind != 'file':
            continue
        yield ie.file_id, path
    

def build_cache(branch):
    inv = branch.read_working_inventory()

    cache = {}
    _update_cache_from_list(branch, cache, _files_from_inventory(inv))
    


def update_cache(branch, inv):
    # TODO: It's supposed to be faster to stat the files in order by inum.
    # We don't directly know the inum of the files of course but we do
    # know where they were last sighted, so we can sort by that.

    cache = load_cache(branch)
    return _update_cache_from_list(branch, cache, _files_from_inventory(inv))



def _update_cache_from_list(branch, cache, to_update):
    """Update the cache to have info on the named files.

    to_update is a sequence of (file_id, path) pairs.
    """
    hardcheck = dirty = 0
    for file_id, path in to_update:
        fap = branch.abspath(path)
        fp = fingerprint(fap, path)
        cacheentry = cache.get(file_id)

        if fp == None: # not here
            if cacheentry:
                del cache[file_id]
                dirty += 1
            continue

        if cacheentry and (cacheentry[3:] == fp):
            continue                    # all stat fields unchanged

        hardcheck += 1

        dig = sha.new(file(fap, 'rb').read()).hexdigest()

        if cacheentry == None or dig != cacheentry[1]: 
            # if there was no previous entry for this file, or if the
            # SHA has changed, then update the cache
            cacheentry = (file_id, dig, path) + fp
            cache[file_id] = cacheentry
            dirty += 1

    mutter('work cache: read %d files, %d changed' % (hardcheck, dirty))
        
    if dirty:
        write_cache(branch, cache.itervalues())

    return cache