~bzr-pqm/bzr/bzr.dev

1852.13.6 by Robert Collins
start hooking in the prototype dirstate serialiser.
1
# Copyright (C) 2006 by Canonical Ltd
2
#
3
# This program is free software; you can redistribute it and/or modify
4
# it under the terms of the GNU General Public License as published by
5
# the Free Software Foundation; either version 2 of the License, or
6
# (at your option) any later version.
7
#
8
# This program is distributed in the hope that it will be useful,
9
# but WITHOUT ANY WARRANTY; without even the implied warranty of
10
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
11
# GNU General Public License for more details.
12
#
13
# You should have received a copy of the GNU General Public License
14
# along with this program; if not, write to the Free Software
15
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
16
17
"""DirState objects record the state of a directory and its bzr metadata."""
18
19
20
import base64
21
import bisect
22
import cStringIO
23
import os
24
import sha
25
import struct
26
import time
27
import zlib
28
29
import bzrlib.inventory
30
from bzrlib.osutils import pathjoin, sha_file, sha_string, walkdirs
31
32
# TODO:
33
# 1)
34
35
class DirState(object):
36
    """Record directory and metadata state for fast access.
37
38
    A dirstate is a specialised data structure for managing local working
39
    tree state information. Its not yet well defined whether it is platform
40
    specific, and if it is how we detect/parameterise that.
41
    """
42
43
    _kind_to_minikind = {'file':'f', 'directory':'d', 'symlink':'l'}
44
    _minikind_to_kind = {'f':'file', 'd':'directory', 'l':'symlink'}
45
46
47
    @staticmethod
48
    def from_tree(tree, base_path):
49
        """Create a dirstate from a bzr Tree and a local disk path.
50
51
        :param tree: The tree which should provide parent information and
52
            inventory ids.
53
        :param base_path: The local path to access the local fs data for this tree.
54
            This is not accessed via the tree object because we want to be able
55
            to seed DirStates from RevisionTrees during checkout. Possibly
56
            a better model is to start with an empty dirstate and populate it
57
            during the checkout operation, but that will require looking at the
58
            behaviour of set_inventory etc. which is not in scope yet - and is
59
            potentially very expensive as it requires an entire scan, or an
60
            inventory diff.
61
        """
62
        result = DirState()
63
64
        lines = []
65
66
        _encode = base64.encodestring
67
68
        parent_ids = tree.get_parent_ids()
69
        num_parents = len(parent_ids)
70
        if num_parents > 3:
71
            raise ValueError('Cannot handle more than 3 parents')
72
73
        parent_trees = []
74
        for parent_id in parent_ids:
75
            parent_trees.append(tree.branch.repository.revision_tree(parent_id))
76
77
        # FIXME: is this utf8 safe?
78
        lines.append('\0'.join([str(num_parents)] + parent_ids))
79
80
        to_minikind = DirState._kind_to_minikind
81
82
        st = os.lstat(base_path)
83
        null_parent_info = '\0'.join((
84
                    'null:'
85
                    , '', ''
86
                    , ''
87
                    , ''
88
                    , ''
89
                    , ''
90
                    ))
91
            #, 'd', gen_root_id().encode('utf8')
92
        root_info = [
93
            '', '' # No path
94
            , 'd', tree.inventory.root.file_id.encode('utf8')
95
            , str(st.st_size)
96
            , pack_stat(st)
97
            , '' # No sha
98
            ] + [null_parent_info]*num_parents
99
#       disabled because the root entry has no revision attribute set.
100
#        for parent_tree in parent_trees:
101
#            root_info.append('\0'.join((
102
#                    parent_tree.inventory.root.revision.encode('utf8'),
103
#                    '', '',
104
#                    '',
105
#                    '',
106
#                    '',
107
#                    '',
108
#                    )))
109
            
110
        lines.append('\0'.join(root_info))
111
112
        test_sha = sha.new('').hexdigest()
113
114
        tstart = time.time()
115
        for block in walkdirs(base_path):
116
117
            to_remove = []
118
            for relpath, name, kind, st, abspath in block:
119
                s = None
120
                symlink_target = None
121
                dirname, basename = os.path.split(relpath.encode('utf8'))
122
                if kind == 'file':
123
                    #s = sha_file(open(abspath, 'rb'))
124
                    s = test_sha
125
                elif kind == 'directory':
126
                    if name in ('.bzr', '.hg', 'CVS', '.svn', '_svn'):
127
                        # Skip this, and all children
128
                        to_remove.append((relpath, name, kind, st, abspath))
129
                        continue
130
                    s = ''
131
                elif kind == 'symlink':
132
                    s = os.readlink(abspath)
133
134
                parent_info = []
135
                if num_parents >= 1:
136
                    parent_info.append(
137
                        '\0'.join((
138
                            parent_ids[0]
139
                            , to_minikind[kind]
140
                            , dirname, basename
141
                            , str(st.st_size)
142
                            , 'n' # Not executable
143
                            , s
144
                            )))
145
                if num_parents >= 2:
146
                    parent_info.append(
147
                        '\0'.join((
148
                            parent_ids[0]
149
                            , to_minikind[kind]
150
                            , dirname, basename
151
                            , str(st.st_size)
152
                            , 'n' # Not executable
153
                            , s
154
                            )))
155
                for count in xrange(2,num_parents):
156
                    parent_info.append(null_parent_info)
157
                lines.append('\0'.join([
158
                    dirname, basename
159
                    , to_minikind[kind]
160
                    , gen_file_id(name).encode('utf8')
161
                    , str(st.st_size)
162
                    , pack_stat(st)
163
                    , s
164
                    ] + parent_info
165
                    ))
166
167
            # It isn't safe to remove entries while we are iterating
168
            # over the same list, so remove them now
169
            for entry in to_remove:
170
                block.remove(entry)
171
172
        output_lines = ['#bzr dirstate flat format 1\n']
173
174
        lines.append('') # a final newline
175
        inventory_text = '\0\n\0'.join(lines)
176
        output_lines.append('adler32: %s\n' % (zlib.adler32(inventory_text),))
177
        # -2, 1 for num parents, 1 for final newline
178
        num_entries = len(lines)-2
179
        output_lines.append('num_entries: %s\n' % (num_entries,))
180
        output_lines.append(inventory_text)
181
182
        result.lines = output_lines
183
        return result
184
185
    def get_lines(self):
186
        """Serialise the entire dirstate to a sequence of lines."""
187
        return self.lines
188
        return [
189
            '#bzr dirstate flat format 1\n',
190
            'adler32: -2\n',
191
            'num_entries: 1\n',
192
            '0\x00\n',
193
            '\x00\x00\x00d\x00TREE_ROOT\x004096\x00AAAQAETIF65EyBeuAAADAQAQQxsAAEHt\x00\x00\n',
194
            '\x00',
195
            ]
196
197
def pack_stat(st, _encode=base64.encodestring, _pack=struct.pack):
198
    """Convert stat values into a packed representation."""
199
    # jam 20060614 it isn't really worth removing more entries if we
200
    # are going to leave it in packed form.
201
    # With only st_mtime and st_mode filesize is 5.5M and read time is 275ms
202
    # With all entries filesize is 5.9M and read time is mabye 280ms
203
    # well within the noise margin
204
205
    # base64.encode always adds a final newline, so strip it off
206
    return _encode(_pack('>llllll'
207
        , st.st_size, st.st_mtime, st.st_ctime
208
        , st.st_dev, st.st_ino, st.st_mode))[:-1]
209