1852.13.6
by Robert Collins
start hooking in the prototype dirstate serialiser. |
1 |
# Copyright (C) 2006 by Canonical Ltd
|
2 |
#
|
|
3 |
# This program is free software; you can redistribute it and/or modify
|
|
4 |
# it under the terms of the GNU General Public License as published by
|
|
5 |
# the Free Software Foundation; either version 2 of the License, or
|
|
6 |
# (at your option) any later version.
|
|
7 |
#
|
|
8 |
# This program is distributed in the hope that it will be useful,
|
|
9 |
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
10 |
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
11 |
# GNU General Public License for more details.
|
|
12 |
#
|
|
13 |
# You should have received a copy of the GNU General Public License
|
|
14 |
# along with this program; if not, write to the Free Software
|
|
15 |
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
|
|
16 |
||
17 |
"""DirState objects record the state of a directory and its bzr metadata."""
|
|
18 |
||
19 |
||
20 |
import base64 |
|
21 |
import bisect |
|
22 |
import cStringIO |
|
23 |
import os |
|
24 |
import sha |
|
25 |
import struct |
|
26 |
import time |
|
27 |
import zlib |
|
28 |
||
29 |
import bzrlib.inventory |
|
30 |
from bzrlib.osutils import pathjoin, sha_file, sha_string, walkdirs |
|
31 |
||
32 |
# TODO:
|
|
33 |
# 1)
|
|
34 |
||
35 |
class DirState(object): |
|
36 |
"""Record directory and metadata state for fast access.
|
|
37 |
||
38 |
A dirstate is a specialised data structure for managing local working
|
|
39 |
tree state information. Its not yet well defined whether it is platform
|
|
40 |
specific, and if it is how we detect/parameterise that.
|
|
41 |
"""
|
|
42 |
||
43 |
_kind_to_minikind = {'file':'f', 'directory':'d', 'symlink':'l'} |
|
44 |
_minikind_to_kind = {'f':'file', 'd':'directory', 'l':'symlink'} |
|
45 |
||
46 |
||
47 |
@staticmethod
|
|
48 |
def from_tree(tree, base_path): |
|
49 |
"""Create a dirstate from a bzr Tree and a local disk path.
|
|
50 |
||
51 |
:param tree: The tree which should provide parent information and
|
|
52 |
inventory ids.
|
|
53 |
:param base_path: The local path to access the local fs data for this tree.
|
|
54 |
This is not accessed via the tree object because we want to be able
|
|
55 |
to seed DirStates from RevisionTrees during checkout. Possibly
|
|
56 |
a better model is to start with an empty dirstate and populate it
|
|
57 |
during the checkout operation, but that will require looking at the
|
|
58 |
behaviour of set_inventory etc. which is not in scope yet - and is
|
|
59 |
potentially very expensive as it requires an entire scan, or an
|
|
60 |
inventory diff.
|
|
61 |
"""
|
|
62 |
result = DirState() |
|
63 |
||
64 |
lines = [] |
|
65 |
||
66 |
_encode = base64.encodestring |
|
67 |
||
68 |
parent_ids = tree.get_parent_ids() |
|
69 |
num_parents = len(parent_ids) |
|
70 |
if num_parents > 3: |
|
71 |
raise ValueError('Cannot handle more than 3 parents') |
|
72 |
||
73 |
parent_trees = [] |
|
74 |
for parent_id in parent_ids: |
|
75 |
parent_trees.append(tree.branch.repository.revision_tree(parent_id)) |
|
76 |
||
77 |
# FIXME: is this utf8 safe?
|
|
78 |
lines.append('\0'.join([str(num_parents)] + parent_ids)) |
|
79 |
||
80 |
to_minikind = DirState._kind_to_minikind |
|
81 |
||
82 |
st = os.lstat(base_path) |
|
83 |
null_parent_info = '\0'.join(( |
|
84 |
'null:'
|
|
85 |
, '', '' |
|
86 |
, '' |
|
87 |
, '' |
|
88 |
, '' |
|
89 |
, '' |
|
90 |
))
|
|
91 |
#, 'd', gen_root_id().encode('utf8')
|
|
92 |
root_info = [ |
|
93 |
'', '' # No path |
|
94 |
, 'd', tree.inventory.root.file_id.encode('utf8') |
|
95 |
, str(st.st_size) |
|
96 |
, pack_stat(st) |
|
97 |
, '' # No sha |
|
98 |
] + [null_parent_info]*num_parents |
|
99 |
# disabled because the root entry has no revision attribute set.
|
|
100 |
# for parent_tree in parent_trees:
|
|
101 |
# root_info.append('\0'.join((
|
|
102 |
# parent_tree.inventory.root.revision.encode('utf8'),
|
|
103 |
# '', '',
|
|
104 |
# '',
|
|
105 |
# '',
|
|
106 |
# '',
|
|
107 |
# '',
|
|
108 |
# )))
|
|
109 |
||
110 |
lines.append('\0'.join(root_info)) |
|
111 |
||
112 |
test_sha = sha.new('').hexdigest() |
|
113 |
||
114 |
tstart = time.time() |
|
115 |
for block in walkdirs(base_path): |
|
116 |
||
117 |
to_remove = [] |
|
118 |
for relpath, name, kind, st, abspath in block: |
|
119 |
s = None |
|
120 |
symlink_target = None |
|
121 |
dirname, basename = os.path.split(relpath.encode('utf8')) |
|
122 |
if kind == 'file': |
|
123 |
#s = sha_file(open(abspath, 'rb'))
|
|
124 |
s = test_sha |
|
125 |
elif kind == 'directory': |
|
126 |
if name in ('.bzr', '.hg', 'CVS', '.svn', '_svn'): |
|
127 |
# Skip this, and all children
|
|
128 |
to_remove.append((relpath, name, kind, st, abspath)) |
|
129 |
continue
|
|
130 |
s = '' |
|
131 |
elif kind == 'symlink': |
|
132 |
s = os.readlink(abspath) |
|
133 |
||
134 |
parent_info = [] |
|
135 |
if num_parents >= 1: |
|
136 |
parent_info.append( |
|
137 |
'\0'.join(( |
|
138 |
parent_ids[0] |
|
139 |
, to_minikind[kind] |
|
140 |
, dirname, basename |
|
141 |
, str(st.st_size) |
|
142 |
, 'n' # Not executable |
|
143 |
, s |
|
144 |
)))
|
|
145 |
if num_parents >= 2: |
|
146 |
parent_info.append( |
|
147 |
'\0'.join(( |
|
148 |
parent_ids[0] |
|
149 |
, to_minikind[kind] |
|
150 |
, dirname, basename |
|
151 |
, str(st.st_size) |
|
152 |
, 'n' # Not executable |
|
153 |
, s |
|
154 |
)))
|
|
155 |
for count in xrange(2,num_parents): |
|
156 |
parent_info.append(null_parent_info) |
|
157 |
lines.append('\0'.join([ |
|
158 |
dirname, basename |
|
159 |
, to_minikind[kind] |
|
160 |
, gen_file_id(name).encode('utf8') |
|
161 |
, str(st.st_size) |
|
162 |
, pack_stat(st) |
|
163 |
, s |
|
164 |
] + parent_info |
|
165 |
))
|
|
166 |
||
167 |
# It isn't safe to remove entries while we are iterating
|
|
168 |
# over the same list, so remove them now
|
|
169 |
for entry in to_remove: |
|
170 |
block.remove(entry) |
|
171 |
||
172 |
output_lines = ['#bzr dirstate flat format 1\n'] |
|
173 |
||
174 |
lines.append('') # a final newline |
|
175 |
inventory_text = '\0\n\0'.join(lines) |
|
176 |
output_lines.append('adler32: %s\n' % (zlib.adler32(inventory_text),)) |
|
177 |
# -2, 1 for num parents, 1 for final newline
|
|
178 |
num_entries = len(lines)-2 |
|
179 |
output_lines.append('num_entries: %s\n' % (num_entries,)) |
|
180 |
output_lines.append(inventory_text) |
|
181 |
||
182 |
result.lines = output_lines |
|
183 |
return result |
|
184 |
||
185 |
def get_lines(self): |
|
186 |
"""Serialise the entire dirstate to a sequence of lines."""
|
|
187 |
return self.lines |
|
188 |
return [ |
|
189 |
'#bzr dirstate flat format 1\n', |
|
190 |
'adler32: -2\n', |
|
191 |
'num_entries: 1\n', |
|
192 |
'0\x00\n', |
|
193 |
'\x00\x00\x00d\x00TREE_ROOT\x004096\x00AAAQAETIF65EyBeuAAADAQAQQxsAAEHt\x00\x00\n', |
|
194 |
'\x00', |
|
195 |
]
|
|
196 |
||
197 |
def pack_stat(st, _encode=base64.encodestring, _pack=struct.pack): |
|
198 |
"""Convert stat values into a packed representation."""
|
|
199 |
# jam 20060614 it isn't really worth removing more entries if we
|
|
200 |
# are going to leave it in packed form.
|
|
201 |
# With only st_mtime and st_mode filesize is 5.5M and read time is 275ms
|
|
202 |
# With all entries filesize is 5.9M and read time is mabye 280ms
|
|
203 |
# well within the noise margin
|
|
204 |
||
205 |
# base64.encode always adds a final newline, so strip it off
|
|
206 |
return _encode(_pack('>llllll' |
|
207 |
, st.st_size, st.st_mtime, st.st_ctime |
|
208 |
, st.st_dev, st.st_ino, st.st_mode))[:-1] |
|
209 |