~abentley/bzrtools/bzrtools.dev

« back to all changes in this revision

Viewing changes to upstream_import.py

  • Committer: Aaron Bentley
  • Date: 2005-11-10 21:04:19 UTC
  • Revision ID: aaron.bentley@utoronto.ca-20051110210419-a402638d94693825
Handled whitespace branch names better

Show diffs side-by-side

added added

removed removed

Lines of Context:
1
 
"""Import upstream source into a branch"""
2
 
 
3
 
import errno
4
 
import os
5
 
import re
6
 
from StringIO import StringIO
7
 
import stat
8
 
import tarfile
9
 
import zipfile
10
 
 
11
 
from bzrlib import generate_ids
12
 
from bzrlib.bzrdir import BzrDir
13
 
from bzrlib.errors import NoSuchFile, BzrCommandError, NotBranchError
14
 
from bzrlib.osutils import (pathjoin, isdir, file_iterator, basename,
15
 
                            file_kind, splitpath)
16
 
from bzrlib.trace import warning
17
 
from bzrlib.transform import TreeTransform, resolve_conflicts, cook_conflicts
18
 
from bzrlib.workingtree import WorkingTree
19
 
from bzrlib.plugins.bzrtools.bzrtools import open_from_url
20
 
from bzrlib.plugins.bzrtools import errors
21
 
 
22
 
class ZipFileWrapper(object):
23
 
 
24
 
    def __init__(self, fileobj, mode):
25
 
        self.zipfile = zipfile.ZipFile(fileobj, mode)
26
 
 
27
 
    def getmembers(self):
28
 
        for info in self.zipfile.infolist():
29
 
            yield ZipInfoWrapper(self.zipfile, info)
30
 
 
31
 
    def extractfile(self, infowrapper):
32
 
        return StringIO(self.zipfile.read(infowrapper.name))
33
 
 
34
 
    def add(self, filename):
35
 
        if isdir(filename):
36
 
            self.zipfile.writestr(filename+'/', '')
37
 
        else:
38
 
            self.zipfile.write(filename)
39
 
 
40
 
    def close(self):
41
 
        self.zipfile.close()
42
 
 
43
 
 
44
 
class ZipInfoWrapper(object):
45
 
 
46
 
    def __init__(self, zipfile, info):
47
 
        self.info = info
48
 
        self.type = None
49
 
        self.name = info.filename
50
 
        self.zipfile = zipfile
51
 
        self.mode = 0666
52
 
 
53
 
    def isdir(self):
54
 
        # Really? Eeeew!
55
 
        return bool(self.name.endswith('/'))
56
 
 
57
 
    def isreg(self):
58
 
        # Really? Eeeew!
59
 
        return not self.isdir()
60
 
 
61
 
 
62
 
class DirWrapper(object):
63
 
    def __init__(self, fileobj, mode='r'):
64
 
        assert mode == 'r', mode
65
 
        self.root = os.path.realpath(fileobj.read())
66
 
 
67
 
    def __repr__(self):
68
 
        return 'DirWrapper(%r)' % self.root
69
 
 
70
 
    def getmembers(self, subdir=None):
71
 
        if subdir is not None:
72
 
            mydir = pathjoin(self.root, subdir)
73
 
        else:
74
 
            mydir = self.root
75
 
        for child in os.listdir(mydir):
76
 
            if subdir is not None:
77
 
                child = pathjoin(subdir, child)
78
 
            fi = FileInfo(self.root, child)
79
 
            yield fi
80
 
            if fi.isdir():
81
 
                for v in self.getmembers(child):
82
 
                    yield v
83
 
 
84
 
    def extractfile(self, member):
85
 
        return open(member.fullpath)
86
 
 
87
 
 
88
 
class FileInfo(object):
89
 
 
90
 
    def __init__(self, root, filepath):
91
 
        self.fullpath = pathjoin(root, filepath)
92
 
        self.root = root
93
 
        if filepath != '':
94
 
            self.name = pathjoin(basename(root), filepath)
95
 
        else:
96
 
            print 'root %r' % root
97
 
            self.name = basename(root)
98
 
        self.type = None
99
 
        stat = os.lstat(self.fullpath)
100
 
        self.mode = stat.st_mode
101
 
        if self.isdir():
102
 
            self.name += '/'
103
 
 
104
 
    def __repr__(self):
105
 
        return 'FileInfo(%r)' % self.name
106
 
 
107
 
    def isreg(self):
108
 
        return stat.S_ISREG(self.mode)
109
 
 
110
 
    def isdir(self):
111
 
        return stat.S_ISDIR(self.mode)
112
 
 
113
 
    def issym(self):
114
 
        if stat.S_ISLNK(self.mode):
115
 
            self.linkname = os.readlink(self.fullpath)
116
 
            return True
117
 
        else:
118
 
            return False
119
 
 
120
 
 
121
 
def top_path(path):
122
 
    """Return the top directory given in a path."""
123
 
    components = splitpath(path)
124
 
    if len(components) > 0:
125
 
        return components[0]
126
 
    else:
127
 
        return ''
128
 
 
129
 
 
130
 
def common_directory(names):
131
 
    """Determine a single directory prefix from a list of names"""
132
 
    possible_prefix = None
133
 
    for name in names:
134
 
        name_top = top_path(name)
135
 
        if name_top == '':
136
 
            return None
137
 
        if possible_prefix is None:
138
 
            possible_prefix = name_top
139
 
        else:
140
 
            if name_top != possible_prefix:
141
 
                return None
142
 
    return possible_prefix
143
 
 
144
 
 
145
 
def do_directory(tt, trans_id, tree, relative_path, path):
146
 
    if isdir(path) and tree.path2id(relative_path) is not None:
147
 
        tt.cancel_deletion(trans_id)
148
 
    else:
149
 
        tt.create_directory(trans_id)
150
 
 
151
 
 
152
 
def add_implied_parents(implied_parents, path):
153
 
    """Update the set of implied parents from a path"""
154
 
    parent = os.path.dirname(path)
155
 
    if parent in implied_parents:
156
 
        return
157
 
    implied_parents.add(parent)
158
 
    add_implied_parents(implied_parents, parent)
159
 
 
160
 
 
161
 
def names_of_files(tar_file):
162
 
    for member in tar_file.getmembers():
163
 
        if member.type != "g":
164
 
            yield member.name
165
 
 
166
 
 
167
 
def should_ignore(relative_path):
168
 
    return top_path(relative_path) == '.bzr'
169
 
 
170
 
 
171
 
def import_tar(tree, tar_input):
172
 
    """Replace the contents of a working directory with tarfile contents.
173
 
    The tarfile may be a gzipped stream.  File ids will be updated.
174
 
    """
175
 
    tar_file = tarfile.open('lala', 'r', tar_input)
176
 
    import_archive(tree, tar_file)
177
 
 
178
 
def import_zip(tree, zip_input):
179
 
    zip_file = ZipFileWrapper(zip_input, 'r')
180
 
    import_archive(tree, zip_file)
181
 
 
182
 
def import_dir(tree, dir_input):
183
 
    dir_file = DirWrapper(dir_input)
184
 
    import_archive(tree, dir_file)
185
 
 
186
 
 
187
 
def import_archive(tree, archive_file):
188
 
    tt = TreeTransform(tree)
189
 
    try:
190
 
        import_archive_to_transform(tree, archive_file, tt)
191
 
        tt.apply()
192
 
    finally:
193
 
        tt.finalize()
194
 
 
195
 
 
196
 
def import_archive_to_transform(tree, archive_file, tt):
197
 
    prefix = common_directory(names_of_files(archive_file))
198
 
    removed = set()
199
 
    for path, entry in tree.iter_entries_by_dir():
200
 
        if entry.parent_id is None:
201
 
            continue
202
 
        trans_id = tt.trans_id_tree_path(path)
203
 
        tt.delete_contents(trans_id)
204
 
        removed.add(path)
205
 
 
206
 
    added = set()
207
 
    implied_parents = set()
208
 
    seen = set()
209
 
    for member in archive_file.getmembers():
210
 
        if member.type == 'g':
211
 
            # type 'g' is a header
212
 
            continue
213
 
        # Inverse functionality in bzr uses utf-8.  We could also
214
 
        # interpret relative to fs encoding, which would match native
215
 
        # behaviour better.
216
 
        relative_path = member.name.decode('utf-8')
217
 
        if prefix is not None:
218
 
            relative_path = relative_path[len(prefix)+1:]
219
 
            relative_path = relative_path.rstrip('/')
220
 
        if relative_path == '':
221
 
            continue
222
 
        if should_ignore(relative_path):
223
 
            continue
224
 
        add_implied_parents(implied_parents, relative_path)
225
 
        trans_id = tt.trans_id_tree_path(relative_path)
226
 
        added.add(relative_path.rstrip('/'))
227
 
        path = tree.abspath(relative_path)
228
 
        if member.name in seen:
229
 
            if tt.final_kind(trans_id) == 'file':
230
 
                tt.set_executability(None, trans_id)
231
 
            tt.cancel_creation(trans_id)
232
 
        seen.add(member.name)
233
 
        if member.isreg():
234
 
            tt.create_file(file_iterator(archive_file.extractfile(member)),
235
 
                           trans_id)
236
 
            executable = (member.mode & 0111) != 0
237
 
            tt.set_executability(executable, trans_id)
238
 
        elif member.isdir():
239
 
            do_directory(tt, trans_id, tree, relative_path, path)
240
 
        elif member.issym():
241
 
            tt.create_symlink(member.linkname, trans_id)
242
 
        else:
243
 
            continue
244
 
        if tt.tree_file_id(trans_id) is None:
245
 
            name = basename(member.name.rstrip('/'))
246
 
            file_id = generate_ids.gen_file_id(name)
247
 
            tt.version_file(file_id, trans_id)
248
 
 
249
 
    for relative_path in implied_parents.difference(added):
250
 
        if relative_path == "":
251
 
            continue
252
 
        trans_id = tt.trans_id_tree_path(relative_path)
253
 
        path = tree.abspath(relative_path)
254
 
        do_directory(tt, trans_id, tree, relative_path, path)
255
 
        if tt.tree_file_id(trans_id) is None:
256
 
            tt.version_file(trans_id, trans_id)
257
 
        added.add(relative_path)
258
 
 
259
 
    for path in removed.difference(added):
260
 
        tt.unversion_file(tt.trans_id_tree_path(path))
261
 
 
262
 
    for conflict in cook_conflicts(resolve_conflicts(tt), tt):
263
 
        warning(conflict)
264
 
 
265
 
 
266
 
def do_import(source, tree_directory=None):
267
 
    """Implementation of import command.  Intended for UI only"""
268
 
    if tree_directory is not None:
269
 
        try:
270
 
            tree = WorkingTree.open(tree_directory)
271
 
        except NotBranchError:
272
 
            if not os.path.exists(tree_directory):
273
 
                os.mkdir(tree_directory)
274
 
            branch = BzrDir.create_branch_convenience(tree_directory)
275
 
            tree = branch.bzrdir.open_workingtree()
276
 
    else:
277
 
        tree = WorkingTree.open_containing('.')[0]
278
 
    tree.lock_write()
279
 
    try:
280
 
        if tree.changes_from(tree.basis_tree()).has_changed():
281
 
            raise BzrCommandError("Working tree has uncommitted changes.")
282
 
 
283
 
        try:
284
 
            archive, external_compressor = get_archive_type(source)
285
 
        except errors.NotArchiveType:
286
 
            if file_kind(source) == 'directory':
287
 
                s = StringIO(source)
288
 
                s.seek(0)
289
 
                import_dir(tree, s)
290
 
            else:
291
 
                raise BzrCommandError('Unhandled import source')
292
 
        else:
293
 
            if archive == 'zip':
294
 
                import_zip(tree, open_from_url(source))
295
 
            elif archive == 'tar':
296
 
                try:
297
 
                    tar_input = open_from_url(source)
298
 
                    if external_compressor == 'bz2':
299
 
                        import bz2
300
 
                        tar_input = StringIO(bz2.decompress(tar_input.read()))
301
 
                    elif external_compressor == 'lzma':
302
 
                        import lzma
303
 
                        tar_input = StringIO(lzma.decompress(tar_input.read()))
304
 
                except IOError, e:
305
 
                    if e.errno == errno.ENOENT:
306
 
                        raise NoSuchFile(source)
307
 
                try:
308
 
                    import_tar(tree, tar_input)
309
 
                finally:
310
 
                    tar_input.close()
311
 
    finally:
312
 
        tree.unlock()
313
 
 
314
 
 
315
 
def get_archive_type(path):
316
 
    """Return the type of archive and compressor indicated by path name.
317
 
 
318
 
    Only external compressors are returned, so zip files are only
319
 
    ('zip', None).  .tgz is treated as ('tar', 'gz') and '.tar.xz' is treated
320
 
    as ('tar', 'lzma').
321
 
    """
322
 
    matches = re.match(r'.*\.(zip|tgz|tar(.(gz|bz2|lzma|xz))?)$', path)
323
 
    if not matches:
324
 
        raise errors.NotArchiveType(path)
325
 
    external_compressor = None
326
 
    if matches.group(3) is not None:
327
 
        archive = 'tar'
328
 
        external_compressor = matches.group(3)
329
 
        if external_compressor == 'xz':
330
 
            external_compressor = 'lzma'
331
 
    elif matches.group(1) == 'tgz':
332
 
        return 'tar', 'gz'
333
 
    else:
334
 
        archive = matches.group(1)
335
 
    return archive, external_compressor