~abentley/bzrtools/bzrtools.dev

« back to all changes in this revision

Viewing changes to upstream_import.py

  • Committer: Aaron Bentley
  • Date: 2012-03-20 02:40:57 UTC
  • Revision ID: aaron@aaronbentley.com-20120320024057-mqltf93xxs09r0ry
Compatibility fixes for bzr.dev

Show diffs side-by-side

added added

removed removed

Lines of Context:
 
1
"""Import upstream source into a branch"""
 
2
 
 
3
import errno
 
4
import os
 
5
import re
 
6
from StringIO import StringIO
 
7
import stat
 
8
import tarfile
 
9
import zipfile
 
10
 
 
11
from bzrlib import generate_ids
 
12
from bzrlib.bzrdir import BzrDir
 
13
from bzrlib.errors import NoSuchFile, BzrCommandError, NotBranchError
 
14
from bzrlib.osutils import (pathjoin, isdir, file_iterator, basename,
 
15
                            file_kind, splitpath)
 
16
from bzrlib.trace import warning
 
17
from bzrlib.transform import TreeTransform, resolve_conflicts, cook_conflicts
 
18
from bzrlib.workingtree import WorkingTree
 
19
from bzrlib.plugins.bzrtools.bzrtools import open_from_url
 
20
from bzrlib.plugins.bzrtools import errors
 
21
 
 
22
class ZipFileWrapper(object):
 
23
 
 
24
    def __init__(self, fileobj, mode):
 
25
        self.zipfile = zipfile.ZipFile(fileobj, mode)
 
26
 
 
27
    def getmembers(self):
 
28
        for info in self.zipfile.infolist():
 
29
            yield ZipInfoWrapper(self.zipfile, info)
 
30
 
 
31
    def extractfile(self, infowrapper):
 
32
        return StringIO(self.zipfile.read(infowrapper.name))
 
33
 
 
34
    def add(self, filename):
 
35
        if isdir(filename):
 
36
            self.zipfile.writestr(filename+'/', '')
 
37
        else:
 
38
            self.zipfile.write(filename)
 
39
 
 
40
    def close(self):
 
41
        self.zipfile.close()
 
42
 
 
43
 
 
44
class ZipInfoWrapper(object):
 
45
 
 
46
    def __init__(self, zipfile, info):
 
47
        self.info = info
 
48
        self.type = None
 
49
        self.name = info.filename
 
50
        self.zipfile = zipfile
 
51
        self.mode = 0666
 
52
 
 
53
    def isdir(self):
 
54
        # Really? Eeeew!
 
55
        return bool(self.name.endswith('/'))
 
56
 
 
57
    def isreg(self):
 
58
        # Really? Eeeew!
 
59
        return not self.isdir()
 
60
 
 
61
 
 
62
class DirWrapper(object):
 
63
    def __init__(self, fileobj, mode='r'):
 
64
        assert mode == 'r', mode
 
65
        self.root = os.path.realpath(fileobj.read())
 
66
 
 
67
    def __repr__(self):
 
68
        return 'DirWrapper(%r)' % self.root
 
69
 
 
70
    def getmembers(self, subdir=None):
 
71
        if subdir is not None:
 
72
            mydir = pathjoin(self.root, subdir)
 
73
        else:
 
74
            mydir = self.root
 
75
        for child in os.listdir(mydir):
 
76
            if subdir is not None:
 
77
                child = pathjoin(subdir, child)
 
78
            fi = FileInfo(self.root, child)
 
79
            yield fi
 
80
            if fi.isdir():
 
81
                for v in self.getmembers(child):
 
82
                    yield v
 
83
 
 
84
    def extractfile(self, member):
 
85
        return open(member.fullpath)
 
86
 
 
87
 
 
88
class FileInfo(object):
 
89
 
 
90
    def __init__(self, root, filepath):
 
91
        self.fullpath = pathjoin(root, filepath)
 
92
        self.root = root
 
93
        if filepath != '':
 
94
            self.name = pathjoin(basename(root), filepath)
 
95
        else:
 
96
            print 'root %r' % root
 
97
            self.name = basename(root)
 
98
        self.type = None
 
99
        stat = os.lstat(self.fullpath)
 
100
        self.mode = stat.st_mode
 
101
        if self.isdir():
 
102
            self.name += '/'
 
103
 
 
104
    def __repr__(self):
 
105
        return 'FileInfo(%r)' % self.name
 
106
 
 
107
    def isreg(self):
 
108
        return stat.S_ISREG(self.mode)
 
109
 
 
110
    def isdir(self):
 
111
        return stat.S_ISDIR(self.mode)
 
112
 
 
113
    def issym(self):
 
114
        if stat.S_ISLNK(self.mode):
 
115
            self.linkname = os.readlink(self.fullpath)
 
116
            return True
 
117
        else:
 
118
            return False
 
119
 
 
120
 
 
121
def top_path(path):
 
122
    """Return the top directory given in a path."""
 
123
    components = splitpath(path)
 
124
    if len(components) > 0:
 
125
        return components[0]
 
126
    else:
 
127
        return ''
 
128
 
 
129
 
 
130
def common_directory(names):
 
131
    """Determine a single directory prefix from a list of names"""
 
132
    possible_prefix = None
 
133
    for name in names:
 
134
        name_top = top_path(name)
 
135
        if name_top == '':
 
136
            return None
 
137
        if possible_prefix is None:
 
138
            possible_prefix = name_top
 
139
        else:
 
140
            if name_top != possible_prefix:
 
141
                return None
 
142
    return possible_prefix
 
143
 
 
144
 
 
145
def do_directory(tt, trans_id, tree, relative_path, path):
 
146
    if isdir(path) and tree.path2id(relative_path) is not None:
 
147
        tt.cancel_deletion(trans_id)
 
148
    else:
 
149
        tt.create_directory(trans_id)
 
150
 
 
151
 
 
152
def add_implied_parents(implied_parents, path):
 
153
    """Update the set of implied parents from a path"""
 
154
    parent = os.path.dirname(path)
 
155
    if parent in implied_parents:
 
156
        return
 
157
    implied_parents.add(parent)
 
158
    add_implied_parents(implied_parents, parent)
 
159
 
 
160
 
 
161
def names_of_files(tar_file):
 
162
    for member in tar_file.getmembers():
 
163
        if member.type != "g":
 
164
            yield member.name
 
165
 
 
166
 
 
167
def should_ignore(relative_path):
 
168
    return top_path(relative_path) == '.bzr'
 
169
 
 
170
 
 
171
def import_tar(tree, tar_input):
 
172
    """Replace the contents of a working directory with tarfile contents.
 
173
    The tarfile may be a gzipped stream.  File ids will be updated.
 
174
    """
 
175
    tar_file = tarfile.open('lala', 'r', tar_input)
 
176
    import_archive(tree, tar_file)
 
177
 
 
178
def import_zip(tree, zip_input):
 
179
    zip_file = ZipFileWrapper(zip_input, 'r')
 
180
    import_archive(tree, zip_file)
 
181
 
 
182
def import_dir(tree, dir_input):
 
183
    dir_file = DirWrapper(dir_input)
 
184
    import_archive(tree, dir_file)
 
185
 
 
186
 
 
187
def import_archive(tree, archive_file):
 
188
    tt = TreeTransform(tree)
 
189
    try:
 
190
        import_archive_to_transform(tree, archive_file, tt)
 
191
        tt.apply()
 
192
    finally:
 
193
        tt.finalize()
 
194
 
 
195
 
 
196
def import_archive_to_transform(tree, archive_file, tt):
 
197
    prefix = common_directory(names_of_files(archive_file))
 
198
    removed = set()
 
199
    for path, entry in tree.iter_entries_by_dir():
 
200
        if entry.parent_id is None:
 
201
            continue
 
202
        trans_id = tt.trans_id_tree_path(path)
 
203
        tt.delete_contents(trans_id)
 
204
        removed.add(path)
 
205
 
 
206
    added = set()
 
207
    implied_parents = set()
 
208
    seen = set()
 
209
    for member in archive_file.getmembers():
 
210
        if member.type == 'g':
 
211
            # type 'g' is a header
 
212
            continue
 
213
        # Inverse functionality in bzr uses utf-8.  We could also
 
214
        # interpret relative to fs encoding, which would match native
 
215
        # behaviour better.
 
216
        relative_path = member.name.decode('utf-8')
 
217
        if prefix is not None:
 
218
            relative_path = relative_path[len(prefix)+1:]
 
219
            relative_path = relative_path.rstrip('/')
 
220
        if relative_path == '':
 
221
            continue
 
222
        if should_ignore(relative_path):
 
223
            continue
 
224
        add_implied_parents(implied_parents, relative_path)
 
225
        trans_id = tt.trans_id_tree_path(relative_path)
 
226
        added.add(relative_path.rstrip('/'))
 
227
        path = tree.abspath(relative_path)
 
228
        if member.name in seen:
 
229
            if tt.final_kind(trans_id) == 'file':
 
230
                tt.set_executability(None, trans_id)
 
231
            tt.cancel_creation(trans_id)
 
232
        seen.add(member.name)
 
233
        if member.isreg():
 
234
            tt.create_file(file_iterator(archive_file.extractfile(member)),
 
235
                           trans_id)
 
236
            executable = (member.mode & 0111) != 0
 
237
            tt.set_executability(executable, trans_id)
 
238
        elif member.isdir():
 
239
            do_directory(tt, trans_id, tree, relative_path, path)
 
240
        elif member.issym():
 
241
            tt.create_symlink(member.linkname, trans_id)
 
242
        else:
 
243
            continue
 
244
        if tt.tree_file_id(trans_id) is None:
 
245
            name = basename(member.name.rstrip('/'))
 
246
            file_id = generate_ids.gen_file_id(name)
 
247
            tt.version_file(file_id, trans_id)
 
248
 
 
249
    for relative_path in implied_parents.difference(added):
 
250
        if relative_path == "":
 
251
            continue
 
252
        trans_id = tt.trans_id_tree_path(relative_path)
 
253
        path = tree.abspath(relative_path)
 
254
        do_directory(tt, trans_id, tree, relative_path, path)
 
255
        if tt.tree_file_id(trans_id) is None:
 
256
            tt.version_file(trans_id, trans_id)
 
257
        added.add(relative_path)
 
258
 
 
259
    for path in removed.difference(added):
 
260
        tt.unversion_file(tt.trans_id_tree_path(path))
 
261
 
 
262
    for conflict in cook_conflicts(resolve_conflicts(tt), tt):
 
263
        warning(conflict)
 
264
 
 
265
 
 
266
def do_import(source, tree_directory=None):
 
267
    """Implementation of import command.  Intended for UI only"""
 
268
    if tree_directory is not None:
 
269
        try:
 
270
            tree = WorkingTree.open(tree_directory)
 
271
        except NotBranchError:
 
272
            if not os.path.exists(tree_directory):
 
273
                os.mkdir(tree_directory)
 
274
            branch = BzrDir.create_branch_convenience(tree_directory)
 
275
            tree = branch.bzrdir.open_workingtree()
 
276
    else:
 
277
        tree = WorkingTree.open_containing('.')[0]
 
278
    tree.lock_write()
 
279
    try:
 
280
        if tree.changes_from(tree.basis_tree()).has_changed():
 
281
            raise BzrCommandError("Working tree has uncommitted changes.")
 
282
 
 
283
        try:
 
284
            archive, external_compressor = get_archive_type(source)
 
285
        except errors.NotArchiveType:
 
286
            if file_kind(source) == 'directory':
 
287
                s = StringIO(source)
 
288
                s.seek(0)
 
289
                import_dir(tree, s)
 
290
            else:
 
291
                raise BzrCommandError('Unhandled import source')
 
292
        else:
 
293
            if archive == 'zip':
 
294
                import_zip(tree, open_from_url(source))
 
295
            elif archive == 'tar':
 
296
                try:
 
297
                    tar_input = open_from_url(source)
 
298
                    if external_compressor == 'bz2':
 
299
                        import bz2
 
300
                        tar_input = StringIO(bz2.decompress(tar_input.read()))
 
301
                    elif external_compressor == 'lzma':
 
302
                        import lzma
 
303
                        tar_input = StringIO(lzma.decompress(tar_input.read()))
 
304
                except IOError, e:
 
305
                    if e.errno == errno.ENOENT:
 
306
                        raise NoSuchFile(source)
 
307
                try:
 
308
                    import_tar(tree, tar_input)
 
309
                finally:
 
310
                    tar_input.close()
 
311
    finally:
 
312
        tree.unlock()
 
313
 
 
314
 
 
315
def get_archive_type(path):
 
316
    """Return the type of archive and compressor indicated by path name.
 
317
 
 
318
    Only external compressors are returned, so zip files are only
 
319
    ('zip', None).  .tgz is treated as ('tar', 'gz') and '.tar.xz' is treated
 
320
    as ('tar', 'lzma').
 
321
    """
 
322
    matches = re.match(r'.*\.(zip|tgz|tar(.(gz|bz2|lzma|xz))?)$', path)
 
323
    if not matches:
 
324
        raise errors.NotArchiveType(path)
 
325
    external_compressor = None
 
326
    if matches.group(3) is not None:
 
327
        archive = 'tar'
 
328
        external_compressor = matches.group(3)
 
329
        if external_compressor == 'xz':
 
330
            external_compressor = 'lzma'
 
331
    elif matches.group(1) == 'tgz':
 
332
        return 'tar', 'gz'
 
333
    else:
 
334
        archive = matches.group(1)
 
335
    return archive, external_compressor