~abentley/bzrtools/bzrtools.dev

374 by Aaron Bentley
Start work on import plugin
1
"""Import upstream source into a branch"""
2
382 by Aaron Bentley
Handle adds and removes efficiently
3
import errno
374 by Aaron Bentley
Start work on import plugin
4
import os
772 by Aaron Bentley
Better handling of compound tar names.
5
import re
374 by Aaron Bentley
Start work on import plugin
6
from StringIO import StringIO
484 by Aaron Bentley
Get closer to importing directories using the same mechanism as files
7
import stat
374 by Aaron Bentley
Start work on import plugin
8
import tarfile
475 by Aaron Bentley
Add zip import support
9
import zipfile
374 by Aaron Bentley
Start work on import plugin
10
482 by Aaron Bentley
upstream imports honour the execute bit
11
from bzrlib import generate_ids
374 by Aaron Bentley
Start work on import plugin
12
from bzrlib.bzrdir import BzrDir
380 by Aaron Bentley
Got import working decently
13
from bzrlib.errors import NoSuchFile, BzrCommandError, NotBranchError
489 by Aaron Bentley
import now imports directories
14
from bzrlib.osutils import (pathjoin, isdir, file_iterator, basename,
703 by Aaron Bentley
Fix import with Python 2.6
15
                            file_kind, splitpath)
381 by Aaron Bentley
Handle conflicts and tarfiles that omit directories
16
from bzrlib.trace import warning
17
from bzrlib.transform import TreeTransform, resolve_conflicts, cook_conflicts
377 by Aaron Bentley
Got import command working
18
from bzrlib.workingtree import WorkingTree
563 by Aaron Bentley
Allow importing directly from a URL
19
from bzrlib.plugins.bzrtools.bzrtools import open_from_url
772 by Aaron Bentley
Better handling of compound tar names.
20
from bzrlib.plugins.bzrtools import errors
377 by Aaron Bentley
Got import command working
21
475 by Aaron Bentley
Add zip import support
22
class ZipFileWrapper(object):
23
477 by Aaron Bentley
split out upstream_import test cases
24
    def __init__(self, fileobj, mode):
25
        self.zipfile = zipfile.ZipFile(fileobj, mode)
475 by Aaron Bentley
Add zip import support
26
27
    def getmembers(self):
28
        for info in self.zipfile.infolist():
29
            yield ZipInfoWrapper(self.zipfile, info)
30
31
    def extractfile(self, infowrapper):
32
        return StringIO(self.zipfile.read(infowrapper.name))
33
476 by Aaron Bentley
Generalize tests for zip
34
    def add(self, filename):
35
        if isdir(filename):
36
            self.zipfile.writestr(filename+'/', '')
37
        else:
38
            self.zipfile.write(filename)
39
40
    def close(self):
41
        self.zipfile.close()
42
475 by Aaron Bentley
Add zip import support
43
44
class ZipInfoWrapper(object):
531.2.2 by Charlie Shepherd
Remove all trailing whitespace
45
475 by Aaron Bentley
Add zip import support
46
    def __init__(self, zipfile, info):
47
        self.info = info
48
        self.type = None
49
        self.name = info.filename
50
        self.zipfile = zipfile
482 by Aaron Bentley
upstream imports honour the execute bit
51
        self.mode = 0666
475 by Aaron Bentley
Add zip import support
52
53
    def isdir(self):
54
        # Really? Eeeew!
55
        return bool(self.name.endswith('/'))
56
57
    def isreg(self):
58
        # Really? Eeeew!
59
        return not self.isdir()
60
374 by Aaron Bentley
Start work on import plugin
61
484 by Aaron Bentley
Get closer to importing directories using the same mechanism as files
62
class DirWrapper(object):
63
    def __init__(self, fileobj, mode='r'):
64
        assert mode == 'r', mode
65
        self.root = os.path.realpath(fileobj.read())
66
488 by Aaron Bentley
Fix tests for importing directories
67
    def __repr__(self):
68
        return 'DirWrapper(%r)' % self.root
69
484 by Aaron Bentley
Get closer to importing directories using the same mechanism as files
70
    def getmembers(self, subdir=None):
71
        if subdir is not None:
72
            mydir = pathjoin(self.root, subdir)
73
        else:
74
            mydir = self.root
75
        for child in os.listdir(mydir):
76
            if subdir is not None:
77
                child = pathjoin(subdir, child)
78
            fi = FileInfo(self.root, child)
79
            yield fi
80
            if fi.isdir():
81
                for v in self.getmembers(child):
82
                    yield v
83
84
    def extractfile(self, member):
85
        return open(member.fullpath)
86
87
88
class FileInfo(object):
89
90
    def __init__(self, root, filepath):
91
        self.fullpath = pathjoin(root, filepath)
92
        self.root = root
489 by Aaron Bentley
import now imports directories
93
        if filepath != '':
94
            self.name = pathjoin(basename(root), filepath)
95
        else:
96
            print 'root %r' % root
97
            self.name = basename(root)
484 by Aaron Bentley
Get closer to importing directories using the same mechanism as files
98
        self.type = None
99
        stat = os.lstat(self.fullpath)
100
        self.mode = stat.st_mode
101
        if self.isdir():
102
            self.name += '/'
103
488 by Aaron Bentley
Fix tests for importing directories
104
    def __repr__(self):
105
        return 'FileInfo(%r)' % self.name
106
484 by Aaron Bentley
Get closer to importing directories using the same mechanism as files
107
    def isreg(self):
108
        return stat.S_ISREG(self.mode)
109
110
    def isdir(self):
111
        return stat.S_ISDIR(self.mode)
112
535.1.1 by Reinhard Tartler
bugfix: make it possible to import upstream sources containing symlinks. solution: implement FileInfo.issym
113
    def issym(self):
114
        if stat.S_ISLNK(self.mode):
115
            self.linkname = os.readlink(self.fullpath)
116
            return True
117
        else:
118
            return False
119
531.2.2 by Charlie Shepherd
Remove all trailing whitespace
120
703 by Aaron Bentley
Fix import with Python 2.6
121
def top_path(path):
377 by Aaron Bentley
Got import command working
122
    """Return the top directory given in a path."""
703 by Aaron Bentley
Fix import with Python 2.6
123
    components = splitpath(path)
124
    if len(components) > 0:
125
        return components[0]
126
    else:
127
        return ''
374 by Aaron Bentley
Start work on import plugin
128
129
130
def common_directory(names):
131
    """Determine a single directory prefix from a list of names"""
132
    possible_prefix = None
133
    for name in names:
703 by Aaron Bentley
Fix import with Python 2.6
134
        name_top = top_path(name)
484 by Aaron Bentley
Get closer to importing directories using the same mechanism as files
135
        if name_top == '':
136
            return None
374 by Aaron Bentley
Start work on import plugin
137
        if possible_prefix is None:
138
            possible_prefix = name_top
139
        else:
140
            if name_top != possible_prefix:
141
                return None
142
    return possible_prefix
143
144
382 by Aaron Bentley
Handle adds and removes efficiently
145
def do_directory(tt, trans_id, tree, relative_path, path):
381 by Aaron Bentley
Handle conflicts and tarfiles that omit directories
146
    if isdir(path) and tree.path2id(relative_path) is not None:
147
        tt.cancel_deletion(trans_id)
148
    else:
149
        tt.create_directory(trans_id)
150
151
152
def add_implied_parents(implied_parents, path):
153
    """Update the set of implied parents from a path"""
154
    parent = os.path.dirname(path)
155
    if parent in implied_parents:
156
        return
157
    implied_parents.add(parent)
158
    add_implied_parents(implied_parents, parent)
159
160
383 by Aaron Bentley
Skip the extended header in Linux tarballs
161
def names_of_files(tar_file):
162
    for member in tar_file.getmembers():
163
        if member.type != "g":
164
            yield member.name
165
166
730.2.2 by Max Bowsher
Having discovered that bzr-builddeb import_dsc.py is a horrid copy-paste job of bzrtools upstream_import.py, restructure the change to minimize divergence from it.
167
def should_ignore(relative_path):
168
    return top_path(relative_path) == '.bzr'
169
170
374 by Aaron Bentley
Start work on import plugin
171
def import_tar(tree, tar_input):
377 by Aaron Bentley
Got import command working
172
    """Replace the contents of a working directory with tarfile contents.
384 by Aaron Bentley
Implement bzip support
173
    The tarfile may be a gzipped stream.  File ids will be updated.
377 by Aaron Bentley
Got import command working
174
    """
374 by Aaron Bentley
Start work on import plugin
175
    tar_file = tarfile.open('lala', 'r', tar_input)
475 by Aaron Bentley
Add zip import support
176
    import_archive(tree, tar_file)
177
178
def import_zip(tree, zip_input):
477 by Aaron Bentley
split out upstream_import test cases
179
    zip_file = ZipFileWrapper(zip_input, 'r')
475 by Aaron Bentley
Add zip import support
180
    import_archive(tree, zip_file)
181
484 by Aaron Bentley
Get closer to importing directories using the same mechanism as files
182
def import_dir(tree, dir_input):
183
    dir_file = DirWrapper(dir_input)
184
    import_archive(tree, dir_file)
185
768 by Aaron Bentley
Fix non-ascii tarball handling
186
475 by Aaron Bentley
Add zip import support
187
def import_archive(tree, archive_file):
768 by Aaron Bentley
Fix non-ascii tarball handling
188
    tt = TreeTransform(tree)
189
    try:
190
        import_archive_to_transform(tree, archive_file, tt)
191
        tt.apply()
192
    finally:
193
        tt.finalize()
194
195
196
def import_archive_to_transform(tree, archive_file, tt):
475 by Aaron Bentley
Add zip import support
197
    prefix = common_directory(names_of_files(archive_file))
382 by Aaron Bentley
Handle adds and removes efficiently
198
    removed = set()
786 by Aaron Bentley
Compatibility fixes for bzr.dev
199
    for path, entry in tree.iter_entries_by_dir():
453.1.2 by Aaron Bentley
Handle the fact that roots are included
200
        if entry.parent_id is None:
201
            continue
375 by Aaron Bentley
Correctly extract tarfiles
202
        trans_id = tt.trans_id_tree_path(path)
203
        tt.delete_contents(trans_id)
382 by Aaron Bentley
Handle adds and removes efficiently
204
        removed.add(path)
375 by Aaron Bentley
Correctly extract tarfiles
205
531.2.2 by Charlie Shepherd
Remove all trailing whitespace
206
    added = set()
381 by Aaron Bentley
Handle conflicts and tarfiles that omit directories
207
    implied_parents = set()
385 by Aaron Bentley
Fix double-add bug
208
    seen = set()
475 by Aaron Bentley
Add zip import support
209
    for member in archive_file.getmembers():
383 by Aaron Bentley
Skip the extended header in Linux tarballs
210
        if member.type == 'g':
211
            # type 'g' is a header
212
            continue
768 by Aaron Bentley
Fix non-ascii tarball handling
213
        # Inverse functionality in bzr uses utf-8.  We could also
214
        # interpret relative to fs encoding, which would match native
215
        # behaviour better.
216
        relative_path = member.name.decode('utf-8')
374 by Aaron Bentley
Start work on import plugin
217
        if prefix is not None:
218
            relative_path = relative_path[len(prefix)+1:]
517.1.3 by Aaron Bentley
Handle broken python tar implementations
219
            relative_path = relative_path.rstrip('/')
375 by Aaron Bentley
Correctly extract tarfiles
220
        if relative_path == '':
221
            continue
730.2.2 by Max Bowsher
Having discovered that bzr-builddeb import_dsc.py is a horrid copy-paste job of bzrtools upstream_import.py, restructure the change to minimize divergence from it.
222
        if should_ignore(relative_path):
223
            continue
381 by Aaron Bentley
Handle conflicts and tarfiles that omit directories
224
        add_implied_parents(implied_parents, relative_path)
375 by Aaron Bentley
Correctly extract tarfiles
225
        trans_id = tt.trans_id_tree_path(relative_path)
382 by Aaron Bentley
Handle adds and removes efficiently
226
        added.add(relative_path.rstrip('/'))
375 by Aaron Bentley
Correctly extract tarfiles
227
        path = tree.abspath(relative_path)
385 by Aaron Bentley
Fix double-add bug
228
        if member.name in seen:
482 by Aaron Bentley
upstream imports honour the execute bit
229
            if tt.final_kind(trans_id) == 'file':
230
                tt.set_executability(None, trans_id)
385 by Aaron Bentley
Fix double-add bug
231
            tt.cancel_creation(trans_id)
232
        seen.add(member.name)
375 by Aaron Bentley
Correctly extract tarfiles
233
        if member.isreg():
531.2.2 by Charlie Shepherd
Remove all trailing whitespace
234
            tt.create_file(file_iterator(archive_file.extractfile(member)),
380 by Aaron Bentley
Got import working decently
235
                           trans_id)
482 by Aaron Bentley
upstream imports honour the execute bit
236
            executable = (member.mode & 0111) != 0
237
            tt.set_executability(executable, trans_id)
375 by Aaron Bentley
Correctly extract tarfiles
238
        elif member.isdir():
382 by Aaron Bentley
Handle adds and removes efficiently
239
            do_directory(tt, trans_id, tree, relative_path, path)
375 by Aaron Bentley
Correctly extract tarfiles
240
        elif member.issym():
241
            tt.create_symlink(member.linkname, trans_id)
482 by Aaron Bentley
upstream imports honour the execute bit
242
        else:
243
            continue
244
        if tt.tree_file_id(trans_id) is None:
245
            name = basename(member.name.rstrip('/'))
246
            file_id = generate_ids.gen_file_id(name)
247
            tt.version_file(file_id, trans_id)
381 by Aaron Bentley
Handle conflicts and tarfiles that omit directories
248
382 by Aaron Bentley
Handle adds and removes efficiently
249
    for relative_path in implied_parents.difference(added):
381 by Aaron Bentley
Handle conflicts and tarfiles that omit directories
250
        if relative_path == "":
251
            continue
252
        trans_id = tt.trans_id_tree_path(relative_path)
253
        path = tree.abspath(relative_path)
382 by Aaron Bentley
Handle adds and removes efficiently
254
        do_directory(tt, trans_id, tree, relative_path, path)
482 by Aaron Bentley
upstream imports honour the execute bit
255
        if tt.tree_file_id(trans_id) is None:
256
            tt.version_file(trans_id, trans_id)
382 by Aaron Bentley
Handle adds and removes efficiently
257
        added.add(relative_path)
381 by Aaron Bentley
Handle conflicts and tarfiles that omit directories
258
482 by Aaron Bentley
upstream imports honour the execute bit
259
    for path in removed.difference(added):
260
        tt.unversion_file(tt.trans_id_tree_path(path))
261
381 by Aaron Bentley
Handle conflicts and tarfiles that omit directories
262
    for conflict in cook_conflicts(resolve_conflicts(tt), tt):
263
        warning(conflict)
374 by Aaron Bentley
Start work on import plugin
264
377 by Aaron Bentley
Got import command working
265
380 by Aaron Bentley
Got import working decently
266
def do_import(source, tree_directory=None):
377 by Aaron Bentley
Got import command working
267
    """Implementation of import command.  Intended for UI only"""
380 by Aaron Bentley
Got import working decently
268
    if tree_directory is not None:
269
        try:
270
            tree = WorkingTree.open(tree_directory)
271
        except NotBranchError:
272
            if not os.path.exists(tree_directory):
273
                os.mkdir(tree_directory)
274
            branch = BzrDir.create_branch_convenience(tree_directory)
275
            tree = branch.bzrdir.open_workingtree()
276
    else:
277
        tree = WorkingTree.open_containing('.')[0]
377 by Aaron Bentley
Got import command working
278
    tree.lock_write()
279
    try:
423.1.7 by Aaron Bentley
More updates for 0.9
280
        if tree.changes_from(tree.basis_tree()).has_changed():
378 by Aaron Bentley
Check for modified files
281
            raise BzrCommandError("Working tree has uncommitted changes.")
282
772 by Aaron Bentley
Better handling of compound tar names.
283
        try:
284
            archive, external_compressor = get_archive_type(source)
285
        except errors.NotArchiveType:
286
            if file_kind(source) == 'directory':
287
                s = StringIO(source)
288
                s.seek(0)
289
                import_dir(tree, s)
290
            else:
291
                raise BzrCommandError('Unhandled import source')
475 by Aaron Bentley
Add zip import support
292
        else:
772 by Aaron Bentley
Better handling of compound tar names.
293
            if archive == 'zip':
294
                import_zip(tree, open_from_url(source))
295
            elif archive == 'tar':
296
                try:
297
                    tar_input = open_from_url(source)
298
                    if external_compressor == 'bz2':
299
                        import bz2
300
                        tar_input = StringIO(bz2.decompress(tar_input.read()))
301
                    elif external_compressor == 'lzma':
302
                        import lzma
303
                        tar_input = StringIO(lzma.decompress(tar_input.read()))
304
                except IOError, e:
305
                    if e.errno == errno.ENOENT:
306
                        raise NoSuchFile(source)
307
                try:
308
                    import_tar(tree, tar_input)
309
                finally:
310
                    tar_input.close()
377 by Aaron Bentley
Got import command working
311
    finally:
312
        tree.unlock()
772 by Aaron Bentley
Better handling of compound tar names.
313
314
315
def get_archive_type(path):
316
    """Return the type of archive and compressor indicated by path name.
317
318
    Only external compressors are returned, so zip files are only
773 by Aaron Bentley
Fix docstring.
319
    ('zip', None).  .tgz is treated as ('tar', 'gz') and '.tar.xz' is treated
320
    as ('tar', 'lzma').
772 by Aaron Bentley
Better handling of compound tar names.
321
    """
322
    matches = re.match(r'.*\.(zip|tgz|tar(.(gz|bz2|lzma|xz))?)$', path)
323
    if not matches:
324
        raise errors.NotArchiveType(path)
325
    external_compressor = None
326
    if matches.group(3) is not None:
327
        archive = 'tar'
328
        external_compressor = matches.group(3)
329
        if external_compressor == 'xz':
330
            external_compressor = 'lzma'
331
    elif matches.group(1) == 'tgz':
332
        return 'tar', 'gz'
333
    else:
334
        archive = matches.group(1)
335
    return archive, external_compressor