~abentley/bzrtools/bzrtools.dev

« back to all changes in this revision

Viewing changes to upstream_import.py

  • Committer: Jelmer Vernooij
  • Date: 2011-06-24 13:35:53 UTC
  • mto: This revision was merged to the branch mainline in revision 771.
  • Revision ID: jelmer@samba.org-20110624133553-h9s8syvvvsvpd1o9
Add support for importing .tar.xz and .tar.lzma files.

Show diffs side-by-side

added added

removed removed

Lines of Context:
1
1
"""Import upstream source into a branch"""
2
2
 
3
 
from bz2 import BZ2File
4
3
import errno
5
4
import os
6
 
from shutil import rmtree
7
5
from StringIO import StringIO
 
6
import stat
8
7
import tarfile
9
 
from unittest import makeSuite
 
8
import zipfile
10
9
 
 
10
from bzrlib import generate_ids
11
11
from bzrlib.bzrdir import BzrDir
12
 
from bzrlib.delta import compare_trees
13
12
from bzrlib.errors import NoSuchFile, BzrCommandError, NotBranchError
14
 
from bzrlib.osutils import pathjoin, isdir, file_iterator
15
 
from bzrlib.tests import TestCaseInTempDir
 
13
from bzrlib.osutils import (pathjoin, isdir, file_iterator, basename,
 
14
                            file_kind, splitpath)
16
15
from bzrlib.trace import warning
17
16
from bzrlib.transform import TreeTransform, resolve_conflicts, cook_conflicts
18
17
from bzrlib.workingtree import WorkingTree
19
 
 
20
 
 
21
 
def top_directory(path):
 
18
from bzrlib.plugins.bzrtools.bzrtools import open_from_url
 
19
 
 
20
class ZipFileWrapper(object):
 
21
 
 
22
    def __init__(self, fileobj, mode):
 
23
        self.zipfile = zipfile.ZipFile(fileobj, mode)
 
24
 
 
25
    def getmembers(self):
 
26
        for info in self.zipfile.infolist():
 
27
            yield ZipInfoWrapper(self.zipfile, info)
 
28
 
 
29
    def extractfile(self, infowrapper):
 
30
        return StringIO(self.zipfile.read(infowrapper.name))
 
31
 
 
32
    def add(self, filename):
 
33
        if isdir(filename):
 
34
            self.zipfile.writestr(filename+'/', '')
 
35
        else:
 
36
            self.zipfile.write(filename)
 
37
 
 
38
    def close(self):
 
39
        self.zipfile.close()
 
40
 
 
41
 
 
42
class ZipInfoWrapper(object):
 
43
 
 
44
    def __init__(self, zipfile, info):
 
45
        self.info = info
 
46
        self.type = None
 
47
        self.name = info.filename
 
48
        self.zipfile = zipfile
 
49
        self.mode = 0666
 
50
 
 
51
    def isdir(self):
 
52
        # Really? Eeeew!
 
53
        return bool(self.name.endswith('/'))
 
54
 
 
55
    def isreg(self):
 
56
        # Really? Eeeew!
 
57
        return not self.isdir()
 
58
 
 
59
 
 
60
class DirWrapper(object):
 
61
    def __init__(self, fileobj, mode='r'):
 
62
        assert mode == 'r', mode
 
63
        self.root = os.path.realpath(fileobj.read())
 
64
 
 
65
    def __repr__(self):
 
66
        return 'DirWrapper(%r)' % self.root
 
67
 
 
68
    def getmembers(self, subdir=None):
 
69
        if subdir is not None:
 
70
            mydir = pathjoin(self.root, subdir)
 
71
        else:
 
72
            mydir = self.root
 
73
        for child in os.listdir(mydir):
 
74
            if subdir is not None:
 
75
                child = pathjoin(subdir, child)
 
76
            fi = FileInfo(self.root, child)
 
77
            yield fi
 
78
            if fi.isdir():
 
79
                for v in self.getmembers(child):
 
80
                    yield v
 
81
 
 
82
    def extractfile(self, member):
 
83
        return open(member.fullpath)
 
84
 
 
85
 
 
86
class FileInfo(object):
 
87
 
 
88
    def __init__(self, root, filepath):
 
89
        self.fullpath = pathjoin(root, filepath)
 
90
        self.root = root
 
91
        if filepath != '':
 
92
            self.name = pathjoin(basename(root), filepath)
 
93
        else:
 
94
            print 'root %r' % root
 
95
            self.name = basename(root)
 
96
        self.type = None
 
97
        stat = os.lstat(self.fullpath)
 
98
        self.mode = stat.st_mode
 
99
        if self.isdir():
 
100
            self.name += '/'
 
101
 
 
102
    def __repr__(self):
 
103
        return 'FileInfo(%r)' % self.name
 
104
 
 
105
    def isreg(self):
 
106
        return stat.S_ISREG(self.mode)
 
107
 
 
108
    def isdir(self):
 
109
        return stat.S_ISDIR(self.mode)
 
110
 
 
111
    def issym(self):
 
112
        if stat.S_ISLNK(self.mode):
 
113
            self.linkname = os.readlink(self.fullpath)
 
114
            return True
 
115
        else:
 
116
            return False
 
117
 
 
118
 
 
119
def top_path(path):
22
120
    """Return the top directory given in a path."""
23
 
    dirname = os.path.dirname(path)
24
 
    last_dirname = dirname
25
 
    while True:
26
 
        dirname = os.path.dirname(dirname)
27
 
        if dirname == '' or dirname == last_dirname:
28
 
            return last_dirname
29
 
        last_dirname = dirname
 
121
    components = splitpath(path)
 
122
    if len(components) > 0:
 
123
        return components[0]
 
124
    else:
 
125
        return ''
30
126
 
31
127
 
32
128
def common_directory(names):
33
129
    """Determine a single directory prefix from a list of names"""
34
130
    possible_prefix = None
35
131
    for name in names:
36
 
        name_top = top_directory(name)
 
132
        name_top = top_path(name)
 
133
        if name_top == '':
 
134
            return None
37
135
        if possible_prefix is None:
38
136
            possible_prefix = name_top
39
137
        else:
64
162
            yield member.name
65
163
 
66
164
 
 
165
def should_ignore(relative_path):
 
166
    return top_path(relative_path) == '.bzr'
 
167
 
 
168
 
67
169
def import_tar(tree, tar_input):
68
170
    """Replace the contents of a working directory with tarfile contents.
69
171
    The tarfile may be a gzipped stream.  File ids will be updated.
70
172
    """
71
173
    tar_file = tarfile.open('lala', 'r', tar_input)
72
 
    prefix = common_directory(names_of_files(tar_file))
 
174
    import_archive(tree, tar_file)
 
175
 
 
176
def import_zip(tree, zip_input):
 
177
    zip_file = ZipFileWrapper(zip_input, 'r')
 
178
    import_archive(tree, zip_file)
 
179
 
 
180
def import_dir(tree, dir_input):
 
181
    dir_file = DirWrapper(dir_input)
 
182
    import_archive(tree, dir_file)
 
183
 
 
184
 
 
185
def import_archive(tree, archive_file):
73
186
    tt = TreeTransform(tree)
74
 
 
 
187
    try:
 
188
        import_archive_to_transform(tree, archive_file, tt)
 
189
        tt.apply()
 
190
    finally:
 
191
        tt.finalize()
 
192
 
 
193
 
 
194
def import_archive_to_transform(tree, archive_file, tt):
 
195
    prefix = common_directory(names_of_files(archive_file))
75
196
    removed = set()
76
197
    for path, entry in tree.inventory.iter_entries():
 
198
        if entry.parent_id is None:
 
199
            continue
77
200
        trans_id = tt.trans_id_tree_path(path)
78
201
        tt.delete_contents(trans_id)
79
202
        removed.add(path)
80
203
 
81
 
    added = set() 
 
204
    added = set()
82
205
    implied_parents = set()
83
206
    seen = set()
84
 
    for member in tar_file.getmembers():
 
207
    for member in archive_file.getmembers():
85
208
        if member.type == 'g':
86
209
            # type 'g' is a header
87
210
            continue
88
 
        relative_path = member.name 
 
211
        # Inverse functionality in bzr uses utf-8.  We could also
 
212
        # interpret relative to fs encoding, which would match native
 
213
        # behaviour better.
 
214
        relative_path = member.name.decode('utf-8')
89
215
        if prefix is not None:
90
216
            relative_path = relative_path[len(prefix)+1:]
 
217
            relative_path = relative_path.rstrip('/')
91
218
        if relative_path == '':
92
219
            continue
 
220
        if should_ignore(relative_path):
 
221
            continue
93
222
        add_implied_parents(implied_parents, relative_path)
94
223
        trans_id = tt.trans_id_tree_path(relative_path)
95
224
        added.add(relative_path.rstrip('/'))
96
225
        path = tree.abspath(relative_path)
97
226
        if member.name in seen:
 
227
            if tt.final_kind(trans_id) == 'file':
 
228
                tt.set_executability(None, trans_id)
98
229
            tt.cancel_creation(trans_id)
99
230
        seen.add(member.name)
100
231
        if member.isreg():
101
 
            tt.create_file(file_iterator(tar_file.extractfile(member)), 
 
232
            tt.create_file(file_iterator(archive_file.extractfile(member)),
102
233
                           trans_id)
 
234
            executable = (member.mode & 0111) != 0
 
235
            tt.set_executability(executable, trans_id)
103
236
        elif member.isdir():
104
237
            do_directory(tt, trans_id, tree, relative_path, path)
105
238
        elif member.issym():
106
239
            tt.create_symlink(member.linkname, trans_id)
 
240
        else:
 
241
            continue
 
242
        if tt.tree_file_id(trans_id) is None:
 
243
            name = basename(member.name.rstrip('/'))
 
244
            file_id = generate_ids.gen_file_id(name)
 
245
            tt.version_file(file_id, trans_id)
107
246
 
108
247
    for relative_path in implied_parents.difference(added):
109
248
        if relative_path == "":
111
250
        trans_id = tt.trans_id_tree_path(relative_path)
112
251
        path = tree.abspath(relative_path)
113
252
        do_directory(tt, trans_id, tree, relative_path, path)
 
253
        if tt.tree_file_id(trans_id) is None:
 
254
            tt.version_file(trans_id, trans_id)
114
255
        added.add(relative_path)
115
256
 
 
257
    for path in removed.difference(added):
 
258
        tt.unversion_file(tt.trans_id_tree_path(path))
 
259
 
116
260
    for conflict in cook_conflicts(resolve_conflicts(tt), tt):
117
261
        warning(conflict)
118
 
    tt.apply()
119
 
    update_ids(tree, added, removed)
120
 
 
121
 
 
122
 
def update_ids(tree, added, removed):
123
 
    """Make sure that all present files files have file_ids.
124
 
    """
125
 
    # XXX detect renames
126
 
    new = added.difference(removed)
127
 
    deleted = removed.difference(added)
128
 
    tree.add(sorted(new))
129
 
    tree.remove(sorted(deleted, reverse=True))
130
262
 
131
263
 
132
264
def do_import(source, tree_directory=None):
143
275
        tree = WorkingTree.open_containing('.')[0]
144
276
    tree.lock_write()
145
277
    try:
146
 
        if compare_trees(tree, tree.basis_tree()).has_changed():
 
278
        if tree.changes_from(tree.basis_tree()).has_changed():
147
279
            raise BzrCommandError("Working tree has uncommitted changes.")
148
280
 
149
 
        if (source.endswith('.tar') or source.endswith('.tar.gz') or 
150
 
            source.endswith('.tar.bz2')) or source.endswith('.tgz'):
 
281
        if (source.endswith('.tar') or source.endswith('.tar.gz') or
 
282
            source.endswith('.tar.bz2') or source.endswith('.tgz') or
 
283
            source.endswith('.tar.lzma') or source.endswith('.tar.xz')):
151
284
            try:
 
285
                tar_input = open_from_url(source)
152
286
                if source.endswith('.bz2'):
153
 
                    tar_input = BZ2File(source, 'r')
154
 
                    tar_input = StringIO(tar_input.read())
155
 
                else:
156
 
                    tar_input = file(source, 'rb')
 
287
                    import bz2
 
288
                    tar_input = StringIO(bz2.decompress(tar_input.read()))
 
289
                elif source.endswith('.xz') or source.endswith('.lzma'):
 
290
                    import lzma
 
291
                    tar_input = StringIO(lzma.decompress(tar_input.read()))
157
292
            except IOError, e:
158
293
                if e.errno == errno.ENOENT:
159
294
                    raise NoSuchFile(source)
161
296
                import_tar(tree, tar_input)
162
297
            finally:
163
298
                tar_input.close()
 
299
        elif source.endswith('.zip'):
 
300
            import_zip(tree, open_from_url(source))
 
301
        elif file_kind(source) == 'directory':
 
302
            s = StringIO(source)
 
303
            s.seek(0)
 
304
            import_dir(tree, s)
 
305
        else:
 
306
            raise BzrCommandError('Unhandled import source')
164
307
    finally:
165
308
        tree.unlock()
166
 
 
167
 
class TestImport(TestCaseInTempDir):
168
 
 
169
 
    def make_tar(self, mode='w'):
170
 
        result = StringIO()
171
 
        tar_file = tarfile.open('project-0.1.tar', mode, result)
172
 
        os.mkdir('project-0.1')
173
 
        tar_file.add('project-0.1')
174
 
        os.mkdir('project-0.1/junk')
175
 
        tar_file.add('project-0.1/junk')
176
 
        
177
 
        f = file('project-0.1/README', 'wb')
178
 
        f.write('What?')
179
 
        f.close()
180
 
        tar_file.add('project-0.1/README')
181
 
 
182
 
        f = file('project-0.1/FEEDME', 'wb')
183
 
        f.write('Hungry!!')
184
 
        f.close()
185
 
        tar_file.add('project-0.1/FEEDME')
186
 
 
187
 
        tar_file.close()
188
 
        rmtree('project-0.1')
189
 
        result.seek(0)
190
 
        return result
191
 
 
192
 
    def make_tar2(self):
193
 
        result = StringIO()
194
 
        tar_file = tarfile.open('project-0.2.tar', 'w', result)
195
 
        os.mkdir('project-0.2')
196
 
        tar_file.add('project-0.2')
197
 
        
198
 
        os.mkdir('project-0.2/junk')
199
 
        tar_file.add('project-0.2/junk')
200
 
 
201
 
        f = file('project-0.2/README', 'wb')
202
 
        f.write('Now?')
203
 
        f.close()
204
 
        tar_file.add('project-0.2/README')
205
 
        tar_file.close()
206
 
 
207
 
        tar_file = tarfile.open('project-0.2.tar', 'a', result)
208
 
        tar_file.add('project-0.2/README')
209
 
 
210
 
        rmtree('project-0.2')
211
 
        return result
212
 
 
213
 
    def make_messed_tar(self):
214
 
        result = StringIO()
215
 
        tar_file = tarfile.open('project-0.1.tar', 'w', result)
216
 
        os.mkdir('project-0.1')
217
 
        tar_file.add('project-0.1')
218
 
 
219
 
        os.mkdir('project-0.2')
220
 
        tar_file.add('project-0.2')
221
 
        
222
 
        f = file('project-0.1/README', 'wb')
223
 
        f.write('What?')
224
 
        f.close()
225
 
        tar_file.add('project-0.1/README')
226
 
        tar_file.close()
227
 
        rmtree('project-0.1')
228
 
        result.seek(0)
229
 
        return result
230
 
 
231
 
    def test_top_directory(self):
232
 
        self.assertEqual(top_directory('ab/b/c'), 'ab')
233
 
        self.assertEqual(top_directory('/etc'), '/')
234
 
 
235
 
    def test_common_directory(self):
236
 
        self.assertEqual(common_directory(['ab/c/d', 'ab/c/e']), 'ab')
237
 
        self.assertIs(common_directory(['ab/c/d', 'ac/c/e']), None)
238
 
 
239
 
    def test_untar(self):
240
 
        tar_file = self.make_tar()
241
 
        tree = BzrDir.create_standalone_workingtree('tree')
242
 
        import_tar(tree, tar_file)
243
 
        self.assertTrue(tree.path2id('README') is not None) 
244
 
        self.assertTrue(tree.path2id('FEEDME') is not None)
245
 
        self.assertTrue(os.path.isfile(tree.abspath('README')))
246
 
        self.assertEqual(tree.inventory[tree.path2id('README')].kind, 'file')
247
 
        self.assertEqual(tree.inventory[tree.path2id('FEEDME')].kind, 'file')
248
 
        
249
 
        f = file(tree.abspath('junk/food'), 'wb')
250
 
        f.write('I like food\n')
251
 
        f.close()
252
 
 
253
 
        tar_file = self.make_tar2()
254
 
        import_tar(tree, tar_file)
255
 
        self.assertTrue(tree.path2id('README') is not None) 
256
 
        self.assertTrue(not os.path.exists(tree.abspath('FEEDME')))
257
 
 
258
 
 
259
 
    def test_untar2(self):
260
 
        tar_file = self.make_messed_tar()
261
 
        tree = BzrDir.create_standalone_workingtree('tree')
262
 
        import_tar(tree, tar_file)
263
 
        self.assertTrue(tree.path2id('project-0.1/README') is not None) 
264
 
 
265
 
    def test_untar_gzip(self):
266
 
        tar_file = self.make_tar(mode='w:gz')
267
 
        tree = BzrDir.create_standalone_workingtree('tree')
268
 
        import_tar(tree, tar_file)
269
 
        self.assertTrue(tree.path2id('README') is not None) 
270
 
 
271
 
 
272
 
def test_suite():
273
 
    return makeSuite(TestImport)