~bzr-pqm/bzr/bzr.dev

1 by mbp at sourcefrog
import from baz patch-364
1
#! /usr/bin/env python
2
# -*- coding: UTF-8 -*-
3
4
# This program is free software; you can redistribute it and/or modify
5
# it under the terms of the GNU General Public License as published by
6
# the Free Software Foundation; either version 2 of the License, or
7
# (at your option) any later version.
8
9
# This program is distributed in the hope that it will be useful,
10
# but WITHOUT ANY WARRANTY; without even the implied warranty of
11
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12
# GNU General Public License for more details.
13
14
# You should have received a copy of the GNU General Public License
15
# along with this program; if not, write to the Free Software
16
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
17
18
19
from sets import Set
20
21
import sys, os, os.path, random, time, sha, sets, types, re, shutil, tempfile
22
import traceback, socket, fnmatch, difflib, time
23
from binascii import hexlify
24
25
import bzrlib
26
from inventory import Inventory
27
from trace import mutter, note
28
from tree import Tree, EmptyTree, RevisionTree, WorkingTree
29
from inventory import InventoryEntry, Inventory
30
from osutils import isdir, quotefn, isfile, uuid, sha_file, username, chomp, \
31
     format_date, compact_date, pumpfile, user_email, rand_bytes, splitpath, \
32
     joinpath, sha_string, file_kind
33
from store import ImmutableStore
34
from revision import Revision
35
from errors import bailout
36
from textui import show_status
37
from diff import diff_trees
38
39
BZR_BRANCH_FORMAT = "Bazaar-NG branch, format 0.0.4\n"
40
## TODO: Maybe include checks for common corruption of newlines, etc?
41
42
43
44
45
46
######################################################################
47
# branch objects
48
49
class Branch:
50
    """Branch holding a history of revisions.
51
52
    :todo: Perhaps use different stores for different classes of object,
53
           so that we can keep track of how much space each one uses,
54
           or garbage-collect them.
55
56
    :todo: Add a RemoteBranch subclass.  For the basic case of read-only
57
           HTTP access this should be very easy by, 
58
           just redirecting controlfile access into HTTP requests.
59
           We would need a RemoteStore working similarly.
60
61
    :todo: Keep the on-disk branch locked while the object exists.
62
63
    :todo: mkdir() method.
64
    """
65
    def __init__(self, base, init=False):
66
        """Create new branch object at a particular location.
67
68
        :param base: Base directory for the branch.
69
70
        :param init: If True, create new control files in a previously
71
             unversioned directory.  If False, the branch must already
72
             be versioned.
73
74
        In the test suite, creation of new trees is tested using the
75
        `ScratchBranch` class.
76
        """
77
        self.base = os.path.realpath(base)
78
        if init:
79
            self._make_control()
80
        else:
81
            if not isdir(self.controlfilename('.')):
82
                bailout("not a bzr branch: %s" % quotefn(base),
83
                        ['use "bzr init" to initialize a new working tree',
84
                         'current bzr can only operate from top-of-tree'])
85
            self._check_format()
86
87
        self.text_store = ImmutableStore(self.controlfilename('text-store'))
88
        self.revision_store = ImmutableStore(self.controlfilename('revision-store'))
89
        self.inventory_store = ImmutableStore(self.controlfilename('inventory-store'))
90
91
92
    def __str__(self):
93
        return '%s(%r)' % (self.__class__.__name__, self.base)
94
95
96
    __repr__ = __str__
97
98
99
    def _rel(self, name):
100
        """Return filename relative to branch top"""
101
        return os.path.join(self.base, name)
102
        
103
104
    def controlfilename(self, file_or_path):
105
        """Return location relative to branch."""
106
        if isinstance(file_or_path, types.StringTypes):
107
            file_or_path = [file_or_path]
108
        return os.path.join(self.base, bzrlib.BZRDIR, *file_or_path)
109
110
111
    def controlfile(self, file_or_path, mode='r'):
112
        """Open a control file for this branch"""
113
        return file(self.controlfilename(file_or_path), mode)
114
115
116
    def _make_control(self):
117
        os.mkdir(self.controlfilename([]))
118
        self.controlfile('README', 'w').write(
119
            "This is a Bazaar-NG control directory.\n"
120
            "Do not change any files in this directory.")
121
        self.controlfile('branch-format', 'w').write(BZR_BRANCH_FORMAT)
122
        for d in ('text-store', 'inventory-store', 'revision-store'):
123
            os.mkdir(self.controlfilename(d))
124
        for f in ('revision-history', 'merged-patches',
125
                  'pending-merged-patches', 'branch-name'):
126
            self.controlfile(f, 'w').write('')
127
        mutter('created control directory in ' + self.base)
128
        Inventory().write_xml(self.controlfile('inventory','w'))
129
130
131
    def _check_format(self):
132
        """Check this branch format is supported.
133
134
        The current tool only supports the current unstable format.
135
136
        In the future, we might need different in-memory Branch
137
        classes to support downlevel branches.  But not yet.
138
        """        
139
        # read in binary mode to detect newline wierdness.
140
        fmt = self.controlfile('branch-format', 'rb').read()
141
        if fmt != BZR_BRANCH_FORMAT:
142
            bailout('sorry, branch format %r not supported' % fmt,
143
                    ['use a different bzr version',
144
                     'or remove the .bzr directory and "bzr init" again'])
145
146
147
    def read_working_inventory(self):
148
        """Read the working inventory."""
149
        before = time.time()
150
        inv = Inventory.read_xml(self.controlfile('inventory', 'r'))
151
        mutter("loaded inventory of %d items in %f"
152
               % (len(inv), time.time() - before))
153
        return inv
154
155
156
    def _write_inventory(self, inv):
157
        """Update the working inventory.
158
159
        That is to say, the inventory describing changes underway, that
160
        will be committed to the next revision.
161
        """
162
        inv.write_xml(self.controlfile('inventory', 'w'))
163
        mutter('wrote inventory to %s' % quotefn(self.controlfilename('inventory')))
164
165
166
    inventory = property(read_working_inventory, _write_inventory, None,
167
                         """Inventory for the working copy.""")
168
169
170
    def add(self, files, verbose=False):
171
        """Make files versioned.
172
173
        This puts the files in the Added state, so that they will be
174
        recorded by the next commit.
175
176
        :todo: Perhaps have an option to add the ids even if the files do
177
               not (yet) exist.
178
179
        :todo: Perhaps return the ids of the files?  But then again it
180
               is easy to retrieve them if they're needed.
181
182
        :todo: Option to specify file id.
183
184
        :todo: Adding a directory should optionally recurse down and
185
               add all non-ignored children.  Perhaps do that in a
186
               higher-level method.
187
188
        >>> b = ScratchBranch(files=['foo'])
189
        >>> 'foo' in b.unknowns()
190
        True
191
        >>> b.show_status()
192
        ?       foo
193
        >>> b.add('foo')
194
        >>> 'foo' in b.unknowns()
195
        False
196
        >>> bool(b.inventory.path2id('foo'))
197
        True
198
        >>> b.show_status()
199
        A       foo
200
201
        >>> b.add('foo')
202
        Traceback (most recent call last):
203
        ...
204
        BzrError: ('foo is already versioned', [])
205
206
        >>> b.add(['nothere'])
207
        Traceback (most recent call last):
208
        BzrError: ('cannot add: not a regular file or directory: nothere', [])
209
        """
210
211
        # TODO: Re-adding a file that is removed in the working copy
212
        # should probably put it back with the previous ID.
213
        if isinstance(files, types.StringTypes):
214
            files = [files]
215
        
216
        inv = self.read_working_inventory()
217
        for f in files:
218
            if is_control_file(f):
219
                bailout("cannot add control file %s" % quotefn(f))
220
221
            fp = splitpath(f)
222
223
            if len(fp) == 0:
224
                bailout("cannot add top-level %r" % f)
225
                
226
            fullpath = os.path.normpath(self._rel(f))
227
228
            if isfile(fullpath):
229
                kind = 'file'
230
            elif isdir(fullpath):
231
                kind = 'directory'
232
            else:
233
                bailout('cannot add: not a regular file or directory: %s' % quotefn(f))
234
235
            if len(fp) > 1:
236
                parent_name = joinpath(fp[:-1])
237
                mutter("lookup parent %r" % parent_name)
238
                parent_id = inv.path2id(parent_name)
239
                if parent_id == None:
240
                    bailout("cannot add: parent %r is not versioned"
241
                            % joinpath(fp[:-1]))
242
            else:
243
                parent_id = None
244
245
            file_id = _gen_file_id(fp[-1])
246
            inv.add(InventoryEntry(file_id, fp[-1], kind=kind, parent_id=parent_id))
247
            if verbose:
248
                show_status('A', kind, quotefn(f))
249
                
250
            mutter("add file %s file_id:{%s} kind=%r parent_id={%s}"
251
                   % (f, file_id, kind, parent_id))
252
        self._write_inventory(inv)
253
254
255
256
    def remove(self, files, verbose=False):
257
        """Mark nominated files for removal from the inventory.
258
259
        This does not remove their text.  This does not run on 
260
261
        :todo: Refuse to remove modified files unless --force is given?
262
263
        >>> b = ScratchBranch(files=['foo'])
264
        >>> b.add('foo')
265
        >>> b.inventory.has_filename('foo')
266
        True
267
        >>> b.remove('foo')
268
        >>> b.working_tree().has_filename('foo')
269
        True
270
        >>> b.inventory.has_filename('foo')
271
        False
272
        
273
        >>> b = ScratchBranch(files=['foo'])
274
        >>> b.add('foo')
275
        >>> b.commit('one')
276
        >>> b.remove('foo')
277
        >>> b.commit('two')
278
        >>> b.inventory.has_filename('foo') 
279
        False
280
        >>> b.basis_tree().has_filename('foo') 
281
        False
282
        >>> b.working_tree().has_filename('foo') 
283
        True
284
285
        :todo: Do something useful with directories.
286
287
        :todo: Should this remove the text or not?  Tough call; not
288
        removing may be useful and the user can just use use rm, and
289
        is the opposite of add.  Removing it is consistent with most
290
        other tools.  Maybe an option.
291
        """
292
        ## TODO: Normalize names
293
        ## TODO: Remove nested loops; better scalability
294
295
        if isinstance(files, types.StringTypes):
296
            files = [files]
297
        
298
        inv = self.read_working_inventory()
299
300
        # do this before any modifications
301
        for f in files:
302
            fid = inv.path2id(f)
303
            if not fid:
304
                bailout("cannot remove unversioned file %s" % quotefn(f))
305
            mutter("remove inventory entry %s {%s}" % (quotefn(f), fid))
306
            if verbose:
307
                show_status('D', inv[fid].kind, quotefn(f))
308
            del inv[fid]
309
310
        self._write_inventory(inv)
311
312
313
    def unknowns(self):
314
        """Return all unknown files.
315
316
        These are files in the working directory that are not versioned or
317
        control files or ignored.
318
        
319
        >>> b = ScratchBranch(files=['foo', 'foo~'])
320
        >>> list(b.unknowns())
321
        ['foo']
322
        >>> b.add('foo')
323
        >>> list(b.unknowns())
324
        []
325
        >>> b.remove('foo')
326
        >>> list(b.unknowns())
327
        ['foo']
328
        """
329
        return self.working_tree().unknowns()
330
331
332
    def commit(self, message, timestamp=None, committer=None,
333
               verbose=False):
334
        """Commit working copy as a new revision.
335
        
336
        The basic approach is to add all the file texts into the
337
        store, then the inventory, then make a new revision pointing
338
        to that inventory and store that.
339
        
340
        This is not quite safe if the working copy changes during the
341
        commit; for the moment that is simply not allowed.  A better
342
        approach is to make a temporary copy of the files before
343
        computing their hashes, and then add those hashes in turn to
344
        the inventory.  This should mean at least that there are no
345
        broken hash pointers.  There is no way we can get a snapshot
346
        of the whole directory at an instant.  This would also have to
347
        be robust against files disappearing, moving, etc.  So the
348
        whole thing is a bit hard.
349
350
        :param timestamp: if not None, seconds-since-epoch for a
351
             postdated/predated commit.
352
        """
353
354
        ## TODO: Show branch names
355
356
        # TODO: Don't commit if there are no changes, unless forced?
357
358
        # First walk over the working inventory; and both update that
359
        # and also build a new revision inventory.  The revision
360
        # inventory needs to hold the text-id, sha1 and size of the
361
        # actual file versions committed in the revision.  (These are
362
        # not present in the working inventory.)  We also need to
363
        # detect missing/deleted files, and remove them from the
364
        # working inventory.
365
366
        work_inv = self.read_working_inventory()
367
        inv = Inventory()
368
        basis = self.basis_tree()
369
        basis_inv = basis.inventory
370
        missing_ids = []
371
        for path, entry in work_inv.iter_entries():
372
            ## TODO: Cope with files that have gone missing.
373
374
            ## TODO: Check that the file kind has not changed from the previous
375
            ## revision of this file (if any).
376
377
            entry = entry.copy()
378
379
            p = self._rel(path)
380
            file_id = entry.file_id
381
            mutter('commit prep file %s, id %r ' % (p, file_id))
382
383
            if not os.path.exists(p):
384
                mutter("    file is missing, removing from inventory")
385
                if verbose:
386
                    show_status('D', entry.kind, quotefn(path))
387
                missing_ids.append(file_id)
388
                continue
389
390
            # TODO: Handle files that have been deleted
391
392
            # TODO: Maybe a special case for empty files?  Seems a
393
            # waste to store them many times.
394
395
            inv.add(entry)
396
397
            if basis_inv.has_id(file_id):
398
                old_kind = basis_inv[file_id].kind
399
                if old_kind != entry.kind:
400
                    bailout("entry %r changed kind from %r to %r"
401
                            % (file_id, old_kind, entry.kind))
402
403
            if entry.kind == 'directory':
404
                if not isdir(p):
405
                    bailout("%s is entered as directory but not a directory" % quotefn(p))
406
            elif entry.kind == 'file':
407
                if not isfile(p):
408
                    bailout("%s is entered as file but is not a file" % quotefn(p))
409
410
                content = file(p, 'rb').read()
411
412
                entry.text_sha1 = sha_string(content)
413
                entry.text_size = len(content)
414
415
                old_ie = basis_inv.has_id(file_id) and basis_inv[file_id]
416
                if (old_ie
417
                    and (old_ie.text_size == entry.text_size)
418
                    and (old_ie.text_sha1 == entry.text_sha1)):
419
                    ## assert content == basis.get_file(file_id).read()
420
                    entry.text_id = basis_inv[file_id].text_id
421
                    mutter('    unchanged from previous text_id {%s}' %
422
                           entry.text_id)
423
                    
424
                else:
425
                    entry.text_id = _gen_file_id(entry.name)
426
                    self.text_store.add(content, entry.text_id)
427
                    mutter('    stored with text_id {%s}' % entry.text_id)
428
                    if verbose:
429
                        if not old_ie:
430
                            state = 'A'
431
                        elif (old_ie.name == entry.name
432
                              and old_ie.parent_id == entry.parent_id):
433
                            state = 'R'
434
                        else:
435
                            state = 'M'
436
437
                        show_status(state, entry.kind, quotefn(path))
438
439
        for file_id in missing_ids:
440
            # have to do this later so we don't mess up the iterator.
441
            # since parents may be removed before their children we
442
            # have to test.
443
444
            # FIXME: There's probably a better way to do this; perhaps
445
            # the workingtree should know how to filter itself.
446
            if work_inv.has_id(file_id):
447
                del work_inv[file_id]
448
449
450
        inv_id = rev_id = _gen_revision_id(time.time())
451
        
452
        inv_tmp = tempfile.TemporaryFile()
453
        inv.write_xml(inv_tmp)
454
        inv_tmp.seek(0)
455
        self.inventory_store.add(inv_tmp, inv_id)
456
        mutter('new inventory_id is {%s}' % inv_id)
457
458
        self._write_inventory(work_inv)
459
460
        if timestamp == None:
461
            timestamp = time.time()
462
463
        if committer == None:
464
            committer = username()
465
466
        mutter("building commit log message")
467
        rev = Revision(timestamp=timestamp,
468
                       committer=committer,
469
                       precursor = self.last_patch(),
470
                       message = message,
471
                       inventory_id=inv_id,
472
                       revision_id=rev_id)
473
474
        rev_tmp = tempfile.TemporaryFile()
475
        rev.write_xml(rev_tmp)
476
        rev_tmp.seek(0)
477
        self.revision_store.add(rev_tmp, rev_id)
478
        mutter("new revision_id is {%s}" % rev_id)
479
        
480
        ## XXX: Everything up to here can simply be orphaned if we abort
481
        ## the commit; it will leave junk files behind but that doesn't
482
        ## matter.
483
484
        ## TODO: Read back the just-generated changeset, and make sure it
485
        ## applies and recreates the right state.
486
487
        ## TODO: Also calculate and store the inventory SHA1
488
        mutter("committing patch r%d" % (self.revno() + 1))
489
490
        mutter("append to revision-history")
491
        self.controlfile('revision-history', 'at').write(rev_id + '\n')
492
493
        mutter("done!")
494
495
496
    def get_revision(self, revision_id):
497
        """Return the Revision object for a named revision"""
498
        r = Revision.read_xml(self.revision_store[revision_id])
499
        assert r.revision_id == revision_id
500
        return r
501
502
503
    def get_inventory(self, inventory_id):
504
        """Get Inventory object by hash.
505
506
        :todo: Perhaps for this and similar methods, take a revision
507
               parameter which can be either an integer revno or a
508
               string hash."""
509
        i = Inventory.read_xml(self.inventory_store[inventory_id])
510
        return i
511
512
513
    def get_revision_inventory(self, revision_id):
514
        """Return inventory of a past revision."""
515
        if revision_id == None:
516
            return Inventory()
517
        else:
518
            return self.get_inventory(self.get_revision(revision_id).inventory_id)
519
520
521
    def revision_history(self):
522
        """Return sequence of revision hashes on to this branch.
523
524
        >>> ScratchBranch().revision_history()
525
        []
526
        """
527
        return [chomp(l) for l in self.controlfile('revision-history').readlines()]
528
529
530
    def revno(self):
531
        """Return current revision number for this branch.
532
533
        That is equivalent to the number of revisions committed to
534
        this branch.
535
536
        >>> b = ScratchBranch()
537
        >>> b.revno()
538
        0
539
        >>> b.commit('no foo')
540
        >>> b.revno()
541
        1
542
        """
543
        return len(self.revision_history())
544
545
546
    def last_patch(self):
547
        """Return last patch hash, or None if no history.
548
549
        >>> ScratchBranch().last_patch() == None
550
        True
551
        """
552
        ph = self.revision_history()
553
        if ph:
554
            return ph[-1]
555
556
557
    def lookup_revision(self, revno):
558
        """Return revision hash for revision number."""
559
        if revno == 0:
560
            return None
561
562
        try:
563
            # list is 0-based; revisions are 1-based
564
            return self.revision_history()[revno-1]
565
        except IndexError:
566
            bailout("no such revision %s" % revno)
567
568
569
    def revision_tree(self, revision_id):
570
        """Return Tree for a revision on this branch.
571
572
        `revision_id` may be None for the null revision, in which case
573
        an `EmptyTree` is returned."""
574
575
        if revision_id == None:
576
            return EmptyTree()
577
        else:
578
            inv = self.get_revision_inventory(revision_id)
579
            return RevisionTree(self.text_store, inv)
580
581
582
    def working_tree(self):
583
        """Return a `Tree` for the working copy."""
584
        return WorkingTree(self.base, self.read_working_inventory())
585
586
587
    def basis_tree(self):
588
        """Return `Tree` object for last revision.
589
590
        If there are no revisions yet, return an `EmptyTree`.
591
592
        >>> b = ScratchBranch(files=['foo'])
593
        >>> b.basis_tree().has_filename('foo')
594
        False
595
        >>> b.working_tree().has_filename('foo')
596
        True
597
        >>> b.add('foo')
598
        >>> b.commit('add foo')
599
        >>> b.basis_tree().has_filename('foo')
600
        True
601
        """
602
        r = self.last_patch()
603
        if r == None:
604
            return EmptyTree()
605
        else:
606
            return RevisionTree(self.text_store, self.get_revision_inventory(r))
607
608
609
610
    def write_log(self, utc=False):
611
        """Write out human-readable log of commits to this branch
612
613
        :param utc: If true, show dates in universal time, not local time."""
614
        revno = 1
615
        precursor = None
616
        for p in self.revision_history():
617
            print '-' * 40
618
            print 'revno:', revno
619
            ## TODO: Show hash if --id is given.
620
            ##print 'revision-hash:', p
621
            rev = self.get_revision(p)
622
            print 'committer:', rev.committer
623
            print 'timestamp: %s' % (format_date(rev.timestamp, utc))
624
625
            ## opportunistic consistency check, same as check_patch_chaining
626
            if rev.precursor != precursor:
627
                bailout("mismatched precursor!")
628
629
            print 'message:'
630
            if not rev.message:
631
                print '  (no message)'
632
            else:
633
                for l in rev.message.split('\n'):
634
                    print '  ' + l
635
636
            revno += 1
637
            precursor = p
638
639
640
641
    def show_status(branch, show_all=False):
642
        """Display single-line status for non-ignored working files.
643
644
        The list is show sorted in order by file name.
645
646
        >>> b = ScratchBranch(files=['foo', 'foo~'])
647
        >>> b.show_status()
648
        ?       foo
649
        >>> b.add('foo')
650
        >>> b.show_status()
651
        A       foo
652
        >>> b.commit("add foo")
653
        >>> b.show_status()
654
655
        :todo: Get state for single files.
656
657
        :todo: Perhaps show a slash at the end of directory names.        
658
659
        """
660
661
        # We have to build everything into a list first so that it can
662
        # sorted by name, incorporating all the different sources.
663
664
        # FIXME: Rather than getting things in random order and then sorting,
665
        # just step through in order.
666
667
        # Interesting case: the old ID for a file has been removed,
668
        # but a new file has been created under that name.
669
670
        old = branch.basis_tree()
671
        old_inv = old.inventory
672
        new = branch.working_tree()
673
        new_inv = new.inventory
674
675
        for fs, fid, oldname, newname, kind in diff_trees(old, new):
676
            if fs == 'R':
677
                show_status(fs, kind,
678
                            oldname + ' => ' + newname)
679
            elif fs == 'A' or fs == 'M':
680
                show_status(fs, kind, newname)
681
            elif fs == 'D':
682
                show_status(fs, kind, oldname)
683
            elif fs == '.':
684
                if show_all:
685
                    show_status(fs, kind, newname)
686
            elif fs == 'I':
687
                if show_all:
688
                    show_status(fs, kind, newname)
689
            elif fs == '?':
690
                show_status(fs, kind, newname)
691
            else:
692
                bailout("wierd file state %r" % ((fs, fid),))
693
                
694
695
696
class ScratchBranch(Branch):
697
    """Special test class: a branch that cleans up after itself.
698
699
    >>> b = ScratchBranch()
700
    >>> isdir(b.base)
701
    True
702
    >>> bd = b.base
703
    >>> del b
704
    >>> isdir(bd)
705
    False
706
    """
707
    def __init__(self, files = []):
708
        """Make a test branch.
709
710
        This creates a temporary directory and runs init-tree in it.
711
712
        If any files are listed, they are created in the working copy.
713
        """
714
        Branch.__init__(self, tempfile.mkdtemp(), init=True)
715
        for f in files:
716
            file(os.path.join(self.base, f), 'w').write('content of %s' % f)
717
718
719
    def __del__(self):
720
        """Destroy the test branch, removing the scratch directory."""
721
        shutil.rmtree(self.base)
722
723
    
724
725
######################################################################
726
# predicates
727
728
729
def is_control_file(filename):
730
    ## FIXME: better check
731
    filename = os.path.normpath(filename)
732
    while filename != '':
733
        head, tail = os.path.split(filename)
734
        ## mutter('check %r for control file' % ((head, tail), ))
735
        if tail == bzrlib.BZRDIR:
736
            return True
737
        filename = head
738
    return False
739
740
741
742
def _gen_revision_id(when):
743
    """Return new revision-id."""
744
    s = '%s-%s-' % (user_email(), compact_date(when))
745
    s += hexlify(rand_bytes(8))
746
    return s
747
748
749
def _gen_file_id(name):
750
    """Return new file id.
751
752
    This should probably generate proper UUIDs, but for the moment we
753
    cope with just randomness because running uuidgen every time is
754
    slow."""
755
    assert '/' not in name
756
    while name[0] == '.':
757
        name = name[1:]
758
    s = hexlify(rand_bytes(8))
759
    return '-'.join((name, compact_date(time.time()), s))
760
761