14
14
# along with this program; if not, write to the Free Software
15
15
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
17
"""Plugin to fix some potential data errors in a branch.
19
This makes sure that the inventory weave's DAG of ancestry is correct so that
20
attempts to fetch the branch over http, or certain merge operations cope
23
This is most likely needed if you have used fetch-ghosts from bzrlib to
24
resolve ghosts after a baz (or otherwise) import and you get bizarre behaviour
25
when either exporting over http or when merging from other translated branches.
17
"""Reconcilers are able to fix some potential data errors in a branch."""
29
20
import bzrlib.branch
30
21
import bzrlib.errors as errors
31
22
import bzrlib.progress
32
23
from bzrlib.trace import mutter
24
from bzrlib.tsort import TopoSorter
33
25
import bzrlib.ui as ui
34
from bzrlib.weavefile import write_weave_v5 as w5
38
28
def reconcile(dir):
47
37
reconciler.reconcile()
50
def topological_sort(graph):
51
"""Topological sort a graph.
53
graph -- sequence of pairs of node->parents_list.
55
The result is a list of node names, such that all parents come before
58
Nodes at the same depth are returned in sorted order.
60
node identifiers can be any hashable object, and are typically strings.
64
return sorter._work_queue
67
class TopoSorter(object):
69
def sort(self, graph):
70
# the total set of revisions to process
71
self._rev_graph = dict(graph)
73
# self._original_graph = dict(graph)
75
# the current line of history being processed.
76
# once we add in per inventory root ids this will involve a single
77
# pass within the revision knit at that point.
78
# these contain respectively the revision id in the call stack
80
# a set of the revisions on the stack, for cycle detection
82
# and the not_ghost_parents.
83
self._pending_stack = []
84
# actual queue to reinsert. This is a single pass queue we can use to
85
# regenerate the inventory versioned file, and holds
86
# (rev_id, non_ghost_parents) entries
88
# this is a set for fast lookups of queued revisions
89
self._work_set = set()
90
# total steps = 1 read per revision + one insert into the inventory
92
# now we do a depth first search of the revision graph until its empty.
93
# this gives us a topological order across all revisions in
94
# self._all_revisions.
95
# This is flattened to avoid deep recursion:
96
# At each step in the call graph we need to know which parent we are on.
97
# we do this by having three variables for each stack frame:
98
# revision_id being descended into (self._rev_stack)
99
# current queue of parents to descend into
100
# (self._pending_stack)
101
# putting all the parents into a pending list, left to
102
# right and processing the right most one each time. The same parent
103
# may occur at multiple places in the stack - thats fine. We check its
104
# not in the output before processing. However revisions cannot
106
# the algorithm is roughly:
107
# for revision, parents in _rev_graph.items():
108
# if revision in self._all_revisions:
110
# add_revision_parents(revision, parents)
111
# def add_revision_parents(revision, parents)
112
# fpr parent in parents:
113
# add_revision_parents(parent, parent.parents)
114
# self._all_revisions.append(revision)
115
# self._all_revision_parents.appent(parents)
116
# however we tune this to visit fragmens of the graph
117
# and not double-visit entries.
118
# nevertheless the output is a
119
# self._work_queue of revisions, nonghost parents in
120
# topological order and
121
# a self._work_set which is a complete set of revisions.
122
while self._rev_graph:
123
rev_id, parents = self._rev_graph.iteritems().next()
125
self._stack_revision(rev_id, parents)
126
while self._rev_stack:
127
# loop until this call completes.
128
parents_to_visit = self._pending_stack[-1]
129
# if all parents are done, the revision is done
130
if not parents_to_visit:
131
# append the revision to the topo sorted list
132
self._unstack_revision()
134
while self._pending_stack[-1]:
135
# recurse depth first into a single parent
136
next_rev_id = self._pending_stack[-1].pop()
137
if next_rev_id in self._work_set:
138
# this parent was completed by a child on the
139
# call stack. skip it.
141
# push it into the call stack
142
self._stack_revision(next_rev_id, self._rev_graph[next_rev_id])
143
# and do not continue processing parents until this 'call'
147
# we are now recursing down a call stack.
148
# its a width first search which
150
### Useful if fiddling with this code.
152
### for index in range(len(self._work_queue)):
153
### rev = self._work_queue[index]
154
### for left_index in range(index):
155
### if rev in self.original_graph[self._work_queue[left_index]]:
156
### print "revision in parent list of earlier revision"
157
### import pdb;pdb.set_trace()
159
def _stack_revision(self, rev_id, parents):
160
"""Add rev_id to the pending revision stack."""
162
if rev_id in self._rev_set:
163
# we only supply the revisions that led to the cycle. This isn't
164
# minimal though... but it is usually a subset of the entire graph
165
# and easier to debug.
166
raise errors.GraphCycleError(self._rev_set)
168
self._rev_stack.append(rev_id)
169
self._rev_set.add(rev_id)
170
self._pending_stack.append(list(parents))
172
def _unstack_revision(self):
173
"""A revision has been completed.
175
The revision is added to the work queue, and the data for
176
it popped from the call stack.
178
rev_id = self._rev_stack.pop()
179
self._rev_set.remove(rev_id)
180
self._pending_stack.pop()
181
self._work_queue.append(rev_id)
182
self._work_set.add(rev_id)
183
# and remove it from the rev graph as its now complete
184
self._rev_graph.pop(rev_id)
187
40
class Reconciler(object):
188
41
"""Reconcilers are used to reconcile existing data.