2
Copyright (C) 2007 Canonical Ltd
4
This program is free software; you can redistribute it and/or modify
5
it under the terms of the GNU General Public License as published by
6
the Free Software Foundation; either version 2 of the License, or
7
(at your option) any later version.
9
This program is distributed in the hope that it will be useful,
10
but WITHOUT ANY WARRANTY; without even the implied warranty of
11
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12
GNU General Public License for more details.
14
You should have received a copy of the GNU General Public License
15
along with this program; if not, write to the Free Software
16
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
18
Function equate_lines based on bdiff.c from Mercurial.
19
Copyright (C) 2005, 2006 Matt Mackall <mpm@selenic.com>
21
Functions unique_lcs/recurse_matches based on _patiencediff_py.py.
22
Copyright (C) 2005 Bram Cohen, Copyright (C) 2005, 2006 Canonical Ltd
31
/* http://www.python.org/dev/peps/pep-0353/ */
32
#if PY_VERSION_HEX < 0x02050000 && !defined(PY_SSIZE_T_MIN)
33
typedef int Py_ssize_t;
34
#define PY_SSIZE_T_MAX INT_MAX
35
#define PY_SSIZE_T_MIN INT_MIN
40
# define inline __inline__
41
#elif defined(_MSC_VER)
42
# define inline __inline
48
#define MIN(a, b) (((a) > (b)) ? (b) : (a))
49
#define MAX(a, b) (((a) > (b)) ? (a) : (b))
63
/* values from this array need to correspont to the order of the enum above */
64
static char *opcode_names[] = {
73
long hash; /* hash code of the string/object */
74
Py_ssize_t next; /* next line from the same equivalence class */
75
Py_ssize_t equiv; /* equivalence class */
81
Py_ssize_t a_head; /* first item in `a` from this equivalence class */
83
Py_ssize_t b_head; /* first item in `b` from this equivalence class */
91
Py_ssize_t last_a_pos;
92
Py_ssize_t last_b_pos;
97
struct matching_line {
98
Py_ssize_t a; /* index of the line in `a` */
99
Py_ssize_t b; /* index of the line in `b` */
103
struct matching_block {
104
Py_ssize_t a; /* index of the first line in `a` */
105
Py_ssize_t b; /* index of the first line in `b` */
106
Py_ssize_t len; /* length of the block */
110
struct matching_blocks {
111
struct matching_block *matches;
131
struct hashtable hashtable;
132
Py_ssize_t *backpointers;
133
} PatienceSequenceMatcher;
136
static inline Py_ssize_t
137
bisect_left(Py_ssize_t *list, Py_ssize_t item, Py_ssize_t lo, Py_ssize_t hi)
140
Py_ssize_t mid = lo / 2 + hi / 2 + (lo % 2 + hi % 2) / 2;
141
if (list[mid] < item)
151
compare_lines(struct line *a, struct line *b)
153
return ((a->hash != b->hash)
154
|| PyObject_Compare(a->data, b->data));
159
find_equivalence_class(struct bucket *hashtable, Py_ssize_t hsize,
160
struct line *lines, struct line *ref_lines,
164
for (j = lines[i].hash & hsize; hashtable[j].b_head != SENTINEL; j = (j + 1) & hsize) {
165
if (!compare_lines(lines + i, ref_lines + hashtable[j].b_head)) {
174
equate_lines(struct hashtable *result,
175
struct line *lines_a, struct line *lines_b,
176
Py_ssize_t asize, Py_ssize_t bsize)
178
Py_ssize_t i, j, hsize;
179
struct bucket *hashtable;
181
/* check for overflow, we need the table to be at least bsize+1 */
182
if (bsize == PY_SSIZE_T_MAX) {
183
PyErr_SetNone(PyExc_OverflowError);
187
/* build a hash table of the next highest power of 2 */
189
while (hsize < bsize + 1)
192
hashtable = (struct bucket *)malloc(sizeof(struct bucket) * hsize);
193
if (hashtable == NULL) {
198
/* initialise the hashtable */
199
for (i = 0; i < hsize; i++) {
200
hashtable[i].a_count = 0;
201
hashtable[i].b_count = 0;
202
hashtable[i].a_head = SENTINEL;
203
hashtable[i].b_head = SENTINEL;
207
/* add lines from lines_b to the hash table chains. iterating
208
backwards so the matching lines are sorted to the linked list
209
by the line number (because we are adding new lines to the
211
for (i = bsize - 1; i >= 0; i--) {
212
/* find the first hashtable entry, which is either empty or contains
213
the same line as lines_b[i] */
214
j = find_equivalence_class(hashtable, hsize, lines_b, lines_b, i);
216
/* set the equivalence class */
217
lines_b[i].equiv = j;
219
/* add to the head of the equivalence class */
220
lines_b[i].next = hashtable[j].b_head;
221
hashtable[j].b_head = i;
222
hashtable[j].b_count++;
225
/* match items from lines_a to their equivalence class in lines_b.
226
again, iterating backwards for the right order of the linked lists */
227
for (i = asize - 1; i >= 0; i--) {
228
/* find the first hash entry, which is either empty or contains
229
the same line as lines_a[i] */
230
j = find_equivalence_class(hashtable, hsize, lines_a, lines_b, i);
232
/* set the equivalence class, even if we are not interested in this
233
line, because the values are not pre-filled */
234
lines_a[i].equiv = j;
236
/* we are not interested in lines which are not also in lines_b */
237
if (hashtable[j].b_head == SENTINEL)
240
/* add to the head of the equivalence class */
241
lines_a[i].next = hashtable[j].a_head;
242
hashtable[j].a_head = i;
243
hashtable[j].a_count++;
246
result->last_a_pos = -1;
247
result->last_b_pos = -1;
248
result->size = hsize + 1;
249
result->table = hashtable;
256
/* Finds longest common subsequence of unique lines in a[alo:ahi] and
258
Parameter backpointers must have allocated memory for at least
259
4 * (bhi - blo) ints. */
261
unique_lcs(struct matching_line *answer,
262
struct hashtable *hashtable, Py_ssize_t *backpointers,
263
struct line *lines_a, struct line *lines_b,
264
Py_ssize_t alo, Py_ssize_t blo, Py_ssize_t ahi, Py_ssize_t bhi)
266
Py_ssize_t i, k, equiv, apos, bpos, norm_apos, norm_bpos, bsize, stacksize;
267
Py_ssize_t *stacks, *lasts, *btoa;
273
h = hashtable->table;
275
/* "unpack" the allocated memory */
276
stacks = backpointers + bsize;
277
lasts = stacks + bsize;
278
btoa = lasts + bsize;
280
/* initialise the backpointers */
281
for (i = 0; i < bsize; i++)
282
backpointers[i] = SENTINEL;
284
if (hashtable->last_a_pos == -1 || hashtable->last_a_pos > alo)
285
for (i = 0; i < hashtable->size; i++)
286
h[i].a_pos = h[i].a_head;
287
hashtable->last_a_pos = alo;
289
if (hashtable->last_b_pos == -1 || hashtable->last_b_pos > blo)
290
for (i = 0; i < hashtable->size; i++)
291
h[i].b_pos = h[i].b_head;
292
hashtable->last_b_pos = blo;
294
for (bpos = blo; bpos < bhi; bpos++) {
295
equiv = lines_b[bpos].equiv;
297
/* no lines in a or b */
298
if (h[equiv].a_count == 0 || h[equiv].b_count == 0)
301
/* find an unique line in lines_a that matches lines_b[bpos]
302
if we find more than one line within the range alo:ahi,
303
jump to the next line from lines_b immediately */
305
/* loop through all lines in the linked list */
306
for (i = h[equiv].a_pos; i != SENTINEL; i = lines_a[i].next) {
307
/* the index is lower than alo, the the next line */
312
/* the index is higher than ahi, stop searching */
315
/* if the line is within our range, check if it's a duplicate */
316
if (apos != SENTINEL)
318
/* save index to the line */
321
/* this line has no equivalent in lines_a[alo:ahi] */
322
if (apos == SENTINEL)
325
/* check for duplicates of this line in lines_b[blo:bhi] */
326
/* loop through all lines in the linked list */
327
for (i = h[equiv].b_pos; i != SENTINEL; i = lines_b[i].next) {
328
/* the index is lower than blo, the the next line */
333
/* the index is higher than bhi, stop searching */
336
/* if this isn't the line with started with and it's within
337
our range, it's a duplicate */
342
/* use normalised indexes ([0,ahi-alo) instead of [alo,ahi))
343
for the patience sorting algorithm */
344
norm_bpos = bpos - blo;
345
norm_apos = apos - alo;
346
btoa[norm_bpos] = norm_apos;
349
Ok, how does this work...
351
We have a list of matching lines from two lists, a and b. These
352
matches are stored in variable `btoa`. As we are iterating over this
353
table by bpos, the lines from b already form an increasing sequence.
354
We need to "sort" also the lines from a using the patience sorting
355
algorithm, ignoring the lines which would need to be swapped.
357
http://en.wikipedia.org/wiki/Patience_sorting
359
For each pair of lines, we need to place the line from a on either
360
an existing pile that has higher value on the top or create a new
361
pile. Variable `stacks` represents the tops of these piles and in
362
variable `lasts` we store the lines from b, that correspond to the
363
lines from a in `stacks`.
365
Whenever we place a new line on top of a pile, we store a
366
backpointer to the line (b) from top of the previous pile. This means
367
that after the loop, variable `backpointers` will contain an index
368
to the previous matching lines that forms an increasing sequence
369
(over both indexes a and b) with the current matching lines. If
370
either index a or b of the previous matching lines would be higher
371
than indexes of the current one or if the indexes of the current
372
one are 0, it will contain SENTINEL.
374
To construct the LCS, we will just need to follow these backpointers
375
from the top of the last pile and stop when we reach SENTINEL.
378
/* as an optimization, check if the next line comes at the end,
379
because it usually does */
380
if (stacksize && stacks[stacksize - 1] < norm_apos)
382
/* as an optimization, check if the next line comes right after
383
the previous line, because usually it does */
384
else if (stacksize && (stacks[k] < norm_apos) &&
385
(k == stacksize - 1 || stacks[k + 1] > norm_apos))
388
k = bisect_left(stacks, norm_apos, 0, stacksize);
391
backpointers[norm_bpos] = lasts[k - 1];
394
stacks[k] = norm_apos;
395
lasts[k] = norm_bpos;
398
stacks[stacksize] = norm_apos;
399
lasts[stacksize] = norm_bpos;
411
/* backtrace the structures to find the LCS */
413
k = lasts[stacksize - 1];
414
while (k != SENTINEL) {
415
answer[i].a = btoa[k];
424
/* Adds a new line to the list of matching blocks, either extending the
425
current block or adding a new one. */
427
add_matching_line(struct matching_blocks *answer, Py_ssize_t a, Py_ssize_t b)
429
Py_ssize_t last_index = answer->count - 1;
430
if ((last_index >= 0) &&
431
(a == answer->matches[last_index].a +
432
answer->matches[last_index].len) &&
433
(b == answer->matches[last_index].b +
434
answer->matches[last_index].len)) {
435
/* enlarge the last block */
436
answer->matches[last_index].len++;
439
/* create a new block */
441
answer->matches[last_index].a = a;
442
answer->matches[last_index].b = b;
443
answer->matches[last_index].len = 1;
450
recurse_matches(struct matching_blocks *answer, struct hashtable *hashtable,
451
Py_ssize_t *backpointers, struct line *a, struct line *b,
452
Py_ssize_t alo, Py_ssize_t blo, Py_ssize_t ahi, Py_ssize_t bhi,
456
Py_ssize_t new, last_a_pos, last_b_pos, lcs_size, nahi, nbhi, i, apos, bpos;
457
struct matching_line *lcs;
459
if (maxrecursion < 0)
462
if (alo == ahi || blo == bhi)
466
last_a_pos = alo - 1;
467
last_b_pos = blo - 1;
469
lcs = (struct matching_line *)malloc(sizeof(struct matching_line) * (bhi - blo));
473
lcs_size = unique_lcs(lcs, hashtable, backpointers, a, b, alo, blo, ahi, bhi);
475
/* recurse between lines which are unique in each file and match */
476
for (i = lcs_size - 1; i >= 0; i--) {
477
apos = alo + lcs[i].a;
478
bpos = blo + lcs[i].b;
479
if (last_a_pos + 1 != apos || last_b_pos + 1 != bpos) {
480
res = recurse_matches(answer, hashtable,
482
last_a_pos + 1, last_b_pos + 1,
483
apos, bpos, maxrecursion - 1);
489
add_matching_line(answer, apos, bpos);
496
/* find matches between the last match and the end */
498
res = recurse_matches(answer, hashtable,
500
last_a_pos + 1, last_b_pos + 1,
501
ahi, bhi, maxrecursion - 1);
507
/* find matching lines at the very beginning */
508
else if (a[alo].equiv == b[blo].equiv) {
509
while (alo < ahi && blo < bhi && a[alo].equiv == b[blo].equiv)
510
add_matching_line(answer, alo++, blo++);
511
res = recurse_matches(answer, hashtable,
513
alo, blo, ahi, bhi, maxrecursion - 1);
518
/* find matching lines at the very end */
519
else if (a[ahi - 1].equiv == b[bhi - 1].equiv) {
522
while (nahi > alo && nbhi > blo && a[nahi - 1].equiv == b[nbhi - 1].equiv) {
526
res = recurse_matches(answer, hashtable,
528
last_a_pos + 1, last_b_pos + 1,
529
nahi, nbhi, maxrecursion - 1);
532
for (i = 0; i < ahi - nahi; i++)
533
add_matching_line(answer, nahi + i, nbhi + i);
545
delete_lines(struct line *lines, Py_ssize_t size)
547
struct line *line = lines;
549
Py_XDECREF(line->data);
557
load_lines(PyObject *orig, struct line **lines)
561
PyObject *seq, *item;
563
seq = PySequence_Fast(orig, "sequence expected");
568
size = PySequence_Fast_GET_SIZE(seq);
574
/* Allocate a memory block for line data, initialized to 0 */
575
line = *lines = (struct line *)calloc(size, sizeof(struct line));
582
for (i = 0; i < size; i++) {
583
item = PySequence_Fast_GET_ITEM(seq, i);
586
line->hash = PyObject_Hash(item);
587
if (line->hash == (-1)) {
588
/* Propogate the hash exception */
592
line->next = SENTINEL;
599
/* Error -- cleanup unused object references */
600
delete_lines(*lines, i);
608
py_unique_lcs(PyObject *self, PyObject *args)
610
PyObject *aseq, *bseq, *res, *item;
611
Py_ssize_t asize, bsize, i, nmatches, *backpointers = NULL;
612
struct line *a = NULL, *b = NULL;
613
struct matching_line *matches = NULL;
614
struct hashtable hashtable;
616
if (!PyArg_ParseTuple(args, "OO", &aseq, &bseq))
619
hashtable.table = NULL;
621
asize = load_lines(aseq, &a);
622
bsize = load_lines(bseq, &b);
623
if (asize == -1 || bsize == -1)
626
if (!equate_lines(&hashtable, a, b, asize, bsize))
629
matches = (struct matching_line *)malloc(sizeof(struct matching_line) * bsize);
633
backpointers = (Py_ssize_t *)malloc(sizeof(Py_ssize_t) * bsize * 4);
634
if (backpointers == NULL)
637
nmatches = unique_lcs(matches, &hashtable, backpointers, a, b, 0, 0, asize, bsize);
639
res = PyList_New(nmatches);
640
for (i = 0; i < nmatches; i++) {
641
#if PY_VERSION_HEX < 0x02050000
642
item = Py_BuildValue("ii", matches[nmatches - i - 1].a, matches[nmatches - i - 1].b);
644
item = Py_BuildValue("nn", matches[nmatches - i - 1].a, matches[nmatches - i - 1].b);
648
if (PyList_SetItem(res, i, item) != 0)
654
free(hashtable.table);
655
delete_lines(b, bsize);
656
delete_lines(a, asize);
662
free(hashtable.table);
663
delete_lines(b, bsize);
664
delete_lines(a, asize);
670
py_recurse_matches(PyObject *self, PyObject *args)
672
PyObject *aseq, *bseq, *item, *answer;
673
int maxrecursion, res;
674
Py_ssize_t i, j, asize, bsize, alo, blo, ahi, bhi;
675
Py_ssize_t *backpointers = NULL;
676
struct line *a = NULL, *b = NULL;
677
struct hashtable hashtable;
678
struct matching_blocks matches;
680
#if PY_VERSION_HEX < 0x02050000
681
if (!PyArg_ParseTuple(args, "OOiiiiOi", &aseq, &bseq, &alo, &blo,
682
&ahi, &bhi, &answer, &maxrecursion))
684
if (!PyArg_ParseTuple(args, "OOnnnnOi", &aseq, &bseq, &alo, &blo,
685
&ahi, &bhi, &answer, &maxrecursion))
689
hashtable.table = NULL;
690
matches.matches = NULL;
692
asize = load_lines(aseq, &a);
693
bsize = load_lines(bseq, &b);
694
if (asize == -1 || bsize == -1)
697
if (!equate_lines(&hashtable, a, b, asize, bsize))
701
matches.matches = (struct matching_block *)malloc(sizeof(struct matching_block) * bsize);
702
if (matches.matches == NULL)
705
backpointers = (Py_ssize_t *)malloc(sizeof(Py_ssize_t) * bsize * 4);
706
if (backpointers == NULL)
709
res = recurse_matches(&matches, &hashtable, backpointers,
710
a, b, alo, blo, ahi, bhi, maxrecursion);
714
for (i = 0; i < matches.count; i++) {
715
for (j = 0; j < matches.matches[i].len; j++) {
716
#if PY_VERSION_HEX < 0x02050000
717
item = Py_BuildValue("ii", matches.matches[i].a + j,
718
matches.matches[i].b + j);
720
item = Py_BuildValue("nn", matches.matches[i].a + j,
721
matches.matches[i].b + j);
725
if (PyList_Append(answer, item) != 0)
731
free(matches.matches);
732
free(hashtable.table);
733
delete_lines(b, bsize);
734
delete_lines(a, asize);
739
free(matches.matches);
740
free(hashtable.table);
741
delete_lines(b, bsize);
742
delete_lines(a, asize);
748
PatienceSequenceMatcher_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
750
PyObject *junk, *a, *b;
751
PatienceSequenceMatcher *self;
753
self = (PatienceSequenceMatcher *)type->tp_alloc(type, 0);
756
if (!PyArg_ParseTuple(args, "OOO", &junk, &a, &b)) {
761
self->asize = load_lines(a, &(self->a));
762
self->bsize = load_lines(b, &(self->b));
764
if (self->asize == -1 || self->bsize == -1) {
769
if (!equate_lines(&self->hashtable, self->a, self->b, self->asize, self->bsize)) {
774
self->backpointers = (Py_ssize_t *)malloc(sizeof(Py_ssize_t) * self->bsize * 4);
775
if (self->backpointers == NULL) {
782
return (PyObject *)self;
787
PatienceSequenceMatcher_dealloc(PatienceSequenceMatcher* self)
789
free(self->backpointers);
790
free(self->hashtable.table);
791
delete_lines(self->b, self->bsize);
792
delete_lines(self->a, self->asize);
793
self->ob_type->tp_free((PyObject *)self);
797
static char PatienceSequenceMatcher_get_matching_blocks_doc[] =
798
"Return list of triples describing matching subsequences.\n"
800
"Each triple is of the form (i, j, n), and means that\n"
801
"a[i:i+n] == b[j:j+n]. The triples are monotonically increasing in\n"
804
"The last triple is a dummy, (len(a), len(b), 0), and is the only\n"
805
"triple with n==0.\n"
807
">>> s = PatienceSequenceMatcher(None, \"abxcd\", \"abcd\")\n"
808
">>> s.get_matching_blocks()\n"
809
"[(0, 0, 2), (3, 2, 2), (5, 4, 0)]\n";
812
PatienceSequenceMatcher_get_matching_blocks(PatienceSequenceMatcher* self)
814
PyObject *answer, *item;
817
struct matching_blocks matches;
820
matches.matches = (struct matching_block *)malloc(sizeof(struct matching_block) * self->bsize);
821
if (matches.matches == NULL)
822
return PyErr_NoMemory();
824
res = recurse_matches(&matches, &self->hashtable, self->backpointers,
825
self->a, self->b, 0, 0,
826
self->asize, self->bsize, 10);
828
free(matches.matches);
829
return PyErr_NoMemory();
832
answer = PyList_New(matches.count + 1);
833
if (answer == NULL) {
834
free(matches.matches);
838
for (i = 0; i < matches.count; i++) {
839
#if PY_VERSION_HEX < 0x02050000
840
item = Py_BuildValue("iii", matches.matches[i].a,
841
matches.matches[i].b, matches.matches[i].len);
843
item = Py_BuildValue("nnn", matches.matches[i].a,
844
matches.matches[i].b, matches.matches[i].len);
848
if (PyList_SetItem(answer, i, item) != 0)
851
#if PY_VERSION_HEX < 0x02050000
852
item = Py_BuildValue("iii", self->asize, self->bsize, 0);
854
item = Py_BuildValue("nnn", self->asize, self->bsize, 0);
858
if (PyList_SetItem(answer, i, item) != 0)
861
free(matches.matches);
865
free(matches.matches);
871
static char PatienceSequenceMatcher_get_opcodes_doc[] =
872
"Return list of 5-tuples describing how to turn a into b.\n"
874
"Each tuple is of the form (tag, i1, i2, j1, j2). The first tuple\n"
875
"has i1 == j1 == 0, and remaining tuples have i1 == the i2 from the\n"
876
"tuple preceding it, and likewise for j1 == the previous j2.\n"
878
"The tags are strings, with these meanings:\n"
880
"'replace': a[i1:i2] should be replaced by b[j1:j2]\n"
881
"'delete': a[i1:i2] should be deleted.\n"
882
" Note that j1==j2 in this case.\n"
883
"'insert': b[j1:j2] should be inserted at a[i1:i1].\n"
884
" Note that i1==i2 in this case.\n"
885
"'equal': a[i1:i2] == b[j1:j2]\n"
887
">>> a = \"qabxcd\"\n"
888
">>> b = \"abycdf\"\n"
889
">>> s = PatienceSequenceMatcher(None, a, b)\n"
890
">>> for tag, i1, i2, j1, j2 in s.get_opcodes():\n"
891
"... print (\"%7s a[%d:%d] (%s) b[%d:%d] (%s)\" %\n"
892
"... (tag, i1, i2, a[i1:i2], j1, j2, b[j1:j2]))\n"
893
" delete a[0:1] (q) b[0:0] ()\n"
894
" equal a[1:3] (ab) b[0:2] (ab)\n"
895
"replace a[3:4] (x) b[2:3] (y)\n"
896
" equal a[4:6] (cd) b[3:5] (cd)\n"
897
" insert a[6:6] () b[5:6] (f)\n";
900
PatienceSequenceMatcher_get_opcodes(PatienceSequenceMatcher* self)
902
PyObject *answer, *item;
903
Py_ssize_t i, j, k, ai, bj;
905
struct matching_blocks matches;
908
matches.matches = (struct matching_block *)malloc(sizeof(struct matching_block) * (self->bsize + 1));
909
if (matches.matches == NULL)
910
return PyErr_NoMemory();
912
res = recurse_matches(&matches, &self->hashtable, self->backpointers,
913
self->a, self->b, 0, 0,
914
self->asize, self->bsize, 10);
916
free(matches.matches);
917
return PyErr_NoMemory();
920
matches.matches[matches.count].a = self->asize;
921
matches.matches[matches.count].b = self->bsize;
922
matches.matches[matches.count].len = 0;
925
answer = PyList_New(0);
926
if (answer == NULL) {
927
free(matches.matches);
932
for (k = 0; k < matches.count; k++) {
933
ai = matches.matches[k].a;
934
bj = matches.matches[k].b;
937
if (i < ai && j < bj)
945
#if PY_VERSION_HEX < 0x02050000
946
item = Py_BuildValue("siiii", opcode_names[tag], i, ai, j, bj);
948
item = Py_BuildValue("snnnn", opcode_names[tag], i, ai, j, bj);
952
if (PyList_Append(answer, item) != 0)
956
i = ai + matches.matches[k].len;
957
j = bj + matches.matches[k].len;
959
if (matches.matches[k].len > 0) {
960
#if PY_VERSION_HEX < 0x02050000
961
item = Py_BuildValue("siiii", opcode_names[OP_EQUAL], ai, i, bj, j);
963
item = Py_BuildValue("snnnn", opcode_names[OP_EQUAL], ai, i, bj, j);
967
if (PyList_Append(answer, item) != 0)
972
free(matches.matches);
976
free(matches.matches);
982
static char PatienceSequenceMatcher_get_grouped_opcodes_doc[] =
983
"Isolate change clusters by eliminating ranges with no changes.\n"
985
"Return a list of groups with upto n lines of context.\n"
986
"Each group is in the same format as returned by get_opcodes().\n"
988
">>> from pprint import pprint\n"
989
">>> a = map(str, range(1,40))\n"
991
">>> b[8:8] = ['i'] # Make an insertion\n"
992
">>> b[20] += 'x' # Make a replacement\n"
993
">>> b[23:28] = [] # Make a deletion\n"
994
">>> b[30] += 'y' # Make another replacement\n"
995
">>> pprint(PatienceSequenceMatcher(None,a,b).get_grouped_opcodes())\n"
996
"[[('equal', 5, 8, 5, 8), ('insert', 8, 8, 8, 9), ('equal', 8, 11, 9, 12)],\n"
997
" [('equal', 16, 19, 17, 20),\n"
998
" ('replace', 19, 20, 20, 21),\n"
999
" ('equal', 20, 22, 21, 23),\n"
1000
" ('delete', 22, 27, 23, 23),\n"
1001
" ('equal', 27, 30, 23, 26)],\n"
1002
" [('equal', 31, 34, 27, 30),\n"
1003
" ('replace', 34, 35, 30, 31),\n"
1004
" ('equal', 35, 38, 31, 34)]]\n";
1007
PatienceSequenceMatcher_get_grouped_opcodes(PatienceSequenceMatcher* self,
1010
PyObject *answer, *group, *item;
1011
Py_ssize_t i, j, k, ai, bj, size, ncodes, tag;
1012
Py_ssize_t i1, i2, j1, j2;
1014
struct matching_blocks matches;
1015
struct opcode *codes;
1017
if (!PyArg_ParseTuple(args, "|i", &n))
1021
matches.matches = (struct matching_block *)malloc(sizeof(struct matching_block) * (self->bsize + 1));
1022
if (matches.matches == NULL)
1023
return PyErr_NoMemory();
1025
res = recurse_matches(&matches, &self->hashtable, self->backpointers,
1026
self->a, self->b, 0, 0,
1027
self->asize, self->bsize, 10);
1029
free(matches.matches);
1030
return PyErr_NoMemory();
1033
matches.matches[matches.count].a = self->asize;
1034
matches.matches[matches.count].b = self->bsize;
1035
matches.matches[matches.count].len = 0;
1039
codes = (struct opcode *)malloc(sizeof(struct opcode) * matches.count * 2);
1040
if (codes == NULL) {
1041
free(matches.matches);
1042
return PyErr_NoMemory();
1046
for (k = 0; k < matches.count; k++) {
1047
ai = matches.matches[k].a;
1048
bj = matches.matches[k].b;
1051
if (i < ai && j < bj)
1059
codes[ncodes].tag = tag;
1060
codes[ncodes].i1 = i;
1061
codes[ncodes].i2 = ai;
1062
codes[ncodes].j1 = j;
1063
codes[ncodes].j2 = bj;
1067
i = ai + matches.matches[k].len;
1068
j = bj + matches.matches[k].len;
1070
if (matches.matches[k].len > 0) {
1071
codes[ncodes].tag = OP_EQUAL;
1072
codes[ncodes].i1 = ai;
1073
codes[ncodes].i2 = i;
1074
codes[ncodes].j1 = bj;
1075
codes[ncodes].j2 = j;
1081
codes[ncodes].tag = OP_EQUAL;
1082
codes[ncodes].i1 = 0;
1083
codes[ncodes].i2 = 1;
1084
codes[ncodes].j1 = 0;
1085
codes[ncodes].j2 = 1;
1089
/* fixup leading and trailing groups if they show no changes. */
1090
if (codes[0].tag == OP_EQUAL) {
1091
codes[0].i1 = MAX(codes[0].i1, codes[0].i2 - n);
1092
codes[0].j1 = MAX(codes[0].j1, codes[0].j2 - n);
1094
if (codes[ncodes - 1].tag == OP_EQUAL) {
1095
codes[ncodes - 1].i2 = MIN(codes[ncodes - 1].i2,
1096
codes[ncodes - 1].i1 + n);
1097
codes[ncodes - 1].j2 = MIN(codes[ncodes - 1].j2,
1098
codes[ncodes - 1].j1 + n);
1103
answer = PyList_New(0);
1107
group = PyList_New(0);
1113
for (i = 0; i < ncodes; i++) {
1119
/* end the current group and start a new one whenever
1120
there is a large range with no changes. */
1121
if (tag == OP_EQUAL && i2 - i1 > nn) {
1122
#if PY_VERSION_HEX < 0x02050000
1123
item = Py_BuildValue("siiii", opcode_names[tag],
1124
i1, MIN(i2, i1 + n), j1, MIN(j2, j1 + n));
1126
item = Py_BuildValue("snnnn", opcode_names[tag],
1127
i1, MIN(i2, i1 + n), j1, MIN(j2, j1 + n));
1131
if (PyList_Append(group, item) != 0)
1133
if (PyList_Append(answer, group) != 0)
1135
group = PyList_New(0);
1138
i1 = MAX(i1, i2 - n);
1139
j1 = MAX(j1, j2 - n);
1141
#if PY_VERSION_HEX < 0x02050000
1142
item = Py_BuildValue("siiii", opcode_names[tag], i1, i2, j1 ,j2);
1144
item = Py_BuildValue("snnnn", opcode_names[tag], i1, i2, j1 ,j2);
1148
if (PyList_Append(group, item) != 0)
1151
size = PyList_Size(group);
1152
if (size > 0 && !(size == 1 && tag == OP_EQUAL)) {
1153
if (PyList_Append(answer, group) != 0)
1160
free(matches.matches);
1165
free(matches.matches);
1172
static PyMethodDef PatienceSequenceMatcher_methods[] = {
1173
{"get_matching_blocks",
1174
(PyCFunction)PatienceSequenceMatcher_get_matching_blocks,
1176
PatienceSequenceMatcher_get_matching_blocks_doc},
1178
(PyCFunction)PatienceSequenceMatcher_get_opcodes,
1180
PatienceSequenceMatcher_get_opcodes_doc},
1181
{"get_grouped_opcodes",
1182
(PyCFunction)PatienceSequenceMatcher_get_grouped_opcodes,
1184
PatienceSequenceMatcher_get_grouped_opcodes_doc},
1189
static char PatienceSequenceMatcher_doc[] =
1190
"C implementation of PatienceSequenceMatcher";
1193
static PyTypeObject PatienceSequenceMatcherType = {
1194
PyObject_HEAD_INIT(NULL)
1196
"PatienceSequenceMatcher", /* tp_name */
1197
sizeof(PatienceSequenceMatcher), /* tp_basicsize */
1198
0, /* tp_itemsize */
1199
(destructor)PatienceSequenceMatcher_dealloc, /* tp_dealloc */
1205
0, /* tp_as_number */
1206
0, /* tp_as_sequence */
1207
0, /* tp_as_mapping */
1211
0, /* tp_getattro */
1212
0, /* tp_setattro */
1213
0, /* tp_as_buffer */
1214
Py_TPFLAGS_DEFAULT, /* tp_flags*/
1215
PatienceSequenceMatcher_doc, /* tp_doc */
1216
0, /* tp_traverse */
1218
0, /* tp_richcompare */
1219
0, /* tp_weaklistoffset */
1221
0, /* tp_iternext */
1222
PatienceSequenceMatcher_methods, /* tp_methods */
1227
0, /* tp_descr_get */
1228
0, /* tp_descr_set */
1229
0, /* tp_dictoffset */
1232
PatienceSequenceMatcher_new, /* tp_new */
1236
static PyMethodDef cpatiencediff_methods[] = {
1237
{"unique_lcs_c", py_unique_lcs, METH_VARARGS},
1238
{"recurse_matches_c", py_recurse_matches, METH_VARARGS},
1244
init_patiencediff_c(void)
1248
if (PyType_Ready(&PatienceSequenceMatcherType) < 0)
1251
m = Py_InitModule3("_patiencediff_c", cpatiencediff_methods,
1252
"C implementation of PatienceSequenceMatcher");
1256
Py_INCREF(&PatienceSequenceMatcherType);
1257
PyModule_AddObject(m, "PatienceSequenceMatcher_c",
1258
(PyObject *)&PatienceSequenceMatcherType);