2
Copyright (C) 2007 Canonical Ltd
4
This program is free software; you can redistribute it and/or modify
5
it under the terms of the GNU General Public License as published by
6
the Free Software Foundation; either version 2 of the License, or
7
(at your option) any later version.
9
This program is distributed in the hope that it will be useful,
10
but WITHOUT ANY WARRANTY; without even the implied warranty of
11
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12
GNU General Public License for more details.
14
You should have received a copy of the GNU General Public License
15
along with this program; if not, write to the Free Software
16
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
18
Function equate_lines based on bdiff.c from Mercurial.
19
Copyright (C) 2005, 2006 Matt Mackall <mpm@selenic.com>
21
Functions unique_lcs/recurse_matches based on _patiencediff_py.py.
22
Copyright (C) 2005 Bram Cohen, Copyright (C) 2005, 2006 Canonical Ltd
30
#include "python-compat.h"
34
# define inline __inline__
35
#elif defined(_MSC_VER)
36
# define inline __inline
42
#define MIN(a, b) (((a) > (b)) ? (b) : (a))
43
#define MAX(a, b) (((a) > (b)) ? (a) : (b))
57
/* values from this array need to correspont to the order of the enum above */
58
static char *opcode_names[] = {
67
long hash; /* hash code of the string/object */
68
Py_ssize_t next; /* next line from the same equivalence class */
69
Py_ssize_t equiv; /* equivalence class */
75
Py_ssize_t a_head; /* first item in `a` from this equivalence class */
77
Py_ssize_t b_head; /* first item in `b` from this equivalence class */
85
Py_ssize_t last_a_pos;
86
Py_ssize_t last_b_pos;
91
struct matching_line {
92
Py_ssize_t a; /* index of the line in `a` */
93
Py_ssize_t b; /* index of the line in `b` */
97
struct matching_block {
98
Py_ssize_t a; /* index of the first line in `a` */
99
Py_ssize_t b; /* index of the first line in `b` */
100
Py_ssize_t len; /* length of the block */
104
struct matching_blocks {
105
struct matching_block *matches;
125
struct hashtable hashtable;
126
Py_ssize_t *backpointers;
127
} PatienceSequenceMatcher;
130
static inline Py_ssize_t
131
bisect_left(Py_ssize_t *list, Py_ssize_t item, Py_ssize_t lo, Py_ssize_t hi)
134
Py_ssize_t mid = lo / 2 + hi / 2 + (lo % 2 + hi % 2) / 2;
135
if (list[mid] < item)
145
compare_lines(struct line *a, struct line *b)
147
return ((a->hash != b->hash)
148
|| PyObject_Compare(a->data, b->data));
153
find_equivalence_class(struct bucket *hashtable, Py_ssize_t hsize,
154
struct line *lines, struct line *ref_lines,
158
for (j = lines[i].hash & hsize; hashtable[j].b_head != SENTINEL; j = (j + 1) & hsize) {
159
if (!compare_lines(lines + i, ref_lines + hashtable[j].b_head)) {
168
equate_lines(struct hashtable *result,
169
struct line *lines_a, struct line *lines_b,
170
Py_ssize_t asize, Py_ssize_t bsize)
172
Py_ssize_t i, j, hsize;
173
struct bucket *hashtable;
175
/* check for overflow, we need the table to be at least bsize+1 */
176
if (bsize == PY_SSIZE_T_MAX) {
177
PyErr_SetNone(PyExc_OverflowError);
181
/* build a hash table of the next highest power of 2 */
183
while (hsize < bsize + 1)
186
hashtable = (struct bucket *)malloc(sizeof(struct bucket) * hsize);
187
if (hashtable == NULL) {
192
/* initialise the hashtable */
193
for (i = 0; i < hsize; i++) {
194
hashtable[i].a_count = 0;
195
hashtable[i].b_count = 0;
196
hashtable[i].a_head = SENTINEL;
197
hashtable[i].b_head = SENTINEL;
201
/* add lines from lines_b to the hash table chains. iterating
202
backwards so the matching lines are sorted to the linked list
203
by the line number (because we are adding new lines to the
205
for (i = bsize - 1; i >= 0; i--) {
206
/* find the first hashtable entry, which is either empty or contains
207
the same line as lines_b[i] */
208
j = find_equivalence_class(hashtable, hsize, lines_b, lines_b, i);
210
/* set the equivalence class */
211
lines_b[i].equiv = j;
213
/* add to the head of the equivalence class */
214
lines_b[i].next = hashtable[j].b_head;
215
hashtable[j].b_head = i;
216
hashtable[j].b_count++;
219
/* match items from lines_a to their equivalence class in lines_b.
220
again, iterating backwards for the right order of the linked lists */
221
for (i = asize - 1; i >= 0; i--) {
222
/* find the first hash entry, which is either empty or contains
223
the same line as lines_a[i] */
224
j = find_equivalence_class(hashtable, hsize, lines_a, lines_b, i);
226
/* set the equivalence class, even if we are not interested in this
227
line, because the values are not pre-filled */
228
lines_a[i].equiv = j;
230
/* we are not interested in lines which are not also in lines_b */
231
if (hashtable[j].b_head == SENTINEL)
234
/* add to the head of the equivalence class */
235
lines_a[i].next = hashtable[j].a_head;
236
hashtable[j].a_head = i;
237
hashtable[j].a_count++;
240
result->last_a_pos = -1;
241
result->last_b_pos = -1;
242
result->size = hsize + 1;
243
result->table = hashtable;
250
/* Finds longest common subsequence of unique lines in a[alo:ahi] and
252
Parameter backpointers must have allocated memory for at least
253
4 * (bhi - blo) ints. */
255
unique_lcs(struct matching_line *answer,
256
struct hashtable *hashtable, Py_ssize_t *backpointers,
257
struct line *lines_a, struct line *lines_b,
258
Py_ssize_t alo, Py_ssize_t blo, Py_ssize_t ahi, Py_ssize_t bhi)
260
Py_ssize_t i, k, equiv, apos, bpos, norm_apos, norm_bpos, bsize, stacksize;
261
Py_ssize_t *stacks, *lasts, *btoa;
267
h = hashtable->table;
269
/* "unpack" the allocated memory */
270
stacks = backpointers + bsize;
271
lasts = stacks + bsize;
272
btoa = lasts + bsize;
274
/* initialise the backpointers */
275
for (i = 0; i < bsize; i++)
276
backpointers[i] = SENTINEL;
278
if (hashtable->last_a_pos == -1 || hashtable->last_a_pos > alo)
279
for (i = 0; i < hashtable->size; i++)
280
h[i].a_pos = h[i].a_head;
281
hashtable->last_a_pos = alo;
283
if (hashtable->last_b_pos == -1 || hashtable->last_b_pos > blo)
284
for (i = 0; i < hashtable->size; i++)
285
h[i].b_pos = h[i].b_head;
286
hashtable->last_b_pos = blo;
288
for (bpos = blo; bpos < bhi; bpos++) {
289
equiv = lines_b[bpos].equiv;
291
/* no lines in a or b */
292
if (h[equiv].a_count == 0 || h[equiv].b_count == 0)
295
/* find an unique line in lines_a that matches lines_b[bpos]
296
if we find more than one line within the range alo:ahi,
297
jump to the next line from lines_b immediately */
299
/* loop through all lines in the linked list */
300
for (i = h[equiv].a_pos; i != SENTINEL; i = lines_a[i].next) {
301
/* the index is lower than alo, the the next line */
306
/* the index is higher than ahi, stop searching */
309
/* if the line is within our range, check if it's a duplicate */
310
if (apos != SENTINEL)
312
/* save index to the line */
315
/* this line has no equivalent in lines_a[alo:ahi] */
316
if (apos == SENTINEL)
319
/* check for duplicates of this line in lines_b[blo:bhi] */
320
/* loop through all lines in the linked list */
321
for (i = h[equiv].b_pos; i != SENTINEL; i = lines_b[i].next) {
322
/* the index is lower than blo, the the next line */
327
/* the index is higher than bhi, stop searching */
330
/* if this isn't the line with started with and it's within
331
our range, it's a duplicate */
336
/* use normalised indexes ([0,ahi-alo) instead of [alo,ahi))
337
for the patience sorting algorithm */
338
norm_bpos = bpos - blo;
339
norm_apos = apos - alo;
340
btoa[norm_bpos] = norm_apos;
343
Ok, how does this work...
345
We have a list of matching lines from two lists, a and b. These
346
matches are stored in variable `btoa`. As we are iterating over this
347
table by bpos, the lines from b already form an increasing sequence.
348
We need to "sort" also the lines from a using the patience sorting
349
algorithm, ignoring the lines which would need to be swapped.
351
http://en.wikipedia.org/wiki/Patience_sorting
353
For each pair of lines, we need to place the line from a on either
354
an existing pile that has higher value on the top or create a new
355
pile. Variable `stacks` represents the tops of these piles and in
356
variable `lasts` we store the lines from b, that correspond to the
357
lines from a in `stacks`.
359
Whenever we place a new line on top of a pile, we store a
360
backpointer to the line (b) from top of the previous pile. This means
361
that after the loop, variable `backpointers` will contain an index
362
to the previous matching lines that forms an increasing sequence
363
(over both indexes a and b) with the current matching lines. If
364
either index a or b of the previous matching lines would be higher
365
than indexes of the current one or if the indexes of the current
366
one are 0, it will contain SENTINEL.
368
To construct the LCS, we will just need to follow these backpointers
369
from the top of the last pile and stop when we reach SENTINEL.
372
/* as an optimization, check if the next line comes at the end,
373
because it usually does */
374
if (stacksize && stacks[stacksize - 1] < norm_apos)
376
/* as an optimization, check if the next line comes right after
377
the previous line, because usually it does */
378
else if (stacksize && (stacks[k] < norm_apos) &&
379
(k == stacksize - 1 || stacks[k + 1] > norm_apos))
382
k = bisect_left(stacks, norm_apos, 0, stacksize);
385
backpointers[norm_bpos] = lasts[k - 1];
388
stacks[k] = norm_apos;
389
lasts[k] = norm_bpos;
392
stacks[stacksize] = norm_apos;
393
lasts[stacksize] = norm_bpos;
405
/* backtrace the structures to find the LCS */
407
k = lasts[stacksize - 1];
408
while (k != SENTINEL) {
409
answer[i].a = btoa[k];
418
/* Adds a new line to the list of matching blocks, either extending the
419
current block or adding a new one. */
421
add_matching_line(struct matching_blocks *answer, Py_ssize_t a, Py_ssize_t b)
423
Py_ssize_t last_index = answer->count - 1;
424
if ((last_index >= 0) &&
425
(a == answer->matches[last_index].a +
426
answer->matches[last_index].len) &&
427
(b == answer->matches[last_index].b +
428
answer->matches[last_index].len)) {
429
/* enlarge the last block */
430
answer->matches[last_index].len++;
433
/* create a new block */
435
answer->matches[last_index].a = a;
436
answer->matches[last_index].b = b;
437
answer->matches[last_index].len = 1;
444
recurse_matches(struct matching_blocks *answer, struct hashtable *hashtable,
445
Py_ssize_t *backpointers, struct line *a, struct line *b,
446
Py_ssize_t alo, Py_ssize_t blo, Py_ssize_t ahi, Py_ssize_t bhi,
450
Py_ssize_t new, last_a_pos, last_b_pos, lcs_size, nahi, nbhi, i, apos, bpos;
451
struct matching_line *lcs;
453
if (maxrecursion < 0)
456
if (alo == ahi || blo == bhi)
460
last_a_pos = alo - 1;
461
last_b_pos = blo - 1;
463
lcs = (struct matching_line *)malloc(sizeof(struct matching_line) * (bhi - blo));
467
lcs_size = unique_lcs(lcs, hashtable, backpointers, a, b, alo, blo, ahi, bhi);
469
/* recurse between lines which are unique in each file and match */
470
for (i = lcs_size - 1; i >= 0; i--) {
471
apos = alo + lcs[i].a;
472
bpos = blo + lcs[i].b;
473
if (last_a_pos + 1 != apos || last_b_pos + 1 != bpos) {
474
res = recurse_matches(answer, hashtable,
476
last_a_pos + 1, last_b_pos + 1,
477
apos, bpos, maxrecursion - 1);
483
add_matching_line(answer, apos, bpos);
490
/* find matches between the last match and the end */
492
res = recurse_matches(answer, hashtable,
494
last_a_pos + 1, last_b_pos + 1,
495
ahi, bhi, maxrecursion - 1);
501
/* find matching lines at the very beginning */
502
else if (a[alo].equiv == b[blo].equiv) {
503
while (alo < ahi && blo < bhi && a[alo].equiv == b[blo].equiv)
504
add_matching_line(answer, alo++, blo++);
505
res = recurse_matches(answer, hashtable,
507
alo, blo, ahi, bhi, maxrecursion - 1);
512
/* find matching lines at the very end */
513
else if (a[ahi - 1].equiv == b[bhi - 1].equiv) {
516
while (nahi > alo && nbhi > blo && a[nahi - 1].equiv == b[nbhi - 1].equiv) {
520
res = recurse_matches(answer, hashtable,
522
last_a_pos + 1, last_b_pos + 1,
523
nahi, nbhi, maxrecursion - 1);
526
for (i = 0; i < ahi - nahi; i++)
527
add_matching_line(answer, nahi + i, nbhi + i);
539
delete_lines(struct line *lines, Py_ssize_t size)
541
struct line *line = lines;
543
Py_XDECREF(line->data);
551
load_lines(PyObject *orig, struct line **lines)
555
PyObject *seq, *item;
557
seq = PySequence_Fast(orig, "sequence expected");
562
size = PySequence_Fast_GET_SIZE(seq);
568
/* Allocate a memory block for line data, initialized to 0 */
569
line = *lines = (struct line *)calloc(size, sizeof(struct line));
576
for (i = 0; i < size; i++) {
577
item = PySequence_Fast_GET_ITEM(seq, i);
580
line->hash = PyObject_Hash(item);
581
if (line->hash == (-1)) {
582
/* Propogate the hash exception */
586
line->next = SENTINEL;
593
/* Error -- cleanup unused object references */
594
delete_lines(*lines, i);
602
py_unique_lcs(PyObject *self, PyObject *args)
604
PyObject *aseq, *bseq, *res, *item;
605
Py_ssize_t asize, bsize, i, nmatches, *backpointers = NULL;
606
struct line *a = NULL, *b = NULL;
607
struct matching_line *matches = NULL;
608
struct hashtable hashtable;
610
if (!PyArg_ParseTuple(args, "OO", &aseq, &bseq))
613
hashtable.table = NULL;
615
asize = load_lines(aseq, &a);
616
bsize = load_lines(bseq, &b);
617
if (asize == -1 || bsize == -1)
620
if (!equate_lines(&hashtable, a, b, asize, bsize))
623
matches = (struct matching_line *)malloc(sizeof(struct matching_line) * bsize);
627
backpointers = (Py_ssize_t *)malloc(sizeof(Py_ssize_t) * bsize * 4);
628
if (backpointers == NULL)
631
nmatches = unique_lcs(matches, &hashtable, backpointers, a, b, 0, 0, asize, bsize);
633
res = PyList_New(nmatches);
634
for (i = 0; i < nmatches; i++) {
635
#if PY_VERSION_HEX < 0x02050000
636
item = Py_BuildValue("ii", matches[nmatches - i - 1].a, matches[nmatches - i - 1].b);
638
item = Py_BuildValue("nn", matches[nmatches - i - 1].a, matches[nmatches - i - 1].b);
642
if (PyList_SetItem(res, i, item) != 0)
648
free(hashtable.table);
649
delete_lines(b, bsize);
650
delete_lines(a, asize);
656
free(hashtable.table);
657
delete_lines(b, bsize);
658
delete_lines(a, asize);
664
py_recurse_matches(PyObject *self, PyObject *args)
666
PyObject *aseq, *bseq, *item, *answer;
667
int maxrecursion, res;
668
Py_ssize_t i, j, asize, bsize, alo, blo, ahi, bhi;
669
Py_ssize_t *backpointers = NULL;
670
struct line *a = NULL, *b = NULL;
671
struct hashtable hashtable;
672
struct matching_blocks matches;
674
#if PY_VERSION_HEX < 0x02050000
675
if (!PyArg_ParseTuple(args, "OOiiiiOi", &aseq, &bseq, &alo, &blo,
676
&ahi, &bhi, &answer, &maxrecursion))
678
if (!PyArg_ParseTuple(args, "OOnnnnOi", &aseq, &bseq, &alo, &blo,
679
&ahi, &bhi, &answer, &maxrecursion))
683
hashtable.table = NULL;
684
matches.matches = NULL;
686
asize = load_lines(aseq, &a);
687
bsize = load_lines(bseq, &b);
688
if (asize == -1 || bsize == -1)
691
if (!equate_lines(&hashtable, a, b, asize, bsize))
695
matches.matches = (struct matching_block *)malloc(sizeof(struct matching_block) * bsize);
696
if (matches.matches == NULL)
699
backpointers = (Py_ssize_t *)malloc(sizeof(Py_ssize_t) * bsize * 4);
700
if (backpointers == NULL)
703
res = recurse_matches(&matches, &hashtable, backpointers,
704
a, b, alo, blo, ahi, bhi, maxrecursion);
708
for (i = 0; i < matches.count; i++) {
709
for (j = 0; j < matches.matches[i].len; j++) {
710
#if PY_VERSION_HEX < 0x02050000
711
item = Py_BuildValue("ii", matches.matches[i].a + j,
712
matches.matches[i].b + j);
714
item = Py_BuildValue("nn", matches.matches[i].a + j,
715
matches.matches[i].b + j);
719
if (PyList_Append(answer, item) != 0)
725
free(matches.matches);
726
free(hashtable.table);
727
delete_lines(b, bsize);
728
delete_lines(a, asize);
733
free(matches.matches);
734
free(hashtable.table);
735
delete_lines(b, bsize);
736
delete_lines(a, asize);
742
PatienceSequenceMatcher_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
744
PyObject *junk, *a, *b;
745
PatienceSequenceMatcher *self;
747
self = (PatienceSequenceMatcher *)type->tp_alloc(type, 0);
750
if (!PyArg_ParseTuple(args, "OOO", &junk, &a, &b)) {
755
self->asize = load_lines(a, &(self->a));
756
self->bsize = load_lines(b, &(self->b));
758
if (self->asize == -1 || self->bsize == -1) {
763
if (!equate_lines(&self->hashtable, self->a, self->b, self->asize, self->bsize)) {
768
self->backpointers = (Py_ssize_t *)malloc(sizeof(Py_ssize_t) * self->bsize * 4);
769
if (self->backpointers == NULL) {
776
return (PyObject *)self;
781
PatienceSequenceMatcher_dealloc(PatienceSequenceMatcher* self)
783
free(self->backpointers);
784
free(self->hashtable.table);
785
delete_lines(self->b, self->bsize);
786
delete_lines(self->a, self->asize);
787
self->ob_type->tp_free((PyObject *)self);
791
static char PatienceSequenceMatcher_get_matching_blocks_doc[] =
792
"Return list of triples describing matching subsequences.\n"
794
"Each triple is of the form (i, j, n), and means that\n"
795
"a[i:i+n] == b[j:j+n]. The triples are monotonically increasing in\n"
798
"The last triple is a dummy, (len(a), len(b), 0), and is the only\n"
799
"triple with n==0.\n"
801
">>> s = PatienceSequenceMatcher(None, \"abxcd\", \"abcd\")\n"
802
">>> s.get_matching_blocks()\n"
803
"[(0, 0, 2), (3, 2, 2), (5, 4, 0)]\n";
806
PatienceSequenceMatcher_get_matching_blocks(PatienceSequenceMatcher* self)
808
PyObject *answer, *item;
811
struct matching_blocks matches;
814
matches.matches = (struct matching_block *)malloc(sizeof(struct matching_block) * self->bsize);
815
if (matches.matches == NULL)
816
return PyErr_NoMemory();
818
res = recurse_matches(&matches, &self->hashtable, self->backpointers,
819
self->a, self->b, 0, 0,
820
self->asize, self->bsize, 10);
822
free(matches.matches);
823
return PyErr_NoMemory();
826
answer = PyList_New(matches.count + 1);
827
if (answer == NULL) {
828
free(matches.matches);
832
for (i = 0; i < matches.count; i++) {
833
#if PY_VERSION_HEX < 0x02050000
834
item = Py_BuildValue("iii", matches.matches[i].a,
835
matches.matches[i].b, matches.matches[i].len);
837
item = Py_BuildValue("nnn", matches.matches[i].a,
838
matches.matches[i].b, matches.matches[i].len);
842
if (PyList_SetItem(answer, i, item) != 0)
845
#if PY_VERSION_HEX < 0x02050000
846
item = Py_BuildValue("iii", self->asize, self->bsize, 0);
848
item = Py_BuildValue("nnn", self->asize, self->bsize, 0);
852
if (PyList_SetItem(answer, i, item) != 0)
855
free(matches.matches);
859
free(matches.matches);
865
static char PatienceSequenceMatcher_get_opcodes_doc[] =
866
"Return list of 5-tuples describing how to turn a into b.\n"
868
"Each tuple is of the form (tag, i1, i2, j1, j2). The first tuple\n"
869
"has i1 == j1 == 0, and remaining tuples have i1 == the i2 from the\n"
870
"tuple preceding it, and likewise for j1 == the previous j2.\n"
872
"The tags are strings, with these meanings:\n"
874
"'replace': a[i1:i2] should be replaced by b[j1:j2]\n"
875
"'delete': a[i1:i2] should be deleted.\n"
876
" Note that j1==j2 in this case.\n"
877
"'insert': b[j1:j2] should be inserted at a[i1:i1].\n"
878
" Note that i1==i2 in this case.\n"
879
"'equal': a[i1:i2] == b[j1:j2]\n"
881
">>> a = \"qabxcd\"\n"
882
">>> b = \"abycdf\"\n"
883
">>> s = PatienceSequenceMatcher(None, a, b)\n"
884
">>> for tag, i1, i2, j1, j2 in s.get_opcodes():\n"
885
"... print (\"%7s a[%d:%d] (%s) b[%d:%d] (%s)\" %\n"
886
"... (tag, i1, i2, a[i1:i2], j1, j2, b[j1:j2]))\n"
887
" delete a[0:1] (q) b[0:0] ()\n"
888
" equal a[1:3] (ab) b[0:2] (ab)\n"
889
"replace a[3:4] (x) b[2:3] (y)\n"
890
" equal a[4:6] (cd) b[3:5] (cd)\n"
891
" insert a[6:6] () b[5:6] (f)\n";
894
PatienceSequenceMatcher_get_opcodes(PatienceSequenceMatcher* self)
896
PyObject *answer, *item;
897
Py_ssize_t i, j, k, ai, bj;
899
struct matching_blocks matches;
902
matches.matches = (struct matching_block *)malloc(sizeof(struct matching_block) * (self->bsize + 1));
903
if (matches.matches == NULL)
904
return PyErr_NoMemory();
906
res = recurse_matches(&matches, &self->hashtable, self->backpointers,
907
self->a, self->b, 0, 0,
908
self->asize, self->bsize, 10);
910
free(matches.matches);
911
return PyErr_NoMemory();
914
matches.matches[matches.count].a = self->asize;
915
matches.matches[matches.count].b = self->bsize;
916
matches.matches[matches.count].len = 0;
919
answer = PyList_New(0);
920
if (answer == NULL) {
921
free(matches.matches);
926
for (k = 0; k < matches.count; k++) {
927
ai = matches.matches[k].a;
928
bj = matches.matches[k].b;
931
if (i < ai && j < bj)
939
#if PY_VERSION_HEX < 0x02050000
940
item = Py_BuildValue("siiii", opcode_names[tag], i, ai, j, bj);
942
item = Py_BuildValue("snnnn", opcode_names[tag], i, ai, j, bj);
946
if (PyList_Append(answer, item) != 0)
950
i = ai + matches.matches[k].len;
951
j = bj + matches.matches[k].len;
953
if (matches.matches[k].len > 0) {
954
#if PY_VERSION_HEX < 0x02050000
955
item = Py_BuildValue("siiii", opcode_names[OP_EQUAL], ai, i, bj, j);
957
item = Py_BuildValue("snnnn", opcode_names[OP_EQUAL], ai, i, bj, j);
961
if (PyList_Append(answer, item) != 0)
966
free(matches.matches);
970
free(matches.matches);
976
static char PatienceSequenceMatcher_get_grouped_opcodes_doc[] =
977
"Isolate change clusters by eliminating ranges with no changes.\n"
979
"Return a list of groups with upto n lines of context.\n"
980
"Each group is in the same format as returned by get_opcodes().\n"
982
">>> from pprint import pprint\n"
983
">>> a = map(str, range(1,40))\n"
985
">>> b[8:8] = ['i'] # Make an insertion\n"
986
">>> b[20] += 'x' # Make a replacement\n"
987
">>> b[23:28] = [] # Make a deletion\n"
988
">>> b[30] += 'y' # Make another replacement\n"
989
">>> pprint(PatienceSequenceMatcher(None,a,b).get_grouped_opcodes())\n"
990
"[[('equal', 5, 8, 5, 8), ('insert', 8, 8, 8, 9), ('equal', 8, 11, 9, 12)],\n"
991
" [('equal', 16, 19, 17, 20),\n"
992
" ('replace', 19, 20, 20, 21),\n"
993
" ('equal', 20, 22, 21, 23),\n"
994
" ('delete', 22, 27, 23, 23),\n"
995
" ('equal', 27, 30, 23, 26)],\n"
996
" [('equal', 31, 34, 27, 30),\n"
997
" ('replace', 34, 35, 30, 31),\n"
998
" ('equal', 35, 38, 31, 34)]]\n";
1001
PatienceSequenceMatcher_get_grouped_opcodes(PatienceSequenceMatcher* self,
1004
PyObject *answer, *group, *item;
1005
Py_ssize_t i, j, k, ai, bj, size, ncodes, tag;
1006
Py_ssize_t i1, i2, j1, j2;
1008
struct matching_blocks matches;
1009
struct opcode *codes;
1011
if (!PyArg_ParseTuple(args, "|i", &n))
1015
matches.matches = (struct matching_block *)malloc(sizeof(struct matching_block) * (self->bsize + 1));
1016
if (matches.matches == NULL)
1017
return PyErr_NoMemory();
1019
res = recurse_matches(&matches, &self->hashtable, self->backpointers,
1020
self->a, self->b, 0, 0,
1021
self->asize, self->bsize, 10);
1023
free(matches.matches);
1024
return PyErr_NoMemory();
1027
matches.matches[matches.count].a = self->asize;
1028
matches.matches[matches.count].b = self->bsize;
1029
matches.matches[matches.count].len = 0;
1033
codes = (struct opcode *)malloc(sizeof(struct opcode) * matches.count * 2);
1034
if (codes == NULL) {
1035
free(matches.matches);
1036
return PyErr_NoMemory();
1040
for (k = 0; k < matches.count; k++) {
1041
ai = matches.matches[k].a;
1042
bj = matches.matches[k].b;
1045
if (i < ai && j < bj)
1053
codes[ncodes].tag = tag;
1054
codes[ncodes].i1 = i;
1055
codes[ncodes].i2 = ai;
1056
codes[ncodes].j1 = j;
1057
codes[ncodes].j2 = bj;
1061
i = ai + matches.matches[k].len;
1062
j = bj + matches.matches[k].len;
1064
if (matches.matches[k].len > 0) {
1065
codes[ncodes].tag = OP_EQUAL;
1066
codes[ncodes].i1 = ai;
1067
codes[ncodes].i2 = i;
1068
codes[ncodes].j1 = bj;
1069
codes[ncodes].j2 = j;
1075
codes[ncodes].tag = OP_EQUAL;
1076
codes[ncodes].i1 = 0;
1077
codes[ncodes].i2 = 1;
1078
codes[ncodes].j1 = 0;
1079
codes[ncodes].j2 = 1;
1083
/* fixup leading and trailing groups if they show no changes. */
1084
if (codes[0].tag == OP_EQUAL) {
1085
codes[0].i1 = MAX(codes[0].i1, codes[0].i2 - n);
1086
codes[0].j1 = MAX(codes[0].j1, codes[0].j2 - n);
1088
if (codes[ncodes - 1].tag == OP_EQUAL) {
1089
codes[ncodes - 1].i2 = MIN(codes[ncodes - 1].i2,
1090
codes[ncodes - 1].i1 + n);
1091
codes[ncodes - 1].j2 = MIN(codes[ncodes - 1].j2,
1092
codes[ncodes - 1].j1 + n);
1097
answer = PyList_New(0);
1101
group = PyList_New(0);
1107
for (i = 0; i < ncodes; i++) {
1113
/* end the current group and start a new one whenever
1114
there is a large range with no changes. */
1115
if (tag == OP_EQUAL && i2 - i1 > nn) {
1116
#if PY_VERSION_HEX < 0x02050000
1117
item = Py_BuildValue("siiii", opcode_names[tag],
1118
i1, MIN(i2, i1 + n), j1, MIN(j2, j1 + n));
1120
item = Py_BuildValue("snnnn", opcode_names[tag],
1121
i1, MIN(i2, i1 + n), j1, MIN(j2, j1 + n));
1125
if (PyList_Append(group, item) != 0)
1127
if (PyList_Append(answer, group) != 0)
1129
group = PyList_New(0);
1132
i1 = MAX(i1, i2 - n);
1133
j1 = MAX(j1, j2 - n);
1135
#if PY_VERSION_HEX < 0x02050000
1136
item = Py_BuildValue("siiii", opcode_names[tag], i1, i2, j1 ,j2);
1138
item = Py_BuildValue("snnnn", opcode_names[tag], i1, i2, j1 ,j2);
1142
if (PyList_Append(group, item) != 0)
1145
size = PyList_Size(group);
1146
if (size > 0 && !(size == 1 && tag == OP_EQUAL)) {
1147
if (PyList_Append(answer, group) != 0)
1154
free(matches.matches);
1159
free(matches.matches);
1166
static PyMethodDef PatienceSequenceMatcher_methods[] = {
1167
{"get_matching_blocks",
1168
(PyCFunction)PatienceSequenceMatcher_get_matching_blocks,
1170
PatienceSequenceMatcher_get_matching_blocks_doc},
1172
(PyCFunction)PatienceSequenceMatcher_get_opcodes,
1174
PatienceSequenceMatcher_get_opcodes_doc},
1175
{"get_grouped_opcodes",
1176
(PyCFunction)PatienceSequenceMatcher_get_grouped_opcodes,
1178
PatienceSequenceMatcher_get_grouped_opcodes_doc},
1183
static char PatienceSequenceMatcher_doc[] =
1184
"C implementation of PatienceSequenceMatcher";
1187
static PyTypeObject PatienceSequenceMatcherType = {
1188
PyObject_HEAD_INIT(NULL)
1190
"PatienceSequenceMatcher", /* tp_name */
1191
sizeof(PatienceSequenceMatcher), /* tp_basicsize */
1192
0, /* tp_itemsize */
1193
(destructor)PatienceSequenceMatcher_dealloc, /* tp_dealloc */
1199
0, /* tp_as_number */
1200
0, /* tp_as_sequence */
1201
0, /* tp_as_mapping */
1205
0, /* tp_getattro */
1206
0, /* tp_setattro */
1207
0, /* tp_as_buffer */
1208
Py_TPFLAGS_DEFAULT, /* tp_flags*/
1209
PatienceSequenceMatcher_doc, /* tp_doc */
1210
0, /* tp_traverse */
1212
0, /* tp_richcompare */
1213
0, /* tp_weaklistoffset */
1215
0, /* tp_iternext */
1216
PatienceSequenceMatcher_methods, /* tp_methods */
1221
0, /* tp_descr_get */
1222
0, /* tp_descr_set */
1223
0, /* tp_dictoffset */
1226
PatienceSequenceMatcher_new, /* tp_new */
1230
static PyMethodDef cpatiencediff_methods[] = {
1231
{"unique_lcs_c", py_unique_lcs, METH_VARARGS},
1232
{"recurse_matches_c", py_recurse_matches, METH_VARARGS},
1238
init_patiencediff_c(void)
1242
if (PyType_Ready(&PatienceSequenceMatcherType) < 0)
1245
m = Py_InitModule3("_patiencediff_c", cpatiencediff_methods,
1246
"C implementation of PatienceSequenceMatcher");
1250
Py_INCREF(&PatienceSequenceMatcherType);
1251
PyModule_AddObject(m, "PatienceSequenceMatcher_c",
1252
(PyObject *)&PatienceSequenceMatcherType);