2
Copyright (C) 2007, 2010 Canonical Ltd
4
This program is free software; you can redistribute it and/or modify
5
it under the terms of the GNU General Public License as published by
6
the Free Software Foundation; either version 2 of the License, or
7
(at your option) any later version.
9
This program is distributed in the hope that it will be useful,
10
but WITHOUT ANY WARRANTY; without even the implied warranty of
11
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12
GNU General Public License for more details.
14
You should have received a copy of the GNU General Public License
15
along with this program; if not, write to the Free Software
16
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
18
Function equate_lines based on bdiff.c from Mercurial.
19
Copyright (C) 2005, 2006 Matt Mackall <mpm@selenic.com>
21
Functions unique_lcs/recurse_matches based on _patiencediff_py.py.
22
Copyright (C) 2005 Bram Cohen, Copyright (C) 2005, 2006 Canonical Ltd
30
#include "python-compat.h"
34
# define inline __inline__
35
#elif defined(_MSC_VER)
36
# define inline __inline
42
#define MIN(a, b) (((a) > (b)) ? (b) : (a))
43
#define MAX(a, b) (((a) > (b)) ? (a) : (b))
49
/* malloc returns NULL on some platforms if you try to allocate nothing,
50
* causing <https://bugs.edge.launchpad.net/bzr/+bug/511267> and
51
* <https://bugs.edge.launchpad.net/bzr/+bug/331095>. On glibc it passes, but
52
* let's make it fail to aid testing. */
53
#define guarded_malloc(x) ( (x) ? malloc(x) : NULL )
63
/* values from this array need to correspont to the order of the enum above */
64
static char *opcode_names[] = {
73
long hash; /* hash code of the string/object */
74
Py_ssize_t next; /* next line from the same equivalence class */
75
Py_ssize_t equiv; /* equivalence class */
81
Py_ssize_t a_head; /* first item in `a` from this equivalence class */
83
Py_ssize_t b_head; /* first item in `b` from this equivalence class */
91
Py_ssize_t last_a_pos;
92
Py_ssize_t last_b_pos;
97
struct matching_line {
98
Py_ssize_t a; /* index of the line in `a` */
99
Py_ssize_t b; /* index of the line in `b` */
103
struct matching_block {
104
Py_ssize_t a; /* index of the first line in `a` */
105
Py_ssize_t b; /* index of the first line in `b` */
106
Py_ssize_t len; /* length of the block */
110
struct matching_blocks {
111
struct matching_block *matches;
131
struct hashtable hashtable;
132
Py_ssize_t *backpointers;
133
} PatienceSequenceMatcher;
136
static inline Py_ssize_t
137
bisect_left(Py_ssize_t *list, Py_ssize_t item, Py_ssize_t lo, Py_ssize_t hi)
140
Py_ssize_t mid = lo / 2 + hi / 2 + (lo % 2 + hi % 2) / 2;
141
if (list[mid] < item)
151
compare_lines(struct line *a, struct line *b)
153
return ((a->hash != b->hash)
154
|| PyObject_Compare(a->data, b->data));
159
find_equivalence_class(struct bucket *hashtable, Py_ssize_t hsize,
160
struct line *lines, struct line *ref_lines,
164
for (j = lines[i].hash & hsize; hashtable[j].b_head != SENTINEL; j = (j + 1) & hsize) {
165
if (!compare_lines(lines + i, ref_lines + hashtable[j].b_head)) {
174
equate_lines(struct hashtable *result,
175
struct line *lines_a, struct line *lines_b,
176
Py_ssize_t asize, Py_ssize_t bsize)
178
Py_ssize_t i, j, hsize;
179
struct bucket *hashtable;
181
/* check for overflow, we need the table to be at least bsize+1 */
182
if (bsize == PY_SSIZE_T_MAX) {
183
PyErr_SetNone(PyExc_OverflowError);
187
/* build a hash table of the next highest power of 2 */
189
while (hsize < bsize + 1)
193
hashtable = (struct bucket *) guarded_malloc(sizeof(struct bucket) * hsize);
194
if (hashtable == NULL) {
199
/* initialise the hashtable */
200
for (i = 0; i < hsize; i++) {
201
hashtable[i].a_count = 0;
202
hashtable[i].b_count = 0;
203
hashtable[i].a_head = SENTINEL;
204
hashtable[i].b_head = SENTINEL;
208
/* add lines from lines_b to the hash table chains. iterating
209
backwards so the matching lines are sorted to the linked list
210
by the line number (because we are adding new lines to the
212
for (i = bsize - 1; i >= 0; i--) {
213
/* find the first hashtable entry, which is either empty or contains
214
the same line as lines_b[i] */
215
j = find_equivalence_class(hashtable, hsize, lines_b, lines_b, i);
217
/* set the equivalence class */
218
lines_b[i].equiv = j;
220
/* add to the head of the equivalence class */
221
lines_b[i].next = hashtable[j].b_head;
222
hashtable[j].b_head = i;
223
hashtable[j].b_count++;
226
/* match items from lines_a to their equivalence class in lines_b.
227
again, iterating backwards for the right order of the linked lists */
228
for (i = asize - 1; i >= 0; i--) {
229
/* find the first hash entry, which is either empty or contains
230
the same line as lines_a[i] */
231
j = find_equivalence_class(hashtable, hsize, lines_a, lines_b, i);
233
/* set the equivalence class, even if we are not interested in this
234
line, because the values are not pre-filled */
235
lines_a[i].equiv = j;
237
/* we are not interested in lines which are not also in lines_b */
238
if (hashtable[j].b_head == SENTINEL)
241
/* add to the head of the equivalence class */
242
lines_a[i].next = hashtable[j].a_head;
243
hashtable[j].a_head = i;
244
hashtable[j].a_count++;
247
result->last_a_pos = -1;
248
result->last_b_pos = -1;
249
result->size = hsize + 1;
250
result->table = hashtable;
257
/* Finds longest common subsequence of unique lines in a[alo:ahi] and
259
Parameter backpointers must have allocated memory for at least
260
4 * (bhi - blo) ints. */
262
unique_lcs(struct matching_line *answer,
263
struct hashtable *hashtable, Py_ssize_t *backpointers,
264
struct line *lines_a, struct line *lines_b,
265
Py_ssize_t alo, Py_ssize_t blo, Py_ssize_t ahi, Py_ssize_t bhi)
267
Py_ssize_t i, k, equiv, apos, bpos, norm_apos, norm_bpos, bsize, stacksize;
268
Py_ssize_t *stacks, *lasts, *btoa;
274
h = hashtable->table;
276
/* "unpack" the allocated memory */
277
stacks = backpointers + bsize;
278
lasts = stacks + bsize;
279
btoa = lasts + bsize;
281
/* initialise the backpointers */
282
for (i = 0; i < bsize; i++)
283
backpointers[i] = SENTINEL;
285
if (hashtable->last_a_pos == -1 || hashtable->last_a_pos > alo)
286
for (i = 0; i < hashtable->size; i++)
287
h[i].a_pos = h[i].a_head;
288
hashtable->last_a_pos = alo;
290
if (hashtable->last_b_pos == -1 || hashtable->last_b_pos > blo)
291
for (i = 0; i < hashtable->size; i++)
292
h[i].b_pos = h[i].b_head;
293
hashtable->last_b_pos = blo;
295
for (bpos = blo; bpos < bhi; bpos++) {
296
equiv = lines_b[bpos].equiv;
298
/* no lines in a or b */
299
if (h[equiv].a_count == 0 || h[equiv].b_count == 0)
302
/* find an unique line in lines_a that matches lines_b[bpos]
303
if we find more than one line within the range alo:ahi,
304
jump to the next line from lines_b immediately */
306
/* loop through all lines in the linked list */
307
for (i = h[equiv].a_pos; i != SENTINEL; i = lines_a[i].next) {
308
/* the index is lower than alo, continue to the next line */
313
/* the index is higher than ahi, stop searching */
316
/* if the line is within our range, check if it's a duplicate */
317
if (apos != SENTINEL)
319
/* save index to the line */
322
/* this line has no equivalent in lines_a[alo:ahi] */
323
if (apos == SENTINEL)
326
/* check for duplicates of this line in lines_b[blo:bhi] */
327
/* loop through all lines in the linked list */
328
for (i = h[equiv].b_pos; i != SENTINEL; i = lines_b[i].next) {
329
/* the index is lower than blo, continue to the next line */
334
/* the index is higher than bhi, stop searching */
337
/* if this isn't the line with started with and it's within
338
our range, it's a duplicate */
343
/* use normalised indexes ([0,ahi-alo) instead of [alo,ahi))
344
for the patience sorting algorithm */
345
norm_bpos = bpos - blo;
346
norm_apos = apos - alo;
347
btoa[norm_bpos] = norm_apos;
350
Ok, how does this work...
352
We have a list of matching lines from two lists, a and b. These
353
matches are stored in variable `btoa`. As we are iterating over this
354
table by bpos, the lines from b already form an increasing sequence.
355
We need to "sort" also the lines from a using the patience sorting
356
algorithm, ignoring the lines which would need to be swapped.
358
http://en.wikipedia.org/wiki/Patience_sorting
360
For each pair of lines, we need to place the line from a on either
361
an existing pile that has higher value on the top or create a new
362
pile. Variable `stacks` represents the tops of these piles and in
363
variable `lasts` we store the lines from b, that correspond to the
364
lines from a in `stacks`.
366
Whenever we place a new line on top of a pile, we store a
367
backpointer to the line (b) from top of the previous pile. This means
368
that after the loop, variable `backpointers` will contain an index
369
to the previous matching lines that forms an increasing sequence
370
(over both indexes a and b) with the current matching lines. If
371
either index a or b of the previous matching lines would be higher
372
than indexes of the current one or if the indexes of the current
373
one are 0, it will contain SENTINEL.
375
To construct the LCS, we will just need to follow these backpointers
376
from the top of the last pile and stop when we reach SENTINEL.
379
/* as an optimization, check if the next line comes at the end,
380
because it usually does */
381
if (stacksize && stacks[stacksize - 1] < norm_apos)
383
/* as an optimization, check if the next line comes right after
384
the previous line, because usually it does */
385
else if (stacksize && (stacks[k] < norm_apos) &&
386
(k == stacksize - 1 || stacks[k + 1] > norm_apos))
389
k = bisect_left(stacks, norm_apos, 0, stacksize);
392
backpointers[norm_bpos] = lasts[k - 1];
395
stacks[k] = norm_apos;
396
lasts[k] = norm_bpos;
399
stacks[stacksize] = norm_apos;
400
lasts[stacksize] = norm_bpos;
412
/* backtrace the structures to find the LCS */
414
k = lasts[stacksize - 1];
415
while (k != SENTINEL) {
416
answer[i].a = btoa[k];
425
/* Adds a new line to the list of matching blocks, either extending the
426
current block or adding a new one. */
428
add_matching_line(struct matching_blocks *answer, Py_ssize_t a, Py_ssize_t b)
430
Py_ssize_t last_index = answer->count - 1;
431
if ((last_index >= 0) &&
432
(a == answer->matches[last_index].a +
433
answer->matches[last_index].len) &&
434
(b == answer->matches[last_index].b +
435
answer->matches[last_index].len)) {
436
/* enlarge the last block */
437
answer->matches[last_index].len++;
440
/* create a new block */
442
answer->matches[last_index].a = a;
443
answer->matches[last_index].b = b;
444
answer->matches[last_index].len = 1;
451
recurse_matches(struct matching_blocks *answer, struct hashtable *hashtable,
452
Py_ssize_t *backpointers, struct line *a, struct line *b,
453
Py_ssize_t alo, Py_ssize_t blo, Py_ssize_t ahi, Py_ssize_t bhi,
457
Py_ssize_t new, last_a_pos, last_b_pos, lcs_size, nahi, nbhi, i, apos, bpos;
458
struct matching_line *lcs;
460
if (maxrecursion < 0)
463
if (alo == ahi || blo == bhi)
467
last_a_pos = alo - 1;
468
last_b_pos = blo - 1;
470
lcs = (struct matching_line *)guarded_malloc(sizeof(struct matching_line) * (bhi - blo));
474
lcs_size = unique_lcs(lcs, hashtable, backpointers, a, b, alo, blo, ahi, bhi);
476
/* recurse between lines which are unique in each file and match */
477
for (i = lcs_size - 1; i >= 0; i--) {
478
apos = alo + lcs[i].a;
479
bpos = blo + lcs[i].b;
480
if (last_a_pos + 1 != apos || last_b_pos + 1 != bpos) {
481
res = recurse_matches(answer, hashtable,
483
last_a_pos + 1, last_b_pos + 1,
484
apos, bpos, maxrecursion - 1);
490
add_matching_line(answer, apos, bpos);
497
/* find matches between the last match and the end */
499
res = recurse_matches(answer, hashtable,
501
last_a_pos + 1, last_b_pos + 1,
502
ahi, bhi, maxrecursion - 1);
508
/* find matching lines at the very beginning */
509
else if (a[alo].equiv == b[blo].equiv) {
510
while (alo < ahi && blo < bhi && a[alo].equiv == b[blo].equiv)
511
add_matching_line(answer, alo++, blo++);
512
res = recurse_matches(answer, hashtable,
514
alo, blo, ahi, bhi, maxrecursion - 1);
519
/* find matching lines at the very end */
520
else if (a[ahi - 1].equiv == b[bhi - 1].equiv) {
523
while (nahi > alo && nbhi > blo && a[nahi - 1].equiv == b[nbhi - 1].equiv) {
527
res = recurse_matches(answer, hashtable,
529
last_a_pos + 1, last_b_pos + 1,
530
nahi, nbhi, maxrecursion - 1);
533
for (i = 0; i < ahi - nahi; i++)
534
add_matching_line(answer, nahi + i, nbhi + i);
546
delete_lines(struct line *lines, Py_ssize_t size)
548
struct line *line = lines;
550
Py_XDECREF(line->data);
558
load_lines(PyObject *orig, struct line **lines)
562
PyObject *seq, *item;
564
seq = PySequence_Fast(orig, "sequence expected");
569
size = PySequence_Fast_GET_SIZE(seq);
575
/* Allocate a memory block for line data, initialized to 0 */
576
line = *lines = (struct line *)calloc(size, sizeof(struct line));
583
for (i = 0; i < size; i++) {
584
item = PySequence_Fast_GET_ITEM(seq, i);
587
line->hash = PyObject_Hash(item);
588
if (line->hash == (-1)) {
589
/* Propogate the hash exception */
593
line->next = SENTINEL;
600
/* Error -- cleanup unused object references */
601
delete_lines(*lines, i);
609
py_unique_lcs(PyObject *self, PyObject *args)
611
PyObject *aseq, *bseq, *res, *item;
612
Py_ssize_t asize, bsize, i, nmatches, *backpointers = NULL;
613
struct line *a = NULL, *b = NULL;
614
struct matching_line *matches = NULL;
615
struct hashtable hashtable;
617
if (!PyArg_ParseTuple(args, "OO", &aseq, &bseq))
620
hashtable.table = NULL;
622
asize = load_lines(aseq, &a);
623
bsize = load_lines(bseq, &b);
624
if (asize == -1 || bsize == -1)
627
if (!equate_lines(&hashtable, a, b, asize, bsize))
631
matches = (struct matching_line *)guarded_malloc(sizeof(struct matching_line) * bsize);
635
backpointers = (Py_ssize_t *)guarded_malloc(sizeof(Py_ssize_t) * bsize * 4);
636
if (backpointers == NULL)
640
nmatches = unique_lcs(matches, &hashtable, backpointers, a, b, 0, 0, asize, bsize);
642
res = PyList_New(nmatches);
643
for (i = 0; i < nmatches; i++) {
644
#if PY_VERSION_HEX < 0x02050000
645
item = Py_BuildValue("ii", matches[nmatches - i - 1].a, matches[nmatches - i - 1].b);
647
item = Py_BuildValue("nn", matches[nmatches - i - 1].a, matches[nmatches - i - 1].b);
651
if (PyList_SetItem(res, i, item) != 0)
657
free(hashtable.table);
658
delete_lines(b, bsize);
659
delete_lines(a, asize);
665
free(hashtable.table);
666
delete_lines(b, bsize);
667
delete_lines(a, asize);
673
py_recurse_matches(PyObject *self, PyObject *args)
675
PyObject *aseq, *bseq, *item, *answer;
676
int maxrecursion, res;
677
Py_ssize_t i, j, asize, bsize, alo, blo, ahi, bhi;
678
Py_ssize_t *backpointers = NULL;
679
struct line *a = NULL, *b = NULL;
680
struct hashtable hashtable;
681
struct matching_blocks matches;
683
#if PY_VERSION_HEX < 0x02050000
684
if (!PyArg_ParseTuple(args, "OOiiiiOi", &aseq, &bseq, &alo, &blo,
685
&ahi, &bhi, &answer, &maxrecursion))
687
if (!PyArg_ParseTuple(args, "OOnnnnOi", &aseq, &bseq, &alo, &blo,
688
&ahi, &bhi, &answer, &maxrecursion))
692
hashtable.table = NULL;
693
matches.matches = NULL;
695
asize = load_lines(aseq, &a);
696
bsize = load_lines(bseq, &b);
697
if (asize == -1 || bsize == -1)
700
if (!equate_lines(&hashtable, a, b, asize, bsize))
706
matches.matches = (struct matching_block *)guarded_malloc(sizeof(struct matching_block) * bsize);
707
if (matches.matches == NULL)
710
backpointers = (Py_ssize_t *)guarded_malloc(sizeof(Py_ssize_t) * bsize * 4);
711
if (backpointers == NULL)
714
matches.matches = NULL;
718
res = recurse_matches(&matches, &hashtable, backpointers,
719
a, b, alo, blo, ahi, bhi, maxrecursion);
723
for (i = 0; i < matches.count; i++) {
724
for (j = 0; j < matches.matches[i].len; j++) {
725
#if PY_VERSION_HEX < 0x02050000
726
item = Py_BuildValue("ii", matches.matches[i].a + j,
727
matches.matches[i].b + j);
729
item = Py_BuildValue("nn", matches.matches[i].a + j,
730
matches.matches[i].b + j);
734
if (PyList_Append(answer, item) != 0)
740
free(matches.matches);
741
free(hashtable.table);
742
delete_lines(b, bsize);
743
delete_lines(a, asize);
748
free(matches.matches);
749
free(hashtable.table);
750
delete_lines(b, bsize);
751
delete_lines(a, asize);
757
PatienceSequenceMatcher_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
759
PyObject *junk, *a, *b;
760
PatienceSequenceMatcher *self;
762
self = (PatienceSequenceMatcher *)type->tp_alloc(type, 0);
765
if (!PyArg_ParseTuple(args, "OOO", &junk, &a, &b)) {
770
self->asize = load_lines(a, &(self->a));
771
self->bsize = load_lines(b, &(self->b));
773
if (self->asize == -1 || self->bsize == -1) {
778
if (!equate_lines(&self->hashtable, self->a, self->b, self->asize, self->bsize)) {
783
if (self->bsize > 0) {
784
self->backpointers = (Py_ssize_t *)guarded_malloc(sizeof(Py_ssize_t) * self->bsize * 4);
785
if (self->backpointers == NULL) {
791
self->backpointers = NULL;
796
return (PyObject *)self;
801
PatienceSequenceMatcher_dealloc(PatienceSequenceMatcher* self)
803
free(self->backpointers);
804
free(self->hashtable.table);
805
delete_lines(self->b, self->bsize);
806
delete_lines(self->a, self->asize);
807
self->ob_type->tp_free((PyObject *)self);
811
static char PatienceSequenceMatcher_get_matching_blocks_doc[] =
812
"Return list of triples describing matching subsequences.\n"
814
"Each triple is of the form (i, j, n), and means that\n"
815
"a[i:i+n] == b[j:j+n]. The triples are monotonically increasing in\n"
818
"The last triple is a dummy, (len(a), len(b), 0), and is the only\n"
819
"triple with n==0.\n"
821
">>> s = PatienceSequenceMatcher(None, \"abxcd\", \"abcd\")\n"
822
">>> s.get_matching_blocks()\n"
823
"[(0, 0, 2), (3, 2, 2), (5, 4, 0)]\n";
826
PatienceSequenceMatcher_get_matching_blocks(PatienceSequenceMatcher* self)
828
PyObject *answer, *item;
831
struct matching_blocks matches;
834
if (self->bsize > 0) {
835
matches.matches = (struct matching_block *)
836
guarded_malloc(sizeof(struct matching_block) * self->bsize);
837
if (matches.matches == NULL)
838
return PyErr_NoMemory();
840
matches.matches = NULL;
842
res = recurse_matches(&matches, &self->hashtable, self->backpointers,
843
self->a, self->b, 0, 0,
844
self->asize, self->bsize, 10);
846
free(matches.matches);
847
return PyErr_NoMemory();
850
answer = PyList_New(matches.count + 1);
851
if (answer == NULL) {
852
free(matches.matches);
856
for (i = 0; i < matches.count; i++) {
857
#if PY_VERSION_HEX < 0x02050000
858
item = Py_BuildValue("iii", matches.matches[i].a,
859
matches.matches[i].b, matches.matches[i].len);
861
item = Py_BuildValue("nnn", matches.matches[i].a,
862
matches.matches[i].b, matches.matches[i].len);
866
if (PyList_SetItem(answer, i, item) != 0)
869
#if PY_VERSION_HEX < 0x02050000
870
item = Py_BuildValue("iii", self->asize, self->bsize, 0);
872
item = Py_BuildValue("nnn", self->asize, self->bsize, 0);
876
if (PyList_SetItem(answer, i, item) != 0)
879
free(matches.matches);
883
free(matches.matches);
889
static char PatienceSequenceMatcher_get_opcodes_doc[] =
890
"Return list of 5-tuples describing how to turn a into b.\n"
892
"Each tuple is of the form (tag, i1, i2, j1, j2). The first tuple\n"
893
"has i1 == j1 == 0, and remaining tuples have i1 == the i2 from the\n"
894
"tuple preceding it, and likewise for j1 == the previous j2.\n"
896
"The tags are strings, with these meanings:\n"
898
"'replace': a[i1:i2] should be replaced by b[j1:j2]\n"
899
"'delete': a[i1:i2] should be deleted.\n"
900
" Note that j1==j2 in this case.\n"
901
"'insert': b[j1:j2] should be inserted at a[i1:i1].\n"
902
" Note that i1==i2 in this case.\n"
903
"'equal': a[i1:i2] == b[j1:j2]\n"
905
">>> a = \"qabxcd\"\n"
906
">>> b = \"abycdf\"\n"
907
">>> s = PatienceSequenceMatcher(None, a, b)\n"
908
">>> for tag, i1, i2, j1, j2 in s.get_opcodes():\n"
909
"... print (\"%7s a[%d:%d] (%s) b[%d:%d] (%s)\" %\n"
910
"... (tag, i1, i2, a[i1:i2], j1, j2, b[j1:j2]))\n"
911
" delete a[0:1] (q) b[0:0] ()\n"
912
" equal a[1:3] (ab) b[0:2] (ab)\n"
913
"replace a[3:4] (x) b[2:3] (y)\n"
914
" equal a[4:6] (cd) b[3:5] (cd)\n"
915
" insert a[6:6] () b[5:6] (f)\n";
918
PatienceSequenceMatcher_get_opcodes(PatienceSequenceMatcher* self)
920
PyObject *answer, *item;
921
Py_ssize_t i, j, k, ai, bj;
923
struct matching_blocks matches;
926
matches.matches = (struct matching_block *)guarded_malloc(sizeof(struct matching_block) * (self->bsize + 1));
927
if (matches.matches == NULL)
928
return PyErr_NoMemory();
930
res = recurse_matches(&matches, &self->hashtable, self->backpointers,
931
self->a, self->b, 0, 0,
932
self->asize, self->bsize, 10);
934
free(matches.matches);
935
return PyErr_NoMemory();
938
matches.matches[matches.count].a = self->asize;
939
matches.matches[matches.count].b = self->bsize;
940
matches.matches[matches.count].len = 0;
943
answer = PyList_New(0);
944
if (answer == NULL) {
945
free(matches.matches);
950
for (k = 0; k < matches.count; k++) {
951
ai = matches.matches[k].a;
952
bj = matches.matches[k].b;
955
if (i < ai && j < bj)
963
#if PY_VERSION_HEX < 0x02050000
964
item = Py_BuildValue("siiii", opcode_names[tag], i, ai, j, bj);
966
item = Py_BuildValue("snnnn", opcode_names[tag], i, ai, j, bj);
970
if (PyList_Append(answer, item) != 0)
974
i = ai + matches.matches[k].len;
975
j = bj + matches.matches[k].len;
977
if (matches.matches[k].len > 0) {
978
#if PY_VERSION_HEX < 0x02050000
979
item = Py_BuildValue("siiii", opcode_names[OP_EQUAL], ai, i, bj, j);
981
item = Py_BuildValue("snnnn", opcode_names[OP_EQUAL], ai, i, bj, j);
985
if (PyList_Append(answer, item) != 0)
990
free(matches.matches);
994
free(matches.matches);
1000
static char PatienceSequenceMatcher_get_grouped_opcodes_doc[] =
1001
"Isolate change clusters by eliminating ranges with no changes.\n"
1003
"Return a list of groups with upto n lines of context.\n"
1004
"Each group is in the same format as returned by get_opcodes().\n"
1006
">>> from pprint import pprint\n"
1007
">>> a = map(str, range(1,40))\n"
1009
">>> b[8:8] = ['i'] # Make an insertion\n"
1010
">>> b[20] += 'x' # Make a replacement\n"
1011
">>> b[23:28] = [] # Make a deletion\n"
1012
">>> b[30] += 'y' # Make another replacement\n"
1013
">>> pprint(PatienceSequenceMatcher(None,a,b).get_grouped_opcodes())\n"
1014
"[[('equal', 5, 8, 5, 8), ('insert', 8, 8, 8, 9), ('equal', 8, 11, 9, 12)],\n"
1015
" [('equal', 16, 19, 17, 20),\n"
1016
" ('replace', 19, 20, 20, 21),\n"
1017
" ('equal', 20, 22, 21, 23),\n"
1018
" ('delete', 22, 27, 23, 23),\n"
1019
" ('equal', 27, 30, 23, 26)],\n"
1020
" [('equal', 31, 34, 27, 30),\n"
1021
" ('replace', 34, 35, 30, 31),\n"
1022
" ('equal', 35, 38, 31, 34)]]\n";
1025
PatienceSequenceMatcher_get_grouped_opcodes(PatienceSequenceMatcher* self,
1028
PyObject *answer, *group, *item;
1029
Py_ssize_t i, j, k, ai, bj, size, ncodes, tag;
1030
Py_ssize_t i1, i2, j1, j2;
1032
struct matching_blocks matches;
1033
struct opcode *codes;
1035
if (!PyArg_ParseTuple(args, "|i", &n))
1039
matches.matches = (struct matching_block *)guarded_malloc(sizeof(struct matching_block) * (self->bsize + 1));
1040
if (matches.matches == NULL)
1041
return PyErr_NoMemory();
1043
res = recurse_matches(&matches, &self->hashtable, self->backpointers,
1044
self->a, self->b, 0, 0,
1045
self->asize, self->bsize, 10);
1047
free(matches.matches);
1048
return PyErr_NoMemory();
1051
matches.matches[matches.count].a = self->asize;
1052
matches.matches[matches.count].b = self->bsize;
1053
matches.matches[matches.count].len = 0;
1057
codes = (struct opcode *)guarded_malloc(sizeof(struct opcode) * matches.count * 2);
1058
if (codes == NULL) {
1059
free(matches.matches);
1060
return PyErr_NoMemory();
1064
for (k = 0; k < matches.count; k++) {
1065
ai = matches.matches[k].a;
1066
bj = matches.matches[k].b;
1069
if (i < ai && j < bj)
1077
codes[ncodes].tag = tag;
1078
codes[ncodes].i1 = i;
1079
codes[ncodes].i2 = ai;
1080
codes[ncodes].j1 = j;
1081
codes[ncodes].j2 = bj;
1085
i = ai + matches.matches[k].len;
1086
j = bj + matches.matches[k].len;
1088
if (matches.matches[k].len > 0) {
1089
codes[ncodes].tag = OP_EQUAL;
1090
codes[ncodes].i1 = ai;
1091
codes[ncodes].i2 = i;
1092
codes[ncodes].j1 = bj;
1093
codes[ncodes].j2 = j;
1099
codes[ncodes].tag = OP_EQUAL;
1100
codes[ncodes].i1 = 0;
1101
codes[ncodes].i2 = 1;
1102
codes[ncodes].j1 = 0;
1103
codes[ncodes].j2 = 1;
1107
/* fixup leading and trailing groups if they show no changes. */
1108
if (codes[0].tag == OP_EQUAL) {
1109
codes[0].i1 = MAX(codes[0].i1, codes[0].i2 - n);
1110
codes[0].j1 = MAX(codes[0].j1, codes[0].j2 - n);
1112
if (codes[ncodes - 1].tag == OP_EQUAL) {
1113
codes[ncodes - 1].i2 = MIN(codes[ncodes - 1].i2,
1114
codes[ncodes - 1].i1 + n);
1115
codes[ncodes - 1].j2 = MIN(codes[ncodes - 1].j2,
1116
codes[ncodes - 1].j1 + n);
1121
answer = PyList_New(0);
1125
group = PyList_New(0);
1131
for (i = 0; i < ncodes; i++) {
1137
/* end the current group and start a new one whenever
1138
there is a large range with no changes. */
1139
if (tag == OP_EQUAL && i2 - i1 > nn) {
1140
#if PY_VERSION_HEX < 0x02050000
1141
item = Py_BuildValue("siiii", opcode_names[tag],
1142
i1, MIN(i2, i1 + n), j1, MIN(j2, j1 + n));
1144
item = Py_BuildValue("snnnn", opcode_names[tag],
1145
i1, MIN(i2, i1 + n), j1, MIN(j2, j1 + n));
1149
if (PyList_Append(group, item) != 0)
1151
if (PyList_Append(answer, group) != 0)
1153
group = PyList_New(0);
1156
i1 = MAX(i1, i2 - n);
1157
j1 = MAX(j1, j2 - n);
1159
#if PY_VERSION_HEX < 0x02050000
1160
item = Py_BuildValue("siiii", opcode_names[tag], i1, i2, j1 ,j2);
1162
item = Py_BuildValue("snnnn", opcode_names[tag], i1, i2, j1 ,j2);
1166
if (PyList_Append(group, item) != 0)
1169
size = PyList_Size(group);
1170
if (size > 0 && !(size == 1 && tag == OP_EQUAL)) {
1171
if (PyList_Append(answer, group) != 0)
1178
free(matches.matches);
1183
free(matches.matches);
1190
static PyMethodDef PatienceSequenceMatcher_methods[] = {
1191
{"get_matching_blocks",
1192
(PyCFunction)PatienceSequenceMatcher_get_matching_blocks,
1194
PatienceSequenceMatcher_get_matching_blocks_doc},
1196
(PyCFunction)PatienceSequenceMatcher_get_opcodes,
1198
PatienceSequenceMatcher_get_opcodes_doc},
1199
{"get_grouped_opcodes",
1200
(PyCFunction)PatienceSequenceMatcher_get_grouped_opcodes,
1202
PatienceSequenceMatcher_get_grouped_opcodes_doc},
1207
static char PatienceSequenceMatcher_doc[] =
1208
"C implementation of PatienceSequenceMatcher";
1211
static PyTypeObject PatienceSequenceMatcherType = {
1212
PyObject_HEAD_INIT(NULL)
1214
"PatienceSequenceMatcher", /* tp_name */
1215
sizeof(PatienceSequenceMatcher), /* tp_basicsize */
1216
0, /* tp_itemsize */
1217
(destructor)PatienceSequenceMatcher_dealloc, /* tp_dealloc */
1223
0, /* tp_as_number */
1224
0, /* tp_as_sequence */
1225
0, /* tp_as_mapping */
1229
0, /* tp_getattro */
1230
0, /* tp_setattro */
1231
0, /* tp_as_buffer */
1232
Py_TPFLAGS_DEFAULT, /* tp_flags*/
1233
PatienceSequenceMatcher_doc, /* tp_doc */
1234
0, /* tp_traverse */
1236
0, /* tp_richcompare */
1237
0, /* tp_weaklistoffset */
1239
0, /* tp_iternext */
1240
PatienceSequenceMatcher_methods, /* tp_methods */
1245
0, /* tp_descr_get */
1246
0, /* tp_descr_set */
1247
0, /* tp_dictoffset */
1250
PatienceSequenceMatcher_new, /* tp_new */
1254
static PyMethodDef cpatiencediff_methods[] = {
1255
{"unique_lcs_c", py_unique_lcs, METH_VARARGS},
1256
{"recurse_matches_c", py_recurse_matches, METH_VARARGS},
1262
init_patiencediff_c(void)
1266
if (PyType_Ready(&PatienceSequenceMatcherType) < 0)
1269
m = Py_InitModule3("_patiencediff_c", cpatiencediff_methods,
1270
"C implementation of PatienceSequenceMatcher");
1274
Py_INCREF(&PatienceSequenceMatcherType);
1275
PyModule_AddObject(m, "PatienceSequenceMatcher_c",
1276
(PyObject *)&PatienceSequenceMatcherType);