NumeRe v1.1.4
NumeRe: Framework für Numerische Rechnungen
Diff.hpp
Go to the documentation of this file.
1
36/* If you use this library, you must include dtl.hpp only. */
37
38#ifndef DTL_DIFF_H
39#define DTL_DIFF_H
40
41namespace dtl {
42
47 template <typename elem, typename sequence = vector< elem >, typename comparator = Compare< elem > >
48 class Diff
49 {
50 private :
51 dtl_typedefs(elem, sequence)
52 sequence A;
53 sequence B;
54 size_t M;
55 size_t N;
56 size_t delta;
57 size_t offset;
58 long long *fp;
59 long long editDistance;
64 bool swapped;
65 bool huge;
66 bool trivial;
68 uniHunkVec uniHunks;
69 comparator cmp;
70 public :
71 Diff () {}
72
73 Diff (const sequence& a,
74 const sequence& b) : A(a), B(b), ses(false) {
75 init();
76 }
77
78 Diff (const sequence& a,
79 const sequence& b,
80 bool deletesFirst) : A(a), B(b), ses(deletesFirst) {
81 init();
82 }
83
84 Diff (const sequence& a,
85 const sequence& b,
86 const comparator& comp) : A(a), B(b), ses(false), cmp(comp) {
87 init();
88 }
89
90 Diff (const sequence& a,
91 const sequence& b,
92 bool deleteFirst,
93 const comparator& comp) : A(a), B(b), ses(deleteFirst), cmp(comp) {
94 init();
95 }
96
97 ~Diff() {}
98
99 long long getEditDistance () const {
100 return editDistance;
101 }
102
104 return lcs;
105 }
106
107 elemVec getLcsVec () const {
108 return lcs.getSequence();
109 }
110
112 return ses;
113 }
114
115 uniHunkVec getUniHunks () const {
116 return uniHunks;
117 }
118
119 /* These should be deprecated */
120 bool isHuge () const {
121 return huge;
122 }
123
124 void onHuge () {
125 this->huge = true;
126 }
127
128 void offHuge () {
129 this->huge = false;
130 }
131
132 bool isUnserious () const {
133 return trivial;
134 }
135
136 void onUnserious () {
137 this->trivial = true;
138 }
139
140 void offUnserious () {
141 this->trivial = false;
142 }
143
145 this->editDistanceOnly = true;
146 }
147
148 /* These are the replacements for the above */
149 bool hugeEnabled () const {
150 return huge;
151 }
152
153 void enableHuge () {
154 this->huge = true;
155 }
156
157 void disableHuge () {
158 this->huge = false;
159 }
160
161 bool trivialEnabled () const {
162 return trivial;
163 }
164
165 void enableTrivial () const {
166 this->trivial = true;
167 }
168
170 this->trivial = false;
171 }
172
174 this->editDistanceOnly = true;
175 }
176
180 sequence uniPatch (const sequence& seq) {
181 elemList seqLst(seq.begin(), seq.end());
182 sesElemVec shunk;
183 sesElemVec_iter vsesIt;
184 elemList_iter lstIt = seqLst.begin();
185 long long inc_dec_total = 0;
186 long long gap = 1;
187 for (uniHunkVec_iter it=uniHunks.begin();it!=uniHunks.end();++it) {
188 joinSesVec(shunk, it->common[0]);
189 joinSesVec(shunk, it->change);
190 joinSesVec(shunk, it->common[1]);
191 it->a += inc_dec_total;
192 inc_dec_total += it->inc_dec_count;
193 for (long long i=0;i<it->a - gap;++i) {
194 ++lstIt;
195 }
196 gap = it->a + it->b + it->inc_dec_count;
197 vsesIt = shunk.begin();
198 while (vsesIt!=shunk.end()) {
199 switch (vsesIt->second.type) {
200 case SES_ADD :
201 seqLst.insert(lstIt, vsesIt->first);
202 break;
203 case SES_DELETE :
204 if (lstIt != seqLst.end()) {
205 lstIt = seqLst.erase(lstIt);
206 }
207 break;
208 case SES_COMMON :
209 if (lstIt != seqLst.end()) {
210 ++lstIt;
211 }
212 break;
213 default :
214 // no fall-through
215 break;
216 }
217 ++vsesIt;
218 }
219 shunk.clear();
220 }
221
222 sequence patchedSeq(seqLst.begin(), seqLst.end());
223 return patchedSeq;
224 }
225
229 sequence patch (const sequence& seq) const {
230 sesElemVec sesSeq = ses.getSequence();
231 elemList seqLst(seq.begin(), seq.end());
232 elemList_iter lstIt = seqLst.begin();
233 for (sesElemVec_iter sesIt=sesSeq.begin();sesIt!=sesSeq.end();++sesIt) {
234 switch (sesIt->second.type) {
235 case SES_ADD :
236 seqLst.insert(lstIt, sesIt->first);
237 break;
238 case SES_DELETE :
239 lstIt = seqLst.erase(lstIt);
240 break;
241 case SES_COMMON :
242 ++lstIt;
243 break;
244 default :
245 // no through
246 break;
247 }
248 }
249 sequence patchedSeq(seqLst.begin(), seqLst.end());
250 return patchedSeq;
251 }
252
258 void compose() {
259
260 if (isHuge()) {
262 }
263
264 long long p = -1;
265 fp = new long long[M + N + 3];
266 fill(&fp[0], &fp[M + N + 3], -1);
267 path = editPath(M + N + 3);
268 fill(path.begin(), path.end(), -1);
269 ONP:
270 do {
271 ++p;
272 for (long long k=-p;k<=static_cast<long long>(delta)-1;++k) {
273 fp[k+offset] = snake(k, fp[k-1+offset]+1, fp[k+1+offset]);
274 }
275 for (long long k=static_cast<long long>(delta)+p;k>=static_cast<long long>(delta)+1;--k) {
276 fp[k+offset] = snake(k, fp[k-1+offset]+1, fp[k+1+offset]);
277 }
278 fp[delta+offset] = snake(static_cast<long long>(delta), fp[delta-1+offset]+1, fp[delta+1+offset]);
279 } while (fp[delta+offset] != static_cast<long long>(N) && pathCordinates.size() < MAX_CORDINATES_SIZE);
280
281 editDistance += static_cast<long long>(delta) + 2 * p;
282 long long r = path[delta+offset];
283 P cordinate;
284 editPathCordinates epc(0);
285
286 // recording edit distance only
287 if (editDistanceOnly) {
288 delete[] this->fp;
289 return;
290 }
291
292 while(r != -1) {
293 cordinate.x = pathCordinates[(size_t)r].x;
294 cordinate.y = pathCordinates[(size_t)r].y;
295 epc.push_back(cordinate);
296 r = pathCordinates[(size_t)r].k;
297 }
298
299 // record Longest Common Subsequence & Shortest Edit Script
300 if (!recordSequence(epc)) {
301 pathCordinates.resize(0);
302 epc.resize(0);
303 p = -1;
304 goto ONP;
305 }
306 delete[] this->fp;
307 }
308
312 template < typename stream >
313 void printSES (stream& out) const {
314 sesElemVec ses_v = ses.getSequence();
315 for_each(ses_v.begin(), ses_v.end(), ChangePrinter< sesElem, stream >(out));
316 }
317
318 void printSES (ostream& out = cout) const {
319 printSES< ostream >(out);
320 }
321
325 template < typename stream >
326 static void printSES (const Ses< elem >& s, stream& out) {
327 sesElemVec ses_v = s.getSequence();
328 for_each(ses_v.begin(), ses_v.end(), ChangePrinter< sesElem, stream >(out));
329 }
330
331 static void printSES (const Ses< elem >& s, ostream& out = cout) {
332 printSES< ostream >(s, out);
333 }
334
338 template < typename stream, template < typename SEET, typename STRT > class PT >
339 void printSES (stream& out) const {
340 sesElemVec ses_v = ses.getSequence ();
341 for_each (ses_v.begin (), ses_v.end(), PT < sesElem, stream > (out));
342 }
343
347 template < typename stream >
348 void printUnifiedFormat (stream& out) const {
349 for_each(uniHunks.begin(), uniHunks.end(), UniHunkPrinter< sesElem, stream >(out));
350 }
351
352 void printUnifiedFormat (ostream& out = cout) const {
353 printUnifiedFormat< ostream >(out);
354 }
355
359 template < typename stream >
360 static void printUnifiedFormat (const uniHunkVec& hunks, stream& out) {
361 for_each(hunks.begin(), hunks.end(), UniHunkPrinter< sesElem >(out));
362 }
363
364 static void printUnifiedFormat (const uniHunkVec& hunks, ostream& out = cout) {
365 printUnifiedFormat< ostream >(hunks, out);
366 }
367
372 sesElemVec common[2];
373 sesElemVec change;
374 sesElemVec ses_v = ses.getSequence();
375 long long l_cnt = 1;
376 long long length = distance(ses_v.begin(), ses_v.end());
377 long long middle = 0;
378 bool isMiddle, isAfter;
379 elemInfo einfo;
380 long long a, b, c, d; // @@ -a,b +c,d @@
381 long long inc_dec_count = 0;
383 sesElemVec adds;
384 sesElemVec deletes;
385
386 isMiddle = isAfter = false;
387 a = b = c = d = 0;
388
389 for (sesElemVec_iter it=ses_v.begin();it!=ses_v.end();++it, ++l_cnt) {
390 einfo = it->second;
391 switch (einfo.type) {
392 case SES_ADD :
393 middle = 0;
394 ++inc_dec_count;
395 adds.push_back(*it);
396 if (!isMiddle) isMiddle = true;
397 if (isMiddle) ++d;
398 if (l_cnt >= length) {
399 joinSesVec(change, deletes);
400 joinSesVec(change, adds);
401 isAfter = true;
402 }
403 break;
404 case SES_DELETE :
405 middle = 0;
406 --inc_dec_count;
407 deletes.push_back(*it);
408 if (!isMiddle) isMiddle = true;
409 if (isMiddle) ++b;
410 if (l_cnt >= length) {
411 joinSesVec(change, deletes);
412 joinSesVec(change, adds);
413 isAfter = true;
414 }
415 break;
416 case SES_COMMON :
417 ++b;++d;
418 if (common[1].empty() && adds.empty() && deletes.empty() && change.empty()) {
419 if (static_cast<long long>(common[0].size()) < DTL_CONTEXT_SIZE) {
420 if (a == 0 && c == 0) {
421 if (!wasSwapped()) {
422 a = einfo.beforeIdx;
423 c = einfo.afterIdx;
424 } else {
425 a = einfo.afterIdx;
426 c = einfo.beforeIdx;
427 }
428 }
429 common[0].push_back(*it);
430 } else {
431 rotate(common[0].begin(), common[0].begin() + 1, common[0].end());
432 common[0].pop_back();
433 common[0].push_back(*it);
434 ++a;++c;
435 --b;--d;
436 }
437 }
438 if (isMiddle && !isAfter) {
439 ++middle;
440 joinSesVec(change, deletes);
441 joinSesVec(change, adds);
442 change.push_back(*it);
443 if (middle >= DTL_SEPARATE_SIZE || l_cnt >= length) {
444 isAfter = true;
445 }
446 adds.clear();
447 deletes.clear();
448 }
449 break;
450 default :
451 // no through
452 break;
453 }
454 // compose unified format hunk
455 if (isAfter && !change.empty()) {
456 sesElemVec_iter cit = it;
457 long long cnt = 0;
458 for (long long i=0;i<DTL_SEPARATE_SIZE && (cit != ses_v.end());++i, ++cit) {
459 if (cit->second.type == SES_COMMON) {
460 ++cnt;
461 }
462 }
463 if (cnt < DTL_SEPARATE_SIZE && l_cnt < length) {
464 middle = 0;
465 isAfter = false;
466 continue;
467 }
468 if (static_cast<long long>(common[0].size()) >= DTL_SEPARATE_SIZE) {
469 long long c0size = static_cast<long long>(common[0].size());
470 rotate(common[0].begin(),
471 common[0].begin() + (size_t)c0size - DTL_SEPARATE_SIZE,
472 common[0].end());
473 for (long long i=0;i<c0size - DTL_SEPARATE_SIZE;++i) {
474 common[0].pop_back();
475 }
476 a += c0size - DTL_SEPARATE_SIZE;
477 c += c0size - DTL_SEPARATE_SIZE;
478 }
479 if (a == 0) ++a;
480 if (c == 0) ++c;
481 if (wasSwapped()) swap(a, c);
482 hunk.a = a;
483 hunk.b = b;
484 hunk.c = c;
485 hunk.d = d;
486 hunk.common[0] = common[0];
487 hunk.change = change;
488 hunk.common[1] = common[1];
489 hunk.inc_dec_count = inc_dec_count;
490 uniHunks.push_back(hunk);
491 isMiddle = false;
492 isAfter = false;
493 common[0].clear();
494 common[1].clear();
495 adds.clear();
496 deletes.clear();
497 change.clear();
498 a = b = c = d = middle = inc_dec_count = 0;
499 }
500 }
501 }
502
506 template <typename stream>
508 {
509 elem line;
510 Ses< elem > ret;
511 long long x_idx, y_idx;
512 x_idx = y_idx = 1;
513 while (getline(st, line)) {
514 elem mark(line.begin(), line.begin() + 1);
515 elem e(line.begin() + 1, line.end());
516 if (mark == SES_MARK_DELETE) {
517 ret.addSequence(e, x_idx, 0, SES_DELETE);
518 ++x_idx;
519 } else if (mark == SES_MARK_ADD) {
520 ret.addSequence(e, y_idx, 0, SES_ADD);
521 ++y_idx;
522 } else if (mark == SES_MARK_COMMON) {
523 ret.addSequence(e, x_idx, y_idx, SES_COMMON);
524 ++x_idx;
525 ++y_idx;
526 }
527 }
528 return ret;
529 }
530
531 private :
535 void init () {
536 M = distance(A.begin(), A.end());
537 N = distance(B.begin(), B.end());
538 if (M < N) {
539 swapped = false;
540 } else {
541 swap(A, B);
542 swap(M, N);
543 swapped = true;
544 }
545 editDistance = 0;
546 delta = N - M;
547 offset = M + 1;
548 huge = false;
549 trivial = false;
550 editDistanceOnly = false;
551 fp = NULL;
552 }
553
557 long long snake(const long long& k, const long long& above, const long long& below) {
558 long long r = above > below ? path[(size_t)k-1+offset] : path[(size_t)k+1+offset];
559 long long y = max(above, below);
560 long long x = y - k;
561 while ((size_t)x < M && (size_t)y < N && (swapped ? cmp.impl(B[(size_t)y], A[(size_t)x]) : cmp.impl(A[(size_t)x], B[(size_t)y]))) {
562 ++x;++y;
563 }
564
565 path[(size_t)k+offset] = static_cast<long long>(pathCordinates.size());
566 if (!editDistanceOnly) {
567 P p;
568 p.x = x;p.y = y;p.k = r;
569 pathCordinates.push_back(p);
570 }
571 return y;
572 }
573
578 sequence_const_iter x(A.begin());
579 sequence_const_iter y(B.begin());
580 long long x_idx, y_idx; // line number for Unified Format
581 long long px_idx, py_idx; // cordinates
582 bool complete = false;
583 x_idx = y_idx = 1;
584 px_idx = py_idx = 0;
585 for (size_t i=v.size()-1;!complete;--i) {
586 while(px_idx < v[i].x || py_idx < v[i].y) {
587 if (v[i].y - v[i].x > py_idx - px_idx) {
588 if (!wasSwapped()) {
589 ses.addSequence(*y, 0, y_idx, SES_ADD);
590 } else {
591 ses.addSequence(*y, y_idx, 0, SES_DELETE);
592 }
593 ++y;
594 ++y_idx;
595 ++py_idx;
596 } else if (v[i].y - v[i].x < py_idx - px_idx) {
597 if (!wasSwapped()) {
598 ses.addSequence(*x, x_idx, 0, SES_DELETE);
599 } else {
600 ses.addSequence(*x, 0, x_idx, SES_ADD);
601 }
602 ++x;
603 ++x_idx;
604 ++px_idx;
605 } else {
606 if (!wasSwapped()) {
607 lcs.addSequence(*x);
608 ses.addSequence(*x, x_idx, y_idx, SES_COMMON);
609 } else {
610 lcs.addSequence(*y);
611 ses.addSequence(*y, y_idx, x_idx, SES_COMMON);
612 }
613 ++x;
614 ++y;
615 ++x_idx;
616 ++y_idx;
617 ++px_idx;
618 ++py_idx;
619 }
620 }
621 if (i == 0) complete = true;
622 }
623
624 if (x_idx > static_cast<long long>(M) && y_idx > static_cast<long long>(N)) {
625 // all recording succeeded
626 } else {
627 // trivial difference
628 if (trivialEnabled()) {
629 if (!wasSwapped()) {
630 recordOddSequence(x_idx, M, x, SES_DELETE);
631 recordOddSequence(y_idx, N, y, SES_ADD);
632 } else {
633 recordOddSequence(x_idx, M, x, SES_ADD);
634 recordOddSequence(y_idx, N, y, SES_DELETE);
635 }
636 return true;
637 }
638
639 // nontrivial difference
640 sequence A_(A.begin() + (size_t)x_idx - 1, A.end());
641 sequence B_(B.begin() + (size_t)y_idx - 1, B.end());
642 A = A_;
643 B = B_;
644 M = distance(A.begin(), A.end());
645 N = distance(B.begin(), B.end());
646 delta = N - M;
647 offset = M + 1;
648 delete[] fp;
649 fp = new long long[M + N + 3];
650 fill(&fp[0], &fp[M + N + 3], -1);
651 fill(path.begin(), path.end(), -1);
652 return false;
653 }
654 return true;
655 }
656
660 void inline recordOddSequence (long long idx, long long length, sequence_const_iter it, const edit_t et) {
661 while(idx < length){
662 ses.addSequence(*it, idx, 0, et);
663 ++it;
664 ++idx;
665 ++editDistance;
666 }
667 ses.addSequence(*it, idx, 0, et);
668 ++editDistance;
669 }
670
674 void inline joinSesVec (sesElemVec& s1, sesElemVec& s2) const {
675 if (!s2.empty()) {
676 for (sesElemVec_iter vit=s2.begin();vit!=s2.end();++vit) {
677 s1.push_back(*vit);
678 }
679 }
680 }
681
685 bool inline wasSwapped () const {
686 return swapped;
687 }
688
689 };
690}
691
692#endif // DTL_DIFF_H
void enableTrivial() const
Definition: Diff.hpp:165
static Ses< elem > composeSesFromStream(stream &st)
Definition: Diff.hpp:507
void printUnifiedFormat(stream &out) const
Definition: Diff.hpp:348
bool hugeEnabled() const
Definition: Diff.hpp:149
dtl_typedefs(elem, sequence) sequence A
void disableHuge()
Definition: Diff.hpp:157
comparator cmp
Definition: Diff.hpp:69
editPath path
Definition: Diff.hpp:62
bool isHuge() const
Definition: Diff.hpp:120
void onUnserious()
Definition: Diff.hpp:136
size_t N
Definition: Diff.hpp:55
Diff(const sequence &a, const sequence &b, const comparator &comp)
Definition: Diff.hpp:84
bool editDistanceOnly
Definition: Diff.hpp:67
void printSES(ostream &out=cout) const
Definition: Diff.hpp:318
size_t delta
Definition: Diff.hpp:56
Diff(const sequence &a, const sequence &b, bool deletesFirst)
Definition: Diff.hpp:78
static void printUnifiedFormat(const uniHunkVec &hunks, stream &out)
Definition: Diff.hpp:360
void editDistanceOnlyEnabled()
Definition: Diff.hpp:173
Diff()
Definition: Diff.hpp:71
size_t offset
Definition: Diff.hpp:57
void recordOddSequence(long long idx, long long length, sequence_const_iter it, const edit_t et)
Definition: Diff.hpp:660
void composeUnifiedHunks()
Definition: Diff.hpp:371
long long * fp
Definition: Diff.hpp:58
void disableTrivial()
Definition: Diff.hpp:169
bool huge
Definition: Diff.hpp:65
sequence patch(const sequence &seq) const
Definition: Diff.hpp:229
void compose()
Definition: Diff.hpp:258
long long getEditDistance() const
Definition: Diff.hpp:99
void init()
Definition: Diff.hpp:535
void offUnserious()
Definition: Diff.hpp:140
size_t M
Definition: Diff.hpp:54
bool wasSwapped() const
Definition: Diff.hpp:685
Ses< elem > ses
Definition: Diff.hpp:61
static void printSES(const Ses< elem > &s, stream &out)
Definition: Diff.hpp:326
sequence B
Definition: Diff.hpp:53
elemVec getLcsVec() const
Definition: Diff.hpp:107
Diff(const sequence &a, const sequence &b, bool deleteFirst, const comparator &comp)
Definition: Diff.hpp:90
Lcs< elem > lcs
Definition: Diff.hpp:60
sequence uniPatch(const sequence &seq)
Definition: Diff.hpp:180
editPathCordinates pathCordinates
Definition: Diff.hpp:63
bool trivial
Definition: Diff.hpp:66
void printSES(stream &out) const
Definition: Diff.hpp:313
void printSES(stream &out) const
Definition: Diff.hpp:339
Lcs< elem > getLcs() const
Definition: Diff.hpp:103
Diff(const sequence &a, const sequence &b)
Definition: Diff.hpp:73
void offHuge()
Definition: Diff.hpp:128
void printUnifiedFormat(ostream &out=cout) const
Definition: Diff.hpp:352
~Diff()
Definition: Diff.hpp:97
bool recordSequence(const editPathCordinates &v)
Definition: Diff.hpp:577
uniHunkVec getUniHunks() const
Definition: Diff.hpp:115
bool trivialEnabled() const
Definition: Diff.hpp:161
Ses< elem > getSes() const
Definition: Diff.hpp:111
bool isUnserious() const
Definition: Diff.hpp:132
bool swapped
Definition: Diff.hpp:64
long long snake(const long long &k, const long long &above, const long long &below)
Definition: Diff.hpp:557
static void printUnifiedFormat(const uniHunkVec &hunks, ostream &out=cout)
Definition: Diff.hpp:364
long long editDistance
Definition: Diff.hpp:59
void joinSesVec(sesElemVec &s1, sesElemVec &s2) const
Definition: Diff.hpp:674
uniHunkVec uniHunks
Definition: Diff.hpp:68
static void printSES(const Ses< elem > &s, ostream &out=cout)
Definition: Diff.hpp:331
void enableHuge()
Definition: Diff.hpp:153
void onHuge()
Definition: Diff.hpp:124
void onOnlyEditDistance()
Definition: Diff.hpp:144
Definition: Lcs.hpp:48
Definition: Ses.hpp:48
sesElemVec getSequence() const
Definition: Ses.hpp:119
void addSequence(elem e, long long beforeIdx, long long afterIdx, const edit_t type)
Definition: Ses.hpp:83
Definition: Diff.hpp:41
int edit_t
Definition: variables.hpp:71
const edit_t SES_ADD
Definition: variables.hpp:74
const long long DTL_CONTEXT_SIZE
Definition: variables.hpp:96
const unsigned long long MAX_CORDINATES_SIZE
Definition: variables.hpp:110
const long long DTL_SEPARATE_SIZE
Definition: variables.hpp:95
const edit_t SES_COMMON
Definition: variables.hpp:73
const edit_t SES_DELETE
Definition: variables.hpp:72
vector< long long > editPath
Definition: variables.hpp:112
vector< P > editPathCordinates
Definition: variables.hpp:113
#define max(a, b)
Definition: resampler.cpp:30
long long x
Definition: variables.hpp:102
long long k
Definition: variables.hpp:104
long long y
Definition: variables.hpp:103
long long afterIdx
Definition: variables.hpp:88
long long beforeIdx
Definition: variables.hpp:87
vector< sesElem > change
Definition: variables.hpp:122
long long b
Definition: variables.hpp:120
long long d
Definition: variables.hpp:120
long long inc_dec_count
Definition: variables.hpp:123
long long c
Definition: variables.hpp:120
long long a
Definition: variables.hpp:120
vector< sesElem > common[2]
Definition: variables.hpp:121
#define SES_MARK_DELETE
Definition: variables.hpp:79
#define SES_MARK_ADD
Definition: variables.hpp:81
#define SES_MARK_COMMON
Definition: variables.hpp:80