1 package org.catacomb.util;
2
3
4 /*
5 * $Log: Diff.java,v $
6 * Revision 1.2 2010-05-31 11:01:45 rcc
7 * rodrigos version
8 *
9 * Revision 1.1 2009-06-23 03:37:26 rcc
10 * first commit
11 *
12 * Revision 1.2 2007-08-31 20:01:54 rcc
13 * misc
14 *
15 * Revision 1.1 2007-08-13 13:20:39 rcc
16 * first
17 *
18 * Revision 1.6 2003/03/06 22:51:32 stuart
19 * Convert to CVS
20 *
21 * Revision 1.5 2002/07/19 19:14:40 stuart
22 * fix reverseScript, make change ctor public, update docs
23 *
24 * Revision 1.4 2002/04/09 17:53:39 stuart
25 * More flexible interface for diff() function.
26 *
27 * Revision 1.3 2000/03/03 21:58:03 stuart
28 * move discard_confusing_lines and shift_boundaries to class file_data
29 *
30 * Revision 1.2 2000/03/02 16:37:38 stuart
31 * Add GPL and copyright
32 *
33 */
34
35 import java.util.Hashtable;
36
37 /** A class to compare vectors of objects. The result of comparison
38 is a list of <code>change</code> objects which form an
39 edit script. The objects compared are traditionally lines
40 of text from two files. Comparison options such as "ignore
41 whitespace" are implemented by modifying the <code>equals</code>
42 and <code>hashcode</code> methods for the objects compared.
43 <p>
44 The basic algorithm is described in: </br>
45 "An O(ND) Difference Algorithm and its Variations", Eugene Myers,
46 Algorithmica Vol. 1 No. 2, 1986, p 251.
47 <p>
48 This class outputs different results from GNU diff 1.15 on some
49 inputs. Our results are actually better (smaller change list, smaller
50 total size of changes), but it would be nice to know why. Perhaps
51 there is a memory overwrite bug in GNU diff 1.15.
52
53 @author Stuart D. Gathman, translated from GNU diff 1.15
54 Copyright (C) 2000 Business Management Systems, Inc.
55 <p>
56 This program is free software; you can redistribute it and/or modify
57 it under the terms of the GNU General Public License as published by
58 the Free Software Foundation; either version 1, or (at your option)
59 any later version.
60 <p>
61 This program is distributed in the hope that it will be useful,
62 but WITHOUT ANY WARRANTY; without even the implied warranty of
63 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
64 GNU General Public License for more details.
65 <p>
66 You should have received a copy of the <a href=COPYING.txt>
67 GNU General Public License</a>
68 along with this program; if not, write to the Free Software
69 Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
70
71 */
72
73 public class Diff {
74
75 /** Prepare to find differences between two arrays. Each element of
76 the arrays is translated to an "equivalence number" based on
77 the result of <code>equals</code>. The original Object arrays
78 are no longer needed for computing the differences. They will
79 be needed again later to print the results of the comparison as
80 an edit script, if desired.
81 */
82 public Diff(Object[] a,Object[] b) {
83 Hashtable<Object, Integer> h = new Hashtable<Object, Integer>(a.length + b.length);
84 filevec[0] = new file_data(a,h);
85 filevec[1] = new file_data(b,h);
86 }
87
88 /** 1 more than the maximum equivalence value used for this or its
89 sibling file. */
90 int equiv_max = 1;
91
92 /** When set to true, the comparison uses a heuristic to speed it up.
93 With this heuristic, for files with a constant small density
94 of changes, the algorithm is linear in the file size. */
95 public boolean heuristic = false;
96
97 /** When set to true, the algorithm returns a guarranteed minimal
98 set of changes. This makes things slower, sometimes much slower. */
99 public boolean no_discards = false;
100
101 private int[] xvec, yvec; /* Vectors being compared. */
102 private int[] fdiag; /* Vector, indexed by diagonal, containing
103 the X coordinate of the point furthest
104 along the given diagonal in the forward
105 search of the edit matrix. */
106 private int[] bdiag; /* Vector, indexed by diagonal, containing
107 the X coordinate of the point furthest
108 along the given diagonal in the backward
109 search of the edit matrix. */
110 private int fdiagoff, bdiagoff;
111 private final file_data[] filevec = new file_data[2];
112 private int cost;
113
114 /** Find the midpoint of the shortest edit script for a specified
115 portion of the two files.
116
117 We scan from the beginnings of the files, and simultaneously from the ends,
118 doing a breadth-first search through the space of edit-sequence.
119 When the two searches meet, we have found the midpoint of the shortest
120 edit sequence.
121
122 The value returned is the number of the diagonal on which the midpoint lies.
123 The diagonal number equals the number of inserted lines minus the number
124 of deleted lines (counting only lines before the midpoint).
125 The edit cost is stored into COST; this is the total number of
126 lines inserted or deleted (counting only lines before the midpoint).
127
128 This function assumes that the first lines of the specified portions
129 of the two files do not match, and likewise that the last lines do not
130 match. The caller must trim matching lines from the beginning and end
131 of the portions it is going to specify.
132
133 Note that if we return the "wrong" diagonal value, or if
134 the value of bdiag at that diagonal is "wrong",
135 the worst this can do is cause suboptimal diff output.
136 It cannot cause incorrect diff output. */
137
138 private int diag(int xoff, int xlim, int yoff, int ylim) {
139 final int[] fd = fdiag; // Give the compiler a chance.
140 final int[] bd = bdiag; // Additional help for the compiler.
141 final int[] xv = xvec; // Still more help for the compiler.
142 final int[] yv = yvec; // And more and more . . .
143 final int dmin = xoff - ylim; // Minimum valid diagonal.
144 final int dmax = xlim - yoff; // Maximum valid diagonal.
145 final int fmid = xoff - yoff; // Center diagonal of top-down search.
146 final int bmid = xlim - ylim; // Center diagonal of bottom-up search.
147 int fmin = fmid, fmax = fmid; // Limits of top-down search.
148 int bmin = bmid, bmax = bmid; // Limits of bottom-up search.
149 /* True if southeast corner is on an odd
150 diagonal with respect to the northwest. */
151 final boolean odd = (fmid - bmid & 1) != 0;
152
153 fd[fdiagoff + fmid] = xoff;
154 bd[bdiagoff + bmid] = xlim;
155
156 for (int c = 1;; ++c)
157 {
158 int d; /* Active diagonal. */
159 boolean big_snake = false;
160
161 /* Extend the top-down search by an edit step in each diagonal. */
162 if (fmin > dmin)
163 fd[fdiagoff + --fmin - 1] = -1;
164 else
165 ++fmin;
166 if (fmax < dmax)
167 fd[fdiagoff + ++fmax + 1] = -1;
168 else
169 --fmax;
170 for (d = fmax; d >= fmin; d -= 2)
171 {
172 int x, y, oldx, tlo = fd[fdiagoff + d - 1], thi = fd[fdiagoff + d + 1];
173
174 if (tlo >= thi)
175 x = tlo + 1;
176 else
177 x = thi;
178 oldx = x;
179 y = x - d;
180 while (x < xlim && y < ylim && xv[x] == yv[y]) {
181 ++x;
182 ++y;
183 }
184 if (x - oldx > 20)
185 big_snake = true;
186 fd[fdiagoff + d] = x;
187 if (odd && bmin <= d && d <= bmax && bd[bdiagoff + d] <= fd[fdiagoff + d])
188 {
189 cost = 2 * c - 1;
190 return d;
191 }
192 }
193
194 /* Similar extend the bottom-up search. */
195 if (bmin > dmin)
196 bd[bdiagoff + --bmin - 1] = Integer.MAX_VALUE;
197 else
198 ++bmin;
199 if (bmax < dmax)
200 bd[bdiagoff + ++bmax + 1] = Integer.MAX_VALUE;
201 else
202 --bmax;
203 for (d = bmax; d >= bmin; d -= 2)
204 {
205 int x, y, oldx, tlo = bd[bdiagoff + d - 1], thi = bd[bdiagoff + d + 1];
206
207 if (tlo < thi)
208 x = tlo;
209 else
210 x = thi - 1;
211 oldx = x;
212 y = x - d;
213 while (x > xoff && y > yoff && xv[x - 1] == yv[y - 1]) {
214 --x;
215 --y;
216 }
217 if (oldx - x > 20)
218 big_snake = true;
219 bd[bdiagoff + d] = x;
220 if (!odd && fmin <= d && d <= fmax && bd[bdiagoff + d] <= fd[fdiagoff + d])
221 {
222 cost = 2 * c;
223 return d;
224 }
225 }
226
227 /* Heuristic: check occasionally for a diagonal that has made
228 lots of progress compared with the edit distance.
229 If we have any such, find the one that has made the most
230 progress and return it as if it had succeeded.
231
232 With this heuristic, for files with a constant small density
233 of changes, the algorithm is linear in the file size. */
234
235 if (c > 200 && big_snake && heuristic)
236 {
237 int best = 0;
238 int bestpos = -1;
239
240 for (d = fmax; d >= fmin; d -= 2)
241 {
242 int dd = d - fmid;
243 if ((fd[fdiagoff + d] - xoff)*2 - dd > 12 * (c + (dd > 0 ? dd : -dd)))
244 {
245 if (fd[fdiagoff + d] * 2 - dd > best
246 && fd[fdiagoff + d] - xoff > 20
247 && fd[fdiagoff + d] - d - yoff > 20)
248 {
249 int k;
250 int x = fd[fdiagoff + d];
251
252 /* We have a good enough best diagonal;
253 now insist that it end with a significant snake. */
254 for (k = 1; k <= 20; k++)
255 if (xvec[x - k] != yvec[x - d - k])
256 break;
257
258 if (k == 21)
259 {
260 best = fd[fdiagoff + d] * 2 - dd;
261 bestpos = d;
262 }
263 }
264 }
265 }
266 if (best > 0)
267 {
268 cost = 2 * c - 1;
269 return bestpos;
270 }
271
272 best = 0;
273 for (d = bmax; d >= bmin; d -= 2)
274 {
275 int dd = d - bmid;
276 if ((xlim - bd[bdiagoff + d])*2 + dd > 12 * (c + (dd > 0 ? dd : -dd)))
277 {
278 if ((xlim - bd[bdiagoff + d]) * 2 + dd > best
279 && xlim - bd[bdiagoff + d] > 20
280 && ylim - (bd[bdiagoff + d] - d) > 20)
281 {
282 /* We have a good enough best diagonal;
283 now insist that it end with a significant snake. */
284 int k;
285 int x = bd[bdiagoff + d];
286
287 for (k = 0; k < 20; k++)
288 if (xvec[x + k] != yvec[x - d + k])
289 break;
290 if (k == 20)
291 {
292 best = (xlim - bd[bdiagoff + d]) * 2 + dd;
293 bestpos = d;
294 }
295 }
296 }
297 }
298 if (best > 0)
299 {
300 cost = 2 * c - 1;
301 return bestpos;
302 }
303 }
304 }
305 }
306
307 /** Compare in detail contiguous subsequences of the two files
308 which are known, as a whole, to match each other.
309
310 The results are recorded in the vectors filevec[N].changed_flag, by
311 storing a 1 in the element for each line that is an insertion or deletion.
312
313 The subsequence of file 0 is [XOFF, XLIM) and likewise for file 1.
314
315 Note that XLIM, YLIM are exclusive bounds.
316 All line numbers are origin-0 and discarded lines are not counted. */
317
318 @SuppressWarnings("all")
319 private void compareseq(int xoff, int xlim, int yoff, int ylim) {
320 /* Slide down the bottom initial diagonal. */
321 while (xoff < xlim && yoff < ylim && xvec[xoff] == yvec[yoff]) {
322 ++xoff;
323 ++yoff;
324 }
325 /* Slide up the top initial diagonal. */
326 while (xlim > xoff && ylim > yoff && xvec[xlim - 1] == yvec[ylim - 1]) {
327 --xlim;
328 --ylim;
329 }
330
331 /* Handle simple cases. */
332 if (xoff == xlim)
333 while (yoff < ylim)
334 filevec[1].changed_flag[1+filevec[1].realindexes[yoff++]] = true;
335 else if (yoff == ylim)
336 while (xoff < xlim)
337 filevec[0].changed_flag[1+filevec[0].realindexes[xoff++]] = true;
338 else
339 {
340 /* Find a point of correspondence in the middle of the files. */
341
342 int d = diag(xoff, xlim, yoff, ylim);
343 int c = cost;
344
345 int b = bdiag[bdiagoff + d];
346
347 if (c == 1)
348 {
349 /* This should be impossible, because it implies that
350 one of the two subsequences is empty,
351 and that case was handled above without calling `diag'.
352 Let's verify that this is true. */
353 throw new IllegalArgumentException("Empty subsequence");
354 }
355
356
357 /* Use that point to split this problem into two subproblems. */
358 compareseq(xoff, b, yoff, b - d);
359 /* This used to use f instead of b,
360 but that is incorrect!
361 It is not necessarily the case that diagonal d
362 has a snake from b to f. */
363 compareseq(b, xlim, b - d, ylim);
364
365 }
366 }
367
368 /** Discard lines from one file that have no matches in the other file.
369 */
370
371 private void discard_confusing_lines() {
372 filevec[0].discard_confusing_lines(filevec[1]);
373 filevec[1].discard_confusing_lines(filevec[0]);
374 }
375
376 private boolean inhibit = false;
377
378 /** Adjust inserts/deletes of blank lines to join changes
379 as much as possible.
380 */
381
382 private void shift_boundaries() {
383 if (inhibit)
384 return;
385 filevec[0].shift_boundaries(filevec[1]);
386 filevec[1].shift_boundaries(filevec[0]);
387 }
388
389 public interface ScriptBuilder {
390 /** Scan the tables of which lines are inserted and deleted,
391 producing an edit script.
392 @param changed0 true for lines in first file which do not match 2nd
393 @param len0 number of lines in first file
394 @param changed1 true for lines in 2nd file which do not match 1st
395 @param len1 number of lines in 2nd file
396 @return a linked list of changes - or null
397 */
398 public change build_script(
399 boolean[] changed0,int len0,
400 boolean[] changed1,int len1
401 );
402 }
403
404 /** Scan the tables of which lines are inserted and deleted,
405 producing an edit script in reverse order. */
406
407 static class ReverseScript implements ScriptBuilder {
408 public change build_script(
409 final boolean[] changed0,int len0,
410 final boolean[] changed1,int len1)
411 {
412 change script = null;
413 int i0 = 0, i1 = 0;
414 while (i0 < len0 || i1 < len1) {
415 if (changed0[1+i0] || changed1[1+i1]) {
416 int line0 = i0, line1 = i1;
417
418 /* Find # lines changed here in each file. */
419 while (changed0[1+i0]) ++i0;
420 while (changed1[1+i1]) ++i1;
421
422 /* Record this change. */
423 script = new change(line0, line1, i0 - line0, i1 - line1, script);
424 }
425
426 /* We have reached lines in the two files that match each other. */
427 i0++;
428 i1++;
429 }
430
431 return script;
432 }
433 }
434
435 static class ForwardScript implements ScriptBuilder {
436 /** Scan the tables of which lines are inserted and deleted,
437 producing an edit script in forward order. */
438 public change build_script(
439 final boolean[] changed0,int len0,
440 final boolean[] changed1,int len1)
441 {
442 change script = null;
443 int i0 = len0, i1 = len1;
444
445 while (i0 >= 0 || i1 >= 0)
446 {
447 if (changed0[i0] || changed1[i1])
448 {
449 int line0 = i0, line1 = i1;
450
451 /* Find # lines changed here in each file. */
452 while (changed0[i0]) --i0;
453 while (changed1[i1]) --i1;
454
455 /* Record this change. */
456 script = new change(i0, i1, line0 - i0, line1 - i1, script);
457 }
458
459 /* We have reached lines in the two files that match each other. */
460 i0--;
461 i1--;
462 }
463
464 return script;
465 }
466 }
467
468 /** Standard ScriptBuilders. */
469 public final static ScriptBuilder
470 forwardScript = new ForwardScript(),
471 reverseScript = new ReverseScript();
472
473 /* Report the differences of two files. DEPTH is the current directory
474 depth. */
475 public final change diff_2(final boolean reverse) {
476 return diff(reverse ? reverseScript : forwardScript);
477 }
478
479 /** Get the results of comparison as an edit script. The script
480 is described by a list of changes. The standard ScriptBuilder
481 implementations provide for forward and reverse edit scripts.
482 Alternate implementations could, for instance, list common elements
483 instead of differences.
484 @param bld an object to build the script from change flags
485 @return the head of a list of changes
486 */
487 public change diff(final ScriptBuilder bld) {
488
489 /* Some lines are obviously insertions or deletions
490 because they don't match anything. Detect them now,
491 and avoid even thinking about them in the main comparison algorithm. */
492
493 discard_confusing_lines();
494
495 /* Now do the main comparison algorithm, considering just the
496 undiscarded lines. */
497
498 xvec = filevec[0].undiscarded;
499 yvec = filevec[1].undiscarded;
500
501 int diags =
502 filevec[0].nondiscarded_lines + filevec[1].nondiscarded_lines + 3;
503 fdiag = new int[diags];
504 fdiagoff = filevec[1].nondiscarded_lines + 1;
505 bdiag = new int[diags];
506 bdiagoff = filevec[1].nondiscarded_lines + 1;
507
508 compareseq(0, filevec[0].nondiscarded_lines,
509 0, filevec[1].nondiscarded_lines);
510 fdiag = null;
511 bdiag = null;
512
513 /* Modify the results slightly to make them prettier
514 in cases where that can validly be done. */
515
516 shift_boundaries();
517
518 /* Get the results of comparison in the form of a chain
519 of `struct change's -- an edit script. */
520 return bld.build_script(
521 filevec[0].changed_flag,
522 filevec[0].buffered_lines,
523 filevec[1].changed_flag,
524 filevec[1].buffered_lines
525 );
526
527 }
528
529 /** The result of comparison is an "edit script": a chain of change objects.
530 Each change represents one place where some lines are deleted
531 and some are inserted.
532
533 LINE0 and LINE1 are the first affected lines in the two files (origin 0).
534 DELETED is the number of lines deleted here from file 0.
535 INSERTED is the number of lines inserted here in file 1.
536
537 If DELETED is 0 then LINE0 is the number of the line before
538 which the insertion was done; vice versa for INSERTED and LINE1. */
539
540 public static class change {
541 /** Previous or next edit command. */
542 public change link;
543 /** # lines of file 1 changed here. */
544 public final int inserted;
545 /** # lines of file 0 changed here. */
546 public final int deleted;
547 /** Line number of 1st deleted line. */
548 public final int line0;
549 /** Line number of 1st inserted line. */
550 public final int line1;
551
552 /** Cons an additional entry onto the front of an edit script OLD.
553 LINE0 and LINE1 are the first affected lines in the two files (origin 0).
554 DELETED is the number of lines deleted here from file 0.
555 INSERTED is the number of lines inserted here in file 1.
556
557 If DELETED is 0 then LINE0 is the number of the line before
558 which the insertion was done; vice versa for INSERTED and LINE1. */
559 public change(int line0, int line1, int deleted, int inserted, change old) {
560 this.line0 = line0;
561 this.line1 = line1;
562 this.inserted = inserted;
563 this.deleted = deleted;
564 this.link = old;
565 //System.err.println(line0+","+line1+","+inserted+","+deleted);
566 }
567 }
568
569 /** Data on one input file being compared.
570 */
571
572 class file_data {
573
574 /** Allocate changed array for the results of comparison. */
575 void clear() {
576 /* Allocate a flag for each line of each file, saying whether that line
577 is an insertion or deletion.
578 Allocate an extra element, always zero, at each end of each vector.
579 */
580 changed_flag = new boolean[buffered_lines + 2];
581 }
582
583 /** Return equiv_count[I] as the number of lines in this file
584 that fall in equivalence class I.
585 @return the array of equivalence class counts.
586 */
587 int[] equivCount() {
588 int[] equiv_count = new int[equiv_max];
589 for (int i = 0; i < buffered_lines; ++i)
590 ++equiv_count[equivs[i]];
591 return equiv_count;
592 }
593
594 /** Discard lines that have no matches in another file.
595
596 A line which is discarded will not be considered by the actual
597 comparison algorithm; it will be as if that line were not in the file.
598 The file's `realindexes' table maps virtual line numbers
599 (which don't count the discarded lines) into real line numbers;
600 this is how the actual comparison algorithm produces results
601 that are comprehensible when the discarded lines are counted.
602 <p>
603 When we discard a line, we also mark it as a deletion or insertion
604 so that it will be printed in the output.
605 @param f the other file
606 */
607 void discard_confusing_lines(file_data f) {
608 clear();
609 /* Set up table of which lines are going to be discarded. */
610 final byte[] discarded = discardable(f.equivCount());
611
612 /* Don't really discard the provisional lines except when they occur
613 in a run of discardables, with nonprovisionals at the beginning
614 and end. */
615 filterDiscards(discarded);
616
617 /* Actually discard the lines. */
618 discard(discarded);
619 }
620
621 /** Mark to be discarded each line that matches no line of another file.
622 If a line matches many lines, mark it as provisionally discardable.
623 @see equivCount()
624 @param counts The count of each equivalence number for the other file.
625 @return 0=nondiscardable, 1=discardable or 2=provisionally discardable
626 for each line
627 */
628
629 private byte[] discardable(final int[] counts) {
630 final int end = buffered_lines;
631 final byte[] discards = new byte[end];
632 //final int[] equivs = this.equivs;
633 int many = 5;
634 int tem = end / 64;
635
636 /* Multiply MANY by approximate square root of number of lines.
637 That is the threshold for provisionally discardable lines. */
638 while ((tem = tem >> 2) > 0)
639 many *= 2;
640
641 for (int i = 0; i < end; i++)
642 {
643 int nmatch;
644 if (equivs[i] == 0)
645 continue;
646 nmatch = counts[equivs[i]];
647 if (nmatch == 0)
648 discards[i] = 1;
649 else if (nmatch > many)
650 discards[i] = 2;
651 }
652 return discards;
653 }
654
655 /** Don't really discard the provisional lines except when they occur
656 in a run of discardables, with nonprovisionals at the beginning
657 and end. */
658
659 private void filterDiscards(final byte[] discards) {
660 final int end = buffered_lines;
661
662 for (int i = 0; i < end; i++)
663 {
664 /* Cancel provisional discards not in middle of run of discards. */
665 if (discards[i] == 2)
666 discards[i] = 0;
667 else if (discards[i] != 0)
668 {
669 /* We have found a nonprovisional discard. */
670 int j;
671 int length;
672 int provisional = 0;
673
674 /* Find end of this run of discardable lines.
675 Count how many are provisionally discardable. */
676 for (j = i; j < end; j++)
677 {
678 if (discards[j] == 0)
679 break;
680 if (discards[j] == 2)
681 ++provisional;
682 }
683
684 /* Cancel provisional discards at end, and shrink the run. */
685 while (j > i && discards[j - 1] == 2) {
686 discards[--j] = 0;
687 --provisional;
688 }
689
690 /* Now we have the length of a run of discardable lines
691 whose first and last are not provisional. */
692 length = j - i;
693
694 /* If 1/4 of the lines in the run are provisional,
695 cancel discarding of all provisional lines in the run. */
696 if (provisional * 4 > length)
697 {
698 while (j > i)
699 if (discards[--j] == 2)
700 discards[j] = 0;
701 }
702 else
703 {
704 int consec;
705 int minimum = 1;
706 int tem = length / 4;
707
708 /* MINIMUM is approximate square root of LENGTH/4.
709 A subrun of two or more provisionals can stand
710 when LENGTH is at least 16.
711 A subrun of 4 or more can stand when LENGTH >= 64. */
712 while ((tem = tem >> 2) > 0)
713 minimum *= 2;
714 minimum++;
715
716 /* Cancel any subrun of MINIMUM or more provisionals
717 within the larger run. */
718 for (j = 0, consec = 0; j < length; j++)
719 if (discards[i + j] != 2)
720 consec = 0;
721 else if (minimum == ++consec)
722 /* Back up to start of subrun, to cancel it all. */
723 j -= consec;
724 else if (minimum < consec)
725 discards[i + j] = 0;
726
727 /* Scan from beginning of run
728 until we find 3 or more nonprovisionals in a row
729 or until the first nonprovisional at least 8 lines in.
730 Until that point, cancel any provisionals. */
731 for (j = 0, consec = 0; j < length; j++)
732 {
733 if (j >= 8 && discards[i + j] == 1)
734 break;
735 if (discards[i + j] == 2) {
736 consec = 0;
737 discards[i + j] = 0;
738 }
739 else if (discards[i + j] == 0)
740 consec = 0;
741 else
742 consec++;
743 if (consec == 3)
744 break;
745 }
746
747 /* I advances to the last line of the run. */
748 i += length - 1;
749
750 /* Same thing, from end. */
751 for (j = 0, consec = 0; j < length; j++)
752 {
753 if (j >= 8 && discards[i - j] == 1)
754 break;
755 if (discards[i - j] == 2) {
756 consec = 0;
757 discards[i - j] = 0;
758 }
759 else if (discards[i - j] == 0)
760 consec = 0;
761 else
762 consec++;
763 if (consec == 3)
764 break;
765 }
766 }
767 }
768 }
769 }
770
771 /** Actually discard the lines.
772 @param discards flags lines to be discarded
773 */
774 private void discard(final byte[] discards) {
775 final int end = buffered_lines;
776 int j = 0;
777 for (int i = 0; i < end; ++i)
778 if (no_discards || discards[i] == 0)
779 {
780 undiscarded[j] = equivs[i];
781 realindexes[j++] = i;
782 }
783 else
784 changed_flag[1+i] = true;
785 nondiscarded_lines = j;
786 }
787
788 file_data(Object[] data,Hashtable<Object, Integer> h) {
789 buffered_lines = data.length;
790
791 equivs = new int[buffered_lines];
792 undiscarded = new int[buffered_lines];
793 realindexes = new int[buffered_lines];
794
795 for (int i = 0; i < data.length; ++i) {
796 Integer ir = h.get(data[i]);
797 if (ir == null)
798 h.put(data[i], new Integer(equivs[i] = equiv_max++));
799 else
800 equivs[i] = ir.intValue();
801 }
802 }
803
804 /** Adjust inserts/deletes of blank lines to join changes
805 as much as possible.
806
807 We do something when a run of changed lines include a blank
808 line at one end and have an excluded blank line at the other.
809 We are free to choose which blank line is included.
810 `compareseq' always chooses the one at the beginning,
811 but usually it is cleaner to consider the following blank line
812 to be the "change". The only exception is if the preceding blank line
813 would join this change to other changes.
814 @param f the file being compared against
815 */
816
817 void shift_boundaries(file_data f) {
818 final boolean[] changed = changed_flag;
819 final boolean[] other_changed = f.changed_flag;
820 int i = 0;
821 int j = 0;
822 int i_end = buffered_lines;
823 int preceding = -1;
824 int other_preceding = -1;
825
826 for (;;)
827 {
828 int start, end, other_start;
829
830 /* Scan forwards to find beginning of another run of changes.
831 Also keep track of the corresponding point in the other file. */
832
833 while (i < i_end && !changed[1+i])
834 {
835 while (other_changed[1+j++])
836 /* Non-corresponding lines in the other file
837 will count as the preceding batch of changes. */
838 other_preceding = j;
839 i++;
840 }
841
842 if (i == i_end)
843 break;
844
845 start = i;
846 other_start = j;
847
848 for (;;)
849 {
850 /* Now find the end of this run of changes. */
851
852 while (i < i_end && changed[1+i]) i++;
853 end = i;
854
855 /* If the first changed line matches the following unchanged one,
856 and this run does not follow right after a previous run,
857 and there are no lines deleted from the other file here,
858 then classify the first changed line as unchanged
859 and the following line as changed in its place. */
860
861 /* You might ask, how could this run follow right after another?
862 Only because the previous run was shifted here. */
863
864 if (end != i_end
865 && equivs[start] == equivs[end]
866 && !other_changed[1+j]
867 && end != i_end
868 && !((preceding >= 0 && start == preceding)
869 || (other_preceding >= 0
870 && other_start == other_preceding)))
871 {
872 changed[1+end++] = true;
873 changed[1+start++] = false;
874 ++i;
875 /* Since one line-that-matches is now before this run
876 instead of after, we must advance in the other file
877 to keep in synch. */
878 ++j;
879 }
880 else
881 break;
882 }
883
884 preceding = i;
885 other_preceding = j;
886 }
887 }
888
889 /** Number of elements (lines) in this file. */
890 final int buffered_lines;
891
892 /** Vector, indexed by line number, containing an equivalence code for
893 each line. It is this vector that is actually compared with that
894 of another file to generate differences. */
895 private final int[] equivs;
896
897 /** Vector, like the previous one except that
898 the elements for discarded lines have been squeezed out. */
899 final int[] undiscarded;
900
901 /** Vector mapping virtual line numbers (not counting discarded lines)
902 to real ones (counting those lines). Both are origin-0. */
903 final int[] realindexes;
904
905 /** Total number of nondiscarded lines. */
906 int nondiscarded_lines;
907
908 /** Array, indexed by real origin-1 line number,
909 containing true for a line that is an insertion or a deletion.
910 The results of comparison are stored here. */
911 boolean[] changed_flag;
912
913 }
914 }