1 package org.catacomb.util; 2 3 4 /* 5 * $Log: Diff.java,v $ 6 * Revision 1.2 2010-05-31 11:01:45 rcc 7 * rodrigos version 8 * 9 * Revision 1.1 2009-06-23 03:37:26 rcc 10 * first commit 11 * 12 * Revision 1.2 2007-08-31 20:01:54 rcc 13 * misc 14 * 15 * Revision 1.1 2007-08-13 13:20:39 rcc 16 * first 17 * 18 * Revision 1.6 2003/03/06 22:51:32 stuart 19 * Convert to CVS 20 * 21 * Revision 1.5 2002/07/19 19:14:40 stuart 22 * fix reverseScript, make change ctor public, update docs 23 * 24 * Revision 1.4 2002/04/09 17:53:39 stuart 25 * More flexible interface for diff() function. 26 * 27 * Revision 1.3 2000/03/03 21:58:03 stuart 28 * move discard_confusing_lines and shift_boundaries to class file_data 29 * 30 * Revision 1.2 2000/03/02 16:37:38 stuart 31 * Add GPL and copyright 32 * 33 */ 34 35 import java.util.Hashtable; 36 37 /** A class to compare vectors of objects. The result of comparison 38 is a list of <code>change</code> objects which form an 39 edit script. The objects compared are traditionally lines 40 of text from two files. Comparison options such as "ignore 41 whitespace" are implemented by modifying the <code>equals</code> 42 and <code>hashcode</code> methods for the objects compared. 43 <p> 44 The basic algorithm is described in: </br> 45 "An O(ND) Difference Algorithm and its Variations", Eugene Myers, 46 Algorithmica Vol. 1 No. 2, 1986, p 251. 47 <p> 48 This class outputs different results from GNU diff 1.15 on some 49 inputs. Our results are actually better (smaller change list, smaller 50 total size of changes), but it would be nice to know why. Perhaps 51 there is a memory overwrite bug in GNU diff 1.15. 52 53 @author Stuart D. Gathman, translated from GNU diff 1.15 54 Copyright (C) 2000 Business Management Systems, Inc. 55 <p> 56 This program is free software; you can redistribute it and/or modify 57 it under the terms of the GNU General Public License as published by 58 the Free Software Foundation; either version 1, or (at your option) 59 any later version. 60 <p> 61 This program is distributed in the hope that it will be useful, 62 but WITHOUT ANY WARRANTY; without even the implied warranty of 63 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 64 GNU General Public License for more details. 65 <p> 66 You should have received a copy of the <a href=COPYING.txt> 67 GNU General Public License</a> 68 along with this program; if not, write to the Free Software 69 Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. 70 71 */ 72 73 public class Diff { 74 75 /** Prepare to find differences between two arrays. Each element of 76 the arrays is translated to an "equivalence number" based on 77 the result of <code>equals</code>. The original Object arrays 78 are no longer needed for computing the differences. They will 79 be needed again later to print the results of the comparison as 80 an edit script, if desired. 81 */ 82 public Diff(Object[] a,Object[] b) { 83 Hashtable<Object, Integer> h = new Hashtable<Object, Integer>(a.length + b.length); 84 filevec[0] = new file_data(a,h); 85 filevec[1] = new file_data(b,h); 86 } 87 88 /** 1 more than the maximum equivalence value used for this or its 89 sibling file. */ 90 int equiv_max = 1; 91 92 /** When set to true, the comparison uses a heuristic to speed it up. 93 With this heuristic, for files with a constant small density 94 of changes, the algorithm is linear in the file size. */ 95 public boolean heuristic = false; 96 97 /** When set to true, the algorithm returns a guarranteed minimal 98 set of changes. This makes things slower, sometimes much slower. */ 99 public boolean no_discards = false; 100 101 private int[] xvec, yvec; /* Vectors being compared. */ 102 private int[] fdiag; /* Vector, indexed by diagonal, containing 103 the X coordinate of the point furthest 104 along the given diagonal in the forward 105 search of the edit matrix. */ 106 private int[] bdiag; /* Vector, indexed by diagonal, containing 107 the X coordinate of the point furthest 108 along the given diagonal in the backward 109 search of the edit matrix. */ 110 private int fdiagoff, bdiagoff; 111 private final file_data[] filevec = new file_data[2]; 112 private int cost; 113 114 /** Find the midpoint of the shortest edit script for a specified 115 portion of the two files. 116 117 We scan from the beginnings of the files, and simultaneously from the ends, 118 doing a breadth-first search through the space of edit-sequence. 119 When the two searches meet, we have found the midpoint of the shortest 120 edit sequence. 121 122 The value returned is the number of the diagonal on which the midpoint lies. 123 The diagonal number equals the number of inserted lines minus the number 124 of deleted lines (counting only lines before the midpoint). 125 The edit cost is stored into COST; this is the total number of 126 lines inserted or deleted (counting only lines before the midpoint). 127 128 This function assumes that the first lines of the specified portions 129 of the two files do not match, and likewise that the last lines do not 130 match. The caller must trim matching lines from the beginning and end 131 of the portions it is going to specify. 132 133 Note that if we return the "wrong" diagonal value, or if 134 the value of bdiag at that diagonal is "wrong", 135 the worst this can do is cause suboptimal diff output. 136 It cannot cause incorrect diff output. */ 137 138 private int diag(int xoff, int xlim, int yoff, int ylim) { 139 final int[] fd = fdiag; // Give the compiler a chance. 140 final int[] bd = bdiag; // Additional help for the compiler. 141 final int[] xv = xvec; // Still more help for the compiler. 142 final int[] yv = yvec; // And more and more . . . 143 final int dmin = xoff - ylim; // Minimum valid diagonal. 144 final int dmax = xlim - yoff; // Maximum valid diagonal. 145 final int fmid = xoff - yoff; // Center diagonal of top-down search. 146 final int bmid = xlim - ylim; // Center diagonal of bottom-up search. 147 int fmin = fmid, fmax = fmid; // Limits of top-down search. 148 int bmin = bmid, bmax = bmid; // Limits of bottom-up search. 149 /* True if southeast corner is on an odd 150 diagonal with respect to the northwest. */ 151 final boolean odd = (fmid - bmid & 1) != 0; 152 153 fd[fdiagoff + fmid] = xoff; 154 bd[bdiagoff + bmid] = xlim; 155 156 for (int c = 1;; ++c) 157 { 158 int d; /* Active diagonal. */ 159 boolean big_snake = false; 160 161 /* Extend the top-down search by an edit step in each diagonal. */ 162 if (fmin > dmin) 163 fd[fdiagoff + --fmin - 1] = -1; 164 else 165 ++fmin; 166 if (fmax < dmax) 167 fd[fdiagoff + ++fmax + 1] = -1; 168 else 169 --fmax; 170 for (d = fmax; d >= fmin; d -= 2) 171 { 172 int x, y, oldx, tlo = fd[fdiagoff + d - 1], thi = fd[fdiagoff + d + 1]; 173 174 if (tlo >= thi) 175 x = tlo + 1; 176 else 177 x = thi; 178 oldx = x; 179 y = x - d; 180 while (x < xlim && y < ylim && xv[x] == yv[y]) { 181 ++x; 182 ++y; 183 } 184 if (x - oldx > 20) 185 big_snake = true; 186 fd[fdiagoff + d] = x; 187 if (odd && bmin <= d && d <= bmax && bd[bdiagoff + d] <= fd[fdiagoff + d]) 188 { 189 cost = 2 * c - 1; 190 return d; 191 } 192 } 193 194 /* Similar extend the bottom-up search. */ 195 if (bmin > dmin) 196 bd[bdiagoff + --bmin - 1] = Integer.MAX_VALUE; 197 else 198 ++bmin; 199 if (bmax < dmax) 200 bd[bdiagoff + ++bmax + 1] = Integer.MAX_VALUE; 201 else 202 --bmax; 203 for (d = bmax; d >= bmin; d -= 2) 204 { 205 int x, y, oldx, tlo = bd[bdiagoff + d - 1], thi = bd[bdiagoff + d + 1]; 206 207 if (tlo < thi) 208 x = tlo; 209 else 210 x = thi - 1; 211 oldx = x; 212 y = x - d; 213 while (x > xoff && y > yoff && xv[x - 1] == yv[y - 1]) { 214 --x; 215 --y; 216 } 217 if (oldx - x > 20) 218 big_snake = true; 219 bd[bdiagoff + d] = x; 220 if (!odd && fmin <= d && d <= fmax && bd[bdiagoff + d] <= fd[fdiagoff + d]) 221 { 222 cost = 2 * c; 223 return d; 224 } 225 } 226 227 /* Heuristic: check occasionally for a diagonal that has made 228 lots of progress compared with the edit distance. 229 If we have any such, find the one that has made the most 230 progress and return it as if it had succeeded. 231 232 With this heuristic, for files with a constant small density 233 of changes, the algorithm is linear in the file size. */ 234 235 if (c > 200 && big_snake && heuristic) 236 { 237 int best = 0; 238 int bestpos = -1; 239 240 for (d = fmax; d >= fmin; d -= 2) 241 { 242 int dd = d - fmid; 243 if ((fd[fdiagoff + d] - xoff)*2 - dd > 12 * (c + (dd > 0 ? dd : -dd))) 244 { 245 if (fd[fdiagoff + d] * 2 - dd > best 246 && fd[fdiagoff + d] - xoff > 20 247 && fd[fdiagoff + d] - d - yoff > 20) 248 { 249 int k; 250 int x = fd[fdiagoff + d]; 251 252 /* We have a good enough best diagonal; 253 now insist that it end with a significant snake. */ 254 for (k = 1; k <= 20; k++) 255 if (xvec[x - k] != yvec[x - d - k]) 256 break; 257 258 if (k == 21) 259 { 260 best = fd[fdiagoff + d] * 2 - dd; 261 bestpos = d; 262 } 263 } 264 } 265 } 266 if (best > 0) 267 { 268 cost = 2 * c - 1; 269 return bestpos; 270 } 271 272 best = 0; 273 for (d = bmax; d >= bmin; d -= 2) 274 { 275 int dd = d - bmid; 276 if ((xlim - bd[bdiagoff + d])*2 + dd > 12 * (c + (dd > 0 ? dd : -dd))) 277 { 278 if ((xlim - bd[bdiagoff + d]) * 2 + dd > best 279 && xlim - bd[bdiagoff + d] > 20 280 && ylim - (bd[bdiagoff + d] - d) > 20) 281 { 282 /* We have a good enough best diagonal; 283 now insist that it end with a significant snake. */ 284 int k; 285 int x = bd[bdiagoff + d]; 286 287 for (k = 0; k < 20; k++) 288 if (xvec[x + k] != yvec[x - d + k]) 289 break; 290 if (k == 20) 291 { 292 best = (xlim - bd[bdiagoff + d]) * 2 + dd; 293 bestpos = d; 294 } 295 } 296 } 297 } 298 if (best > 0) 299 { 300 cost = 2 * c - 1; 301 return bestpos; 302 } 303 } 304 } 305 } 306 307 /** Compare in detail contiguous subsequences of the two files 308 which are known, as a whole, to match each other. 309 310 The results are recorded in the vectors filevec[N].changed_flag, by 311 storing a 1 in the element for each line that is an insertion or deletion. 312 313 The subsequence of file 0 is [XOFF, XLIM) and likewise for file 1. 314 315 Note that XLIM, YLIM are exclusive bounds. 316 All line numbers are origin-0 and discarded lines are not counted. */ 317 318 @SuppressWarnings("all") 319 private void compareseq(int xoff, int xlim, int yoff, int ylim) { 320 /* Slide down the bottom initial diagonal. */ 321 while (xoff < xlim && yoff < ylim && xvec[xoff] == yvec[yoff]) { 322 ++xoff; 323 ++yoff; 324 } 325 /* Slide up the top initial diagonal. */ 326 while (xlim > xoff && ylim > yoff && xvec[xlim - 1] == yvec[ylim - 1]) { 327 --xlim; 328 --ylim; 329 } 330 331 /* Handle simple cases. */ 332 if (xoff == xlim) 333 while (yoff < ylim) 334 filevec[1].changed_flag[1+filevec[1].realindexes[yoff++]] = true; 335 else if (yoff == ylim) 336 while (xoff < xlim) 337 filevec[0].changed_flag[1+filevec[0].realindexes[xoff++]] = true; 338 else 339 { 340 /* Find a point of correspondence in the middle of the files. */ 341 342 int d = diag(xoff, xlim, yoff, ylim); 343 int c = cost; 344 345 int b = bdiag[bdiagoff + d]; 346 347 if (c == 1) 348 { 349 /* This should be impossible, because it implies that 350 one of the two subsequences is empty, 351 and that case was handled above without calling `diag'. 352 Let's verify that this is true. */ 353 throw new IllegalArgumentException("Empty subsequence"); 354 } 355 356 357 /* Use that point to split this problem into two subproblems. */ 358 compareseq(xoff, b, yoff, b - d); 359 /* This used to use f instead of b, 360 but that is incorrect! 361 It is not necessarily the case that diagonal d 362 has a snake from b to f. */ 363 compareseq(b, xlim, b - d, ylim); 364 365 } 366 } 367 368 /** Discard lines from one file that have no matches in the other file. 369 */ 370 371 private void discard_confusing_lines() { 372 filevec[0].discard_confusing_lines(filevec[1]); 373 filevec[1].discard_confusing_lines(filevec[0]); 374 } 375 376 private boolean inhibit = false; 377 378 /** Adjust inserts/deletes of blank lines to join changes 379 as much as possible. 380 */ 381 382 private void shift_boundaries() { 383 if (inhibit) 384 return; 385 filevec[0].shift_boundaries(filevec[1]); 386 filevec[1].shift_boundaries(filevec[0]); 387 } 388 389 public interface ScriptBuilder { 390 /** Scan the tables of which lines are inserted and deleted, 391 producing an edit script. 392 @param changed0 true for lines in first file which do not match 2nd 393 @param len0 number of lines in first file 394 @param changed1 true for lines in 2nd file which do not match 1st 395 @param len1 number of lines in 2nd file 396 @return a linked list of changes - or null 397 */ 398 public change build_script( 399 boolean[] changed0,int len0, 400 boolean[] changed1,int len1 401 ); 402 } 403 404 /** Scan the tables of which lines are inserted and deleted, 405 producing an edit script in reverse order. */ 406 407 static class ReverseScript implements ScriptBuilder { 408 public change build_script( 409 final boolean[] changed0,int len0, 410 final boolean[] changed1,int len1) 411 { 412 change script = null; 413 int i0 = 0, i1 = 0; 414 while (i0 < len0 || i1 < len1) { 415 if (changed0[1+i0] || changed1[1+i1]) { 416 int line0 = i0, line1 = i1; 417 418 /* Find # lines changed here in each file. */ 419 while (changed0[1+i0]) ++i0; 420 while (changed1[1+i1]) ++i1; 421 422 /* Record this change. */ 423 script = new change(line0, line1, i0 - line0, i1 - line1, script); 424 } 425 426 /* We have reached lines in the two files that match each other. */ 427 i0++; 428 i1++; 429 } 430 431 return script; 432 } 433 } 434 435 static class ForwardScript implements ScriptBuilder { 436 /** Scan the tables of which lines are inserted and deleted, 437 producing an edit script in forward order. */ 438 public change build_script( 439 final boolean[] changed0,int len0, 440 final boolean[] changed1,int len1) 441 { 442 change script = null; 443 int i0 = len0, i1 = len1; 444 445 while (i0 >= 0 || i1 >= 0) 446 { 447 if (changed0[i0] || changed1[i1]) 448 { 449 int line0 = i0, line1 = i1; 450 451 /* Find # lines changed here in each file. */ 452 while (changed0[i0]) --i0; 453 while (changed1[i1]) --i1; 454 455 /* Record this change. */ 456 script = new change(i0, i1, line0 - i0, line1 - i1, script); 457 } 458 459 /* We have reached lines in the two files that match each other. */ 460 i0--; 461 i1--; 462 } 463 464 return script; 465 } 466 } 467 468 /** Standard ScriptBuilders. */ 469 public final static ScriptBuilder 470 forwardScript = new ForwardScript(), 471 reverseScript = new ReverseScript(); 472 473 /* Report the differences of two files. DEPTH is the current directory 474 depth. */ 475 public final change diff_2(final boolean reverse) { 476 return diff(reverse ? reverseScript : forwardScript); 477 } 478 479 /** Get the results of comparison as an edit script. The script 480 is described by a list of changes. The standard ScriptBuilder 481 implementations provide for forward and reverse edit scripts. 482 Alternate implementations could, for instance, list common elements 483 instead of differences. 484 @param bld an object to build the script from change flags 485 @return the head of a list of changes 486 */ 487 public change diff(final ScriptBuilder bld) { 488 489 /* Some lines are obviously insertions or deletions 490 because they don't match anything. Detect them now, 491 and avoid even thinking about them in the main comparison algorithm. */ 492 493 discard_confusing_lines(); 494 495 /* Now do the main comparison algorithm, considering just the 496 undiscarded lines. */ 497 498 xvec = filevec[0].undiscarded; 499 yvec = filevec[1].undiscarded; 500 501 int diags = 502 filevec[0].nondiscarded_lines + filevec[1].nondiscarded_lines + 3; 503 fdiag = new int[diags]; 504 fdiagoff = filevec[1].nondiscarded_lines + 1; 505 bdiag = new int[diags]; 506 bdiagoff = filevec[1].nondiscarded_lines + 1; 507 508 compareseq(0, filevec[0].nondiscarded_lines, 509 0, filevec[1].nondiscarded_lines); 510 fdiag = null; 511 bdiag = null; 512 513 /* Modify the results slightly to make them prettier 514 in cases where that can validly be done. */ 515 516 shift_boundaries(); 517 518 /* Get the results of comparison in the form of a chain 519 of `struct change's -- an edit script. */ 520 return bld.build_script( 521 filevec[0].changed_flag, 522 filevec[0].buffered_lines, 523 filevec[1].changed_flag, 524 filevec[1].buffered_lines 525 ); 526 527 } 528 529 /** The result of comparison is an "edit script": a chain of change objects. 530 Each change represents one place where some lines are deleted 531 and some are inserted. 532 533 LINE0 and LINE1 are the first affected lines in the two files (origin 0). 534 DELETED is the number of lines deleted here from file 0. 535 INSERTED is the number of lines inserted here in file 1. 536 537 If DELETED is 0 then LINE0 is the number of the line before 538 which the insertion was done; vice versa for INSERTED and LINE1. */ 539 540 public static class change { 541 /** Previous or next edit command. */ 542 public change link; 543 /** # lines of file 1 changed here. */ 544 public final int inserted; 545 /** # lines of file 0 changed here. */ 546 public final int deleted; 547 /** Line number of 1st deleted line. */ 548 public final int line0; 549 /** Line number of 1st inserted line. */ 550 public final int line1; 551 552 /** Cons an additional entry onto the front of an edit script OLD. 553 LINE0 and LINE1 are the first affected lines in the two files (origin 0). 554 DELETED is the number of lines deleted here from file 0. 555 INSERTED is the number of lines inserted here in file 1. 556 557 If DELETED is 0 then LINE0 is the number of the line before 558 which the insertion was done; vice versa for INSERTED and LINE1. */ 559 public change(int line0, int line1, int deleted, int inserted, change old) { 560 this.line0 = line0; 561 this.line1 = line1; 562 this.inserted = inserted; 563 this.deleted = deleted; 564 this.link = old; 565 //System.err.println(line0+","+line1+","+inserted+","+deleted); 566 } 567 } 568 569 /** Data on one input file being compared. 570 */ 571 572 class file_data { 573 574 /** Allocate changed array for the results of comparison. */ 575 void clear() { 576 /* Allocate a flag for each line of each file, saying whether that line 577 is an insertion or deletion. 578 Allocate an extra element, always zero, at each end of each vector. 579 */ 580 changed_flag = new boolean[buffered_lines + 2]; 581 } 582 583 /** Return equiv_count[I] as the number of lines in this file 584 that fall in equivalence class I. 585 @return the array of equivalence class counts. 586 */ 587 int[] equivCount() { 588 int[] equiv_count = new int[equiv_max]; 589 for (int i = 0; i < buffered_lines; ++i) 590 ++equiv_count[equivs[i]]; 591 return equiv_count; 592 } 593 594 /** Discard lines that have no matches in another file. 595 596 A line which is discarded will not be considered by the actual 597 comparison algorithm; it will be as if that line were not in the file. 598 The file's `realindexes' table maps virtual line numbers 599 (which don't count the discarded lines) into real line numbers; 600 this is how the actual comparison algorithm produces results 601 that are comprehensible when the discarded lines are counted. 602 <p> 603 When we discard a line, we also mark it as a deletion or insertion 604 so that it will be printed in the output. 605 @param f the other file 606 */ 607 void discard_confusing_lines(file_data f) { 608 clear(); 609 /* Set up table of which lines are going to be discarded. */ 610 final byte[] discarded = discardable(f.equivCount()); 611 612 /* Don't really discard the provisional lines except when they occur 613 in a run of discardables, with nonprovisionals at the beginning 614 and end. */ 615 filterDiscards(discarded); 616 617 /* Actually discard the lines. */ 618 discard(discarded); 619 } 620 621 /** Mark to be discarded each line that matches no line of another file. 622 If a line matches many lines, mark it as provisionally discardable. 623 @see equivCount() 624 @param counts The count of each equivalence number for the other file. 625 @return 0=nondiscardable, 1=discardable or 2=provisionally discardable 626 for each line 627 */ 628 629 private byte[] discardable(final int[] counts) { 630 final int end = buffered_lines; 631 final byte[] discards = new byte[end]; 632 //final int[] equivs = this.equivs; 633 int many = 5; 634 int tem = end / 64; 635 636 /* Multiply MANY by approximate square root of number of lines. 637 That is the threshold for provisionally discardable lines. */ 638 while ((tem = tem >> 2) > 0) 639 many *= 2; 640 641 for (int i = 0; i < end; i++) 642 { 643 int nmatch; 644 if (equivs[i] == 0) 645 continue; 646 nmatch = counts[equivs[i]]; 647 if (nmatch == 0) 648 discards[i] = 1; 649 else if (nmatch > many) 650 discards[i] = 2; 651 } 652 return discards; 653 } 654 655 /** Don't really discard the provisional lines except when they occur 656 in a run of discardables, with nonprovisionals at the beginning 657 and end. */ 658 659 private void filterDiscards(final byte[] discards) { 660 final int end = buffered_lines; 661 662 for (int i = 0; i < end; i++) 663 { 664 /* Cancel provisional discards not in middle of run of discards. */ 665 if (discards[i] == 2) 666 discards[i] = 0; 667 else if (discards[i] != 0) 668 { 669 /* We have found a nonprovisional discard. */ 670 int j; 671 int length; 672 int provisional = 0; 673 674 /* Find end of this run of discardable lines. 675 Count how many are provisionally discardable. */ 676 for (j = i; j < end; j++) 677 { 678 if (discards[j] == 0) 679 break; 680 if (discards[j] == 2) 681 ++provisional; 682 } 683 684 /* Cancel provisional discards at end, and shrink the run. */ 685 while (j > i && discards[j - 1] == 2) { 686 discards[--j] = 0; 687 --provisional; 688 } 689 690 /* Now we have the length of a run of discardable lines 691 whose first and last are not provisional. */ 692 length = j - i; 693 694 /* If 1/4 of the lines in the run are provisional, 695 cancel discarding of all provisional lines in the run. */ 696 if (provisional * 4 > length) 697 { 698 while (j > i) 699 if (discards[--j] == 2) 700 discards[j] = 0; 701 } 702 else 703 { 704 int consec; 705 int minimum = 1; 706 int tem = length / 4; 707 708 /* MINIMUM is approximate square root of LENGTH/4. 709 A subrun of two or more provisionals can stand 710 when LENGTH is at least 16. 711 A subrun of 4 or more can stand when LENGTH >= 64. */ 712 while ((tem = tem >> 2) > 0) 713 minimum *= 2; 714 minimum++; 715 716 /* Cancel any subrun of MINIMUM or more provisionals 717 within the larger run. */ 718 for (j = 0, consec = 0; j < length; j++) 719 if (discards[i + j] != 2) 720 consec = 0; 721 else if (minimum == ++consec) 722 /* Back up to start of subrun, to cancel it all. */ 723 j -= consec; 724 else if (minimum < consec) 725 discards[i + j] = 0; 726 727 /* Scan from beginning of run 728 until we find 3 or more nonprovisionals in a row 729 or until the first nonprovisional at least 8 lines in. 730 Until that point, cancel any provisionals. */ 731 for (j = 0, consec = 0; j < length; j++) 732 { 733 if (j >= 8 && discards[i + j] == 1) 734 break; 735 if (discards[i + j] == 2) { 736 consec = 0; 737 discards[i + j] = 0; 738 } 739 else if (discards[i + j] == 0) 740 consec = 0; 741 else 742 consec++; 743 if (consec == 3) 744 break; 745 } 746 747 /* I advances to the last line of the run. */ 748 i += length - 1; 749 750 /* Same thing, from end. */ 751 for (j = 0, consec = 0; j < length; j++) 752 { 753 if (j >= 8 && discards[i - j] == 1) 754 break; 755 if (discards[i - j] == 2) { 756 consec = 0; 757 discards[i - j] = 0; 758 } 759 else if (discards[i - j] == 0) 760 consec = 0; 761 else 762 consec++; 763 if (consec == 3) 764 break; 765 } 766 } 767 } 768 } 769 } 770 771 /** Actually discard the lines. 772 @param discards flags lines to be discarded 773 */ 774 private void discard(final byte[] discards) { 775 final int end = buffered_lines; 776 int j = 0; 777 for (int i = 0; i < end; ++i) 778 if (no_discards || discards[i] == 0) 779 { 780 undiscarded[j] = equivs[i]; 781 realindexes[j++] = i; 782 } 783 else 784 changed_flag[1+i] = true; 785 nondiscarded_lines = j; 786 } 787 788 file_data(Object[] data,Hashtable<Object, Integer> h) { 789 buffered_lines = data.length; 790 791 equivs = new int[buffered_lines]; 792 undiscarded = new int[buffered_lines]; 793 realindexes = new int[buffered_lines]; 794 795 for (int i = 0; i < data.length; ++i) { 796 Integer ir = h.get(data[i]); 797 if (ir == null) 798 h.put(data[i], new Integer(equivs[i] = equiv_max++)); 799 else 800 equivs[i] = ir.intValue(); 801 } 802 } 803 804 /** Adjust inserts/deletes of blank lines to join changes 805 as much as possible. 806 807 We do something when a run of changed lines include a blank 808 line at one end and have an excluded blank line at the other. 809 We are free to choose which blank line is included. 810 `compareseq' always chooses the one at the beginning, 811 but usually it is cleaner to consider the following blank line 812 to be the "change". The only exception is if the preceding blank line 813 would join this change to other changes. 814 @param f the file being compared against 815 */ 816 817 void shift_boundaries(file_data f) { 818 final boolean[] changed = changed_flag; 819 final boolean[] other_changed = f.changed_flag; 820 int i = 0; 821 int j = 0; 822 int i_end = buffered_lines; 823 int preceding = -1; 824 int other_preceding = -1; 825 826 for (;;) 827 { 828 int start, end, other_start; 829 830 /* Scan forwards to find beginning of another run of changes. 831 Also keep track of the corresponding point in the other file. */ 832 833 while (i < i_end && !changed[1+i]) 834 { 835 while (other_changed[1+j++]) 836 /* Non-corresponding lines in the other file 837 will count as the preceding batch of changes. */ 838 other_preceding = j; 839 i++; 840 } 841 842 if (i == i_end) 843 break; 844 845 start = i; 846 other_start = j; 847 848 for (;;) 849 { 850 /* Now find the end of this run of changes. */ 851 852 while (i < i_end && changed[1+i]) i++; 853 end = i; 854 855 /* If the first changed line matches the following unchanged one, 856 and this run does not follow right after a previous run, 857 and there are no lines deleted from the other file here, 858 then classify the first changed line as unchanged 859 and the following line as changed in its place. */ 860 861 /* You might ask, how could this run follow right after another? 862 Only because the previous run was shifted here. */ 863 864 if (end != i_end 865 && equivs[start] == equivs[end] 866 && !other_changed[1+j] 867 && end != i_end 868 && !((preceding >= 0 && start == preceding) 869 || (other_preceding >= 0 870 && other_start == other_preceding))) 871 { 872 changed[1+end++] = true; 873 changed[1+start++] = false; 874 ++i; 875 /* Since one line-that-matches is now before this run 876 instead of after, we must advance in the other file 877 to keep in synch. */ 878 ++j; 879 } 880 else 881 break; 882 } 883 884 preceding = i; 885 other_preceding = j; 886 } 887 } 888 889 /** Number of elements (lines) in this file. */ 890 final int buffered_lines; 891 892 /** Vector, indexed by line number, containing an equivalence code for 893 each line. It is this vector that is actually compared with that 894 of another file to generate differences. */ 895 private final int[] equivs; 896 897 /** Vector, like the previous one except that 898 the elements for discarded lines have been squeezed out. */ 899 final int[] undiscarded; 900 901 /** Vector mapping virtual line numbers (not counting discarded lines) 902 to real ones (counting those lines). Both are origin-0. */ 903 final int[] realindexes; 904 905 /** Total number of nondiscarded lines. */ 906 int nondiscarded_lines; 907 908 /** Array, indexed by real origin-1 line number, 909 containing true for a line that is an insertion or a deletion. 910 The results of comparison are stored here. */ 911 boolean[] changed_flag; 912 913 } 914 }