View Javadoc

1   package org.incava.util.diff;
2   
3   import java.util.*;
4   
5   /**
6    * Compares two collections, returning a list of the additions, changes, and
7    * deletions between them. A <code>Comparator</code> may be passed as an
8    * argument to the constructor, and will thus be used. If not provided, the
9    * initial value in the <code>a</code> ("from") collection will be looked at to
10   * see if it supports the <code>Comparable</code> interface. If so, its
11   * <code>equals</code> and <code>compareTo</code> methods will be invoked on the
12   * instances in the "from" and "to" collections; otherwise, for speed, hash
13   * codes from the objects will be used instead for comparison.
14   *
15   * <p>The file FileDiff.java shows an example usage of this class, in an
16   * application similar to the Unix "diff" program.</p>
17   */
18  public class Diff<T> {
19      /**
20       * The source array, AKA the "from" values.
21       */
22      protected T[] a;
23  
24      /**
25       * The target array, AKA the "to" values.
26       */
27      protected T[] b;
28  
29      /**
30       * The list of differences, as <code>Difference</code> instances.
31       */
32      protected List<Difference> diffs = new ArrayList<Difference>();
33  
34      /**
35       * The pending, uncommitted difference.
36       */
37      private Difference pending;
38  
39      /**
40       * The comparator used, if any.
41       */
42      private Comparator<T> comparator;
43  
44      /**
45       * The thresholds.
46       */
47      private TreeMap<Integer, Integer> thresh;
48  
49      /**
50       * Constructs the Diff object for the two arrays, using the given comparator.
51       */
52      public Diff(T[] a, T[] b, Comparator<T> comp) {
53          this.a = a;
54          this.b = b;
55          this.comparator = comp;
56          this.thresh = null; // created in getLongestCommonSubsequences
57      }
58  
59      /**
60       * Constructs the Diff object for the two arrays, using the default
61       * comparison mechanism between the objects, such as <code>equals</code> and
62       * <code>compareTo</code>.
63       */
64      public Diff(T[] a, T[] b) {
65          this(a, b, null);
66      }
67  
68      /**
69       * Constructs the Diff object for the two collections, using the given
70       * comparator.
71       */
72      @SuppressWarnings("unchecked")
73      public Diff(Collection<T> a, Collection<T> b, Comparator<T> comp) {
74          this((T[]) a.toArray(), (T[]) b.toArray(), comp);
75      }
76  
77      /**
78       * Constructs the Diff object for the two collections, using the default
79       * comparison mechanism between the objects, such as <code>equals</code> and
80       * <code>compareTo</code>.
81       */
82      public Diff(Collection<T> a, Collection<T> b) {
83          this(a, b, null);
84      }
85  
86      /**
87       * Runs diff and returns the results.
88       */
89      public List<Difference> diff() {
90          traverseSequences();
91  
92          // add the last difference, if pending:
93          if (pending != null) {
94              diffs.add(pending);
95          }
96  
97          return diffs;
98      }
99  
100     /**
101      * Traverses the sequences, seeking the longest common subsequences,
102      * invoking the methods <code>finishedA</code>, <code>finishedB</code>,
103      * <code>onANotB</code>, and <code>onBNotA</code>.
104      */
105     protected void traverseSequences() {
106         Integer[] matches = getLongestCommonSubsequences();
107 
108         int lastA = a.length - 1;
109         int lastB = b.length - 1;
110         int bi = 0;
111         int ai;
112 
113         int lastMatch = matches.length - 1;
114 
115         for (ai = 0; ai <= lastMatch; ++ai) {
116             Integer bLine = matches[ai];
117 
118             if (bLine == null) {
119                 onANotB(ai, bi);
120             } else {
121                 while (bi < bLine.intValue()) {
122                     onBNotA(ai, bi++);
123                 }
124 
125                 onMatch(ai, bi++);
126             }
127         }
128 
129         boolean calledFinishA = false;
130         boolean calledFinishB = false;
131 
132         while (ai <= lastA || bi <= lastB) {
133 
134             // last A?
135             if (ai == lastA + 1 && bi <= lastB) {
136                 if (!calledFinishA && callFinishedA()) {
137                     finishedA(lastA);
138                     calledFinishA = true;
139                 } else {
140                     while (bi <= lastB) {
141                         onBNotA(ai, bi++);
142                     }
143                 }
144             }
145 
146             // last B?
147             if (bi == lastB + 1 && ai <= lastA) {
148                 if (!calledFinishB && callFinishedB()) {
149                     finishedB(lastB);
150                     calledFinishB = true;
151                 } else {
152                     while (ai <= lastA) {
153                         onANotB(ai++, bi);
154                     }
155                 }
156             }
157 
158             if (ai <= lastA) {
159                 onANotB(ai++, bi);
160             }
161 
162             if (bi <= lastB) {
163                 onBNotA(ai, bi++);
164             }
165         }
166     }
167 
168     /**
169      * Override and return true in order to have <code>finishedA</code> invoked
170      * at the last element in the <code>a</code> array.
171      */
172     protected boolean callFinishedA() {
173         return false;
174     }
175 
176     /**
177      * Override and return true in order to have <code>finishedB</code> invoked
178      * at the last element in the <code>b</code> array.
179      */
180     protected boolean callFinishedB() {
181         return false;
182     }
183 
184     /**
185      * Invoked at the last element in <code>a</code>, if
186      * <code>callFinishedA</code> returns true.
187      */
188     protected void finishedA(int lastA) {
189     }
190 
191     /**
192      * Invoked at the last element in <code>b</code>, if
193      * <code>callFinishedB</code> returns true.
194      */
195     protected void finishedB(int lastB) {
196     }
197 
198     /**
199      * Invoked for elements in <code>a</code> and not in <code>b</code>.
200      */
201     protected void onANotB(int ai, int bi) {
202         if (pending == null) {
203             pending = new Difference(ai, ai, bi, -1);
204         } else {
205             pending.setDeleted(ai);
206         }
207     }
208 
209     /**
210      * Invoked for elements in <code>b</code> and not in <code>a</code>.
211      */
212     protected void onBNotA(int ai, int bi) {
213         if (pending == null) {
214             pending = new Difference(ai, -1, bi, bi);
215         } else {
216             pending.setAdded(bi);
217         }
218     }
219 
220     /**
221      * Invoked for elements matching in <code>a</code> and <code>b</code>.
222      */
223     protected void onMatch(int ai, int bi) {
224         if (pending == null) {
225             // no current pending
226         } else {
227             diffs.add(pending);
228             pending = null;
229         }
230     }
231 
232     /**
233      * Compares the two objects, using the comparator provided with the
234      * constructor, if any.
235      */
236     protected boolean equals(T x, T y) {
237         return comparator == null ? x.equals(y) : comparator.compare(x, y) == 0;
238     }
239 
240     /**
241      * Returns an array of the longest common subsequences.
242      */
243     public Integer[] getLongestCommonSubsequences() {
244         int aStart = 0;
245         int aEnd = a.length - 1;
246 
247         int bStart = 0;
248         int bEnd = b.length - 1;
249 
250         TreeMap<Integer, Integer> matches = new TreeMap<Integer, Integer>();
251 
252         while (aStart <= aEnd && bStart <= bEnd && equals(a[aStart], b[bStart])) {
253             matches.put(new Integer(aStart++), new Integer(bStart++));
254         }
255 
256         while (aStart <= aEnd && bStart <= bEnd && equals(a[aEnd], b[bEnd])) {
257             matches.put(new Integer(aEnd--), new Integer(bEnd--));
258         }
259 
260         Map<T, List<Integer>> bMatches = null;
261         if (comparator == null) {
262             if (a.length > 0 && a[0] instanceof Comparable) {
263                 // this uses the Comparable interface
264                 bMatches = new TreeMap<T, List<Integer>>();
265             } else {
266                 // this just uses hashCode()
267                 bMatches = new HashMap<T, List<Integer>>();
268             }
269         } else {
270             // we don't really want them sorted, but this is the only Map
271             // implementation (as of JDK 1.4) that takes a comparator.
272             bMatches = new TreeMap<T, List<Integer>>(comparator);
273         }
274 
275         for (int bi = bStart; bi <= bEnd; ++bi) {
276             T element = b[bi];
277             T key = element;
278             List<Integer> positions = (List<Integer>) bMatches.get(key);
279             if (positions == null) {
280                 positions = new ArrayList<Integer>();
281                 bMatches.put(key, positions);
282             }
283             positions.add(new Integer(bi));
284         }
285 
286         thresh = new TreeMap<Integer, Integer>();
287         Map<Integer, Object[]> links = new HashMap<Integer, Object[]>();
288 
289         for (int i = aStart; i <= aEnd; ++i) {
290             Object aElement = a[i]; // keygen here.
291             List<Integer> positions = bMatches.get(aElement);
292 
293             if (positions != null) {
294                 Integer k = new Integer(0);
295                 ListIterator<Integer> pit = positions.listIterator(positions
296                         .size());
297                 while (pit.hasPrevious()) {
298                     Integer j = pit.previous();
299 
300                     k = insert(j, k);
301 
302                     if (k == null) {
303                         // nothing
304                     } else {
305                         Object value = k.intValue() > 0 ? links
306                                 .get(new Integer(k.intValue() - 1)) : null;
307                         links.put(k, new Object[] { value, new Integer(i), j });
308                     }
309                 }
310             }
311         }
312 
313         if (thresh.size() > 0) {
314             Integer ti = thresh.lastKey();
315             Object[] link = links.get(ti);
316             while (link != null) {
317                 Integer x = (Integer) link[1];
318                 Integer y = (Integer) link[2];
319                 matches.put(x, y);
320                 link = (Object[]) link[0];
321             }
322         }
323 
324         return toArray(matches);
325     }
326 
327     /**
328      * Converts the map (indexed by java.lang.Integers) into an array.
329      */
330     protected static Integer[] toArray(TreeMap<Integer, Integer> map) {
331         int size = map.size() == 0 ? 0 : 1 + ((Integer) map.lastKey())
332                 .intValue();
333         Integer[] ary = new Integer[size];
334         Iterator<Integer> it = map.keySet().iterator();
335 
336         while (it.hasNext()) {
337             Integer idx = it.next();
338             Integer val = map.get(idx);
339             ary[idx.intValue()] = val;
340         }
341         return ary;
342     }
343 
344     /**
345      * Returns whether the integer is not zero (including if it is not null).
346      */
347     protected static boolean isNonzero(Integer i) {
348         return i != null && i.intValue() != 0;
349     }
350 
351     /**
352      * Returns whether the value in the map for the given index is greater than
353      * the given value.
354      */
355     protected boolean isGreaterThan(Integer index, Integer val) {
356         Integer lhs = (Integer) thresh.get(index);
357         return lhs != null && val != null && lhs.compareTo(val) > 0;
358     }
359 
360     /**
361      * Returns whether the value in the map for the given index is less than
362      * the given value.
363      */
364     protected boolean isLessThan(Integer index, Integer val) {
365         Integer lhs = (Integer) thresh.get(index);
366         return lhs != null && (val == null || lhs.compareTo(val) < 0);
367     }
368 
369     /**
370      * Returns the value for the greatest key in the map.
371      */
372     protected Integer getLastValue() {
373         return (Integer) thresh.get(thresh.lastKey());
374     }
375 
376     /**
377      * Adds the given value to the "end" of the threshold map, that is, with the
378      * greatest index/key.
379      */
380     protected void append(Integer value) {
381         Integer addIdx = null;
382         if (thresh.size() == 0) {
383             addIdx = new Integer(0);
384         } else {
385             Integer lastKey = (Integer) thresh.lastKey();
386             addIdx = new Integer(lastKey.intValue() + 1);
387         }
388         thresh.put(addIdx, value);
389     }
390 
391     /**
392      * Inserts the given values into the threshold map.
393      */
394     protected Integer insert(Integer j, Integer k) {
395         if (isNonzero(k) && isGreaterThan(k, j)
396                 && isLessThan(new Integer(k.intValue() - 1), j)) {
397             thresh.put(k, j);
398         } else {
399             int hi = -1;
400 
401             if (isNonzero(k)) {
402                 hi = k.intValue();
403             } else if (thresh.size() > 0) {
404                 hi = ((Integer) thresh.lastKey()).intValue();
405             }
406 
407             // off the end?
408             if (hi == -1 || j.compareTo(getLastValue()) > 0) {
409                 append(j);
410                 k = new Integer(hi + 1);
411             } else {
412                 // binary search for insertion point:
413                 int lo = 0;
414 
415                 while (lo <= hi) {
416                     int index = (hi + lo) / 2;
417                     Integer val = (Integer) thresh.get(new Integer(index));
418                     int cmp = j.compareTo(val);
419 
420                     if (cmp == 0) {
421                         return null;
422                     } else if (cmp > 0) {
423                         lo = index + 1;
424                     } else {
425                         hi = index - 1;
426                     }
427                 }
428 
429                 thresh.put(new Integer(lo), j);
430                 k = new Integer(lo);
431             }
432         }
433 
434         return k;
435     }
436 
437 }