Finish adapting code to commons-text, adding missing header and fixing Javadocs
Project: http://git-wip-us.apache.org/repos/asf/commons-text/repo Commit: http://git-wip-us.apache.org/repos/asf/commons-text/commit/a7e88eef Tree: http://git-wip-us.apache.org/repos/asf/commons-text/tree/a7e88eef Diff: http://git-wip-us.apache.org/repos/asf/commons-text/diff/a7e88eef Branch: refs/heads/myers-algo Commit: a7e88eef1a77649648a7fc6e9015448eb97c4572 Parents: cca1f19 Author: Bruno P. Kinoshita <ki...@apache.org> Authored: Thu Feb 5 00:33:14 2015 -0200 Committer: Bruno P. Kinoshita <ki...@apache.org> Committed: Thu Feb 5 00:33:14 2015 -0200 ---------------------------------------------------------------------- .../commons/text/diff/CommandVisitor.java | 12 +- .../apache/commons/text/diff/DeleteCommand.java | 6 +- .../apache/commons/text/diff/EditCommand.java | 9 +- .../apache/commons/text/diff/EditScript.java | 7 +- .../apache/commons/text/diff/InsertCommand.java | 6 +- .../apache/commons/text/diff/KeepCommand.java | 6 +- .../commons/text/diff/ReplacementsFinder.java | 10 +- .../commons/text/diff/ReplacementsHandler.java | 3 +- .../commons/text/diff/StringsComparator.java | 137 +++++++++++++++---- .../apache/commons/text/diff/package-info.java | 3 + 10 files changed, 147 insertions(+), 52 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/commons-text/blob/a7e88eef/src/main/java/org/apache/commons/text/diff/CommandVisitor.java ---------------------------------------------------------------------- diff --git a/src/main/java/org/apache/commons/text/diff/CommandVisitor.java b/src/main/java/org/apache/commons/text/diff/CommandVisitor.java index ae3833c..d73dde8 100644 --- a/src/main/java/org/apache/commons/text/diff/CommandVisitor.java +++ b/src/main/java/org/apache/commons/text/diff/CommandVisitor.java @@ -28,14 +28,17 @@ package org.apache.commons.text.diff; * it will perform the loop over all commands in the script and the * proper methods of the user class will be called as the commands are * encountered. + * </p> * <p> * The implementation of the user visitor class will depend on the * need. Here are two examples. + * </p> * <p> * The first example is a visitor that build the longest common * subsequence: + * </p> * <pre> - * import org.apache.commons.collections4.comparators.sequence.CommandVisitor; + * import org.apache.commons.text.diff.CommandVisitor; * * import java.util.ArrayList; * @@ -67,7 +70,7 @@ package org.apache.commons.text.diff; * The second example is a visitor that shows the commands and the way * they transform the first sequence into the second one: * <pre> - * import org.apache.commons.collections4.comparators.sequence.CommandVisitor; + * import org.apache.commons.text.diff.CommandVisitor; * * import java.util.Arrays; * import java.util.ArrayList; @@ -97,7 +100,7 @@ package org.apache.commons.text.diff; * } * * private void display(String commandName, Object object) { - * System.out.println(commandName + " " + object + " ->" + this); + * System.out.println(commandName + " " + object + ": " + this); * } * * public String toString() { @@ -114,8 +117,7 @@ package org.apache.commons.text.diff; * } * </pre> * - * @since 4.0 - * @version $Id: CommandVisitor.java 1477760 2013-04-30 18:34:03Z tn $ + * @since 1.0 */ public interface CommandVisitor<T> { http://git-wip-us.apache.org/repos/asf/commons-text/blob/a7e88eef/src/main/java/org/apache/commons/text/diff/DeleteCommand.java ---------------------------------------------------------------------- diff --git a/src/main/java/org/apache/commons/text/diff/DeleteCommand.java b/src/main/java/org/apache/commons/text/diff/DeleteCommand.java index 0de3a43..7494002 100644 --- a/src/main/java/org/apache/commons/text/diff/DeleteCommand.java +++ b/src/main/java/org/apache/commons/text/diff/DeleteCommand.java @@ -24,12 +24,12 @@ package org.apache.commons.text.diff; * transforming the first sequence into the second sequence uses an instance of * this class to represent the deletion of this object. The objects embedded in * these type of commands always come from the first sequence. + * </p> * - * @see SequencesComparator + * @see StringsComparator * @see EditScript * - * @since 4.0 - * @version $Id: DeleteCommand.java 1477760 2013-04-30 18:34:03Z tn $ + * @since 1.0 */ public class DeleteCommand<T> extends EditCommand<T> { http://git-wip-us.apache.org/repos/asf/commons-text/blob/a7e88eef/src/main/java/org/apache/commons/text/diff/EditCommand.java ---------------------------------------------------------------------- diff --git a/src/main/java/org/apache/commons/text/diff/EditCommand.java b/src/main/java/org/apache/commons/text/diff/EditCommand.java index 7c5ff76..627db4d 100644 --- a/src/main/java/org/apache/commons/text/diff/EditCommand.java +++ b/src/main/java/org/apache/commons/text/diff/EditCommand.java @@ -21,9 +21,10 @@ package org.apache.commons.text.diff; * into another one. * <p> * When two objects sequences are compared through the - * {@link SequencesComparator#getScript SequencesComparator.getScript} method, + * {@link StringsComparator#getScript StringsComparator.getScript} method, * the result is provided has a {@link EditScript script} containing the commands * that progressively transform the first sequence into the second one. + * </p> * <p> * There are only three types of commands, all of which are subclasses of this * abstract class. Each command is associated with one object belonging to at @@ -38,12 +39,12 @@ package org.apache.commons.text.diff; * the object passed as an argument comes from the second sequence) ; this can * be important if subclassing is used for some elements in the first sequence * and the <code>equals</code> method is specialized. + * </p> * - * @see SequencesComparator + * @see StringsComparator * @see EditScript * - * @since 4.0 - * @version $Id: EditCommand.java 1477760 2013-04-30 18:34:03Z tn $ + * @since 1.0 */ public abstract class EditCommand<T> { http://git-wip-us.apache.org/repos/asf/commons-text/blob/a7e88eef/src/main/java/org/apache/commons/text/diff/EditScript.java ---------------------------------------------------------------------- diff --git a/src/main/java/org/apache/commons/text/diff/EditScript.java b/src/main/java/org/apache/commons/text/diff/EditScript.java index ab7b04e..641d60b 100644 --- a/src/main/java/org/apache/commons/text/diff/EditScript.java +++ b/src/main/java/org/apache/commons/text/diff/EditScript.java @@ -25,7 +25,7 @@ import java.util.List; * <p> * An edit script is the most general view of the differences between two * sequences. It is built as the result of the comparison between two sequences - * by the {@link SequencesComparator SequencesComparator} class. The user can + * by the {@link StringsComparator StringsComparator} class. The user can * walk through it using the <em>visitor</em> design pattern. * <p> * It is guaranteed that the objects embedded in the {@link InsertCommand insert @@ -35,13 +35,12 @@ import java.util.List; * is used for some elements in the first sequence and the <code>equals</code> * method is specialized. * - * @see SequencesComparator + * @see StringsComparator * @see EditCommand * @see CommandVisitor * @see ReplacementsHandler * - * @since 4.0 - * @version $Id: EditScript.java 1477760 2013-04-30 18:34:03Z tn $ + * @since 1.0 */ public class EditScript<T> { http://git-wip-us.apache.org/repos/asf/commons-text/blob/a7e88eef/src/main/java/org/apache/commons/text/diff/InsertCommand.java ---------------------------------------------------------------------- diff --git a/src/main/java/org/apache/commons/text/diff/InsertCommand.java b/src/main/java/org/apache/commons/text/diff/InsertCommand.java index 9ec9c6a..9a365d0 100644 --- a/src/main/java/org/apache/commons/text/diff/InsertCommand.java +++ b/src/main/java/org/apache/commons/text/diff/InsertCommand.java @@ -24,12 +24,12 @@ package org.apache.commons.text.diff; * transforming the first sequence into the second sequence uses an instance of * this class to represent the insertion of this object. The objects embedded in * these type of commands always come from the second sequence. + * </p> * - * @see SequencesComparator + * @see StringsComparator * @see EditScript * - * @since 4.0 - * @version $Id: InsertCommand.java 1477760 2013-04-30 18:34:03Z tn $ + * @since 1.0 */ public class InsertCommand<T> extends EditCommand<T> { http://git-wip-us.apache.org/repos/asf/commons-text/blob/a7e88eef/src/main/java/org/apache/commons/text/diff/KeepCommand.java ---------------------------------------------------------------------- diff --git a/src/main/java/org/apache/commons/text/diff/KeepCommand.java b/src/main/java/org/apache/commons/text/diff/KeepCommand.java index a511bfe..687f7e7 100644 --- a/src/main/java/org/apache/commons/text/diff/KeepCommand.java +++ b/src/main/java/org/apache/commons/text/diff/KeepCommand.java @@ -24,12 +24,12 @@ package org.apache.commons.text.diff; * transforming the first sequence into the second sequence uses an instance of * this class to represent the keeping of this object. The objects embedded in * these type of commands always come from the first sequence. + * </p> * - * @see SequencesComparator + * @see StringsComparator * @see EditScript * - * @since 4.0 - * @version $Id: KeepCommand.java 1477760 2013-04-30 18:34:03Z tn $ + * @since 1.0 */ public class KeepCommand<T> extends EditCommand<T> { http://git-wip-us.apache.org/repos/asf/commons-text/blob/a7e88eef/src/main/java/org/apache/commons/text/diff/ReplacementsFinder.java ---------------------------------------------------------------------- diff --git a/src/main/java/org/apache/commons/text/diff/ReplacementsFinder.java b/src/main/java/org/apache/commons/text/diff/ReplacementsFinder.java index 361a26d..52e4112 100644 --- a/src/main/java/org/apache/commons/text/diff/ReplacementsFinder.java +++ b/src/main/java/org/apache/commons/text/diff/ReplacementsFinder.java @@ -34,25 +34,26 @@ import java.util.List; * {@link DeleteCommand delete}, {@link KeepCommand keep} and * {@link InsertCommand insert} commands by fewer replacements acting on * complete sub-sequences. + * </p> * <p> * This class is devoted to perform this interpretation. It visits an * {@link EditScript edit script} (because it implements the * {@link CommandVisitor CommandVisitor} interface) and calls a user-supplied * handler implementing the {@link ReplacementsHandler ReplacementsHandler} * interface to process the sub-sequences. + * </p> * * @see ReplacementsHandler * @see EditScript - * @see SequencesComparator + * @see StringsComparator * - * @since 4.0 - * @version $Id: ReplacementsFinder.java 1477760 2013-04-30 18:34:03Z tn $ + * @since 1.0 */ public class ReplacementsFinder<T> implements CommandVisitor<T> { private final List<T> pendingInsertions; private final List<T> pendingDeletions; - private int skipped; + private int skipped; /** Handler to call when synchronized sequences are found. */ private final ReplacementsHandler<T> handler; @@ -83,6 +84,7 @@ public class ReplacementsFinder<T> implements CommandVisitor<T> { * <p> * When a synchronization object is identified, the pending insertions and * pending deletions sets are provided to the user handler as subsequences. + * </p> * * @param object synchronization object detected */ http://git-wip-us.apache.org/repos/asf/commons-text/blob/a7e88eef/src/main/java/org/apache/commons/text/diff/ReplacementsHandler.java ---------------------------------------------------------------------- diff --git a/src/main/java/org/apache/commons/text/diff/ReplacementsHandler.java b/src/main/java/org/apache/commons/text/diff/ReplacementsHandler.java index fe99a03..d5d61a4 100644 --- a/src/main/java/org/apache/commons/text/diff/ReplacementsHandler.java +++ b/src/main/java/org/apache/commons/text/diff/ReplacementsHandler.java @@ -22,8 +22,7 @@ import java.util.List; * This interface is devoted to handle synchronized replacement sequences. * * @see ReplacementsFinder - * @since 4.0 - * @version $Id: ReplacementsHandler.java 1543277 2013-11-19 00:53:50Z ggregory $ + * @since 1.0 */ public interface ReplacementsHandler<T> { http://git-wip-us.apache.org/repos/asf/commons-text/blob/a7e88eef/src/main/java/org/apache/commons/text/diff/StringsComparator.java ---------------------------------------------------------------------- diff --git a/src/main/java/org/apache/commons/text/diff/StringsComparator.java b/src/main/java/org/apache/commons/text/diff/StringsComparator.java index c59f89f..c2fbfab 100644 --- a/src/main/java/org/apache/commons/text/diff/StringsComparator.java +++ b/src/main/java/org/apache/commons/text/diff/StringsComparator.java @@ -1,25 +1,96 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ package org.apache.commons.text.diff; - - +/** + * <p> + * It is guaranteed that the comparisons will always be done as + * <code>o1.equals(o2)</code> where <code>o1</code> belongs to the first + * sequence and <code>o2</code> belongs to the second sequence. This can + * be important if subclassing is used for some elements in the first + * sequence and the <code>equals</code> method is specialized. + * </p> + * <p> + * Comparison can be seen from two points of view: either as giving the smallest + * modification allowing to transform the first sequence into the second one, or + * as giving the longest sequence which is a subsequence of both initial + * sequences. The <code>equals</code> method is used to compare objects, so any + * object can be put into sequences. Modifications include deleting, inserting + * or keeping one object, starting from the beginning of the first sequence. + * </p> + * <p> + * This class implements the comparison algorithm, which is the very efficient + * algorithm from Eugene W. Myers + * <a href="http://www.cis.upenn.edu/~bcpierce/courses/dd/papers/diff.ps"> + * An O(ND) Difference Algorithm and Its Variations</a>. This algorithm produces + * the shortest possible {@link EditScript edit script} containing all the + * {@link EditCommand commands} needed to transform the first sequence into + * the second one. + * + * <p> + * This class has been adapted from the commons-collections implementation. + * </p> + * + * @see EditScript + * @see EditCommand + * @see CommandVisitor + * + * @since 1.0 + */ public class StringsComparator { - + + /** + * First character sequence. + */ private final String left; + /** + * Second character sequence. + */ private final String right; - + /** Temporary variables. */ private final int[] vDown; private final int[] vUp; - + + /** + * Simple constructor. + * <p> + * Creates a new instance of StringsComparator. + * </p> + * <p> + * It is <em>guaranteed</em> that the comparisons will always be done as + * <code>o1.equals(o2)</code> where <code>o1</code> belongs to the first + * sequence and <code>o2</code> belongs to the second sequence. This can be + * important if subclassing is used for some elements in the first sequence + * and the <code>equals</code> method is specialized. + * </p> + * + * @param left first character sequence to be compared + * @param right second character sequence to be compared + */ public StringsComparator(String left, String right) { this.left = left; this.right = right; - + final int size = left.length() + right.length() + 2; vDown = new int[size]; vUp = new int[size]; } - + /** * Get the {@link EditScript} object. * <p> @@ -29,12 +100,13 @@ public class StringsComparator { * {@link KeepCommand keep commands} come from the first sequence. This can * be important if subclassing is used for some elements in the first * sequence and the <code>equals</code> method is specialized. + * </p> * * @return the edit script resulting from the comparison of the two * sequences */ - public EditScript getScript() { - final EditScript script = new EditScript(); + public EditScript<Character> getScript() { + final EditScript<Character> script = new EditScript<Character>(); buildScript(0, left.length(), 0, right.length(), script); return script; } @@ -49,7 +121,7 @@ public class StringsComparator { * @param script the edited script */ private void buildScript(final int start1, final int end1, final int start2, final int end2, - final EditScript script) { + final EditScript<Character> script) { final Snake middle = getMiddleSnake(start1, end1, start2, end2); if (middle == null @@ -60,15 +132,15 @@ public class StringsComparator { int j = start2; while (i < end1 || j < end2) { if (i < end1 && j < end2 && left.charAt(i) == right.charAt(j)) { - script.append(new KeepCommand(left.charAt(i))); + script.append(new KeepCommand<Character>(left.charAt(i))); ++i; ++j; } else { if (end1 - start1 > end2 - start2) { - script.append(new DeleteCommand(left.charAt(i))); + script.append(new DeleteCommand<Character>(left.charAt(i))); ++i; } else { - script.append(new InsertCommand(right.charAt(j))); + script.append(new InsertCommand<Character>(right.charAt(j))); ++j; } } @@ -80,16 +152,33 @@ public class StringsComparator { start2, middle.getStart() - middle.getDiag(), script); for (int i = middle.getStart(); i < middle.getEnd(); ++i) { - script.append(new KeepCommand(left.charAt(i))); + script.append(new KeepCommand<Character>(left.charAt(i))); } buildScript(middle.getEnd(), end1, middle.getEnd() - middle.getDiag(), end2, script); } } - + + /** + * Get the middle snake corresponding to two subsequences of the + * main sequences. + * <p> + * The snake is found using the MYERS Algorithm (this algorithms has + * also been implemented in the GNU diff program). This algorithm is + * explained in Eugene Myers article: + * <a href="http://www.cs.arizona.edu/people/gene/PAPERS/diff.ps"> + * An O(ND) Difference Algorithm and Its Variations</a>. + * </p> + * + * @param start1 the begin of the first sequence to be compared + * @param end1 the end of the first sequence to be compared + * @param start2 the begin of the second sequence to be compared + * @param end2 the end of the second sequence to be compared + * @return the middle snake + */ private Snake getMiddleSnake(int start1, int end1, int start2, int end2) { - // Myers Algorithm + // Myers Algorithm // Initialisations final int m = end1 - start1; final int n = end2 - start2; @@ -160,7 +249,7 @@ public class StringsComparator { // this should not happen throw new RuntimeException("Internal Error"); } - + /** * Build a snake. * @@ -235,17 +324,17 @@ public class StringsComparator { return diag; } } - + public static void main(String[] args) { StringsComparator sc = new StringsComparator("O Bruno eh um bom rapaz. Ele eh do Brasil.", "O Bruno foi um bom rapaz. Ele eh do Brasil ."); - EditScript es = sc.getScript(); - es.visit(new CommandVisitor() { - + EditScript<Character> es = sc.getScript(); + es.visit(new CommandVisitor<Character>() { + boolean nlAdd = true; boolean nlRemove = true; @Override - public void visitInsertCommand(Object object) { + public void visitInsertCommand(Character object) { if (nlAdd) { System.out.println(); System.out.print("> "); @@ -255,7 +344,7 @@ public class StringsComparator { } @Override - public void visitKeepCommand(Object object) { + public void visitKeepCommand(Character object) { if (!nlAdd) { nlAdd = true; } @@ -267,7 +356,7 @@ public class StringsComparator { } @Override - public void visitDeleteCommand(Object object) { + public void visitDeleteCommand(Character object) { if (nlRemove) { System.out.println(); System.out.print("< "); http://git-wip-us.apache.org/repos/asf/commons-text/blob/a7e88eef/src/main/java/org/apache/commons/text/diff/package-info.java ---------------------------------------------------------------------- diff --git a/src/main/java/org/apache/commons/text/diff/package-info.java b/src/main/java/org/apache/commons/text/diff/package-info.java index 7a565d4..92fde86 100644 --- a/src/main/java/org/apache/commons/text/diff/package-info.java +++ b/src/main/java/org/apache/commons/text/diff/package-info.java @@ -17,6 +17,9 @@ /** * <p>Provides algorithms for diff between strings.</p> * + * <p>The initial implementation of the Myers algorithm was adapted from the + * commons-collections sequence package.</p> + * * @since 1.0 */ package org.apache.commons.text.diff;