aherbert commented on a change in pull request #233: URL: https://github.com/apache/commons-text/pull/233#discussion_r640025272
########## File path: src/main/java/org/apache/commons/text/similarity/LongestCommonSubsequence.java ########## @@ -58,7 +58,48 @@ public Integer apply(final CharSequence left, final CharSequence right) { if (left == null || right == null) { throw new IllegalArgumentException("Inputs must not be null"); } - return longestCommonSubsequence(left, right).length(); + // Find lengths of two strings + final int leftSz = left.length(); + final int rightSz = right.length(); + + // Check if we can save even more space + if (leftSz < rightSz) { + return calculateLCSLength(right, rightSz, left, leftSz); + } + return calculateLCSLength(left, leftSz, right, rightSz); + } + + // Assuming the sequence "left" is of size m and the sequence "right" is of size n, + // this method calculates the length of LCS the two sequences in O(m*n) time and O(n) space. + // Since LCS(S1, S2) = LCS(S2, S1), it is preferable to pass shorter sequence as "right," + // so that we can save even more space. + // This is an implementation of Hirschberg's algorithm: + // D. S. Hirschberg, "A Linear Space Algorithm for Computing Maximal Common Subsequences," Communications of the ACM, vol. 18, no. 6, pp. 341--343, 1975 + static int calculateLCSLength(final CharSequence left, final int m, + final CharSequence right, final int n) { + // We only need to keep track of last two rows + final int[][] dp = new int[2][1 + n]; + + for (int i = 0; i <= m; i++) { + // We avoid calling virtual function charAt within nested loop + // for the sake of efficiency + final int leftCh = i > 0 ? left.charAt(i - 1) : -1; + // Binary index, used to index the current row + // The previous row can be calculated by subtracting currentRow from 1 + final int currentRow = i % 2; Review comment: Given the index is positive then you can avoid the modulus and use a mask: ```java final int currentRow = i & 0x1; ``` It may still be better to just swap the rows as pointed out. The mask may be useful elsewhere to avoid modulus. -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org