Jkroll has uploaded a new change for review. ( 
https://gerrit.wikimedia.org/r/404293 )

Change subject: [WiP] Use character runs as an additional measure for change vs 
add+delete
......................................................................

[WiP] Use character runs as an additional measure for change vs add+delete

Change-Id: I2dafeca326dee2a594f7565d68f05128cf32acef
---
M DiffEngine.h
M Wikidiff2.h
2 files changed, 16 insertions(+), 4 deletions(-)


  git pull ssh://gerrit.wikimedia.org:29418/mediawiki/php/wikidiff2 
refs/changes/93/404293/1

diff --git a/DiffEngine.h b/DiffEngine.h
index 38dbe0e..f32b1a7 100644
--- a/DiffEngine.h
+++ b/DiffEngine.h
@@ -31,7 +31,7 @@
 
 // helper function to calculate similarity of text lines, based on existing 
diff code.
 // used in DiffEngine and Wikidiff2.
-double calculateSimilarity(TextUtil::WordVector& words1, TextUtil::WordVector& 
words2, long long bailoutComplexity, int *opCountPtr = nullptr);
+double calculateSimilarity(TextUtil::WordVector& words1, TextUtil::WordVector& 
words2, long long bailoutComplexity, int *runCountPtr = nullptr, int 
*opCountPtr = nullptr);
 
 /**
  * Diff operation
@@ -195,7 +195,9 @@
        TextUtil::WordVector words1, words2;
        TextUtil::explodeWords(del, words1);
        TextUtil::explodeWords(add, words2);
-       return calculateSimilarity(words1, words2, bailoutComplexity) > 
looksLikeChangeThreshold();
+       int runCount;
+       double similarity = calculateSimilarity(words1, words2, 
bailoutComplexity, &runCount);
+       return similarity > looksLikeChangeThreshold();
 }
 
 // go through list of changed lines. if they are too dissimilar, convert to 
del+add.
@@ -677,7 +679,7 @@
        engine.diff(from_lines, to_lines, *this, bailoutComplexity);
 }
 
-inline double calculateSimilarity(TextUtil::WordVector& words1, 
TextUtil::WordVector& words2, long long bailoutComplexity, int *opCountPtr /* = 
nullptr*/)
+inline double calculateSimilarity(TextUtil::WordVector& words1, 
TextUtil::WordVector& words2, long long bailoutComplexity, int *runCountPtr /* 
= nullptr */, int *opCountPtr /* = nullptr*/)
 {
        typedef Diff<Word> WordDiff;
        WordDiff diff(words1, words2, bailoutComplexity);
@@ -689,6 +691,8 @@
                        return a + (b->suffixEnd - b->bodyStart);
                });
        };
+       int runCount = 0;
+       int lastOp = -1;
        for (int i = 0; i < diff.size(); ++i) {
                int op = diff[i].op;
                int charCount;
@@ -706,6 +710,10 @@
                }
                opCharCount[op] += charCount;
                charsTotal += charCount;
+               if(op != lastOp) {
+                       runCount++;
+                       lastOp = op;
+               }
        }
        if (opCharCount[DiffOp<Word>::copy] == 0) {
                similarity = 0.0;
@@ -723,6 +731,9 @@
                }
        }
 
+       if (runCountPtr)
+               *runCountPtr = runCount;
+
        return similarity;
 }
 
diff --git a/Wikidiff2.h b/Wikidiff2.h
index 185bfb6..91d0c03 100644
--- a/Wikidiff2.h
+++ b/Wikidiff2.h
@@ -40,6 +40,7 @@
                struct DiffMapEntry
                {
                        double similarity;
+                       int runCount;   // number of ChangeOp sequences 
(character counts).
                        int opCharCount[4] = { 0 };
                        int opIndexFrom, opLineFrom, opIndexTo, opLineTo;
                        bool lhsDisplayed = false, rhsDisplayed = false;
@@ -81,7 +82,7 @@
 inline Wikidiff2::DiffMapEntry::DiffMapEntry(Wikidiff2::WordVector& words1, 
Wikidiff2::WordVector& words2, int opIndexFrom_, int opLineFrom_, int 
opIndexTo_, int opLineTo_):
        opIndexFrom(opIndexFrom_), opLineFrom(opLineFrom_), 
opIndexTo(opIndexTo_), opLineTo(opLineTo_)
 {
-       similarity = calculateSimilarity(words1, words2, 
MAX_WORD_LEVEL_DIFF_COMPLEXITY, opCharCount);
+       similarity = calculateSimilarity(words1, words2, 
MAX_WORD_LEVEL_DIFF_COMPLEXITY, &runCount, opCharCount);
 }
 
 inline bool Wikidiff2::AllowPrintMovedLineDiff::operator () (StringDiff & 
linediff, int maxMovedLines)

-- 
To view, visit https://gerrit.wikimedia.org/r/404293
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings

Gerrit-MessageType: newchange
Gerrit-Change-Id: I2dafeca326dee2a594f7565d68f05128cf32acef
Gerrit-PatchSet: 1
Gerrit-Project: mediawiki/php/wikidiff2
Gerrit-Branch: master
Gerrit-Owner: Jkroll <[email protected]>

_______________________________________________
MediaWiki-commits mailing list
[email protected]
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits

Reply via email to