Jkroll has uploaded a new change for review. (
https://gerrit.wikimedia.org/r/398094 )
Change subject: Make change detection threshold configurable via ini setting
......................................................................
Make change detection threshold configurable via ini setting
Change 356582 introduced a similarity threshold above which DiffOp::change will
be converted to add+del ops. This patch makes the hardcoded threshold
configurable using an ini setting. We want to use this to find a better
threshold.
Bug: T181404
Change-Id: I771698222b61ef5fa6f6f502eca928f280ebf4d2
---
M DiffEngine.h
M Wikidiff2.h
M hhvm_wikidiff2.cpp
M php_wikidiff2.cpp
4 files changed, 38 insertions(+), 2 deletions(-)
git pull ssh://gerrit.wikimedia.org:29418/mediawiki/php/wikidiff2
refs/changes/94/398094/1
diff --git a/DiffEngine.h b/DiffEngine.h
index 76d0c0b..e6054a3 100644
--- a/DiffEngine.h
+++ b/DiffEngine.h
@@ -23,6 +23,9 @@
#include "Wikidiff2.h"
#include "Word.h"
#include "textutil.h"
+#ifdef HHVM_BUILD_DSO
+#include "hphp/runtime/base/ini-setting.h"
+#endif //HHVM_BUILD_DSO
// helper function to calculate similarity of text lines, based on existing
diff code.
// used in DiffEngine and Wikidiff2.
@@ -146,6 +149,7 @@
int lcs;
bool done;
enum {MAX_CHUNKS=8};
+ double looksLikeChangeThreshold();
void detectDissimilarChanges(PointerVector& del, PointerVector&
add, Diff<T>& diff, long long bailoutComplexity);
bool looksLikeChange(const T& del, const T& add, long long
bailoutComplexity);
};
@@ -167,6 +171,21 @@
done = false;
}
+template<typename T>
+inline double DiffEngine<T>::looksLikeChangeThreshold()
+{
+#ifdef HHVM_BUILD_DSO
+ // HHVM module
+ HPHP::Variant value(0.25);
+ HPHP::IniSetting::Get(std::string("wikidiff2.change_threshold"), value);
+ return value.toDouble();
+#else
+ // Zend module
+ double ret = INI_FLT("wikidiff2.change_threshold");
+ return ret;
+#endif //HHVM_BUILD_DSO
+}
+
// for a DiffOp::change, decide whether it should be treated as a successive
add and delete based on similarity.
template<typename T>
inline bool DiffEngine<T>::looksLikeChange(const T& del, const T& add, long
long bailoutComplexity)
@@ -174,7 +193,7 @@
TextUtil::WordVector words1, words2;
TextUtil::explodeWords(del, words1);
TextUtil::explodeWords(add, words2);
- return calculateSimilarity(words1, words2, bailoutComplexity) > 0.25;
+ return calculateSimilarity(words1, words2, bailoutComplexity) >
looksLikeChangeThreshold();
}
// go through list of changed lines. if they are too dissimilar, convert to
del+add.
diff --git a/Wikidiff2.h b/Wikidiff2.h
index 244109f..ae4fb11 100644
--- a/Wikidiff2.h
+++ b/Wikidiff2.h
@@ -16,7 +16,8 @@
#include <set>
#include <memory>
-#define WIKIDIFF2_VERSION_STRING "1.5.3"
+#define WIKIDIFF2_VERSION_STRING "1.5.3"
+#define WIKIDIFF2_CHANGE_THRESHOLD_DEFAULT "0.25"
class Wikidiff2 {
public:
diff --git a/hhvm_wikidiff2.cpp b/hhvm_wikidiff2.cpp
index 957f950..c727ea3 100644
--- a/hhvm_wikidiff2.cpp
+++ b/hhvm_wikidiff2.cpp
@@ -70,6 +70,11 @@
return version;
}
+// ini settings settable anywhere (PHP_INI_ALL)
+thread_local struct {
+ double changeThreshold;
+} s_ini;
+
static class Wikidiff2Extension : public Extension {
public:
Wikidiff2Extension() : Extension("wikidiff2",
WIKIDIFF2_VERSION_STRING) {}
@@ -79,6 +84,9 @@
HHVM_FE(wikidiff2_version);
loadSystemlib();
}
+ void threadInit() override {
+ IniSetting::Bind(this, IniSetting::PHP_INI_ALL,
"wikidiff2.change_threshold", WIKIDIFF2_CHANGE_THRESHOLD_DEFAULT,
&s_ini.changeThreshold);
+ }
} s_wikidiff2_extension;
HHVM_GET_MODULE(wikidiff2)
diff --git a/php_wikidiff2.cpp b/php_wikidiff2.cpp
index 52179ca..1a87341 100644
--- a/php_wikidiff2.cpp
+++ b/php_wikidiff2.cpp
@@ -45,17 +45,25 @@
STANDARD_MODULE_PROPERTIES
};
+/* {{{ INI Settings */
+PHP_INI_BEGIN()
+ PHP_INI_ENTRY("wikidiff2.change_threshold",
WIKIDIFF2_CHANGE_THRESHOLD_DEFAULT, PHP_INI_ALL, NULL)
+PHP_INI_END()
+/* }}} */
+
#ifdef COMPILE_DL_WIKIDIFF2
ZEND_GET_MODULE(wikidiff2)
#endif
PHP_MINIT_FUNCTION(wikidiff2)
{
+ REGISTER_INI_ENTRIES();
return SUCCESS;
}
PHP_MSHUTDOWN_FUNCTION(wikidiff2)
{
+ UNREGISTER_INI_ENTRIES();
return SUCCESS;
}
--
To view, visit https://gerrit.wikimedia.org/r/398094
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings
Gerrit-MessageType: newchange
Gerrit-Change-Id: I771698222b61ef5fa6f6f502eca928f280ebf4d2
Gerrit-PatchSet: 1
Gerrit-Project: mediawiki/php/wikidiff2
Gerrit-Branch: master
Gerrit-Owner: Jkroll <[email protected]>
_______________________________________________
MediaWiki-commits mailing list
[email protected]
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits