Groovier1 has uploaded a new change for review. ( https://gerrit.wikimedia.org/r/403622 )
Change subject: Adding WikimediaEvents module for logging behaviour data ...................................................................... Adding WikimediaEvents module for logging behaviour data This change adds a module in WikimediaEvents extension for logging key press and mouse movement data statistics. This data is for use in https://phabricator.wikimedia.org/project/view/3137/ for the task T183869. The data collected will be used to train a machine learning classifier to detect bots registering accounts. Feature: T183869 Change-Id: I90a6704e78009d06a1399b306fb46b53624bbee1 --- M WikimediaEventsHooks.php M extension.json A modules/aiCaptcha/ext.wikimediaEvents.aiCaptcha.js 3 files changed, 470 insertions(+), 2 deletions(-) git pull ssh://gerrit.wikimedia.org:29418/mediawiki/extensions/WikimediaEvents refs/changes/22/403622/1 diff --git a/WikimediaEventsHooks.php b/WikimediaEventsHooks.php index 260a8ab..3913d53 100644 --- a/WikimediaEventsHooks.php +++ b/WikimediaEventsHooks.php @@ -21,6 +21,10 @@ if ( $out->getUser()->isLoggedIn() ) { $out->addModules( 'ext.wikimediaEvents.loggedin' ); } + + if( $out->getTitle()->isSpecial('CreateAccount') ) { + $out->addModules( 'ext.wikimediaEvents.aiCaptcha' ); + } } /** @@ -266,13 +270,14 @@ public static function onResourceLoaderGetConfigVars( &$vars ) { global $wgWMEStatsdBaseUri, $wgWMEReadingDepthSamplingRate, $wgWMEReadingDepthEnabled, $wgWMEPrintSamplingRate, - $wgWMEPrintEnabled; + $wgWMEPrintEnabled, $wgWMEAICaptchaEnabled; $vars['wgWMEStatsdBaseUri'] = $wgWMEStatsdBaseUri; $vars['wgWMEReadingDepthSamplingRate'] = $wgWMEReadingDepthSamplingRate; $vars['wgWMEReadingDepthEnabled'] = $wgWMEReadingDepthEnabled; $vars['wgWMEPrintSamplingRate'] = $wgWMEPrintSamplingRate; $vars['wgWMEPrintEnabled'] = $wgWMEPrintEnabled; + $vars['wgWMEAICaptchaEnabled'] = $wgWMEAICaptchaEnabled; } /** diff --git a/extension.json b/extension.json index d800aa4..abfd3d6 100644 --- a/extension.json +++ b/extension.json @@ -145,6 +145,11 @@ "schema": "RecentChangesTopLinks", "revision": 16732249 }, + "schema.AiCaptcha": { + "class": "ResourceLoaderSchemaModule", + "schema": "AiCaptcha", + "revision": 0 + }, "ext.wikimediaEvents": { "scripts": [ "all/ext.wikimediaEvents.events.js", @@ -185,6 +190,19 @@ "mediawiki.user", "mediawiki.Uri" ] + }, + "ext.wikimediaEvents.aiCaptcha": { + "scripts": [ + "aiCaptcha/ext.wikimediaEvents.aiCaptcha.js" + ], + "targets": [ + "desktop", + "mobile" + ], + "dependencies": [ + "mediawiki.user", + "mediawiki.Uri" + ] } }, "ResourceFileModulePaths": { @@ -199,7 +217,8 @@ "WMEStatsdBaseUri": false, "WMESearchRelevancePages": { "_merge_strategy": "array_plus" - } + }, + "WMEAICaptchaEnabled": false }, "manifest_version": 1 } diff --git a/modules/aiCaptcha/ext.wikimediaEvents.aiCaptcha.js b/modules/aiCaptcha/ext.wikimediaEvents.aiCaptcha.js new file mode 100644 index 0000000..506cbe3 --- /dev/null +++ b/modules/aiCaptcha/ext.wikimediaEvents.aiCaptcha.js @@ -0,0 +1,444 @@ +( function ( $, track, config ) { + + var dwellTimings = {}, flightTimings = {}, + upTime = {}, downTime = {}, + mousePositions = [], mouseClickTimings = [], + passwordInFocus = false; + + /** + Statistics library start + **/ + + function sum(x) { + + // If the array is empty, we needn't bother computing its sum + if (x.length === 0) { + return 0; + } + + // Initializing the sum as the first number in the array + var sum = x[0]; + + // Keeping track of the floating-point error correction + var correction = 0; + + var transition; + + for (var i = 1; i < x.length; i++) { + transition = sum + x[i]; + + // Here we need to update the correction in a different fashion + // if the new absolute value is greater than the absolute sum + if (Math.abs(sum) >= Math.abs(x[i])) { + correction += ((sum - transition) + x[i]); + } + else { + correction += ((x[i] - transition) + sum); + } + + sum = transition; + } + + // Returning the corrected sum + return sum + correction; + } + + function mean(x) { + // The mean of no numbers is null + if (x.length === 0) { + return 0; + } + + return sum(x) / x.length; + } + + function sumNthPowerDeviations(x, n) { + var meanValue = mean(x), + sum = 0, + tempValue, + i; + + // This is an optimization: when n is 2 (we're computing a number squared), + // multiplying the number by itself is significantly faster than using + // the Math.pow method. + if (n === 2) { + for (i = 0; i < x.length; i++) { + tempValue = x[i] - meanValue; + sum += tempValue * tempValue; + } + } else { + for (i = 0; i < x.length; i++) { + sum += Math.pow(x[i] - meanValue, n); + } + } + + return sum; + } + + function variance(x) { + // The variance of no numbers is null + if (x.length === 0) { + return 0; + } + + // Find the mean of squared deviations between the + // mean value and each value. + return sumNthPowerDeviations(x, 2) / x.length; + } + + function kurtosis(x) { + + var n = x.length; + + if (n < 4) { + return 0; + } + + var meanValue = mean(x); + var tempValue; + var secondCentralMoment = 0; + var fourthCentralMoment = 0; + + for (var i = 0; i < n; i++) { + tempValue = x[i] - meanValue; + secondCentralMoment += tempValue * tempValue; + fourthCentralMoment += tempValue * tempValue * tempValue * tempValue; + } + + return (n - 1) / ((n - 2) * (n - 3)) * + (n * (n + 1) * fourthCentralMoment / (secondCentralMoment * secondCentralMoment) - 3 * (n - 1)); + } + + function skewness(x) { + + if (x.length < 3) { + return 0; + } + + var meanValue = mean(x); + var tempValue; + var sumSquaredDeviations = 0; + var sumCubedDeviations = 0; + + for (var i = 0; i < x.length; i++) { + tempValue = x[i] - meanValue; + sumSquaredDeviations += tempValue * tempValue; + sumCubedDeviations += tempValue * tempValue * tempValue; + } + + // this is Bessels' Correction: an adjustment made to sample statistics + // that allows for the reduced degree of freedom entailed in calculating + // values from samples rather than complete populations. + var besselsCorrection = x.length - 1; + + // Find the mean value of that list + var theSampleStandardDeviation = Math.sqrt(sumSquaredDeviations / besselsCorrection); + + var n = x.length, + cubedS = Math.pow(theSampleStandardDeviation, 3); + + return n * sumCubedDeviations / ((n - 1) * (n - 2) * cubedS); + } + + function quantile(x, p) { + var copy = x.slice(); + + if (Array.isArray(p)) { + // rearrange elements so that each element corresponding to a requested + // quantile is on a place it would be if the array was fully sorted + multiQuantileSelect(copy, p); + // Initialize the result array + var results = []; + // For each requested quantile + for (var i = 0; i < p.length; i++) { + results[i] = quantileSorted(copy, p[i]); + } + return results; + } else { + var idx = quantileIndex(copy.length, p); + quantileSelect(copy, idx, 0, copy.length - 1); + return quantileSorted(copy, p); + } + } + + function quantileSorted(x /*: Array<number> */, p /*: number */)/*: number */ { + var idx = x.length * p; + if (x.length === 0) { + throw new Error('quantile requires at least one data point.'); + } else if (p < 0 || p > 1) { + throw new Error('quantiles must be between 0 and 1'); + } else if (p === 1) { + // If p is 1, directly return the last element + return x[x.length - 1]; + } else if (p === 0) { + // If p is 0, directly return the first element + return x[0]; + } else if (idx % 1 !== 0) { + // If p is not integer, return the next element in array + return x[Math.ceil(idx) - 1]; + } else if (x.length % 2 === 0) { + // If the list has even-length, we'll take the average of this number + // and the next value, if there is one + return (x[idx - 1] + x[idx]) / 2; + } else { + // Finally, in the simple case of an integer value + // with an odd-length list, return the x value at the index. + return x[idx]; + } + } + + function quantileSelect(arr, k, left, right) { + if (k % 1 === 0) { + quickselect(arr, k, left, right); + } else { + k = Math.floor(k); + quickselect(arr, k, left, right); + quickselect(arr, k + 1, k + 1, right); + } + } + + function quickselect(arr/*: Array<number> */, k/*: number */, left/*: ?number */, right/*: ?number */)/*: void */ { + left = left || 0; + right = right || (arr.length - 1); + + while (right > left) { + // 600 and 0.5 are arbitrary constants chosen in the original paper to minimize execution time + if (right - left > 600) { + var n = right - left + 1; + var m = k - left + 1; + var z = Math.log(n); + var s = 0.5 * Math.exp(2 * z / 3); + var sd = 0.5 * Math.sqrt(z * s * (n - s) / n); + if (m - n / 2 < 0) sd *= -1; + var newLeft = Math.max(left, Math.floor(k - m * s / n + sd)); + var newRight = Math.min(right, Math.floor(k + (n - m) * s / n + sd)); + quickselect(arr, k, newLeft, newRight); + } + + var t = arr[k]; + var i = left; + var j = right; + + swap(arr, left, k); + if (arr[right] > t) swap(arr, left, right); + + while (i < j) { + swap(arr, i, j); + i++; + j--; + while (arr[i] < t) i++; + while (arr[j] > t) j--; + } + + if (arr[left] === t) swap(arr, left, j); + else { + j++; + swap(arr, j, right); + } + + if (j <= k) left = j + 1; + if (k <= j) right = j - 1; + } + } + + function swap(arr, i, j) { + var tmp = arr[i]; + arr[i] = arr[j]; + arr[j] = tmp; + } + + function multiQuantileSelect(arr, p) { + var indices = [0]; + for (var i = 0; i < p.length; i++) { + indices.push(quantileIndex(arr.length, p[i])); + } + indices.push(arr.length - 1); + indices.sort(compare); + + var stack = [0, indices.length - 1]; + + while (stack.length) { + var r = Math.ceil(stack.pop()); + var l = Math.floor(stack.pop()); + if (r - l <= 1) continue; + + var m = Math.floor((l + r) / 2); + quantileSelect(arr, indices[m], indices[l], indices[r]); + + stack.push(l, m, m, r); + } + } + + function compare(a, b) { + return a - b; + } + + function quantileIndex(len /*: number */, p /*: number */)/*:number*/ { + var idx = len * p; + if (p === 1) { + // If p is 1, directly return the last index + return len - 1; + } else if (p === 0) { + // If p is 0, directly return the first index + return 0; + } else if (idx % 1 !== 0) { + // If index is not integer, return the next index in array + return Math.ceil(idx) - 1; + } else if (len % 2 === 0) { + // If the list has even-length, we'll return the middle of two indices + // around quantile to indicate that we need an average value of the two + return idx - 0.5; + } else { + // Finally, in the simple case of an integer index + // with an odd-length list, return the index + return idx; + } + } + + function interQuartileRange(x) { + // Interquartile range is the span between the upper quartile, + // at `0.75`, and lower quartile, `0.25` + if(x.length<4) { + return 0; + } + + var q1 = quantile(x, 0.75), + q2 = quantile(x, 0.25); + + if (typeof q1 === 'number' && typeof q2 === 'number') { + return q1 - q2; + } + } + + /** + Statistics library end + **/ + + function getMouseStats() { + var speeds = [], prevSlope = 0, prevSpeed = 0, curvatures = [], + accelerations = [], mouseClickDeltas = []; + for (var i=1; i<mousePositions.length; i++) { + var point1 = mousePositions[i-1]; + var point2 = mousePositions[i]; + var deltaX = 1 + Math.abs(point2['x'] - point1['x']); + var deltaY = Math.abs(point2['y'] - point1['y']); + var dist = Math.sqrt(deltaX**2 + deltaY**2); + var deltaT = 1 + (point2['t'] - point1['t']); + var speed = dist / deltaT; + speeds.push(speed); + var acceleration = (speed - prevSpeed) / deltaT; + accelerations.push(acceleration); + var slope = deltaY / deltaX; + var curvature = (slope - prevSlope) / deltaX; + curvatures.push(curvature); + prevSlope = slope; + prevSpeed = speed; + } + for (var i=1; i<mouseClickTimings.length; i++) { + mouseClickDeltas.push(mouseClickTimings[i]-mouseClickTimings[i-1]); + } + return { + "averageMouseSpeed": mean(speeds), + "averageMouseCurvature": mean(curvatures), + "averageMouseAcceleration": mean(accelerations), + "averageDeltaClickTime": mean(mouseClickDeltas), + "mouseSpeedVariance": variance(speeds), + "mouseCurvatureVariance": variance(curvatures), + "mouseAccelerationVariance": variance(accelerations), + "deltaClickTimeVariance": variance(mouseClickDeltas), + "mouseSpeedSkewness": skewness(speeds), + "mouseCurvatureSkewness": skewness(curvatures), + "mouseAccelerationSkewness": skewness(accelerations), + "deltaClickTimeSkewness": skewness(mouseClickDeltas), + "mouseSpeedKurtosis": kurtosis(speeds), + "mouseCurvatureKurtosis": kurtosis(curvatures), + "mouseAccelerationKurtosis": kurtosis(accelerations), + "deltaClickTimeKurtosis": kurtosis(mouseClickDeltas), + "mouseSpeedInterQuartileRange": interQuartileRange(speeds), + "mouseCurvatureInterQuartileRange": interQuartileRange(curvatures), + "mouseAccelerationInterQuartileRange": interQuartileRange(accelerations), + "deltaClickTimeInterQuartileRange": interQuartileRange(mouseClickDeltas) + }; + } + + function getKeyPressStats() { + var dwellTimes=[], flightTimes=[]; + Object.keys(dwellTimings).forEach((field) => { + dwellTimes = dwellTimes.concat(dwellTimings[field]); + }); + Object.keys(flightTimings).forEach((field) => { + flightTimes = flightTimes.concat(flightTimings[field]); + }); + var deltaDwellTimes=[], deltaFlightTimes=[]; + for (var i=1; i<dwellTimes.length; i++) { + deltaDwellTimes.push(dwellTimes[i]-dwellTimes[i-1]); + } + for (var i=1; i<flightTimes.length; i++) { + deltaFlightTimes.push(flightTimes[i]-flightTimes[i-1]); + } + return { + "averageDeltaDwellTime": mean(deltaDwellTimes), + "averageDeltaFlightTime": mean(deltaFlightTimes), + "averageDwellTime": mean(dwellTimes), + "averageFlightTime": mean(flightTimes), + "dwellTimeVariance": variance(dwellTimes), + "flightTimeVariance": variance(flightTimes), + "dwellTimeSkewness": skewness(dwellTimes), + "flightTimeSkewness": skewness(flightTimes), + "dwellTimeKurtosis": kurtosis(dwellTimes), + "flightTimeKurtosis": kurtosis(flightTimes), + "dwellTimeInterQuartileRange": interQuartileRange(dwellTimes), + "flightTimeInterQuartileRange": interQuartileRange(flightTimes) + }; + } + + if ( config.get( 'wgWMEAICaptchaEnabled' ) ) { + + $.each( [ 'wpName2', 'wpEmail' ], function ( _, field ) { + $( '#' + field ).keyup( function () { + var now = new Date().getTime(); + var elapsed = now - (downTime[field] || now); + dwellTimings[field] = dwellTimings[field] || []; + dwellTimings[field].push(elapsed); + upTime[field] = now; + } ); + + $( '#' + field ).keydown( function () { + var now = new Date().getTime(); + var elapsed = now - (upTime[field] || now); + flightTimings[field] = flightTimings[field] || []; + flightTimings[field].push(elapsed); + downTime[field] = now; + } ); + } ); + + $.each( [ 'wpPassword2', 'wpRetype' ], function ( _, field ) { + $( '#' + field ).focus( function() { + passwordInFocus = true; + } ); + } ); + + $(document).mousemove( function(event) { + if ( !passwordInFocus ) { + mousePositions.push({x:event.pageX, y:event.pageY, t:new Date().getTime()}); + } + }); + + $(document).click( function(event) { + if ( !passwordInFocus ) { + mouseClickTimings.push(new Date().getTime()); + } + }); + + $('#wpCreateaccount').click( function(){ + var mouseStats = getMouseStats(); + var keyPressStats = getKeyPressStats(); + var combinedStats = {}; + Object.keys(mouseStats).forEach((key) => combinedStats[key] = mouseStats[key]); + Object.keys(keyPressStats).forEach((key) => combinedStats[key] = keyPressStats[key]); + // var username = $('#wpName2').val(); + // combinedStats["username"] = username; + track('event.AiCaptcha',combinedStats); + }); + } +}( jQuery, mediaWiki.track, mediaWiki.config ) ); -- To view, visit https://gerrit.wikimedia.org/r/403622 To unsubscribe, visit https://gerrit.wikimedia.org/r/settings Gerrit-MessageType: newchange Gerrit-Change-Id: I90a6704e78009d06a1399b306fb46b53624bbee1 Gerrit-PatchSet: 1 Gerrit-Project: mediawiki/extensions/WikimediaEvents Gerrit-Branch: master Gerrit-Owner: Groovier1 <[email protected]> _______________________________________________ MediaWiki-commits mailing list [email protected] https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits
