jenkins-bot has submitted this change and it was merged.
Change subject: Optimize and compress the tries as nodes are added
......................................................................
Optimize and compress the tries as nodes are added
When adding a prefix, only uncompressed branch nodes and terminal nodes
were used. Only after all prefixes were added would recursive subnet
merging and depth compression happen. These extra passes can be avoided
by changing addCidr() in three main ways:
* Adjacent subnets (e.g. 192.0.2.0/25 and 192.0.2.128/25) can be
checked while descending through the trie. The highest node having
only adjacent subnets below it can be set to true (match-success).
* A compressed branch node can be added instead of eight uncompressed
branch nodes whenever match-failure (false) is encountered, the
first bit starts a byte, and at least seven more bits follow.
* This does mean compressed branch nodes must be handled properly when
other prefixes are added. Fortunately, this is not too difficult.
Whenever a compressed branch node is encountered, it can be checked
against the value of the current byte, and if the bytes do not match
(or the prefix is too short), the node can be replaced with eight
uncompressed branch nodes.
This can yield a small speedup (e.g. ~30% for the tests) and avoids
problems with xdebug's default recursion depth limit of 100 (or 256
in version 2.3 and newer), without having to mess with ini_set().
Bug: T126495
Change-Id: I176d4b13faacb93f52d9541c3924080d85a30032
---
M src/IPSet.php
M tests/IPSetTest.php
2 files changed, 49 insertions(+), 109 deletions(-)
Approvals:
BBlack: Looks good to me, approved
jenkins-bot: Verified
diff --git a/src/IPSet.php b/src/IPSet.php
index 249d84e..d8a417f 100644
--- a/src/IPSet.php
+++ b/src/IPSet.php
@@ -80,11 +80,11 @@
* a net loss in my test scenarios due to additional match complexity)
*/
class IPSet {
- /** @var array $root4 The root of the IPv4 matching tree */
- private $root4 = array( false, false );
+ /** @var array|bool $root4 The root of the IPv4 matching tree */
+ private $root4 = false;
- /** @var array $root6 The root of the IPv6 matching tree */
- private $root6 = array( false, false );
+ /** @var array|bool $root6 The root of the IPv6 matching tree */
+ private $root6 = false;
/**
* Instantiate the object from an array of CIDR specs
@@ -98,11 +98,6 @@
foreach ( $cfg as $cidr ) {
$this->addCidr( $cidr );
}
-
- self::recOptimize( $this->root4 );
- self::recCompress( $this->root4, 0, 24 );
- self::recOptimize( $this->root6 );
- self::recCompress( $this->root6, 0, 120 );
}
/**
@@ -140,29 +135,56 @@
}
$rawOrd = array_map( 'ord', str_split( $raw ) );
- // special-case: zero mask overwrites the whole tree with a
pair of terminal successes
- if ( $mask == 0 ) {
- $node = array( true, true );
- return;
- }
-
// iterate the bits of the address while walking the tree
structure for inserts
+ // at the end, $snode will point to the highest node that could
only lead to a
+ // successful match (and thus can be set to true)
+ $snode =& $node;
$curBit = 0;
while ( 1 ) {
- $maskShift = 7 - ( $curBit & 7 );
- $node =& $node[( $rawOrd[$curBit >> 3] & ( 1 <<
$maskShift ) ) >> $maskShift];
- ++$curBit;
if ( $node === true ) {
// already added a larger supernet, no need to
go deeper
return;
} elseif ( $curBit == $mask ) {
// this may wipe out deeper subnets from earlier
- $node = true;
+ $snode = true;
return;
} elseif ( $node === false ) {
// create new subarray to go deeper
- $node = array( false, false );
+ if ( !( $curBit & 7 ) && $curBit <= $mask - 8 )
{
+ $node = array( 'comp' =>
$rawOrd[$curBit >> 3], 'next' => false );
+ } else {
+ $node = array( false, false );
+ }
}
+
+ if ( isset( $node['comp'] ) ) {
+ $comp = $node['comp'];
+ if ( $rawOrd[$curBit >> 3] == $comp && $curBit
<= $mask - 8 ) {
+ // whole byte matches, skip over the
compressed node
+ $node =& $node['next'];
+ $snode =& $node;
+ $curBit += 8;
+ continue;
+ } else {
+ // have to decompress the node and
check individual bits
+ $unode = $node['next'];
+ for ( $i = 0; $i < 8; ++$i ) {
+ $unode = ( $comp & ( 1 << $i ) )
+ ? array( false, $unode )
+ : array( $unode, false
);
+ }
+ $node = $unode;
+ }
+ }
+
+ $maskShift = 7 - ( $curBit & 7 );
+ $index = ( $rawOrd[$curBit >> 3] & ( 1 << $maskShift )
) >> $maskShift;
+ if ( $node[$index ^ 1] !== true ) {
+ // no adjacent subnet, can't form a supernet at
this level
+ $snode =& $node[$index];
+ }
+ $node =& $node[$index];
+ ++$curBit;
}
}
@@ -188,7 +210,7 @@
}
$curBit = 0;
- while ( 1 ) {
+ while ( $node !== true && $node !== false ) {
if ( isset( $node['comp'] ) ) {
// compressed node, matches 1 whole byte on a
byte boundary
if ( $rawOrd[$curBit >> 3] != $node['comp'] ) {
@@ -202,87 +224,8 @@
$node =& $node[( $rawOrd[$curBit >> 3] & ( 1 <<
$maskShift ) ) >> $maskShift];
++$curBit;
}
-
- if ( $node === true || $node === false ) {
- return $node;
- }
- }
- // Unreachable, but fixes missing return statement
- return null;
- }
-
- /**
- * Recursively merges adjacent nets into larger supernets
- *
- * @param array &$node Tree node to optimize, by-reference
- *
- * e.g.: 8.0.0.0/8 + 9.0.0.0/8 -> 8.0.0.0/7
- *
- * @return bool
- */
- private static function recOptimize( &$node ) {
- if ( $node[0] !== false && $node[0] !== true &&
self::recOptimize( $node[0] ) ) {
- $node[0] = true;
- }
- if ( $node[1] !== false && $node[1] !== true &&
self::recOptimize( $node[1] ) ) {
- $node[1] = true;
- }
- if ( $node[0] === true && $node[1] === true ) {
- return true;
- }
- return false;
- }
-
- /**
- * Recursively compresses a tree
- *
- * @param array &$node Tree node to compress, by-reference
- * @param integer $curBit current depth in the tree
- * @param integer $maxCompStart maximum depth at which compression can
start, family-specific
- *
- * This is a very simplistic compression scheme: if we go through a
whole
- * byte of address starting at a byte boundary with no real branching
- * other than immediate false-vs-(node|true), compress that subtree
down to a single
- * byte-matching node.
- * The $maxCompStart check elides recursing the final 7 levels of depth
(family-dependent)
- */
- private static function recCompress( &$node, $curBit, $maxCompStart ) {
- if ( !( $curBit & 7 ) ) { // byte boundary, check for depth-8
single path(s)
- $byte = 0;
- $cnode =& $node;
- $i = 8;
- while ( $i-- ) {
- if ( $cnode[0] === false ) {
- $byte |= 1 << $i;
- $cnode =& $cnode[1];
- } elseif ( $cnode[1] === false ) {
- $cnode =& $cnode[0];
- } else {
- // partial-byte branching, give up
- break;
- }
- }
- if ( $i == -1 ) { // means we did not exit the while()
via break
- $node = array(
- 'comp' => $byte,
- 'next' => &$cnode,
- );
- $curBit += 8;
- if ( $cnode !== true ) {
- self::recCompress( $cnode, $curBit,
$maxCompStart );
- }
- return;
- }
}
- ++$curBit;
- if ( $curBit <= $maxCompStart ) {
- if ( $node[0] !== false && $node[0] !== true ) {
- self::recCompress( $node[0], $curBit,
$maxCompStart );
- }
- if ( $node[1] !== false && $node[1] !== true ) {
- self::recCompress( $node[1], $curBit,
$maxCompStart );
- }
- }
+ return $node;
}
}
diff --git a/tests/IPSetTest.php b/tests/IPSetTest.php
index 0c8014f..d90a802 100644
--- a/tests/IPSetTest.php
+++ b/tests/IPSetTest.php
@@ -29,14 +29,6 @@
*/
class IPSetTest extends \PHPUnit_Framework_TestCase {
- protected function setUp() {
- parent::setUp();
-
- // The IPSet::recOptimize method curses over 100 times.
- // This setting defaults to 100.
- ini_set( 'xdebug.max_nesting_level', 1000 );
- }
-
/**
* Provides test cases for IPSetTest::testIPSet
*
@@ -252,6 +244,8 @@
'ffff:ffff:ffff:ffff:ffff:ffff:ffe0:0/110',
'ffff:ffff:ffff:ffff:ffff:ffff:ffc0:0/107',
'ffff:ffff:ffff:ffff:ffff:ffff:ffa0:0/107',
+
'ffff:ffff:ffff:ffff:ffff:ffff:fe00:0/112',
+
'ffff:ffff:ffff:ffff:ffff:ffff:fe00:0/111',
),
array(
'0.0.0.0' => false,
@@ -264,6 +258,9 @@
'ffff:ffff:ffff:ffff:ffff:ffff:fff4:4444' => true,
'ffff:ffff:ffff:ffff:ffff:ffff:fff9:8080' => true,
'ffff:ffff:ffff:ffff:ffff:ffff:ffff:ffff' => true,
+ 'ffff:ffff:ffff:ffff:ffff:ffff:fe00:0'
=> true,
+ 'ffff:ffff:ffff:ffff:ffff:ffff:fe01:0'
=> true,
+ 'ffff:ffff:ffff:ffff:ffff:ffff:fe02:0'
=> false,
),
),
);
--
To view, visit https://gerrit.wikimedia.org/r/270301
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings
Gerrit-MessageType: merged
Gerrit-Change-Id: I176d4b13faacb93f52d9541c3924080d85a30032
Gerrit-PatchSet: 1
Gerrit-Project: IPSet
Gerrit-Branch: master
Gerrit-Owner: PleaseStand <[email protected]>
Gerrit-Reviewer: BBlack <[email protected]>
Gerrit-Reviewer: Gergő Tisza <[email protected]>
Gerrit-Reviewer: Ori.livneh <[email protected]>
Gerrit-Reviewer: PleaseStand <[email protected]>
Gerrit-Reviewer: jenkins-bot <>
_______________________________________________
MediaWiki-commits mailing list
[email protected]
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits