bearophile wrote:
Robert Fraser:
I implemented a version of the TST you posted using Tango + D1... Here
are my results:
Nice.
Is your TST a map? (not a set).
Yes, it's a map (for my use case).
Are you willing to show me your TST code? Your code is surely better than mine,
and there can be few little things that can be tuned.
And are you also willing to add it to Phobos? (I'll port it to Phobos1, it's
probably easy to do)?
Attached, if you don't mind NO comments.
I really want to port this:
http://dasnar.sdf-eu.org/res/ctst-README.html ... The automatic
balancing might make it a lot better if elements are inserted in order.
It might be very easy by replacing the malloc/free calls with
gc_malloc/gc_free calls.
The big winner here, though, appears to be
D's built-in associative arrays. I thought they were supposed to be very
slow, but Tango's implementation, at least, looks pretty good (without
any re-hashing).
Try comparing the performance of built-in D AAs with Psyco+Python dicts, or a
good C++ STL hash set/hash map.
D AAs are slow probably also because Phobos is statically compiled, they aren't
a template.
I think Phobos1's AAs were pretty bad, but have you tried
Tango's/druntimes? They're quite optimized (they're implemented as a
hash with trees used for collisions).
/*******************************************************************************
* Copyright (c) 2008 Robert Fraser
*
* All rights reserved. This program and the accompanying materials
* are made available under the terms of the Eclipse Public License v1.0
* which accompanies this distribution, and is available at
* http://www.eclipse.org/legal/epl-v10.html
*******************************************************************************/
module candy.util.array;
import tango.core.BitManip : bsr;
import tango.core.Traits : isReferenceType;
import tango.core.Memory : GC;
import tango.stdc.string : memcpy, memset;
public struct Array(T, size_t START_SIZE = 16, bool doScan =
isReferenceType!(T),
alias Realloc = GC.realloc, alias Free = GC.free)
{
T* arr = null;
size_t len = 0;
size_t cap = 0;
public void opCatAssign(T v)
{
len++;
if(len >= cap)
grow(cap ? cap * 2 : START_SIZE);
arr[len - 1] = v;
}
public void opCatAssign(T[] v)
{
int newlen = len + v.length;
if(newlen > cap)
{
if(newlen < START_SIZE)
grow(START_SIZE);
else
grow(2 << bsr(newlen + 1)); // Next power of 2
}
memcpy(arr + len, v.ptr, (newlen - len) * T.sizeof);
len = newlen;
}
public void opCatAssign(Array!(T) v)
{
opCatAssign(v.toArray());
}
public T[] toArray()
{
return arr[0 .. len];
}
public size_t length()
{
return len;
}
public T opIndex(size_t n)
{
assert(n < len);
return arr[n];
}
public void opIndexAssign(T v, size_t n)
{
assert(n < len);
arr[n] = v;
}
public T[] opSlice()
{
return toArray();
}
public T[] opSlice(size_t low, size_t high)
{
assert(low <= high);
assert(high < len);
return arr[low .. high];
}
public void opSliceAssign(T v)
{
arr[0 .. len] = v;
}
public void opSliceAssign(T v, size_t low, size_t high)
{
assert(low <= high);
assert(high < len);
arr[low .. high] = v;
}
public void opSliceAssign(T[] v, size_t low, size_t high)
{
assert(low <= high);
assert(high < len);
assert(v.length == high - low);
memcpy(arr + low, v.ptr, v.length * T.sizeof);
}
public void opSliceAssign(Array!(T) v, size_t low, size_t high)
{
opSliceAssign(v.toArray(), low, high);
}
public bool isEmpty()
{
return len == 0;
}
public void clear()
{
len = 0;
}
public void free()
{
if(arr)
{
Free(arr);
arr = null;
}
len = 0;
}
public void zero()
{
if(arr)
{
memset(arr, 0, cap * T.sizeof);
Free(arr);
arr = null;
}
len = 0;
}
public bool contains(T elem)
{
for(int i = 0; i < len; i++)
if(elem == arr[i])
return true;
return false;
}
public void addNull()
{
static if(!isReferenceType!(T))
assert(false);
len++;
if(len >= cap)
grow(cap ? cap * 2 : START_SIZE);
}
static if(doScan)
private static const BlkAttr = 0;
else
private static const BlkAttr = GC.BlkAttr.NO_SCAN;
private void grow(size_t sz)
{
arr = cast(T*) Realloc(arr, sz * T.sizeof, BlkAttr);
static if(isReferenceType!(T))
memset(arr + cap, 0, (sz - cap) * T.sizeof);
cap = sz;
}
}
public struct Stack(T, size_t START_SIZE = 16, bool doScan =
isReferenceType!(T), bool zeroOnPop = false,
alias ArrayType = Array, ArrayArgs...)
{
private ArrayType!(T, START_SIZE, doScan, ArrayArgs) arr;
public void push(T v)
{
arr ~= v;
}
public T pop()
{
assert(arr.len > 0);
T ret = arr.arr[arr.len - 1];
static if(zeroOnPop)
arr.arr[arr.len - 1] = T.init;
arr.len--;
return ret;
}
public T peek()
{
assert(arr.len > 0);
return arr.arr[arr.len - 1];
}
public bool isEmpty()
{
return arr.isEmpty();
}
public void clear()
{
static if(zeroOnPop)
arr.zero();
else
arr.clear();
}
public bool contains(T elem)
{
return arr.contains(elem);
}
public T[] toArray()
{
return arr.toArray();
}
}
module candy.util.memory;
import tango.core.Memory : GC;
import tango.stdc.string : memset;
import candy.util.array;
/**
* Provides a pool of GCed memory to allocate things from a block.
* This maintains cache coherency for related types (i.e. tree nodes).
* It doesn't garuntee any ordering, though, the array struct should be
* used for that. Also, everything has to be freed at once, freeing one
* portion of this has no effect.
*
* Based on a similar concept posted by bearophile at:
*
http://www.digitalmars.com/webnews/newsgroups.php?art_group=digitalmars.D&article_id=88227
*/
public struct MemPool(size_t BLOCK_SIZE = 1 << 14)
{
private struct Block
{
private void[BLOCK_SIZE] data = void;
}
private void* next; // Next available block
private void* end; // End of the current block
private Array!(Block*) blocks;
public void* alloc(size_t ALLOC_UNIT)()
{
if (next >= end)
{
Block* blk = new Block;
memset(blk.data.ptr, 0, BLOCK_SIZE);
next = blk.data.ptr;
end = next + BLOCK_SIZE;
}
void* ret = next;
next += ALLOC_UNIT;
return ret;
}
public void* allocAligned(size_t ALLOC_UNIT)
{
if (next >= end)
{
Block* blk = new Block;
memset(blk.data.ptr, 0, BLOCK_SIZE);
next = blk.data.ptr;
end = next + BLOCK_SIZE;
}
void* ret = next;
next += ((ALLOC_UNIT + 7) & ~7); // Next multiple of 8 if this isn't a
multiple of 8
return ret;
}
public void free()
{
blocks.zero();
next = null;
end = null;
}
}
/**
* Wrapper for MemPool that allocates the given struct
*/
public struct StructPool(T, size_t BLOCK_SIZE = 1 << 14)
{
private MemPool!(BLOCK_SIZE) mem;
public T* alloc() { return cast(T*) mem.alloc!(T.sizeof)(); }
public void free() { mem.free(); }
}
/**
* Placement new mixin for allocating from a memory pool. If a class type is
* going to be created a _lot_ this *might* be better than regular D new.
* Also, it can be inlined.
*/
public template MemPoolNew(size_t BLOCK_SIZE = 1 << 14)
{
private static MemPool!(BLOCK_SIZE) _memPool;
public new(uint size) { return _memPool.allocAligned(size); }
public delete(void *p) { } // We can't delete with the pool
public static void deleteAllInstances() { _memPool.free(); }
}
/**
* Simple Ternary Tree port of:
http://bitbucket.org/woadwarrior/trie/src/tip/python/tst.py
*
* License:
* Copyright (c) 2009 Jeethu Rao <[email protected]>
* Port to D Copyright (c) 2009 Robert Fraser <[email protected]>
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
*/
module candy.util.TreeNode;
import tango.core.Array : sort;
import candy.util.memory;
import candy.util.array;
public struct TernaryTree(K, V)
{
public static struct SearchResult
{
private TernaryTree* tree;
private TreeNode* result;
public int opApply(int delegate(ref K[], ref V) dg)
{
return tree.apply(result, dg);
}
}
private static struct TreeNode
{
TreeNode* left;
TreeNode* right;
TreeNode* mid;
K split;
K[] key;
V value;
}
private StructPool!(TreeNode) pool;
private TreeNode* root;
public void add(K[] k, V v)
{
assert(k.length);
// NOTE: the compiler wasn't unrolling the tail call, so this is
// presented non-recursively for your enjoyment
TreeNode* node = root;
size_t i = 0;
size_t len = k.length;
if(!node)
{
root = pool.alloc();
root.split = k[i];
node = root;
i++;
goto Lrest;
}
while(i < len)
{
K e = k[i];
K ne = node.split;
if(e < ne)
{
if(node.left)
{
node = node.left;
continue;
}
else
{
node = node.left = pool.alloc();
node.split = e;
i++;
goto Lrest;
}
}
else if(e > ne)
{
if(node.right)
{
node = node.right;
continue;
}
else
{
node = node.right = pool.alloc();
node.split = e;
i++;
goto Lrest;
}
}
i++;
if(i < len)
{
if(node.mid)
{
node = node.mid;
continue;
}
else
{
Lrest:
while(i < len)
{
node = node.mid = pool.alloc();
node.split = k[i];
i++;
}
node.value = v;
node.key = k;
return;
}
}
else
{
node.value = v;
node.key = k;
return;
}
// We should have broken or continued before this point
assert(false);
}
}
public bool get(K[] k, ref V v)
{
TreeNode* node = search(k);
if(node && node.key.length)
{
v = node.value;
return true;
}
return false;
}
public void addAll(K[][] keys, V[] values, bool keysSorted = false)
{
assert(keys.length == values.length);
if(!keys.length)
return;
if(!keysSorted)
sort(keys);
addAll(keys, values, 0, keys.length);
}
public void free()
{
root = null;
pool.free();
}
public SearchResult prefixSearch(K[] k)
{
return SearchResult(this, search(k));
}
public int opApply(int delegate(ref K[], ref V) dg)
{
return apply(root, dg);
}
public bool contains(K[] k)
{
return search(k) !is null;
}
private TreeNode* search(K[] k)
{
assert(k.length);
TreeNode* node = root;
while(node)
{
K e = k[0];
if(e < node.split)
{
node = node.left;
continue;
}
else if(e > node.split)
{
node = node.right;
continue;
}
else
{
k = k[1 .. $];
if(!k.length)
{
return node;
}
else
{
node = node.mid;
continue;
}
}
}
return null;
}
private int apply(TreeNode* node, int delegate(ref K[], ref V) dg)
{
int result = 0;
if(!node)
return result;
Stack!(TreeNode*, 1024) stack;
while(true)
{
if(node.right) stack.push(node.right);
if(node.mid) stack.push(node.mid);
if(node.left) stack.push(node.left);
if(node.key.length)
{
result = dg(node.key, node.value);
if(result)
return result;
}
if(stack.isEmpty)
return result;
node = stack.pop();
}
}
private void addAll(K[][] keys, V[] values, size_t low, size_t high)
{
uint diff = high - low;
if(diff == 1)
{
add(keys[low], values[low]);
return;
}
else if(diff == 2)
{
add(keys[low], values[low]);
add(keys[low+1], values[low+1]);
return;
}
else
{
size_t mid = low + (high / 2);
add(keys[mid], values[mid]);
// OPTIMIZE iterative.. also is 2 is the best cutoff
value here?
addAll(keys, values, mid + 1, high);
addAll(keys, values, low, mid);
}
}
}