The current code omits some possibilities to optimize the tree, leading to a large tree with way to many steps until the leave is reached.
The new code optimizes the tree based on a simple observation: as long as at most half of the nodes at the current level are leaves, tree size is reduced or kept when the children are pulled up one level. Tree depth is halved on average (for some leaves, reduced from 9 to 3), tree size is reduced slightly. The algorithm works as follows: 1. create leaves, each containing min_op_count opcodes 2. create a binary tree 3. pullup children, as long as there are children and tree size is reduced 3.1. do the same for each non-leave child The pullup algorithm has one tunable, "pullup_bias". If set to 0, the algorithm is followed stritly, if set to values larger 0, some more children are pulled up, reducing depth in favor of slightly increased size. Signed-off-by: Stefan Brüns <stefan.bru...@rwth-aachen.de> --- src/mapi/glapi/gen/glX_server_table.py | 76 ++++++++++++++++++++++++++++++++ 1 file changed, 76 insertions(+) diff --git a/src/mapi/glapi/gen/glX_server_table.py b/src/mapi/glapi/gen/glX_server_table.py index 47aa111..5b3ffdc 100644 --- a/src/mapi/glapi/gen/glX_server_table.py +++ b/src/mapi/glapi/gen/glX_server_table.py @@ -65,6 +65,8 @@ class function_table: # Minimum number of opcodes in a leaf node. self.min_op_bits = 3 self.min_op_count = (1 << self.min_op_bits) + + self.pullup_bias = 1 return @@ -84,6 +86,80 @@ class function_table: return + def get_leaves(self): + leaves = [] + + for i in range(0, (1<<self.max_bits), self.min_op_count): + + # M, [children], tree size, depth, type, first opcode + leave = [ 0, [], 1, 0, 'E', i ] + + for j in range(i, i + self.min_op_count): + if self.functions.has_key(j): + leave[4] = 'L' + break; + + leaves.append(leave) + + return leaves + + + def combine_tree(self, nodes): + newnodes = [] + + for i in range(0, len(nodes)-1, 2): + c1 = nodes[i] + c2 = nodes[i+1] + parent = [ 1, [c1, c2], c1[2]+c2[2]+1, 0, 'N', c1[5] ] + if c1[4] == c2[4]: + parent[4] = c1[4] + + newnodes.append(parent) + + if len(newnodes) >= 2: + return self.combine_tree(newnodes) + + return parent + + + def pullup_nodes(self, node): + + children = node[1] + + # Count non-leaves. If at least half of the children + # are non-leaves pulling up saves space. Add a small + # bias to favor shallower trees + count = sum( 1 for child in children if child[4] == 'N' ) + count += self.pullup_bias + + # conditionally replace the children of a node with its + # grandchilren. Grandchildren exist if child size > 1 + if count >= len(children)/2 and children[0][2] > 1: + newnodes = [] + for child in children: + newnodes.append(child[1][0]) + newnodes.append(child[1][1]) + + node[0] += 1 + node[1] = newnodes + node[2] = newnodes[0][2] * len(newnodes) + 1 + self.pullup_nodes(node) + + else: + node[2] = 1 + for child in children: + if child[4] == 'N': + self.pullup_nodes(child) + else: + child[1] = [] + child[2] = 1 + + node[3] = max( child[3] + 1, node[3] ) + node[2] += child[2] + + return + + def divide_group(self, min_opcode, total): """Divide the group starting min_opcode into subgroups. Returns a tuple containing the number of bits consumed by -- 1.7.10.4 _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev