Hi,
Can a gatekeeper help review this patch? It adds a "bit test"
implementation for some small switch-case statements, whose case values
are within a small range (< word size) and where are different case
values sharing one label. In such cases, it converts a "switch(key)" to
several "if( (1<<(key-MIN_KEY)) & CST)", where MIN_KEY and CST are
integral constants, so that more than one case values which share one
destination label can be tested at one time.
This patch also sets VHO_Switch_Density to a smaller default value
for "-Os" than higher optimization levels, in order to avoid
too large jump tables when optimizing for size.
Attached are the patch and a test file which should cover all new code
included in the patch.
Thanks,
Richard
diff -x .svn -ur orig/osprey/be/vho/vho_lower.cxx work/osprey/be/vho/vho_lower.cxx
--- orig/osprey/be/vho/vho_lower.cxx 2010-09-05 10:28:26.000000000 +0800
+++ work/osprey/be/vho/vho_lower.cxx 2010-10-23 19:37:15.000000000 +0800
@@ -257,6 +257,9 @@
static WN * VHO_Switch_Default_Goto; /* goto switch default label */
static WN * VHO_Switch_Stmt; /* switch statement */
static FB_FREQ VHO_Switch_Default_Freq; /* goto switch default freq */
+static INT32 VHO_Switch_Lowest_Label; /* Lowest label of all jump destinations */
+static INT32 VHO_Switch_Highest_Label; /* Highest label of all jump destinations */
+static WN **VHO_Switch_Labels_Map; /* Map from each label to the real WN node */
#ifdef KEY
static BOOL VHO_In_MP_Region_Pragma = FALSE;
@@ -414,49 +417,10 @@
WN *block, *case_goto, *wn;
INT32 i, j;
INT32 switch_first_label;
- INT32 highest_label_num, lowest_label_num;
- WN **labels_map;
block = WN_CreateBlock();
WN_Set_Linenum(block, srcpos);
- // Find the highest and lowest case label numbers.
- highest_label_num = 0;
- lowest_label_num = INT32_MAX;
- for (i = 0; i < VHO_Switch_Ncases; i++) {
- case_goto = VHO_SWITCH_wn(i);
- INT64 value = WN_label_number(case_goto);
- if (value > highest_label_num)
- highest_label_num = value;
- if (value < lowest_label_num)
- lowest_label_num = value;
- }
-
- // Create case labels map.
- INT32 size = (highest_label_num - lowest_label_num + 1) * sizeof(WN *);
- labels_map = (WN **) alloca(size);
- memset(labels_map, 0, size);
-
- // Map each case label to the first label in the case code. A case code will
- // have multiple labels if it is shared by different case values.
- WN *first_label_in_group = (WN *) NULL;
- for (wn = WN_next(VHO_Switch_Stmt); ; wn = WN_next(wn)) {
- if (WN_operator(wn) == OPR_LABEL) {
- if (first_label_in_group == NULL)
- first_label_in_group = wn;
- // Map only the labels in the switch stmt.
- if (WN_label_number(wn) >= lowest_label_num &&
- WN_label_number(wn) <= highest_label_num) {
- labels_map[WN_label_number(wn)] = first_label_in_group;
- }
- if (WN_label_number(wn) == VHO_Switch_Last_Label)
- break;
- } else {
- // Real code terminates the labels group.
- first_label_in_group = NULL;
- }
- }
-
// Sort the case values.
qsort(VHO_Switch_Case_Table, VHO_Switch_Ncases,
sizeof(VHO_SWITCH_ITEM), VHO_Switch_Compare_Value);
@@ -472,8 +436,8 @@
WN *next_case_goto = VHO_SWITCH_wn(j);
INT64 next_value = WN_const_val(next_case_goto);
if (next_value != last_value + 1 ||
- (labels_map[WN_label_number(next_case_goto)] !=
- labels_map[WN_label_number(case_goto)]))
+ (VHO_Switch_Labels_Map[WN_label_number(next_case_goto) - VHO_Switch_Lowest_Label] !=
+ VHO_Switch_Labels_Map[WN_label_number(case_goto) - VHO_Switch_Lowest_Label]))
break;
last_value = next_value;
}
@@ -528,6 +492,117 @@
/* ============================================================================
*
* static WN *
+ * VHO_Switch_Generate_Bit_Test ( SRCPOS srcpos )
+ *
+ * Generate bit-test sequence for the switch statement.
+ *
+ * This converts "switch(key)" into several "if ((1 << (min - MIN_KEY)) & BITMASK)"
+ * where MIN_KEY and BITMASK and constants.
+ *
+ * ============================================================================
+ */
+
+static WN *
+VHO_Switch_Generate_Bit_Test ( SRCPOS srcpos, INT64 min_key, INT64 max_key )
+{
+ WN * wn;
+ WN * block;
+ WN * case_goto;
+ WN * modkey; // 1 << (key - min_key)
+ WN * test;
+ INT32 i, n, sum;
+ UINT64 bitmask;
+ TYPE_ID mtype_key; // Unsigned type of the switch key
+ TYPE_ID mtype_bitmask; // Type of the bitmask (U4 or U8 depending on range)
+ OPCODE opcode_bitmask_const;
+ OPCODE opcode_bitmask_and;
+ OPCODE opcode_bitmask_eq;
+ FB_FREQ freq;
+
+ block = WN_CreateBlock();
+ WN_Set_Linenum (block, srcpos);
+
+ mtype_key = Promoted_Mtype [WN_rtype(VHO_Switch_Index)];
+ if (mtype_key == MTYPE_I4)
+ mtype_key = MTYPE_U4;
+ else if (mtype_key == MTYPE_I8)
+ mtype_key = MTYPE_U8;
+
+ FmtAssert ((mtype_key==MTYPE_U4 || mtype_key==MTYPE_U8),
+ ("Wrong mtype in VHO_Switch_Generate_Bit_Test"));
+
+ mtype_bitmask = (max_key - min_key >= 32) ? MTYPE_U8 : MTYPE_U4;
+
+ /* If min_key is a small positive integer, we may be able to eliminate the subtraction operation
+ But don't do this if cases are consecutive (in which case we eliminate the last bit-test instead)
+ */
+ if ( min_key>0 && max_key<(mtype_bitmask==MTYPE_U4?32:64) && (max_key - min_key + 1 > VHO_Switch_Ncases ))
+ min_key = 0;
+
+ /* key - min_key */
+ modkey = WN_COPY_Tree (VHO_Switch_Index);
+ if (min_key != 0)
+ modkey = WN_CreateExp2 (VHO_Switch_Sub_Opcode, modkey, WN_CreateIntconst (VHO_Switch_Int_Opcode, min_key));
+
+ /* If the key is out of range, jump to default label. */
+ test = WN_CreateExp2 (OPCODE_make_op (OPR_LT, mtype_key, mtype_key),
+ WN_COPY_Tree (modkey),
+ WN_CreateIntconst (OPCODE_make_op(OPR_INTCONST,mtype_key,MTYPE_V), max_key - min_key + 1));
+ WN_INSERT_BlockAfter (block, WN_last(block), WN_CreateFalsebr (WN_label_number (VHO_Switch_Default_Goto), test));
+
+ opcode_bitmask_const = OPCODE_make_op (OPR_INTCONST, mtype_bitmask, MTYPE_V);
+ opcode_bitmask_and = OPCODE_make_op (OPR_BAND, mtype_bitmask, MTYPE_V);
+ opcode_bitmask_eq = OPCODE_make_op (OPR_EQ, mtype_bitmask, mtype_bitmask);
+
+ modkey = WN_CreateExp2 ( OPCODE_make_op(OPR_SHL,mtype_bitmask,MTYPE_V),
+ WN_CreateIntconst (opcode_bitmask_const, 1), modkey);
+
+ sum = 0; /* Total number of cases processed */
+ i = 0;
+
+ while ( i < VHO_Switch_Ncases ) {
+
+ bitmask = 0;
+ case_goto = VHO_SWITCH_wn(i);
+
+ sum += VHO_SWITCH_count(i);
+ freq = 0;
+ for (n = VHO_SWITCH_count(i); n; --n, ++i) {
+ bitmask |= UINT64(1) << (SWITCH_key(i) - min_key);
+ if (Cur_PU_Feedback)
+ freq += VHO_SWITCH_freq (i);
+ }
+
+ if (sum == max_key - min_key + 1) { /* If cases are consecutive, no need to test the last group */
+ wn = WN_CreateGoto ((ST_IDX)0, WN_label_number(case_goto));
+ }
+ else {
+ test = WN_CreateExp2 (opcode_bitmask_and, WN_COPY_Tree(modkey), WN_CreateIntconst (opcode_bitmask_const, bitmask));
+ test = WN_CreateExp2 (opcode_bitmask_eq, test, WN_CreateIntconst (opcode_bitmask_const, 0));
+ wn = WN_CreateFalsebr ( WN_label_number(case_goto), test );
+ }
+
+ WN_Set_Linenum ( wn, srcpos );
+
+ if ( Cur_PU_Feedback ) {
+ Cur_PU_Feedback->Annot (wn, FB_EDGE_BRANCH_TAKEN, freq);
+ }
+
+ WN_INSERT_BlockAfter ( block, WN_last(block), wn );
+ }
+ if ( Cur_PU_Feedback && (VHO_Switch_Ncases > 0))
+ Cur_PU_Feedback->Annot( wn, FB_EDGE_BRANCH_NOT_TAKEN,
+ VHO_Switch_Default_Freq );
+
+ WN_INSERT_BlockAfter ( block, WN_last(block),
+ WN_COPY_Tree ( VHO_Switch_Default_Goto ) );
+
+ return ( block );
+} /* VHO_Switch_Generate_Bit_Test */
+
+/* ============================================================================
+ *
+ * static WN *
* VHO_Switch_Generate_If_Else ( SRCPOS srcpos )
*
* Generate if-else sequence for the switch statement.
@@ -1032,8 +1107,13 @@
INT32 i;
INT32 j;
SRCPOS srcpos;
- INT32 count;
+ INT32 count = 0;
+ INT32 uniq = 0; /* Number of "unique" cases. */
+ UINT64 max_key, min_key; /* Used as either UINT64 or INT64 depending on the actual signedness */
+ UINT64 key_range; /* max_key - min_key */
WN * conv_wn = NULL;
+ INT32 switch_first_label;
+ WN * first_label_in_group;
LABEL_IDX last_label;
@@ -1120,30 +1200,96 @@
VHO_Switch_Cluster_Table =
TYPE_MEM_POOL_ALLOC_N(INT32, MEM_local_pool_ptr, (VHO_Switch_Ncases+1));
+ // Find the highest and lowest case label numbers.
+ VHO_Switch_Highest_Label = 0;
+ VHO_Switch_Lowest_Label = INT32_MAX;
+ for (case_goto = WN_first(block); case_goto; case_goto = WN_next(case_goto)) {
+ INT32 label = WN_label_number(case_goto);
+ if (label > VHO_Switch_Highest_Label)
+ VHO_Switch_Highest_Label = label;
+ if (label < VHO_Switch_Lowest_Label)
+ VHO_Switch_Lowest_Label = label;
+ }
+
+ if (VHO_Switch_Lowest_Label > VHO_Switch_Highest_Label)
+ VHO_Switch_Lowest_Label = VHO_Switch_Highest_Label = 0;
+
+ // Create case labels map.
+ VHO_Switch_Labels_Map = TYPE_MEM_POOL_ALLOC_N(WN*, MEM_local_pool_ptr,
+ (VHO_Switch_Highest_Label-VHO_Switch_Lowest_Label+1));
+ memset(VHO_Switch_Labels_Map, 0, (VHO_Switch_Highest_Label-VHO_Switch_Lowest_Label+1) * sizeof (WN*));
+
+ // Map each case label to the first label in the case code. A case code will
+ // have multiple labels if it is shared by different case values.
+ first_label_in_group = NULL;
+ for (case_goto = WN_next(VHO_Switch_Stmt); case_goto; case_goto = WN_next(case_goto)) {
+ if (WN_operator(case_goto) == OPR_LABEL) {
+ if (first_label_in_group == NULL)
+ first_label_in_group = case_goto;
+ INT32 label = WN_label_number(case_goto);
+ // Map only the labels in the switch stmt.
+ if (label >= VHO_Switch_Lowest_Label && label <= VHO_Switch_Highest_Label) {
+ VHO_Switch_Labels_Map[label - VHO_Switch_Lowest_Label] = first_label_in_group;
+ }
+ if (label == VHO_Switch_Last_Label)
+ break;
+ } else {
+ // Real code terminates the labels group.
+ first_label_in_group = NULL;
+ }
+ }
+
last_label = INT32_MIN;
+ if (VHO_Switch_Signed) {
+ max_key = INT64_MIN;
+ min_key = INT64_MAX;
+ }
+ else {
+ max_key = 0;
+ min_key = UINT64_MAX;
+ }
for ( i = 0, case_goto = WN_first(block);
i < VHO_Switch_Ncases;
i++, case_goto = WN_next(case_goto) ) {
+ if (VHO_Switch_Signed) {
+ INT64 key = WN_const_val (case_goto);
+ if (key < (INT64)min_key)
+ min_key = key;
+ if (key > (INT64)max_key)
+ max_key = key;
+ }
+ else {
+ UINT64 key = WN_const_val (case_goto);
+ if (key < (UINT64)min_key)
+ min_key = key;
+ if (key > (UINT64)max_key)
+ max_key = key;
+ }
+
VHO_SWITCH_wn(i) = case_goto;
if ( Cur_PU_Feedback )
VHO_SWITCH_freq(i) = Cur_PU_Feedback->Query( wn, FB_EDGE_SWITCH( i ) );
- if ( WN_label_number(case_goto) == last_label )
+ if ((last_label >= VHO_Switch_Lowest_Label) && (last_label <= VHO_Switch_Highest_Label) &&
+ (VHO_Switch_Labels_Map[WN_label_number(case_goto) - VHO_Switch_Lowest_Label] ==
+ VHO_Switch_Labels_Map[last_label - VHO_Switch_Lowest_Label]))
count++;
else {
for ( j = i - 1; j >= 0; --j ) {
- if ( last_label == WN_label_number(VHO_SWITCH_wn(j)) )
+ if ( VHO_Switch_Labels_Map[last_label - VHO_Switch_Lowest_Label] ==
+ VHO_Switch_Labels_Map[WN_label_number(VHO_SWITCH_wn(j)) - VHO_Switch_Lowest_Label] )
VHO_SWITCH_count(j) = count;
else
break;
}
+ ++uniq;
count = 1;
last_label = WN_label_number(case_goto);
}
@@ -1151,7 +1297,8 @@
for ( j = i - 1; j >= 0; --j ) {
- if ( last_label == WN_label_number(VHO_SWITCH_wn(j)) )
+ if ( VHO_Switch_Labels_Map[last_label - VHO_Switch_Lowest_Label] ==
+ VHO_Switch_Labels_Map[WN_label_number(VHO_SWITCH_wn(j)) - VHO_Switch_Lowest_Label] )
VHO_SWITCH_count(j) = count;
else
@@ -1165,6 +1312,17 @@
case_goto = VHO_SWITCH_wn(i);
}
+ key_range = max_key - min_key;
+
+ /* Check if bit-test can be used to implement it */
+ if ((key_range < (Is_Target_64bit () ? 64 : 32)) &&
+ ((uniq == 1 && VHO_Switch_Ncases >= 3) ||
+ (uniq == 2 && VHO_Switch_Ncases >= 5) ||
+ (uniq == 3 && VHO_Switch_Ncases >= 6)
+ )) {
+ wn = VHO_Switch_Generate_Bit_Test (srcpos, min_key, max_key);
+ }
+ else
if ( VHO_Switch_Ncases <= VHO_Switch_If_Else_Limit ) {
#ifdef VHO_DEBUG
@@ -1222,6 +1380,7 @@
}
}
+ MEM_POOL_FREE ( MEM_local_pool_ptr, VHO_Switch_Labels_Map );
MEM_POOL_FREE ( MEM_local_pool_ptr, VHO_Switch_Case_Table );
MEM_POOL_FREE ( MEM_local_pool_ptr, VHO_Switch_Cluster_Table );
if (conv_wn == NULL)
diff -x .svn -ur orig/osprey/common/com/config.cxx work/osprey/common/com/config.cxx
--- orig/osprey/common/com/config.cxx 2010-09-05 10:28:35.000000000 +0800
+++ work/osprey/common/com/config.cxx 2010-10-11 12:26:28.000000000 +0800
@@ -1507,6 +1512,10 @@
/* reduce caller+callee "size" limit for inlining */
INLINE_Max_Pu_Size=1000;
+ /* Do not emit large jump tables when optimizing for size */
+ if (! VHO_Switch_Density_Set )
+ VHO_Switch_Density = 3;
+
#ifdef BACK_END
/* LNO options to be turned off for SPACE */
LNO_Outer_Unroll = 1;
diff -x .svn -ur orig/osprey/common/com/config_vho.cxx work/osprey/common/com/config_vho.cxx
--- orig/osprey/common/com/config_vho.cxx 2010-04-07 14:58:30.000000000 +0800
+++ work/osprey/common/com/config_vho.cxx 2010-10-11 12:26:08.000000000 +0800
@@ -88,6 +88,7 @@
BOOL VHO_Combine_Loads = FALSE;
BOOL VHO_Recycle_Pregs = FALSE;
INT32 VHO_Switch_Density = 12;
+BOOL VHO_Switch_Density_Set = FALSE;
INT32 VHO_Switch_If_Else_Limit = 6;
INT32 VHO_Switch_Compgoto_Limit = 3;
BOOL VHO_Switch_Opt = TRUE;
@@ -172,7 +173,7 @@
{ OVK_INT32, OV_INTERNAL, FALSE, "switch_if_else", "switch_if_else",
6, 1, INT32_MAX, &VHO_Switch_If_Else_Limit, NULL },
{ OVK_INT32, OV_INTERNAL, FALSE, "switch_density", "switch_density",
- 12, 1, INT32_MAX, &VHO_Switch_Density, NULL },
+ 12, 1, INT32_MAX, &VHO_Switch_Density, &VHO_Switch_Density_Set },
{ OVK_INT32, OV_INTERNAL, FALSE, "switch_compgoto", "switch_compgoto",
3, 1, INT32_MAX, &VHO_Switch_Compgoto_Limit, NULL },
{ OVK_INT32, OV_INTERNAL, FALSE, "switch_opt_threshold", "switch_opt_threshold",
diff -x .svn -ur orig/osprey/common/com/config_vho.h work/osprey/common/com/config_vho.h
--- orig/osprey/common/com/config_vho.h 2010-04-07 14:58:30.000000000 +0800
+++ work/osprey/common/com/config_vho.h 2010-06-07 19:44:25.000000000 +0800
@@ -78,6 +78,7 @@
extern BOOL VHO_Recycle_Pregs;
extern BOOL VHO_Combine_Loads;
extern INT32 VHO_Switch_Density;
+extern BOOL VHO_Switch_Density_Set;
extern INT32 VHO_Switch_If_Else_Limit;
extern INT32 VHO_Switch_Compgoto_Limit;
extern BOOL VHO_Switch_Opt;
//CMD:$(CC) $(CFLAGS) $(SOURCE) -o tmp.s
//CMD:$(CROSS_AS) $(ASFLAGS) tmp.s -o $(TARGET)
// This test case targets at our bit-test implementation of switch-case
#include <stdint.h> // This should define __WORDSIZE
#include <stdlib.h>
#define VERIFY(expr) do { if (!(expr)) abort (); } while (0)
// This function tests for non-consecutive case values
static int foo1 (int x)
{
switch (x) {
case 1:
case 2:
case 4:
return x;
case 3:
case 5:
case 8:
return -x;
default:
return -1;
}
}
static void test1 (void)
{
VERIFY (foo1(0) == -1);
VERIFY (foo1(1) == 1);
VERIFY (foo1(2) == 2);
VERIFY (foo1(3) == -3);
VERIFY (foo1(4) == 4);
VERIFY (foo1(7) == -1);
}
// This function tests for consecutive case values
static int64_t foo2 (int64_t x)
{
switch (x) {
case -1:
case 1:
return 0;
case 0:
case 2:
case 3:
return 1;
case -2:
case 4:
return 2;
default:
return x;
}
}
static void test2 (void)
{
VERIFY (foo2(-3) == -3);
VERIFY (foo2(-2) == 2);
VERIFY (foo2(-1) == 0);
VERIFY (foo2(0) == 1);
VERIFY (foo2(4) == 2);
VERIFY (foo2(5) == 5);
}
// This function tests whether the threshold is correct.
// This function should _not_ use bit-test
static int foo3 (int x)
{
switch (x) {
case -1:
case 0:
case 1:
case 2:
case 3:
case 4:
return x;
case 10:
case 11:
case __WORDSIZE - 1:
return -2;
default:
return 0;
}
}
static void test3 (void)
{
VERIFY (foo3(-2) == 0);
VERIFY (foo3(-1) == -1);
VERIFY (foo3(__WORDSIZE - 2) == 0);
VERIFY (foo3(__WORDSIZE - 1) == -2);
VERIFY (foo3(__WORDSIZE) == 0);
}
// This function tests whether the threshold is correct.
// This function should use bit-test
static int foo4 (int x)
{
switch (x) {
case -1:
case 0:
case 1:
case 2:
case 3:
case 4:
return x;
case 10:
case 11:
case __WORDSIZE - 2:
return -2;
default:
return 0;
}
}
static void test4 (void)
{
VERIFY (foo4(-2) == 0);
VERIFY (foo4(-1) == -1);
VERIFY (foo4(__WORDSIZE - 3) == 0);
VERIFY (foo4(__WORDSIZE - 2) == -2);
VERIFY (foo4(__WORDSIZE - 1) == 0);
}
// The frontend accepts such a strange switch statement.
// The compiler should not abort in such cases...
static void test5 (void)
{
switch (1) {
break;
}
}
int main ()
{
test1 ();
test2 ();
test3 ();
test4 ();
test5 ();
return 0;
}
------------------------------------------------------------------------------
Nokia and AT&T present the 2010 Calling All Innovators-North America contest
Create new apps & games for the Nokia N8 for consumers in U.S. and Canada
$10 million total in prizes - $4M cash, 500 devices, nearly $6M in marketing
Develop with Nokia Qt SDK, Web Runtime, or Java and Publish to Ovi Store
http://p.sf.net/sfu/nokia-dev2dev
_______________________________________________
Open64-devel mailing list
Open64-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/open64-devel