Sorry, I forgot to attach the patch.  Should be attached now.

On Fri, 18 Mar 2022, Elijah Stone wrote:

What was I thinking? i. is much more prestigious than /:

   x=: ?1e8$65536
y=: 0+x NB. avoid selfie optimizations. The effect is still present, but less pronounced
   x4=: 10 u:x
   y4=: 10 u:y
   x2=: 2 u:x4
   y2=: 2 u:y4
   timex'x i.y'
0.728156
   timex'x4 i.y4'
3.8804
   timex'x2 i.y2'
0.651964

Probably an oversight when 4-byte chars were added, on the theory that 2-byte chars would always get small-range optimization. (Which is, as a matter of fact, not the case.)

Attached is a patch fixing the problem. This also helps with 4-column matrices of 1-byte chars etc. (I think there may be reasonably cheap gains to be made across the board for i. on matrices of few columns, but at the very least small POT deserves to be fast.) This should also improve performance for smaller 2-byte arrays that do not make the small-range cut. With the patch:

   timex'x4 i.y4'
0.613455

 -E
----------------------------------------------------------------------
For information about J forums see http://www.jsoftware.com/forums.htm
diff --git a/jsrc/viavx.c b/jsrc/viavx.c
index 67a1dc47..f55024ec 100644
--- a/jsrc/viavx.c
+++ b/jsrc/viavx.c
@@ -588,35 +588,40 @@ static __forceinline I icmpeq(I *a, I *w, I n) {
 // jtioq  RAT
 // jtioi1 k==SZI, INT/SBT/char/bool not small-range
 // jtioi  list of >1 INT
+// jtioCk k-sized bool/char
 // jtioc  k!=SZI, bool (must be list of em)/char/INT/SBT
 // jtioc01 intolerant FL atom
 // jtioc0 intolerant FL array
 // jtioz01 intolerant CMPX atom
 // jtioz0 intolerant CMPX array
 
-static IOFX(A,US,jtioax1,,cthia(~0LL,1.0,C(*v)),!equ(C(*v),C(av[hj])),1  )  /* 
boxed exact 1-element item */   
-static IOFX(A,US,jtioau,, hiau(C(*v)),  !equ(C(*v),C(av[hj])),1  )  /* boxed 
uniform type         */
-static IOFX(X,US,jtiox,,  hix(v),            !eqx(n,v,av+n*hj),               
cn)  /* extended integer           */   
-static IOFX(Q,US,jtioq,,  hiq(v),            !eqq(n,v,av+n*hj),               
cn)  /* rational number            */   
-static IOFX(C,US,jtioc,,  hic(k,(UC*)v),     memcmpne(v,av+k*hj,k),            
 cn)  /* boolean, char, or integer  */
-static IOFX(I,US,jtioi,COMPSETUP,  hici(n,v),            COMPCALL(av),         
 cn  )  // INT array, not float
-static IOFX(I,US,jtioi1,,  hici1(v),           *v!=av[hj],                    
1 )  // len=8, not float
-static IOFX(D,US,jtioc01,, hic01((UIL*)v),    *v!=av[hj],                      
1) // float atom
-static IOFX(Z,US,jtioz01,, hic0(2,(UIL*)v),    
(v[0].re!=av[hj].re)||(v[0].im!=av[hj].im), 1) // complex atom
-static IOFX(D,US,jtioc0,, hic0(n,(UIL*)v),    fcmp0(v,&av[n*hj],n),           
cn) // float array
-static IOFX(Z,US,jtioz0,, hic0(2*n,(UIL*)v),    
fcmp0((D*)v,(D*)&av[n*hj],2*n),  cn) // complex array
-
-static IOFX(A,UI4,jtioax12,,cthia(~0LL,1.0,C(*v)),!equ(C(*v),C(av[hj])),1  )  
/* boxed exact 1-element item */   
-static IOFX(A,UI4,jtioau2,, hiau(C(*v)),  !equ(C(*v),C(av[hj])),1  )  /* boxed 
uniform type         */
-static IOFX(X,UI4,jtiox2,,  hix(v),            !eqx(n,v,av+n*hj),              
 cn)  /* extended integer           */   
-static IOFX(Q,UI4,jtioq2,,  hiq(v),            !eqq(n,v,av+n*hj),              
 cn)  /* rational number            */   
-static IOFX(C,UI4,jtioc2,,  hic(k,(UC*)v),     memcmpne(v,av+k*hj,k),          
   cn)  /* boolean, char, or integer  */
-static IOFX(I,UI4,jtioi2,COMPSETUP,  hici(n,v),            COMPCALL(av),       
   cn  )  // INT array, not float
-static IOFX(I,UI4,jtioi12,,  hici1(v),           *v!=av[hj],                   
 1 )  // len=8, not float
-static IOFX(D,UI4,jtioc012,, hic01((UIL*)v),    *v!=av[hj],                    
  1) // float atom
-static IOFX(Z,UI4,jtioz012,, hic0(2,(UIL*)v),    
(v[0].re!=av[hj].re)||(v[0].im!=av[hj].im), 1) // complex atom
-static IOFX(D,UI4,jtioc02,, hic0(n,(UIL*)v),    fcmp0(v,&av[n*hj],n),          
 cn) // float array
-static IOFX(Z,UI4,jtioz02,, hic0(2*n,(UIL*)v),    
fcmp0((D*)v,(D*)&av[n*hj],2*n),  cn) // complex array
+static IOFX(A, US,jtioax1,,cthia(~0LL,1.0,C(*v)),!equ(C(*v),C(av[hj])),1) // 
boxed exact 1-element item
+static IOFX(A, US,jtioau,, hiau(C(*v)),      !equ(C(*v),C(av[hj])),    1) // 
boxed uniform type
+static IOFX(X, US,jtiox,,  hix(v),           !eqx(n,v,av+n*hj),       cn) // 
extended integer
+static IOFX(Q, US,jtioq,,  hiq(v),           !eqq(n,v,av+n*hj),       cn) // 
rational number
+static IOFX(C, US,jtioc,,  hic(k,(UC*)v),    memcmpne(v,av+k*hj,k),   cn) // 
boolean, char, or integer
+static IOFX(I, US,jtioi,COMPSETUP,hici(n,v),COMPCALL(av),             cn) // 
INT array, not float
+static IOFX(C2,US,jtioC2,, hici1((C2*)v),    *v!=av[hj],               1) // 
2-byte (char)
+static IOFX(C4,US,jtioC4,, hici1((C4*)v),    *v!=av[hj],               1) // 
4-byte (char)
+static IOFX(I, US,jtioi1,, hici1(v),         *v!=av[hj],               1) // 
len=8, not float
+static IOFX(D, US,jtioc01,,hic01((UIL*)v),   *v!=av[hj],               1) // 
float atom
+static IOFX(Z, US,jtioz01,,hic0(2,(UIL*)v),  
(v[0].re!=av[hj].re)||(v[0].im!=av[hj].im), 1) // complex atom
+static IOFX(D, US,jtioc0,, hic0(n,(UIL*)v),  fcmp0(v,&av[n*hj],n),    cn) // 
float array
+static IOFX(Z, US,jtioz0,, 
hic0(2*n,(UIL*)v),fcmp0((D*)v,(D*)&av[n*hj],2*n),cn) // complex array
+
+static IOFX(A, UI4,jtioax12,,cthia(~0LL,1.0,C(*v)),!equ(C(*v),C(av[hj])),1) // 
boxed exact 1-element item
+static IOFX(A, UI4,jtioau2,, hiau(C(*v)),      !equ(C(*v),C(av[hj])),    1) // 
boxed uniform type
+static IOFX(X, UI4,jtiox2,,  hix(v),           !eqx(n,v,av+n*hj),       cn) // 
extended integer
+static IOFX(Q, UI4,jtioq2,,  hiq(v),           !eqq(n,v,av+n*hj),       cn) // 
rational number
+static IOFX(C, UI4,jtioc2,,  hic(k,(UC*)v),    memcmpne(v,av+k*hj,k),   cn) // 
boolean, char, or integer 
+static IOFX(I, UI4,jtioi2,COMPSETUP,hici(n,v),COMPCALL(av),             cn) // 
INT array, not float
+static IOFX(C2,UI4,jtioC22,, hici1((C2*)v),    *v!=av[hj],               1) // 
2-byte (char)
+static IOFX(C4,UI4,jtioC42,, hici1((C4*)v),    *v!=av[hj],               1) // 
4-byte (char)
+static IOFX(I, UI4,jtioi12,, hici1(v),         *v!=av[hj],               1) // 
len=8, not float
+static IOFX(D, UI4,jtioc012,,hic01((UIL*)v),   *v!=av[hj],               1) // 
float atom
+static IOFX(Z, UI4,jtioz012,,hic0(2,(UIL*)v),  
(v[0].re!=av[hj].re)||(v[0].im!=av[hj].im), 1) // complex atom
+static IOFX(D, UI4,jtioc02,, hic0(n,(UIL*)v),  fcmp0(v,&av[n*hj],n),    cn) // 
float array
+static IOFX(Z, UI4,jtioz02,, 
hic0(2*n,(UIL*)v),fcmp0((D*)v,(D*)&av[n*hj],2*n),cn) // complex array
 
 
 // ********************* second class: tolerant comparisons, possibly boxed 
**********************
@@ -984,9 +989,10 @@ static 
IOFT(A,UI4,jtioa12,cthia(ctmask,1.0,C(*v)),TFINDBX,TFINDBY,TFINDBYKEY,!eq
  }
 
 // The verbs to do the work, for different item lengths and hashtable sizes
-static IOFSMALLRANGE(jtio12,UC,US)  static IOFSMALLRANGE(jtio14,UC,UI4)  // 
1-byte items, using small/large hashtable
-static IOFSMALLRANGE(jtio22,US,US)  static IOFSMALLRANGE(jtio24,US,UI4)  // 
2-byte items, using small/large hashtable
-static IOFSMALLRANGE(jtio42,I,US)  static IOFSMALLRANGE(jtio44,I,UI4)  // 
4/8-byte items, using small/large hashtable
+static IOFSMALLRANGE(jtio12,UC, US)  static IOFSMALLRANGE(jtio14,UC, UI4)  // 
1-byte items, using small/large hashtable
+static IOFSMALLRANGE(jtio22,US, US)  static IOFSMALLRANGE(jtio24,US, UI4)  // 
2-byte items, using small/large hashtable
+static IOFSMALLRANGE(jtio42,UI4,US)  static IOFSMALLRANGE(jtio44,UI4,UI4)  // 
4-byte items, using small/large hashtable
+static IOFSMALLRANGE(jtio82,I,  US)  static IOFSMALLRANGE(jtio84,I,  UI4)  // 
SZI-byte items, using small/large hashtable
 
 // ******************* fourth class: sequential comparison 
***************************************
 // implemented only for i. i: e. u/.   - perhaps should revert for other 
compounds
@@ -1331,29 +1337,33 @@ static I jtutype(J jt,A w,I c){A*wv,x;I m,t;
   R h;                                                                         
      \
  }
 
-static IOFXW(A,US,jtiowax1,,cthia(~0LL,1.0,C(*v)),!equ(C(*v),C(wv[hj])),1  )  
/* boxed exact 1-element item */   
-static IOFXW(A,US,jtiowau,, hiau(C(*v)),  !equ(C(*v),C(wv[hj])),1  )  /* boxed 
uniform type         */
-static IOFXW(X,US,jtiowx,,  hix(v),            !eqx(n,v,wv+n*hj),              
 cn)  /* extended integer           */   
-static IOFXW(Q,US,jtiowq,,  hiq(v),            !eqq(n,v,wv+n*hj),              
 cn)  /* rational number            */   
-static IOFXW(C,US,jtiowc,,  hic(k,(UC*)v),     memcmpne(v,wv+k*hj,k),          
   cn)  /* boolean, char, or integer  */
-static IOFXW(I,US,jtiowi,COMPSETUP,  hici(n,v),            COMPCALL(wv),       
   cn  )  // INT array, not float
-static IOFXW(I,US,jtiowi1,,  hici1(v),           *v!=wv[hj],                   
 1 )  // len=8, not float
-static IOFXW(D,US,jtiowc01,, hic01((UIL*)v),    *v!=wv[hj],                    
  1) // float atom
-static IOFXW(Z,US,jtiowz01,, hic0(2,(UIL*)v),    
(v[0].re!=wv[hj].re)||(v[0].im!=wv[hj].im), 1) // complex atom
-static IOFXW(D,US,jtiowc0,, hic0(n,(UIL*)v),    fcmp0(v,&wv[n*hj],n),          
 cn) // float array
-static IOFXW(Z,US,jtiowz0,, hic0(2*n,(UIL*)v),    
fcmp0((D*)v,(D*)&wv[n*hj],2*n),  cn) // complex array
-
-static IOFXW(A,UI4,jtiowax12,,cthia(~0LL,1.0,C(*v)),!equ(C(*v),C(wv[hj])),1  ) 
 /* boxed exact 1-element item */   
-static IOFXW(A,UI4,jtiowau2,, hiau(C(*v)),  !equ(C(*v),C(wv[hj])),1  )  /* 
boxed uniform type         */
-static IOFXW(X,UI4,jtiowx2,,  hix(v),            !eqx(n,v,wv+n*hj),            
   cn)  /* extended integer           */   
-static IOFXW(Q,UI4,jtiowq2,,  hiq(v),            !eqq(n,v,wv+n*hj),            
   cn)  /* rational number            */   
-static IOFXW(C,UI4,jtiowc2,,  hic(k,(UC*)v),     memcmpne(v,wv+k*hj,k),        
     cn)  /* boolean, char, or integer  */
-static IOFXW(I,UI4,jtiowi2,COMPSETUP,  hici(n,v),            COMPCALL(wv),     
     cn  )  // INT array, not float
-static IOFXW(I,UI4,jtiowi12,,  hici1(v),           *v!=wv[hj],                 
   1 )  // len=8, not float
-static IOFXW(D,UI4,jtiowc012,, hic01((UIL*)v),    *v!=wv[hj],                  
    1) // float atom
-static IOFXW(Z,UI4,jtiowz012,, hic0(2,(UIL*)v),    
(v[0].re!=wv[hj].re)||(v[0].im!=wv[hj].im), 1) // complex atom
-static IOFXW(D,UI4,jtiowc02,, hic0(n,(UIL*)v),    fcmp0(v,&wv[n*hj],n),        
   cn) // float array
-static IOFXW(Z,UI4,jtiowz02,, hic0(2*n,(UIL*)v),    
fcmp0((D*)v,(D*)&wv[n*hj],2*n),  cn) // complex array
+static IOFXW(A, US,jtiowax1,,cthia(~0LL,1.0,C(*v)),!equ(C(*v),C(wv[hj])), 1) 
// boxed exact 1-element item
+static IOFXW(A, US,jtiowau,, hiau(C(*v)),      !equ(C(*v),C(wv[hj])),     1) 
// boxed uniform type
+static IOFXW(X, US,jtiowx,,  hix(v),           !eqx(n,v,wv+n*hj),        cn) 
// extended integer
+static IOFXW(Q, US,jtiowq,,  hiq(v),           !eqq(n,v,wv+n*hj),        cn) 
// rational number
+static IOFXW(C, US,jtiowc,,  hic(k,(UC*)v),    memcmpne(v,wv+k*hj,k),    cn) 
// boolean, char, or integer
+static IOFXW(I, US,jtiowi,COMPSETUP,hici(n,v),COMPCALL(wv),              cn) 
// INT array, not float
+static IOFXW(C2,US,jtiow21,, hici1((C2*)v),    *v!=wv[hj],                1) 
// 2-byte (char)
+static IOFXW(C4,US,jtiow41,, hici1((C4*)v),    *v!=wv[hj],                1) 
// 4-byte (char)
+static IOFXW(I, US,jtiowi1,, hici1(v),         *v!=wv[hj],                1) 
// len=8, not float
+static IOFXW(D, US,jtiowc01,,hic01((UIL*)v),   *v!=wv[hj],                1) 
// float atom
+static IOFXW(Z, US,jtiowz01,,hic0(2,(UIL*)v),  
(v[0].re!=wv[hj].re)||(v[0].im!=wv[hj].im), 1) // complex atom
+static IOFXW(D, US,jtiowc0,, hic0(n,(UIL*)v),  fcmp0(v,&wv[n*hj],n),     cn) 
// float array
+static IOFXW(Z, US,jtiowz0,, 
hic0(2*n,(UIL*)v),fcmp0((D*)v,(D*)&wv[n*hj],2*n),cn) // complex array
+
+static IOFXW(A, UI4,jtiowax12,,cthia(~0LL,1.0,C(*v)),!equ(C(*v),C(wv[hj])), 1) 
// boxed exact 1-element item
+static IOFXW(A, UI4,jtiowau2,, hiau(C(*v)),      !equ(C(*v),C(wv[hj])),     1) 
// boxed uniform type
+static IOFXW(X, UI4,jtiowx2,,  hix(v),           !eqx(n,v,wv+n*hj),        cn) 
// extended integer
+static IOFXW(Q, UI4,jtiowq2,,  hiq(v),           !eqq(n,v,wv+n*hj),        cn) 
// rational number
+static IOFXW(C, UI4,jtiowc2,,  hic(k,(UC*)v),    memcmpne(v,wv+k*hj,k),    cn) 
// boolean, char, or integer
+static IOFXW(I, UI4,jtiowi2,COMPSETUP,hici(n,v),COMPCALL(wv),              cn) 
// INT array, not float
+static IOFXW(C2,UI4,jtiow212,, hici1((C2*)v),    *v!=wv[hj],                1) 
// 2-byte (char)
+static IOFXW(C4,UI4,jtiow412,, hici1((C4*)v),    *v!=wv[hj],                1) 
// 4-byte (char)
+static IOFXW(I, UI4,jtiowi12,, hici1(v),         *v!=wv[hj],                1) 
// len=8, not float
+static IOFXW(D, UI4,jtiowc012,,hic01((UIL*)v),   *v!=wv[hj],                1) 
// float atom
+static IOFXW(Z, UI4,jtiowz012,,hic0(2,(UIL*)v),  
(v[0].re!=wv[hj].re)||(v[0].im!=wv[hj].im), 1) // complex atom
+static IOFXW(D, UI4,jtiowc02,, hic0(n,(UIL*)v),  fcmp0(v,&wv[n*hj],n),         
  cn) // float array
+static IOFXW(Z, UI4,jtiowz02,, 
hic0(2*n,(UIL*)v),fcmp0((D*)v,(D*)&wv[n*hj],2*n),  cn) // complex array
 
 
 // *************************** seventh class: small-range processing of w 
***********************
@@ -1707,6 +1717,7 @@ static CR condrange2(US *s,I n,I min,I max,I maxrange){CR 
ret;I i;US x;
 // jtioq  RAT
 // jtioi1 k==SZI, INT/SBT/char/bool not small-range
 // jtioi  list of >1 INT
+// jtioCk k-sized character
 // jtioc  k!=SZI, bool (must be list of em)/char/INT/SBT
 // jtioc01 intolerant FL atom
 // jtioc0 intolerant FL array
@@ -1723,7 +1734,7 @@ static CR condrange2(US *s,I n,I min,I max,I maxrange){CR 
ret;I i;US x;
 #define FNTBLSMALL1 12  // small-range, 1-byte items
 #define FNTBLSMALL2 13  // small-range, 2-byte items
 #define FNTBLSMALL4 14  // small-range, 4-byte items
-#define FNTBLONEINT 15  // hash of single INT-sized exact value
+#define FNTBLSMALLI 15  // small-range, SZI-byte items
 #define FNTBLBOXARRAY 20  // array of boxes, tolerant or not (we just hash on 
shape)
 #define FNTBLBOXINTOLERANT 21  // single box but intolerant
 #define FNTBLBOXUNIFORM 22  // single box, but a and w have uniform contents
@@ -1731,56 +1742,65 @@ static CR condrange2(US *s,I n,I min,I max,I 
maxrange){CR ret;I i;US x;
 #define FNTBLXNUM 24   // hashed xnum
 #define FNTBLRAT 25   // hashed rat
 #define FNTBLBOXSSORT 26  // boxes, handled by sorting and binary search
-#define FNTBLREVERSE 27  // where the reversed hashes start
-#define FNTBLSIZE 54  // number of functions - before the second half
+#define FNTBL2 27 // 2-byte (probably characters)
+#define FNTBL4 28 // 4-byte (probably characters)
+#define FNTBLI 29  // SZI-byte (will be duplicate of 4 on 32-bit, but who 
cares)
+#define FNTBLREVERSE 30  // where the reversed hashes start
+#define FNTBLSIZE 60  // number of functions - before the second half
 static const AF fntbl[]={
 // prefix: routines used without hashtables, flags, etc
  jtiosc,  // sequential comparison (-2) - we pass in extra args
  jtiosfu,   // i.!.1 - sequential file update (-1)
 // US tables
  jtioc,jtioc,jtioc,jtioc,jtioi,jtioi,jtioi,jtioi,  // bool, INT
- jtiod,jtioc0,jtiod1,jtioc01,jtio12,jtio22,jtio42,jtioi1,   // FL (then 
small-range, then ONEINT)
+ jtiod,jtioc0,jtiod1,jtioc01,jtio12,jtio22,jtio42,jtio82,   // FL (then 
small-range)
  jtioz,jtioz0,jtioz1,jtioz01,   // CMPX
 
  jtioa,jtioax1,jtioau,jtioa1,  // atomic types
  jtiox,jtioq,
- jtiobs,
+ jtiobs,jtioC2,jtioC4,jtioi1,
  
  jtiowc,jtiowc,jtiowc,jtiowc,jtiowi,jtiowi,jtiowi,jtiowi,  // bool, INT
- 0,jtiowc0,0,jtiowc01,0,0,jtio42w,jtiowi1,   // FL (then small-range, then 
ONEINT)
+ 0,jtiowc0,0,jtiowc01,0,0,jtio42w,0,   // FL (then small-range)
  0,jtiowz0,0,jtiowz01,   // CMPX
  0,0,0,0,
  0,0,
- 0,
+ 0,jtiow21,jtiow41,jtiowi1,
 
 // UI4 tables
  jtioc2,jtioc2,jtioc2,jtioc2,jtioi2,jtioi2,jtioi2,jtioi2,  // bool, INT
- jtiod2,jtioc02,jtiod12,jtioc012,jtio14,jtio24,jtio44,jtioi12,   // FL (then 
small-range, then ONEINT)
+ jtiod2,jtioc02,jtiod12,jtioc012,jtio14,jtio24,jtio44,jtio84,   // FL (then 
small-range)
  jtioz2,jtioz02,jtioz12,jtioz012,   // CMPX
 
  jtioa2,jtioax12,jtioau2,jtioa12,  // atomic types
  jtiox2,jtioq2,
- jtiobs,
+ jtiobs,jtioC22,jtioC42,jtioi12,
 
  jtiowc2,jtiowc2,jtiowc2,jtiowc2,jtiowi2,jtiowi2,jtiowi2,jtiowi2,  // bool, INT
- 0,jtiowc02,0,jtiowc012,0,0,jtio44w,jtiowi12,   // FL (then small-range, then 
ONEINT)
- 0,jtiowz02,0,jtiowz012   // CMPX
-
+ 0,jtiowc02,0,jtiowc012,0,0,jtio44w,0,   // FL (then small-range)
+ 0,jtiowz02,0,jtiowz012,   // CMPX
+ 0,0,0,0,
+ 0,0,
+ 0,jtiow212,jtiow412,jtiowi12,
 };
 static const S fnflags[]={  // 0 values reserved for small-range.  They turn 
off booladj
  
IIMODFULL,IIMODFULL,IIMODFULL,IIMODFULL,IIMODFULL,IIMODFULL,IIMODFULL,IIMODFULL,
  // bool, INT
- IIMODFULL,IIMODFULL,IIMODFULL,IIMODFULL,0,0,0,IIMODFULL,   // FL (then 
small-range, then ONEINT)
+ IIMODFULL,IIMODFULL,IIMODFULL,IIMODFULL,0,0,0,0,   // FL (then small-range)
  IIMODFULL,IIMODFULL,IIMODFULL,IIMODFULL,   // CMPX
 
  IIMODFULL,IIMODFULL,IIMODFULL,IIMODFULL,  // atomic types
  IIMODFULL,IIMODFULL,
  -2,  // 'no hashing' (for box search)
+ IIMODFULL,IIMODFULL,IIMODFULL,
  
 // Reversed hashes, where supported.  IIMODFULL is not needed by the 
reversed-hash code so we continue its use, started above, as a flag to turn off 
booleans
  
IREVERSED|IIMODFULL,IREVERSED|IIMODFULL,IREVERSED|IIMODFULL,IREVERSED|IIMODFULL,IREVERSED|IIMODFULL,IREVERSED|IIMODFULL,IREVERSED|IIMODFULL,IREVERSED|IIMODFULL,
  // bool, INT
- 
IIMODFULL,IREVERSED|IIMODFULL,IIMODFULL,IREVERSED|IIMODFULL,IREVERSED,IREVERSED,IREVERSED,IIMODFULL,
   // FL (then small-range, then ONEINT)
- IIMODFULL,IREVERSED|IIMODFULL,IIMODFULL,IREVERSED|IIMODFULL   // CMPX
-
+ 
IIMODFULL,IREVERSED|IIMODFULL,IIMODFULL,IREVERSED|IIMODFULL,IREVERSED,IREVERSED,IREVERSED,IREVERSED,
   // FL (then small-range)
+ IIMODFULL,IREVERSED|IIMODFULL,IIMODFULL,IREVERSED|IIMODFULL,   // CMPX
+ 0,0,0,0,
+ 0,0,
+ 0,
+ IIMODFULL,IIMODFULL,IIMODFULL,
 };
 
 #define MAXBYTEBOOL 65536  // if p exceeds this, we switch over to packed bits
@@ -1959,23 +1979,28 @@ A jtindexofsub(J jt,I mode,A a,A 
w){F2PREFIP;PROLOG(0079);A h=0;fauxblockINT(zfa
    // result is p (the length of hashtable, as # of entries), datamin (the 
minimum value found, if small-range)
    // If the allocated range includes all the possible values for the input, 
set IIMODFULL to indicate that fact
    if(unlikely(2==k)){
-    // if the actual range of the data exceeds p, we revert to hashing.  All 
2-byte types are exact
-    CR crres = 
condrange2(USAV(a),(AN(a)<<klg)>>LGSZS,-1,0,MIN((UI)(IMAX-5)>>booladj,3*m)<<booladj);
   // get the range
-    if(crres.range){
-      datamin=crres.min;
-      // If the range is close to the max, we should consider widening the 
range to use the faster FULL code.  We do this only for boolean hashes, because
-      // in the current allocation going all the way to 65536 kicks us into 
the longer hashtable (questionable decision).  Otherwise we should just promote
-      // any non-Boolean, because the actual cache footprint won't change.
-      // The cost of promoting a Boolean is 1 store (1 clock) per word 
cleared, for (65536-range)>>booladj bytes (if booladj!=0) [or (65536-range) 
hashtable entries if booladj==0]
-      // The savings is 4 ops (2 clocks) per word searched
-      if(booladj && ((UI)(65536-crres.range)>>booladj) < 
(c<<(LGSZI+1))){p=65536; datamin=0;}else{p=crres.range;}  // this 
underestimates the benefit for prehashes
-      if(p==65536)mode|=IIMODFULL;
-      fnx=FNTBLSMALL2;  // This qualifies for small-range processing
+    if(3*m>=65536>>booladj){datamin=0; p=65536; mode|=IIMODFULL; 
fnx=FNTBLSMALL2;} // will always qualify for small-range, so don't bother 
checking range
+    else if(t!=C2T){fnx=FNTBL2;} // 2-col matrix; range-checking likely 
unproductive
+    else{
+     // if the actual range of the data exceeds p, we revert to hashing.  All 
2-byte types are exact
+     CR crres = 
condrange2(USAV(a),(AN(a)<<klg)>>LGSZS,-1,0,MIN((UI)(IMAX-5)>>booladj,3*m)<<booladj);
   // get the range
+     if(!crres.range){fnx=FNTBL2;}
+     else{
+       datamin=crres.min;
+       // If the range is close to the max, we should consider widening the 
range to use the faster FULL code.  We do this only for boolean hashes, because
+       // in the current allocation going all the way to 65536 kicks us into 
the longer hashtable (questionable decision).  Otherwise we should just promote
+       // any non-Boolean, because the actual cache footprint won't change.
+       // The cost of promoting a Boolean is 1 store (1 clock) per word 
cleared, for (65536-range)>>booladj bytes (if booladj!=0) [or (65536-range) 
hashtable entries if booladj==0]
+       // The savings is 4 ops (2 clocks) per word searched
+       if(booladj && ((UI)(65536-crres.range)>>booladj) < 
(c<<(LGSZI+1))){p=65536; datamin=0;}else{p=crres.range;}  // this 
underestimates the benefit for prehashes
+       if(p==65536)mode|=IIMODFULL;
+       fnx=FNTBLSMALL2;  // This qualifies for small-range processing
+     }
     }
    }
    if(likely(fnx<0)){  // if we don't have it yet, it will be a hash or 
small-range integers.  Decide which one
-    if((k&~(t&FL))==SZI){  // non-float, might be INT or SBT, or characters.  
FL has -0 problem   requires SZI==FL
-     if(likely((t&INT+SBT)!=0)){I fnprov;A rangearg; UI rangearglen;  // same 
here, for I types
+    if(((k&~(t&FL))==SZI)|(4==k)){  // non-float, might be INT or SBT, or 
characters.  FL has -0 problem   requires SZI==FL
+     if(likely((t&INT+SBT+(4==k?C4T:0))!=0)){I fnprov;A rangearg; UI 
rangearglen;  // same here, for I types
       // small-range processing is a possibility, but we need to decide 
whether we are going to do a reversed hash, so we will
       // know which range to check.  For i./i:, we reverse if c is much 
shorter than m; for e., we have to consider whether
       // the forward hash will benefit from bits mode, so we have to estimate 
the size of each hash table
@@ -1983,9 +2008,9 @@ A jtindexofsub(J jt,I mode,A a,A 
w){F2PREFIP;PROLOG(0079);A h=0;fauxblockINT(zfa
       // otherwise (a candidate for reversed hash), if i./i: is set, meaning a 
full hashtable is needed, reverse if a is twice as long as w
       // otherwise (e., which uses bitmasks in the forward hash) calculate 
length of bitmask and reverse if the full table for w is shorter
       // than the bitmask for a.  Note that FORKEY will never cause a reversed 
hash
-      rangearg=a; rangearglen=m; fnprov=FNTBLSMALL4;   // values for forward 
check
+      rangearg=a; rangearglen=m; fnprov=FNTBLSMALL4+(k==SZI);   // values for 
forward check
       if(mode&IIOREPS){  // if reverse check is possible, see if it is desired
-       if((m>>(1))>c){rangearg=w; rangearglen=c; 
fnprov=FNTBLSMALL4+FNTBLREVERSE; }  // booladj?(m>MAXBYTEBOOL?5:2): omitted now
+       if((m>>(1))>c){rangearg=w; rangearglen=c; fnprov+=FNTBLREVERSE; }  // 
booladj?(m>MAXBYTEBOOL?5:2): omitted now
       }
       // we make the small-range decision mostly on length; if the range table 
would be bigger than the hashtable, we use the hash.  Here
       // we invert the calculation to see how big a range we can tolerate 
without exceeding the table size.  The length of the hash, whether small-range
@@ -1994,10 +2019,11 @@ A jtindexofsub(J jt,I mode,A a,A 
w){F2PREFIP;PROLOG(0079);A h=0;fauxblockINT(zfa
       // small-range hash.  The full hash spends more time in lookup than in 
creation, because misses become more likely the
       // fuller the table.  This makes small-range much more valuable when the 
hashes are repeated
       I maxsizemult=mode&IPHCALC?6:4;  // # slots/item to allow in small-range 
table.  More if prehash
-      CR crres = 
condrange(AV(rangearg),((AN(rangearg)<<klg))>>LGSZI,IMAX,IMIN,MIN((UI)(IMAX-5)>>booladj,maxsizemult*rangearglen)<<booladj);
+      CR crres = k==SZI?condrange   
(AV(rangearg),((AN(rangearg)<<klg))>>LGSZI,  
IMAX,IMIN,MIN((UI)(IMAX-5)>>booladj,maxsizemult*rangearglen)<<booladj)
+                       
:condrange4(C4AV(rangearg),((AN(rangearg)<<klg))>>LGSZUI4,IMAX,IMIN,MIN((UI)(IMAX-5)>>booladj,maxsizemult*rangearglen)<<booladj);
       if(crres.range){datamin=crres.min; p=crres.range; fnx=fnprov;  // use 
the selected orientation
-      }else{fnx=FNTBLONEINT;}  // select integer hashing if range too big...
-     }else{fnx=FNTBLONEINT;}   // ... or some other 8-byte length (not float, 
though)
+      }else{fnx=FNTBL4+(k==SZI);}  // select integer hashing if range too 
big...
+     }else{ fnx=FNTBL4+(k==SZI);}  // ... or some other 4/8-byte length (not 
float, though)
     }else{  // it's a hash
      fnx=((t&CMPX+FL+INT))+((n==1)?2:0)+fnx+2;  // index: 
CMPX/FL/n==1/intolerant (~fnx is 1 for tolerant, 0 for intolerant; fnx+2 is the 
reverse)
     }
----------------------------------------------------------------------
For information about J forums see http://www.jsoftware.com/forums.htm

Reply via email to