Github user davies commented on the pull request:

    https://github.com/apache/spark/pull/11065#issuecomment-180075423
  
    For join with two ints 
    ```
        val dim2 = broadcast(sqlContext.range(1 << 16)
          .selectExpr("cast(id as int) as k1", "cast(id as int) as k2", 
"cast(id as string) as v"))
    
          sqlContext.range(N).join(dim2,
            (col("id") bitwiseAND 60000).cast(IntegerType) === col("k1")
              && (col("id") bitwiseAND 50000).cast(IntegerType) === 
col("k2")).count()
    
    ```
    
    ```
    /* 001 */
    /* 002 */ public Object generate(Object[] references) {
    /* 003 */   return new GeneratedIterator(references);
    /* 004 */ }
    /* 005 */
    /* 006 */ class GeneratedIterator extends 
org.apache.spark.sql.execution.BufferedRowIterator {
    /* 007 */
    /* 008 */   private Object[] references;
    /* 009 */   private boolean agg_initAgg;
    /* 010 */   private boolean agg_bufIsNull;
    /* 011 */   private long agg_bufValue;
    /* 012 */   private org.apache.spark.broadcast.TorrentBroadcast 
bhj_broadcast;
    /* 013 */   private 
org.apache.spark.sql.execution.joins.UniqueLongHashedRelation bhj_relation;
    /* 014 */   private boolean range_initRange;
    /* 015 */   private long range_partitionEnd;
    /* 016 */   private long range_number;
    /* 017 */   private boolean range_overflow;
    /* 018 */   private UnsafeRow agg_result;
    /* 019 */   private 
org.apache.spark.sql.catalyst.expressions.codegen.BufferHolder agg_holder;
    /* 020 */   private 
org.apache.spark.sql.catalyst.expressions.codegen.UnsafeRowWriter agg_rowWriter;
    /* 021 */
    /* 022 */   public GeneratedIterator(Object[] references) {
    /* 023 */     this.references = references;
    /* 024 */     agg_initAgg = false;
    /* 025 */
    /* 026 */
    /* 027 */     this.bhj_broadcast = 
(org.apache.spark.broadcast.TorrentBroadcast) references[0];
    /* 028 */
    /* 029 */     bhj_relation = 
(org.apache.spark.sql.execution.joins.UniqueLongHashedRelation) 
bhj_broadcast.value();
    /* 030 */     incPeakExecutionMemory(bhj_relation.getMemorySize());
    /* 031 */
    /* 032 */     range_initRange = false;
    /* 033 */     range_partitionEnd = 0L;
    /* 034 */     range_number = 0L;
    /* 035 */     range_overflow = false;
    /* 036 */     agg_result = new UnsafeRow(1);
    /* 037 */     this.agg_holder = new 
org.apache.spark.sql.catalyst.expressions.codegen.BufferHolder(agg_result, 0);
    /* 038 */     this.agg_rowWriter = new 
org.apache.spark.sql.catalyst.expressions.codegen.UnsafeRowWriter(agg_holder, 
1);
    /* 039 */   }
    /* 040 */
    /* 041 */
    /* 042 */   private void agg_doAggregateWithoutKey() throws 
java.io.IOException {
    /* 043 */     // initialize aggregation buffer
    /* 044 */
    /* 045 */     agg_bufIsNull = false;
    /* 046 */     agg_bufValue = 0L;
    /* 047 */
    /* 048 */
    /* 049 */
    /* 050 */
    /* 051 */     // initialize Range
    /* 052 */     if (!range_initRange) {
    /* 053 */       range_initRange = true;
    /* 054 */       if (input.hasNext()) {
    /* 055 */         initRange(((InternalRow) input.next()).getInt(0));
    /* 056 */       } else {
    /* 057 */         return;
    /* 058 */       }
    /* 059 */     }
    /* 060 */
    /* 061 */     while (!range_overflow && range_number < range_partitionEnd) {
    /* 062 */       long range_value = range_number;
    /* 063 */       range_number += 1L;
    /* 064 */       if (range_number < range_value ^ 1L < 0) {
    /* 065 */         range_overflow = true;
    /* 066 */       }
    /* 067 */
    /* 068 */       // generate join key
    /* 069 */       /* (shiftleft(cast(cast((input[0, bigint] & 60000) as int) 
as bigint),32) | (cast(cast((input[0, bigint] & 50000) as int) as bigint) & 0)) 
*/
    /* 070 */       /* shiftleft(cast(cast((input[0, bigint] & 60000) as int) 
as bigint),32) */
    /* 071 */       /* cast(cast((input[0, bigint] & 60000) as int) as bigint) 
*/
    /* 072 */       /* cast((input[0, bigint] & 60000) as int) */
    /* 073 */       /* (input[0, bigint] & 60000) */
    /* 074 */       long bhj_value4 = -1L;
    /* 075 */       bhj_value4 = range_value & 60000L;
    /* 076 */       boolean bhj_isNull3 = false;
    /* 077 */       int bhj_value3 = -1;
    /* 078 */       if (!false) {
    /* 079 */         bhj_value3 = (int) bhj_value4;
    /* 080 */       }
    /* 081 */       boolean bhj_isNull2 = bhj_isNull3;
    /* 082 */       long bhj_value2 = -1L;
    /* 083 */       if (!bhj_isNull3) {
    /* 084 */         bhj_value2 = (long) bhj_value3;
    /* 085 */       }
    /* 086 */
    /* 087 */       long bhj_value1 = -1L;
    /* 088 */       bhj_value1 = bhj_value2 << 32;
    /* 089 */       /* (cast(cast((input[0, bigint] & 50000) as int) as bigint) 
& 0) */
    /* 090 */       /* cast(cast((input[0, bigint] & 50000) as int) as bigint) 
*/
    /* 091 */       /* cast((input[0, bigint] & 50000) as int) */
    /* 092 */       /* (input[0, bigint] & 50000) */
    /* 093 */       long bhj_value11 = -1L;
    /* 094 */       bhj_value11 = range_value & 50000L;
    /* 095 */       boolean bhj_isNull10 = false;
    /* 096 */       int bhj_value10 = -1;
    /* 097 */       if (!false) {
    /* 098 */         bhj_value10 = (int) bhj_value11;
    /* 099 */       }
    /* 100 */       boolean bhj_isNull9 = bhj_isNull10;
    /* 101 */       long bhj_value9 = -1L;
    /* 102 */       if (!bhj_isNull10) {
    /* 103 */         bhj_value9 = (long) bhj_value10;
    /* 104 */       }
    /* 105 */
    /* 106 */       long bhj_value8 = -1L;
    /* 107 */       bhj_value8 = bhj_value9 & 0;
    /* 108 */       long bhj_value = -1L;
    /* 109 */       bhj_value = bhj_value1 | bhj_value8;
    /* 110 */       // find matches from HashedRelation
    /* 111 */       UnsafeRow bhj_matched = false ? null: 
(UnsafeRow)bhj_relation.getValue(bhj_value);
    /* 112 */       if (bhj_matched != null) {
    /* 113 */         /* input[0, int] */
    /* 114 */         int bhj_value15 = bhj_matched.getInt(0);
    /* 115 */         /* input[1, int] */
    /* 116 */         int bhj_value16 = bhj_matched.getInt(1);
    /* 117 */
    /* 118 */
    /* 119 */
    /* 120 */
    /* 121 */         // do aggregate
    /* 122 */         /* (input[0, bigint] + 1) */
    /* 123 */         long agg_value1 = -1L;
    /* 124 */         agg_value1 = agg_bufValue + 1L;
    /* 125 */         // update aggregation buffer
    /* 126 */         agg_bufIsNull = false;
    /* 127 */         agg_bufValue = agg_value1;
    /* 128 */
    /* 129 */
    /* 130 */       }
    /* 131 */
    /* 132 */
    /* 133 */       if (shouldStop()) return;
    /* 134 */     }
    /* 135 */
    /* 136 */
    /* 137 */   }
    /* 138 */
    /* 139 */
    /* 140 */   private void initRange(int idx) {
    /* 141 */     java.math.BigInteger index = 
java.math.BigInteger.valueOf(idx);
    /* 142 */     java.math.BigInteger numSlice = 
java.math.BigInteger.valueOf(1L);
    /* 143 */     java.math.BigInteger numElement = 
java.math.BigInteger.valueOf(104857600L);
    /* 144 */     java.math.BigInteger step = java.math.BigInteger.valueOf(1L);
    /* 145 */     java.math.BigInteger start = java.math.BigInteger.valueOf(0L);
    /* 146 */
    /* 147 */     java.math.BigInteger st = 
index.multiply(numElement).divide(numSlice).multiply(step).add(start);
    /* 148 */     if 
(st.compareTo(java.math.BigInteger.valueOf(Long.MAX_VALUE)) > 0) {
    /* 149 */       range_number = Long.MAX_VALUE;
    /* 150 */     } else if 
(st.compareTo(java.math.BigInteger.valueOf(Long.MIN_VALUE)) < 0) {
    /* 151 */       range_number = Long.MIN_VALUE;
    /* 152 */     } else {
    /* 153 */       range_number = st.longValue();
    /* 154 */     }
    /* 155 */
    /* 156 */     java.math.BigInteger end = 
index.add(java.math.BigInteger.ONE).multiply(numElement).divide(numSlice)
    /* 157 */     .multiply(step).add(start);
    /* 158 */     if 
(end.compareTo(java.math.BigInteger.valueOf(Long.MAX_VALUE)) > 0) {
    /* 159 */       range_partitionEnd = Long.MAX_VALUE;
    /* 160 */     } else if 
(end.compareTo(java.math.BigInteger.valueOf(Long.MIN_VALUE)) < 0) {
    /* 161 */       range_partitionEnd = Long.MIN_VALUE;
    /* 162 */     } else {
    /* 163 */       range_partitionEnd = end.longValue();
    /* 164 */     }
    /* 165 */   }
    /* 166 */
    /* 167 */
    /* 168 */   protected void processNext() throws java.io.IOException {
    /* 169 */     if (!agg_initAgg) {
    /* 170 */       agg_initAgg = true;
    /* 171 */       agg_doAggregateWithoutKey();
    /* 172 */
    /* 173 */       // output the result
    /* 174 */
    /* 175 */
    /* 176 */       agg_rowWriter.zeroOutNullBytes();
    /* 177 */
    /* 178 */
    /* 179 */       if (agg_bufIsNull) {
    /* 180 */         agg_rowWriter.setNullAt(0);
    /* 181 */       } else {
    /* 182 */         agg_rowWriter.write(0, agg_bufValue);
    /* 183 */       }
    /* 184 */       currentRows.add(agg_result.copy());
    /* 185 */     }
    /* 186 */   }
    /* 187 */ }
    /* 188 */
    ```


---
If your project is set up for it, you can reply to this email and have your
reply appear on GitHub as well. If your project does not have this feature
enabled and wishes so, or if the feature is enabled but not working, please
contact infrastructure at [email protected] or file a JIRA ticket
with INFRA.
---

---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to