imply-cheddar commented on code in PR #16708:
URL: https://github.com/apache/druid/pull/16708#discussion_r1669759288


##########
processing/src/main/java/org/apache/druid/segment/data/FixedIndexedWriter.java:
##########
@@ -46,14 +46,16 @@ public class FixedIndexedWriter<T> implements 
DictionaryWriter<T>
   private final Comparator<T> comparator;
   private final ByteBuffer scratch;
   private final ByteBuffer readBuffer;
-  private int numWritten;
+  private final boolean isSorted;
+  private final int width;
+
+  private int cardinality = 0;

Review Comment:
   Yeah, maybe we didn't need the rename to `cardinality`.  At the same time, 
whether the variable is named `cardinality` or `numWritten`, it's still 
returned from the `getCardinality()` method...



##########
processing/src/main/java/org/apache/druid/query/rowsandcols/column/LongArrayColumn.java:
##########
@@ -0,0 +1,201 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.druid.query.rowsandcols.column;
+
+import org.apache.druid.java.util.common.ISE;
+import org.apache.druid.java.util.common.Numbers;
+import org.apache.druid.query.rowsandcols.util.FindResult;
+import org.apache.druid.segment.column.ColumnType;
+
+import javax.annotation.Nonnull;
+import javax.annotation.Nullable;
+import java.util.Arrays;
+
+public class LongArrayColumn implements Column
+{
+  private final long[] vals;
+
+  public LongArrayColumn(
+      long[] vals
+  )
+  {
+    this.vals = vals;
+  }
+
+  @Nonnull
+  @Override
+  public ColumnAccessor toAccessor()
+  {
+    return new MyColumnAccessor();
+  }
+
+  @Nullable
+  @SuppressWarnings("unchecked")
+  @Override
+  public <T> T as(Class<? extends T> clazz)
+  {
+    if (VectorCopier.class.equals(clazz)) {
+      return (T) (VectorCopier) (into, intoStart) -> {
+        if (Integer.MAX_VALUE - vals.length < intoStart) {
+          throw new ISE(
+              "too many rows!!! intoStart[%,d], vals.length[%,d] combine to 
exceed max_int",
+              intoStart,
+              vals.length
+          );
+        }
+        for (int i = 0; i < vals.length; ++i) {
+          into[intoStart + i] = vals[i];
+        }
+      };
+    }
+    if (ColumnValueSwapper.class.equals(clazz)) {
+      return (T) (ColumnValueSwapper) (lhs, rhs) -> {
+        long tmp = vals[lhs];
+        vals[lhs] = vals[rhs];
+        vals[rhs] = tmp;
+      };
+    }
+    return null;
+  }
+
+  private class MyColumnAccessor implements BinarySearchableAccessor
+  {
+    @Override
+    public ColumnType getType()
+    {
+      return ColumnType.LONG;
+    }
+
+    @Override
+    public int numRows()
+    {
+      return vals.length;
+    }
+
+    @Override
+    public boolean isNull(int rowNum)
+    {
+      return false;
+    }
+
+    @Override
+    public Object getObject(int rowNum)
+    {
+      return vals[rowNum];
+    }
+
+    @Override
+    public double getDouble(int rowNum)
+    {
+      return vals[rowNum];
+    }
+
+    @Override
+    public float getFloat(int rowNum)
+    {
+      return vals[rowNum];
+    }
+
+    @Override
+    public long getLong(int rowNum)
+    {
+      return vals[rowNum];
+    }
+
+    @Override
+    public int getInt(int rowNum)
+    {
+      return (int) vals[rowNum];
+    }
+
+    @Override
+    public int compareRows(int lhsRowNum, int rhsRowNum)
+    {
+      return Long.compare(vals[lhsRowNum], vals[rhsRowNum]);
+    }
+
+
+    @Override
+    public FindResult findNull(int startIndex, int endIndex)
+    {
+      return FindResult.notFound(endIndex);
+    }
+
+    @Override
+    public FindResult findDouble(int startIndex, int endIndex, double val)
+    {
+      return findLong(startIndex, endIndex, (int) val);
+    }
+
+    @Override
+    public FindResult findFloat(int startIndex, int endIndex, float val)
+    {
+      return findLong(startIndex, endIndex, (int) val);
+    }
+
+    @Override
+    public FindResult findLong(int startIndex, int endIndex, long val)
+    {
+      if (vals[startIndex] == val) {
+        int end = startIndex + 1;
+
+        while (end < endIndex && vals[end] == val) {
+          ++end;
+        }
+        return FindResult.found(startIndex, end);
+      }
+
+      int i = Arrays.binarySearch(vals, startIndex, endIndex, val);

Review Comment:
   Fwiw, the whole `BinarySearchableAccessor` is actually only used by an 
experimental join implementation that exists to exercise the operator stuff.  
Your commentary about the oddities are totally valid and belong against that.  
It's not really a solidified part of the code base though, so we should perhaps 
add some javadocs to the interface itself, talk a bit about why it's awkward 
and when/if we get back to it, we can improve it.



##########
processing/src/main/java/org/apache/druid/segment/data/FixedIndexedWriter.java:
##########
@@ -197,13 +202,8 @@ private void readPage()
       {
         iteratorBuffer.clear();
         try {
-          if (numWritten - (pos - startPos) < PAGE_SIZE) {
-            int size = (numWritten - (pos - startPos)) * width;
-            iteratorBuffer.limit(size);
-            valuesOut.readFully((long) (pos - startPos) * width, 
iteratorBuffer);
-          } else {
-            valuesOut.readFully((long) (pos - startPos) * width, 
iteratorBuffer);
-          }
+          iteratorBuffer.limit(Math.min(PAGE_SIZE, (cardinality - pos) * 
width));

Review Comment:
   `hasNext()` is implemented in terms of `totalCount` which had 1 added to it 
for the null case.  The old logic was achieving the handling of `null` stuff by 
doing `(pos - startPos)` which is following what you suggest.  For this line 
205, `pos` has already been incremented once by the time it gets here in case 
that there are nulls (the first call to `next()` won't call `readPage()`).



##########
processing/src/main/java/org/apache/druid/query/rowsandcols/concrete/ColumnHolderRACColumn.java:
##########
@@ -91,7 +91,7 @@ public int numRows()
       public boolean isNull(int rowNum)
       {
         offset.set(rowNum);
-        return valueSelector.isNull();
+        return valueSelector.getObject() == null;

Review Comment:
   The contract here is supposed to be abandoning the "numerical null" thing 
that the ValueSelectors follow.  It is explicitly "is it null" rather than 
"will it be null if cast to a primitive type"



##########
processing/src/main/java/org/apache/druid/query/rowsandcols/column/LongArrayColumn.java:
##########
@@ -0,0 +1,201 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.druid.query.rowsandcols.column;
+
+import org.apache.druid.java.util.common.ISE;
+import org.apache.druid.java.util.common.Numbers;
+import org.apache.druid.query.rowsandcols.util.FindResult;
+import org.apache.druid.segment.column.ColumnType;
+
+import javax.annotation.Nonnull;
+import javax.annotation.Nullable;
+import java.util.Arrays;
+
+public class LongArrayColumn implements Column
+{
+  private final long[] vals;
+
+  public LongArrayColumn(
+      long[] vals
+  )
+  {
+    this.vals = vals;
+  }
+
+  @Nonnull
+  @Override
+  public ColumnAccessor toAccessor()
+  {
+    return new MyColumnAccessor();
+  }
+
+  @Nullable
+  @SuppressWarnings("unchecked")
+  @Override
+  public <T> T as(Class<? extends T> clazz)
+  {
+    if (VectorCopier.class.equals(clazz)) {
+      return (T) (VectorCopier) (into, intoStart) -> {
+        if (Integer.MAX_VALUE - vals.length < intoStart) {
+          throw new ISE(
+              "too many rows!!! intoStart[%,d], vals.length[%,d] combine to 
exceed max_int",
+              intoStart,
+              vals.length
+          );
+        }
+        for (int i = 0; i < vals.length; ++i) {
+          into[intoStart + i] = vals[i];
+        }
+      };
+    }
+    if (ColumnValueSwapper.class.equals(clazz)) {
+      return (T) (ColumnValueSwapper) (lhs, rhs) -> {
+        long tmp = vals[lhs];
+        vals[lhs] = vals[rhs];
+        vals[rhs] = tmp;
+      };
+    }
+    return null;
+  }
+
+  private class MyColumnAccessor implements BinarySearchableAccessor
+  {
+    @Override
+    public ColumnType getType()
+    {
+      return ColumnType.LONG;
+    }
+
+    @Override
+    public int numRows()
+    {
+      return vals.length;
+    }
+
+    @Override
+    public boolean isNull(int rowNum)
+    {
+      return false;
+    }
+
+    @Override
+    public Object getObject(int rowNum)
+    {
+      return vals[rowNum];
+    }
+
+    @Override
+    public double getDouble(int rowNum)
+    {
+      return vals[rowNum];
+    }
+
+    @Override
+    public float getFloat(int rowNum)
+    {
+      return vals[rowNum];
+    }
+
+    @Override
+    public long getLong(int rowNum)
+    {
+      return vals[rowNum];
+    }
+
+    @Override
+    public int getInt(int rowNum)
+    {
+      return (int) vals[rowNum];
+    }
+
+    @Override
+    public int compareRows(int lhsRowNum, int rhsRowNum)
+    {
+      return Long.compare(vals[lhsRowNum], vals[rhsRowNum]);
+    }
+
+
+    @Override
+    public FindResult findNull(int startIndex, int endIndex)
+    {
+      return FindResult.notFound(endIndex);
+    }
+
+    @Override
+    public FindResult findDouble(int startIndex, int endIndex, double val)
+    {
+      return findLong(startIndex, endIndex, (int) val);
+    }
+
+    @Override
+    public FindResult findFloat(int startIndex, int endIndex, float val)
+    {
+      return findLong(startIndex, endIndex, (int) val);
+    }
+
+    @Override
+    public FindResult findLong(int startIndex, int endIndex, long val)
+    {
+      if (vals[startIndex] == val) {
+        int end = startIndex + 1;
+
+        while (end < endIndex && vals[end] == val) {
+          ++end;
+        }
+        return FindResult.found(startIndex, end);
+      }
+
+      int i = Arrays.binarySearch(vals, startIndex, endIndex, val);

Review Comment:
   These methods are part of the `BinarySearchableAccessor`, which is 
fundamentally just saying "if you want to binary search and you know it's safe, 
use these methods".  Instead of making the accessor itself implement this, it 
probably would've been better to make it another interface off of the `Column`. 
 That's my bad really.



##########
processing/src/main/java/org/apache/druid/query/rowsandcols/column/LongArrayColumn.java:
##########
@@ -0,0 +1,201 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.druid.query.rowsandcols.column;
+
+import org.apache.druid.java.util.common.ISE;
+import org.apache.druid.java.util.common.Numbers;
+import org.apache.druid.query.rowsandcols.util.FindResult;
+import org.apache.druid.segment.column.ColumnType;
+
+import javax.annotation.Nonnull;
+import javax.annotation.Nullable;
+import java.util.Arrays;
+
+public class LongArrayColumn implements Column
+{
+  private final long[] vals;
+
+  public LongArrayColumn(
+      long[] vals
+  )
+  {
+    this.vals = vals;
+  }
+
+  @Nonnull
+  @Override
+  public ColumnAccessor toAccessor()
+  {
+    return new MyColumnAccessor();
+  }
+
+  @Nullable
+  @SuppressWarnings("unchecked")
+  @Override
+  public <T> T as(Class<? extends T> clazz)
+  {
+    if (VectorCopier.class.equals(clazz)) {
+      return (T) (VectorCopier) (into, intoStart) -> {
+        if (Integer.MAX_VALUE - vals.length < intoStart) {
+          throw new ISE(
+              "too many rows!!! intoStart[%,d], vals.length[%,d] combine to 
exceed max_int",
+              intoStart,
+              vals.length
+          );
+        }
+        for (int i = 0; i < vals.length; ++i) {
+          into[intoStart + i] = vals[i];
+        }
+      };
+    }
+    if (ColumnValueSwapper.class.equals(clazz)) {
+      return (T) (ColumnValueSwapper) (lhs, rhs) -> {
+        long tmp = vals[lhs];
+        vals[lhs] = vals[rhs];
+        vals[rhs] = tmp;
+      };
+    }
+    return null;
+  }
+
+  private class MyColumnAccessor implements BinarySearchableAccessor
+  {
+    @Override
+    public ColumnType getType()
+    {
+      return ColumnType.LONG;
+    }
+
+    @Override
+    public int numRows()
+    {
+      return vals.length;
+    }
+
+    @Override
+    public boolean isNull(int rowNum)
+    {
+      return false;
+    }
+
+    @Override
+    public Object getObject(int rowNum)
+    {
+      return vals[rowNum];
+    }
+
+    @Override
+    public double getDouble(int rowNum)
+    {
+      return vals[rowNum];
+    }
+
+    @Override
+    public float getFloat(int rowNum)
+    {
+      return vals[rowNum];
+    }
+
+    @Override
+    public long getLong(int rowNum)
+    {
+      return vals[rowNum];
+    }
+
+    @Override
+    public int getInt(int rowNum)
+    {
+      return (int) vals[rowNum];
+    }
+
+    @Override
+    public int compareRows(int lhsRowNum, int rhsRowNum)
+    {
+      return Long.compare(vals[lhsRowNum], vals[rhsRowNum]);
+    }
+
+
+    @Override
+    public FindResult findNull(int startIndex, int endIndex)
+    {
+      return FindResult.notFound(endIndex);
+    }
+
+    @Override
+    public FindResult findDouble(int startIndex, int endIndex, double val)
+    {
+      return findLong(startIndex, endIndex, (int) val);

Review Comment:
   My guess is that it's a copy-and-paste error, good catch.



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to