pitrou commented on code in PR #39681: URL: https://github.com/apache/arrow/pull/39681#discussion_r1469872617
########## java/vector/src/main/java/org/apache/arrow/vector/Float2Vector.java: ########## @@ -0,0 +1,434 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.arrow.vector; + +import static org.apache.arrow.vector.NullCheckingForGet.NULL_CHECKING_ENABLED; + + +import org.apache.arrow.memory.ArrowBuf; +import org.apache.arrow.memory.BufferAllocator; +import org.apache.arrow.memory.util.Float16; +import org.apache.arrow.vector.complex.impl.Float2ReaderImpl; +import org.apache.arrow.vector.complex.reader.FieldReader; +import org.apache.arrow.vector.holders.Float2Holder; +import org.apache.arrow.vector.holders.NullableFloat2Holder; +import org.apache.arrow.vector.types.Types.MinorType; +import org.apache.arrow.vector.types.pojo.Field; +import org.apache.arrow.vector.types.pojo.FieldType; +import org.apache.arrow.vector.util.TransferPair; + +/** + * Float2Vector implements a fixed width (2 bytes) vector of + * short values which could be null. A validity buffer (bit vector) is + * maintained to track which elements in the vector are null. + */ +public final class Float2Vector extends BaseFixedWidthVector implements FloatingPointVector { + public static final byte TYPE_WIDTH = 2; + + /** + * Instantiate a Float2Vector. This doesn't allocate any memory for + * the data in vector. + * + * @param name name of the vector + * @param allocator allocator for memory management. + */ + public Float2Vector(String name, BufferAllocator allocator) { + this(name, FieldType.nullable(MinorType.FLOAT2.getType()), allocator); + } + + /** + * Instantiate a Float2Vector. This doesn't allocate any memory for + * the data in vector. + * + * @param name name of the vector + * @param fieldType type of Field materialized by this vector + * @param allocator allocator for memory management. + */ + public Float2Vector(String name, FieldType fieldType, BufferAllocator allocator) { + this(new Field(name, fieldType, null), allocator); + } + + /** + * Instantiate a Float2Vector. This doesn't allocate any memory for + * the data in vector. + * + * @param field field materialized by this vector + * @param allocator allocator for memory management. + */ + public Float2Vector(Field field, BufferAllocator allocator) { + super(field, allocator, TYPE_WIDTH); + } + + @Override + protected FieldReader getReaderImpl() { + return new Float2ReaderImpl(Float2Vector.this); + } + + /** + * Get minor type for this vector. The vector holds values belonging + * to a particular type. + * + * @return {@link MinorType} + */ + @Override + public MinorType getMinorType() { + return MinorType.FLOAT2; + } + + + /*----------------------------------------------------------------* + | | + | vector value retrieval methods | + | | + *----------------------------------------------------------------*/ + + + /** + * Get the element at the given index from the vector. + * + * @param index position of element + * @return element at given index + */ + public short get(int index) throws IllegalStateException { + if (NULL_CHECKING_ENABLED && isSet(index) == 0) { + throw new IllegalStateException("Value at index is null"); + } + return valueBuffer.getShort((long) index * TYPE_WIDTH); + } + + /** + * Get the element at the given index from the vector and + * sets the state in holder. If element at given index + * is null, holder.isSet will be zero. + * + * @param index position of element + */ + public void get(int index, NullableFloat2Holder holder) { + if (isSet(index) == 0) { + holder.isSet = 0; + return; + } + holder.isSet = 1; + holder.value = valueBuffer.getShort((long) index * TYPE_WIDTH); + } + + /** + * Same as {@link #get(int)}. + * + * @param index position of element + * @return element at given index + */ + @Override + public Short getObject(int index) { + if (isSet(index) == 0) { + return null; + } else { + return valueBuffer.getShort((long) index * TYPE_WIDTH); + } + } + + /** + * Given a data buffer, get the value stored at a particular position + * in the vector. + * + * <p>This method should not be used externally. + * + * @param buffer data buffer + * @param index position of the element. + * @return value stored at the index. + */ + static short get(final ArrowBuf buffer, final int index) { + return buffer.getShort((long) index * TYPE_WIDTH); + } + + @Override + public double getValueAsDouble(int index) { + return this.get(index); + } + + public float getValueAsFloat(int index) { + return Float16.toFloat(this.get(index)); + } + + /*----------------------------------------------------------------* + | | + | vector value setter methods | + | | + *----------------------------------------------------------------*/ + + private void setValue(int index, short value) { + valueBuffer.setShort((long) index * TYPE_WIDTH, value); + } + + private void setValue(int index, float value) { + valueBuffer.setShort((long) index * TYPE_WIDTH, Float16.toFloat16(value)); + } + + /** + * Set the element at the given index to the given value. + * + * @param index position of element + * @param value value of element + */ + public void set(int index, short value) { Review Comment: If I call `set(index, float)` or `set(index, double)`, what happens? Is this method called implicitly? ########## java/vector/src/main/java/org/apache/arrow/vector/Float2Vector.java: ########## @@ -0,0 +1,434 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.arrow.vector; + +import static org.apache.arrow.vector.NullCheckingForGet.NULL_CHECKING_ENABLED; + + +import org.apache.arrow.memory.ArrowBuf; +import org.apache.arrow.memory.BufferAllocator; +import org.apache.arrow.memory.util.Float16; +import org.apache.arrow.vector.complex.impl.Float2ReaderImpl; +import org.apache.arrow.vector.complex.reader.FieldReader; +import org.apache.arrow.vector.holders.Float2Holder; +import org.apache.arrow.vector.holders.NullableFloat2Holder; +import org.apache.arrow.vector.types.Types.MinorType; +import org.apache.arrow.vector.types.pojo.Field; +import org.apache.arrow.vector.types.pojo.FieldType; +import org.apache.arrow.vector.util.TransferPair; + +/** + * Float2Vector implements a fixed width (2 bytes) vector of + * short values which could be null. A validity buffer (bit vector) is + * maintained to track which elements in the vector are null. + */ +public final class Float2Vector extends BaseFixedWidthVector implements FloatingPointVector { + public static final byte TYPE_WIDTH = 2; + + /** + * Instantiate a Float2Vector. This doesn't allocate any memory for + * the data in vector. + * + * @param name name of the vector + * @param allocator allocator for memory management. + */ + public Float2Vector(String name, BufferAllocator allocator) { + this(name, FieldType.nullable(MinorType.FLOAT2.getType()), allocator); + } + + /** + * Instantiate a Float2Vector. This doesn't allocate any memory for + * the data in vector. + * + * @param name name of the vector + * @param fieldType type of Field materialized by this vector + * @param allocator allocator for memory management. + */ + public Float2Vector(String name, FieldType fieldType, BufferAllocator allocator) { + this(new Field(name, fieldType, null), allocator); + } + + /** + * Instantiate a Float2Vector. This doesn't allocate any memory for + * the data in vector. + * + * @param field field materialized by this vector + * @param allocator allocator for memory management. + */ + public Float2Vector(Field field, BufferAllocator allocator) { + super(field, allocator, TYPE_WIDTH); + } + + @Override + protected FieldReader getReaderImpl() { + return new Float2ReaderImpl(Float2Vector.this); + } + + /** + * Get minor type for this vector. The vector holds values belonging + * to a particular type. + * + * @return {@link MinorType} + */ + @Override + public MinorType getMinorType() { + return MinorType.FLOAT2; + } + + + /*----------------------------------------------------------------* + | | + | vector value retrieval methods | + | | + *----------------------------------------------------------------*/ + + + /** + * Get the element at the given index from the vector. + * + * @param index position of element + * @return element at given index + */ + public short get(int index) throws IllegalStateException { + if (NULL_CHECKING_ENABLED && isSet(index) == 0) { + throw new IllegalStateException("Value at index is null"); + } + return valueBuffer.getShort((long) index * TYPE_WIDTH); + } + + /** + * Get the element at the given index from the vector and + * sets the state in holder. If element at given index + * is null, holder.isSet will be zero. + * + * @param index position of element + */ + public void get(int index, NullableFloat2Holder holder) { + if (isSet(index) == 0) { + holder.isSet = 0; + return; + } + holder.isSet = 1; + holder.value = valueBuffer.getShort((long) index * TYPE_WIDTH); + } + + /** + * Same as {@link #get(int)}. + * + * @param index position of element + * @return element at given index + */ + @Override + public Short getObject(int index) { + if (isSet(index) == 0) { + return null; + } else { + return valueBuffer.getShort((long) index * TYPE_WIDTH); + } + } + + /** + * Given a data buffer, get the value stored at a particular position + * in the vector. + * + * <p>This method should not be used externally. + * + * @param buffer data buffer + * @param index position of the element. + * @return value stored at the index. + */ + static short get(final ArrowBuf buffer, final int index) { + return buffer.getShort((long) index * TYPE_WIDTH); + } + + @Override + public double getValueAsDouble(int index) { + return this.get(index); Review Comment: So this converts the short into a double directly? Isn't this wrong? ########## java/vector/src/test/java/org/apache/arrow/vector/TestValueVector.java: ########## @@ -332,6 +332,204 @@ public void testSizeOfValueBuffer() { } } + @Test + public void testFixedFloat2() { + try (final Float2Vector floatVector = new Float2Vector(EMPTY_SCHEMA_PATH, allocator)) { + boolean error = false; + int initialCapacity = 16; + + /* we should not throw exception for these values of capacity */ + floatVector.setInitialCapacity(MAX_VALUE_COUNT - 1); + floatVector.setInitialCapacity(MAX_VALUE_COUNT); + + try { + floatVector.setInitialCapacity(MAX_VALUE_COUNT * 4); + } catch (OversizedAllocationException oe) { + error = true; + } finally { + assertTrue(error); + error = false; + } + + floatVector.setInitialCapacity(initialCapacity); + /* no memory allocation has happened yet so capacity of underlying buffer should be 0 */ + assertEquals(0, floatVector.getValueCapacity()); + + /* allocate 32 bytes (16 * 2) */ + floatVector.allocateNew(); + /* underlying buffer should be able to store 16 values */ + assertTrue(floatVector.getValueCapacity() >= initialCapacity); + initialCapacity = floatVector.getValueCapacity(); + + floatVector.zeroVector(); + + /* populate the floatVector */ + floatVector.set(0, (short) 0x101c); // Float16.toFloat16(+0.00050163269043f) + floatVector.set(2, (short) 0x901c); // Float16.toFloat16(-0.00050163269043f) + floatVector.set(4, (short) 0x101d); // Float16.toFloat16(+0.000502109527588f) + floatVector.set(6, (short) 0x901d); // Float16.toFloat16(-0.000502109527588f) + floatVector.set(8, (short) 0x121c); // Float16.toFloat16(+0.00074577331543f) + floatVector.set(10, (short) 0x921c); // Float16.toFloat16(-0.00074577331543f) + floatVector.set(12, (short) 0x501c); // Float16.toFloat16(+32.875f) + floatVector.set(14, (short) 0xd01c); // Float16.toFloat16(-32.875f) + + try { + floatVector.set(initialCapacity, (short) 0x141c); + } catch (IndexOutOfBoundsException ie) { + error = true; + } finally { + assertTrue(error); + error = false; + } + + /* check vector contents */ + assertEquals((short) 0x101c, floatVector.get(0)); + assertEquals((short) 0x901c, floatVector.get(2)); + assertEquals((short) 0x101d, floatVector.get(4)); + assertEquals((short) 0x901d, floatVector.get(6)); + assertEquals((short) 0x121c, floatVector.get(8)); + assertEquals((short) 0x921c, floatVector.get(10)); + assertEquals((short) 0x501c, floatVector.get(12)); + assertEquals((short) 0xd01c, floatVector.get(14)); + + try { + floatVector.get(initialCapacity); + } catch (IndexOutOfBoundsException ie) { + error = true; + } finally { + assertTrue(error); + } + + /* this should trigger a realloc() */ + floatVector.setSafe(initialCapacity, (short) 0x141c); // Float16.toFloat16(+0.00100326538086f) + + /* underlying buffer should now be able to store double the number of values */ + assertTrue(floatVector.getValueCapacity() >= initialCapacity * 2); + + /* vector data should still be intact after realloc */ + assertEquals((short) 0x101c, floatVector.get(0)); + assertEquals((short) 0x901c, floatVector.get(2)); + assertEquals((short) 0x101d, floatVector.get(4)); + assertEquals((short) 0x901d, floatVector.get(6)); + assertEquals((short) 0x121c, floatVector.get(8)); + assertEquals((short) 0x921c, floatVector.get(10)); + assertEquals((short) 0x501c, floatVector.get(12)); + assertEquals((short) 0xd01c, floatVector.get(14)); + assertEquals((short) 0x141c, floatVector.get(initialCapacity)); + + /* reset the vector */ + int capacityBeforeReset = floatVector.getValueCapacity(); + floatVector.reset(); + + /* capacity shouldn't change after reset */ + assertEquals(capacityBeforeReset, floatVector.getValueCapacity()); + + /* vector data should be zeroed out */ + for (int i = 0; i < capacityBeforeReset; i++) { + assertTrue("non-zero data not expected at index: " + i, floatVector.isNull(i)); + } + } + } + + @Test + public void testFixedFloat2WithPossibleTruncate() { + try (final Float2Vector floatVector = new Float2Vector(EMPTY_SCHEMA_PATH, allocator)) { + boolean error = false; + int initialCapacity = 16; + + /* we should not throw exception for these values of capacity */ + floatVector.setInitialCapacity(MAX_VALUE_COUNT - 1); + floatVector.setInitialCapacity(MAX_VALUE_COUNT); + + try { + floatVector.setInitialCapacity(MAX_VALUE_COUNT * 4); + } catch (OversizedAllocationException oe) { + error = true; + } finally { + assertTrue(error); + error = false; + } + + floatVector.setInitialCapacity(initialCapacity); + /* no memory allocation has happened yet so capacity of underlying buffer should be 0 */ + assertEquals(0, floatVector.getValueCapacity()); + + /* allocate 32 bytes (16 * 2) */ + floatVector.allocateNew(); + /* underlying buffer should be able to store 16 values */ + assertTrue(floatVector.getValueCapacity() >= initialCapacity); + initialCapacity = floatVector.getValueCapacity(); + + floatVector.zeroVector(); + + /* populate the floatVector */ + floatVector.set(0, (short) 0x101c); // Float16.toFloat16(+0.00050163269043f) + floatVector.set(2, (short) 0x901c); // Float16.toFloat16(-0.00050163269043f) + floatVector.set(4, (short) 0x101d); // Float16.toFloat16(+0.000502109527588f) + floatVector.setWithPossibleTruncate(6, 2049.0f); // in f32=2049.000000, out f16=2048 + floatVector.setWithPossibleTruncate(8, 4098.0f); // in f32=4098.000000, out f16=4096 + floatVector.setWithPossibleTruncate(10, 8196.0f); // in f32=8196.000000, out f16=8192 + floatVector.setWithPossibleTruncate(12, 16392.0f); // in f32=16392.000000, out f16=16384 + floatVector.setWithPossibleTruncate(14, 32784.0f); // in f32=32784.000000, out f16=32768 + + try { + floatVector.setWithPossibleTruncate(initialCapacity, 65519.0f); // in f32=65519.000000, out f16=65504 + } catch (IndexOutOfBoundsException ie) { + error = true; + } finally { + assertTrue(error); + error = false; + } + + /* check vector contents */ + assertEquals((short) 0x101c, floatVector.get(0)); + assertEquals((short) 0x901c, floatVector.get(2)); + assertEquals((short) 0x101d, floatVector.get(4)); + assertEquals(2048.0f, floatVector.getValueAsFloat(6), 0); + assertEquals(4096.0f, floatVector.getValueAsFloat(8), 0); + assertEquals(8192.0f, floatVector.getValueAsFloat(10), 0); + assertEquals(16384.0f, floatVector.getValueAsFloat(12), 0); + assertEquals(32768.0f, floatVector.getValueAsFloat(14), 0); Review Comment: Can you please test `getValueAsDouble` as well? -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: [email protected] For queries about this service, please contact Infrastructure at: [email protected]
