mcvsubbu commented on a change in pull request #3624: [PINOT-7370] Return
number of bytes read from the reader interfaces/implementations
URL: https://github.com/apache/incubator-pinot/pull/3624#discussion_r246074869
##########
File path:
pinot-core/src/main/java/com/linkedin/pinot/core/operator/docvalsets/SingleValueSet.java
##########
@@ -56,104 +57,116 @@ public void getIntValues(int[] inDocIds, int inStartPos,
int inDocIdsSize, int[]
} else {
throw new UnsupportedOperationException();
}
+ return inDocIdsSize * Integer.BYTES;
}
@Override
- public void getLongValues(int[] inDocIds, int inStartPos, int inDocIdsSize,
long[] outValues, int outStartPos) {
+ public long getLongValues(int[] inDocIds, int inStartPos, int inDocIdsSize,
long[] outValues, int outStartPos) {
int inEndPos = inStartPos + inDocIdsSize;
ReaderContext context = _reader.createContext();
switch (_dataType) {
case INT:
for (int i = inStartPos; i < inEndPos; i++) {
outValues[outStartPos++] = _reader.getInt(inDocIds[i], context);
}
- break;
+ return inDocIdsSize * Integer.BYTES;
case LONG:
for (int i = inStartPos; i < inEndPos; i++) {
outValues[outStartPos++] = _reader.getLong(inDocIds[i], context);
}
- break;
+ return inDocIdsSize * Long.BYTES;
default:
throw new UnsupportedOperationException();
}
}
@Override
- public void getFloatValues(int[] inDocIds, int inStartPos, int inDocIdsSize,
float[] outValues, int outStartPos) {
+ public long getFloatValues(int[] inDocIds, int inStartPos, int inDocIdsSize,
float[] outValues, int outStartPos) {
int inEndPos = inStartPos + inDocIdsSize;
ReaderContext context = _reader.createContext();
switch (_dataType) {
case INT:
for (int i = inStartPos; i < inEndPos; i++) {
outValues[outStartPos++] = _reader.getInt(inDocIds[i], context);
}
- break;
+ return inDocIdsSize * Integer.BYTES;
case LONG:
for (int i = inStartPos; i < inEndPos; i++) {
outValues[outStartPos++] = _reader.getLong(inDocIds[i], context);
}
- break;
+ return inDocIdsSize * Long.BYTES;
case FLOAT:
for (int i = inStartPos; i < inEndPos; i++) {
outValues[outStartPos++] = _reader.getFloat(inDocIds[i], context);
}
- break;
+ return inDocIdsSize * Float.BYTES;
default:
throw new UnsupportedOperationException();
}
}
@Override
- public void getDoubleValues(int[] inDocIds, int inStartPos, int
inDocIdsSize, double[] outValues, int outStartPos) {
+ public long getDoubleValues(int[] inDocIds, int inStartPos, int
inDocIdsSize, double[] outValues, int outStartPos) {
int inEndPos = inStartPos + inDocIdsSize;
ReaderContext context = _reader.createContext();
switch (_dataType) {
case INT:
for (int i = inStartPos; i < inEndPos; i++) {
outValues[outStartPos++] = _reader.getInt(inDocIds[i], context);
}
- break;
+ return inDocIdsSize * Integer.BYTES;
case LONG:
for (int i = inStartPos; i < inEndPos; i++) {
outValues[outStartPos++] = _reader.getLong(inDocIds[i], context);
}
- break;
+ return inDocIdsSize * Long.BYTES;
case FLOAT:
for (int i = inStartPos; i < inEndPos; i++) {
outValues[outStartPos++] = _reader.getFloat(inDocIds[i], context);
}
- break;
+ return inDocIdsSize * Float.BYTES;
case DOUBLE:
for (int i = inStartPos; i < inEndPos; i++) {
outValues[outStartPos++] = _reader.getDouble(inDocIds[i], context);
}
- break;
+ return inDocIdsSize * Double.BYTES;
default:
throw new UnsupportedOperationException();
}
}
@Override
- public void getStringValues(int[] inDocIds, int inStartPos, int
inDocIdsSize, String[] outValues, int outStartPos) {
+ public long getStringValues(int[] inDocIds, int inStartPos, int
inDocIdsSize, String[] outValues, int outStartPos) {
+
+ long bytesRead = 0;
int inEndPos = inStartPos + inDocIdsSize;
ReaderContext context = _reader.createContext();
if (_dataType == DataType.STRING) {
for (int i = inStartPos; i < inEndPos; i++) {
- outValues[outStartPos++] = _reader.getString(inDocIds[i], context);
+ // read as bytes and then decode to string - allows correct estimation
of bytes-read
+ byte[] bytes = _reader.getBytes(inDocIds[i], context);
Review comment:
Calling getBytes here ends up allocating a new byte array
VarByteChunkSingleValueReader, right? Whereas caliing getString() reuses from
the thread-local buffer. Presumably this optimization was made to avoid too
much garbage during queries. Should we introduce a different interface that
returns both the string as well as the number of bytes?
----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on GitHub and use the
URL above to go to the specific comment.
For queries about this service, please contact Infrastructure at:
[email protected]
With regards,
Apache Git Services
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]