[
https://issues.apache.org/jira/browse/CASSANDRA-13676?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel
]
Hao Zhong updated CASSANDRA-13676:
----------------------------------
Description:
When fixing CASSANDRA-2382, Jonathan Ellis complained that some serializers did
(do?) depend on Stream-specific methods. The buggy code is as follow:
{code}
public static class EstimatedHistogramSerializer implements
ICompactSerializer<EstimatedHistogram>
{
public void serialize(EstimatedHistogram eh, DataOutputStream dos)
throws IOException
{
long[] offsets = eh.getBucketOffsets();
long[] buckets = eh.getBuckets(false);
dos.writeInt(buckets.length);
for (int i = 0; i < buckets.length; i++)
{
dos.writeLong(offsets[i == 0 ? 0 : i - 1]);
dos.writeLong(buckets[i]);
}
}
public EstimatedHistogram deserialize(DataInputStream dis) throws
IOException
{
int size = dis.readInt();
long[] offsets = new long[size - 1];
long[] buckets = new long[size];
for (int i = 0; i < size; i++) {
offsets[i == 0 ? 0 : i - 1] = dis.readLong();
buckets[i] = dis.readLong();
}
return new EstimatedHistogram(offsets, buckets);
}
}
{code}
The fixed code is:
{code}
public static class EstimatedHistogramSerializer implements
ICompactSerializer2<EstimatedHistogram>
{
public void serialize(EstimatedHistogram eh, DataOutput dos) throws
IOException
{
long[] offsets = eh.getBucketOffsets();
long[] buckets = eh.getBuckets(false);
dos.writeInt(buckets.length);
for (int i = 0; i < buckets.length; i++)
{
dos.writeLong(offsets[i == 0 ? 0 : i - 1]);
dos.writeLong(buckets[i]);
}
}
public EstimatedHistogram deserialize(DataInput dis) throws IOException
{
int size = dis.readInt();
long[] offsets = new long[size - 1];
long[] buckets = new long[size];
for (int i = 0; i < size; i++) {
offsets[i == 0 ? 0 : i - 1] = dis.readLong();
buckets[i] = dis.readLong();
}
return new EstimatedHistogram(offsets, buckets);
}
}
{code}
I notice that some serializers still depend on Stream-specific methods. For
example, the IndexSummary_deserialize method has the following code:
{code}
public IndexSummary deserialize(DataInputStream in, IPartitioner partitioner,
int expectedMinIndexInterval, int maxIndexInterval) throws IOException
{
int minIndexInterval = in.readInt();
if (minIndexInterval != expectedMinIndexInterval)
{
throw new IOException(String.format("Cannot read index summary
because min_index_interval changed from %d to %d.",
minIndexInterval,
expectedMinIndexInterval));
}
int offsetCount = in.readInt();
long offheapSize = in.readLong();
int samplingLevel = in.readInt();
int fullSamplingSummarySize = in.readInt();
int effectiveIndexInterval = (int) Math.ceil((BASE_SAMPLING_LEVEL /
(double) samplingLevel) * minIndexInterval);
if (effectiveIndexInterval > maxIndexInterval)
{
throw new IOException(String.format("Rebuilding index summary
because the effective index interval (%d) is higher than" +
" the current max index
interval (%d)", effectiveIndexInterval, maxIndexInterval));
}
Memory offsets = Memory.allocate(offsetCount * 4);
Memory entries = Memory.allocate(offheapSize - offsets.size());
try
{
FBUtilities.copy(in, new MemoryOutputStream(offsets),
offsets.size());
FBUtilities.copy(in, new MemoryOutputStream(entries),
entries.size());
}
catch (IOException ioe)
{
offsets.free();
entries.free();
throw ioe;
}
// our on-disk representation treats the offsets and the summary
data as one contiguous structure,
// in which the offsets are based from the start of the structure.
i.e., if the offsets occupy
// X bytes, the value of the first offset will be X. In memory we
split the two regions up, so that
// the summary values are indexed from zero, so we apply a
correction to the offsets when de/serializing.
// In this case subtracting X from each of the offsets.
for (int i = 0 ; i < offsets.size() ; i += 4)
offsets.setInt(i, (int) (offsets.getInt(i) - offsets.size()));
return new IndexSummary(partitioner, offsets, offsetCount, entries,
entries.size(), fullSamplingSummarySize, minIndexInterval, samplingLevel);
}
{code}
Is it worthy replacing the Stream-specific inputs as well?
was:
When fixing CASSANDRA-2382, Jonathan Ellis complained that some serializers did
(do?) depend on Stream-specific methods. The buggy code is as follow:
{code}
public static class EstimatedHistogramSerializer implements
ICompactSerializer<EstimatedHistogram>
{
public void serialize(EstimatedHistogram eh, DataOutputStream dos)
throws IOException
{
long[] offsets = eh.getBucketOffsets();
long[] buckets = eh.getBuckets(false);
dos.writeInt(buckets.length);
for (int i = 0; i < buckets.length; i++)
{
dos.writeLong(offsets[i == 0 ? 0 : i - 1]);
dos.writeLong(buckets[i]);
}
}
public EstimatedHistogram deserialize(DataInputStream dis) throws
IOException
{
int size = dis.readInt();
long[] offsets = new long[size - 1];
long[] buckets = new long[size];
for (int i = 0; i < size; i++) {
offsets[i == 0 ? 0 : i - 1] = dis.readLong();
buckets[i] = dis.readLong();
}
return new EstimatedHistogram(offsets, buckets);
}
}
{code}
The fixed code is:
{code}
public static class EstimatedHistogramSerializer implements
ICompactSerializer2<EstimatedHistogram>
{
public void serialize(EstimatedHistogram eh, DataOutput dos) throws
IOException
{
long[] offsets = eh.getBucketOffsets();
long[] buckets = eh.getBuckets(false);
dos.writeInt(buckets.length);
for (int i = 0; i < buckets.length; i++)
{
dos.writeLong(offsets[i == 0 ? 0 : i - 1]);
dos.writeLong(buckets[i]);
}
}
public EstimatedHistogram deserialize(DataInput dis) throws IOException
{
int size = dis.readInt();
long[] offsets = new long[size - 1];
long[] buckets = new long[size];
for (int i = 0; i < size; i++) {
offsets[i == 0 ? 0 : i - 1] = dis.readLong();
buckets[i] = dis.readLong();
}
return new EstimatedHistogram(offsets, buckets);
}
}
{code}
I notice that some serializers still depend on Stream-specific methods. For
example, the IndexSummary_deserialize method has the following code:
{code}
public IndexSummary deserialize(DataInputStream in, IPartitioner partitioner,
int expectedMinIndexInterval, int maxIndexInterval) throws IOException
{
int minIndexInterval = in.readInt();
if (minIndexInterval != expectedMinIndexInterval)
{
throw new IOException(String.format("Cannot read index summary
because min_index_interval changed from %d to %d.",
minIndexInterval,
expectedMinIndexInterval));
}
int offsetCount = in.readInt();
long offheapSize = in.readLong();
int samplingLevel = in.readInt();
int fullSamplingSummarySize = in.readInt();
int effectiveIndexInterval = (int) Math.ceil((BASE_SAMPLING_LEVEL /
(double) samplingLevel) * minIndexInterval);
if (effectiveIndexInterval > maxIndexInterval)
{
throw new IOException(String.format("Rebuilding index summary
because the effective index interval (%d) is higher than" +
" the current max index
interval (%d)", effectiveIndexInterval, maxIndexInterval));
}
Memory offsets = Memory.allocate(offsetCount * 4);
Memory entries = Memory.allocate(offheapSize - offsets.size());
try
{
FBUtilities.copy(in, new MemoryOutputStream(offsets),
offsets.size());
FBUtilities.copy(in, new MemoryOutputStream(entries),
entries.size());
}
catch (IOException ioe)
{
offsets.free();
entries.free();
throw ioe;
}
// our on-disk representation treats the offsets and the summary
data as one contiguous structure,
// in which the offsets are based from the start of the structure.
i.e., if the offsets occupy
// X bytes, the value of the first offset will be X. In memory we
split the two regions up, so that
// the summary values are indexed from zero, so we apply a
correction to the offsets when de/serializing.
// In this case subtracting X from each of the offsets.
for (int i = 0 ; i < offsets.size() ; i += 4)
offsets.setInt(i, (int) (offsets.getInt(i) - offsets.size()));
return new IndexSummary(partitioner, offsets, offsetCount, entries,
entries.size(), fullSamplingSummarySize, minIndexInterval, samplingLevel);
}
{code}
Is it worthy replace the Stream-specific inputs as well?
> Some serializers depend on Stream-specific methods
> --------------------------------------------------
>
> Key: CASSANDRA-13676
> URL: https://issues.apache.org/jira/browse/CASSANDRA-13676
> Project: Cassandra
> Issue Type: Bug
> Reporter: Hao Zhong
>
> When fixing CASSANDRA-2382, Jonathan Ellis complained that some serializers
> did (do?) depend on Stream-specific methods. The buggy code is as follow:
> {code}
> public static class EstimatedHistogramSerializer implements
> ICompactSerializer<EstimatedHistogram>
> {
> public void serialize(EstimatedHistogram eh, DataOutputStream dos)
> throws IOException
> {
> long[] offsets = eh.getBucketOffsets();
> long[] buckets = eh.getBuckets(false);
> dos.writeInt(buckets.length);
> for (int i = 0; i < buckets.length; i++)
> {
> dos.writeLong(offsets[i == 0 ? 0 : i - 1]);
> dos.writeLong(buckets[i]);
> }
> }
> public EstimatedHistogram deserialize(DataInputStream dis) throws
> IOException
> {
> int size = dis.readInt();
> long[] offsets = new long[size - 1];
> long[] buckets = new long[size];
> for (int i = 0; i < size; i++) {
> offsets[i == 0 ? 0 : i - 1] = dis.readLong();
> buckets[i] = dis.readLong();
> }
> return new EstimatedHistogram(offsets, buckets);
> }
> }
> {code}
> The fixed code is:
> {code}
> public static class EstimatedHistogramSerializer implements
> ICompactSerializer2<EstimatedHistogram>
> {
> public void serialize(EstimatedHistogram eh, DataOutput dos) throws
> IOException
> {
> long[] offsets = eh.getBucketOffsets();
> long[] buckets = eh.getBuckets(false);
> dos.writeInt(buckets.length);
> for (int i = 0; i < buckets.length; i++)
> {
> dos.writeLong(offsets[i == 0 ? 0 : i - 1]);
> dos.writeLong(buckets[i]);
> }
> }
> public EstimatedHistogram deserialize(DataInput dis) throws
> IOException
> {
> int size = dis.readInt();
> long[] offsets = new long[size - 1];
> long[] buckets = new long[size];
> for (int i = 0; i < size; i++) {
> offsets[i == 0 ? 0 : i - 1] = dis.readLong();
> buckets[i] = dis.readLong();
> }
> return new EstimatedHistogram(offsets, buckets);
> }
> }
> {code}
> I notice that some serializers still depend on Stream-specific methods. For
> example, the IndexSummary_deserialize method has the following code:
> {code}
> public IndexSummary deserialize(DataInputStream in, IPartitioner
> partitioner, int expectedMinIndexInterval, int maxIndexInterval) throws
> IOException
> {
> int minIndexInterval = in.readInt();
> if (minIndexInterval != expectedMinIndexInterval)
> {
> throw new IOException(String.format("Cannot read index
> summary because min_index_interval changed from %d to %d.",
> minIndexInterval,
> expectedMinIndexInterval));
> }
> int offsetCount = in.readInt();
> long offheapSize = in.readLong();
> int samplingLevel = in.readInt();
> int fullSamplingSummarySize = in.readInt();
> int effectiveIndexInterval = (int) Math.ceil((BASE_SAMPLING_LEVEL
> / (double) samplingLevel) * minIndexInterval);
> if (effectiveIndexInterval > maxIndexInterval)
> {
> throw new IOException(String.format("Rebuilding index summary
> because the effective index interval (%d) is higher than" +
> " the current max index
> interval (%d)", effectiveIndexInterval, maxIndexInterval));
> }
> Memory offsets = Memory.allocate(offsetCount * 4);
> Memory entries = Memory.allocate(offheapSize - offsets.size());
> try
> {
> FBUtilities.copy(in, new MemoryOutputStream(offsets),
> offsets.size());
> FBUtilities.copy(in, new MemoryOutputStream(entries),
> entries.size());
> }
> catch (IOException ioe)
> {
> offsets.free();
> entries.free();
> throw ioe;
> }
> // our on-disk representation treats the offsets and the summary
> data as one contiguous structure,
> // in which the offsets are based from the start of the
> structure. i.e., if the offsets occupy
> // X bytes, the value of the first offset will be X. In memory we
> split the two regions up, so that
> // the summary values are indexed from zero, so we apply a
> correction to the offsets when de/serializing.
> // In this case subtracting X from each of the offsets.
> for (int i = 0 ; i < offsets.size() ; i += 4)
> offsets.setInt(i, (int) (offsets.getInt(i) - offsets.size()));
> return new IndexSummary(partitioner, offsets, offsetCount,
> entries, entries.size(), fullSamplingSummarySize, minIndexInterval,
> samplingLevel);
> }
> {code}
> Is it worthy replacing the Stream-specific inputs as well?
--
This message was sent by Atlassian JIRA
(v6.4.14#64029)
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]