This is an automated email from the ASF dual-hosted git repository. mblow pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/asterixdb.git
commit 69840d4929ba1c3e774cee3fc29db984d139a062 Author: Michael Blow <[email protected]> AuthorDate: Fri Oct 10 18:03:50 2025 -0400 [NO ISSUE][*DB][COMMON] Reduce logging on dataset async flush batches Ext-ref: MB-68946 Change-Id: I1c2aaa1dbaadf00173e4e955c15c835e5b461ed7 Reviewed-on: https://asterix-gerrit.ics.uci.edu/c/asterixdb/+/20479 Integration-Tests: Jenkins <[email protected]> Tested-by: Jenkins <[email protected]> Reviewed-by: Michael Blow <[email protected]> Reviewed-by: Ali Alsuliman <[email protected]> --- .../common/context/DatasetLifecycleManager.java | 19 +- .../org/apache/asterix/common/utils/Datasets.java | 105 +++++++++ .../asterix/common/utils/IntSortedBitSet.java | 234 ++++++++++++++++++++ .../org/apache/asterix/common/utils/IntUtil.java | 119 +++++++++++ .../org/apache/asterix/common/utils/MathUtil.java | 53 +++++ .../apache/asterix/common/utils/Partitions.java | 110 ++++++++++ .../asterix/common/utils/ShortSortedBitSet.java | 235 +++++++++++++++++++++ .../org/apache/asterix/common/utils/ShortUtil.java | 122 +++++++++++ .../apache/asterix/test/utils/MathUtilTest.java | 79 +++++++ .../asterix/test/utils/ShortSortedBitSetTest.java | 108 ++++++++++ 10 files changed, 1183 insertions(+), 1 deletion(-) diff --git a/asterixdb/asterix-common/src/main/java/org/apache/asterix/common/context/DatasetLifecycleManager.java b/asterixdb/asterix-common/src/main/java/org/apache/asterix/common/context/DatasetLifecycleManager.java index 96f72415e9..e433264b25 100644 --- a/asterixdb/asterix-common/src/main/java/org/apache/asterix/common/context/DatasetLifecycleManager.java +++ b/asterixdb/asterix-common/src/main/java/org/apache/asterix/common/context/DatasetLifecycleManager.java @@ -23,6 +23,7 @@ import static org.apache.hyracks.storage.am.lsm.common.impls.LSMComponentId.MIN_ import java.io.IOException; import java.io.OutputStream; import java.util.ArrayList; +import java.util.LinkedHashMap; import java.util.List; import java.util.Map; import java.util.Set; @@ -47,6 +48,8 @@ import org.apache.asterix.common.transactions.ILogManager; import org.apache.asterix.common.transactions.IRecoveryManager; import org.apache.asterix.common.transactions.LogRecord; import org.apache.asterix.common.transactions.LogType; +import org.apache.asterix.common.utils.Datasets; +import org.apache.asterix.common.utils.Partitions; import org.apache.asterix.common.utils.StoragePathUtil; import org.apache.hyracks.api.application.INCServiceContext; import org.apache.hyracks.api.exceptions.ErrorCode; @@ -500,12 +503,27 @@ public class DatasetLifecycleManager implements IDatasetLifecycleManager, ILifeC @Override public synchronized void asyncFlushMatchingIndexes(Predicate<ILSMIndex> indexPredicate) throws HyracksDataException { + // Key: PartitionsBitSet (immutable after insertion) -> datasets sharing that partition set + Map<Partitions, Datasets> grouped = new LinkedHashMap<>(); + for (DatasetResource dsr : datasets.values()) { + Partitions flushedPartitions; + flushedPartitions = new Partitions(); for (PrimaryIndexOperationTracker opTracker : dsr.getOpTrackers()) { synchronized (opTracker) { asyncFlush(dsr, opTracker, indexPredicate); + flushedPartitions.add(opTracker.partition); } } + // only log datasets for which a flush was actually scheduled + if (!flushedPartitions.isEmpty()) { + grouped.computeIfAbsent(flushedPartitions, k -> new Datasets()).add(dsr.getDatasetID()); + // IMPORTANT: do not modify flushedPartitions after this point as it is now part of the map key! + } + } + + for (Map.Entry<Partitions, Datasets> e : grouped.entrySet()) { + LOGGER.info("Async flushed dataset(s) " + e.getValue() + " on partition(s) " + e.getKey()); } } @@ -515,7 +533,6 @@ public class DatasetLifecycleManager implements IDatasetLifecycleManager, ILifeC for (ILSMIndex lsmIndex : dsr.getDatasetInfo().getDatasetPartitionOpenIndexes(partition)) { LSMIOOperationCallback ioCallback = (LSMIOOperationCallback) lsmIndex.getIOOperationCallback(); if (needsFlush(opTracker, lsmIndex, ioCallback) && indexPredicate.test(lsmIndex)) { - LOGGER.info("Async flushing {}", opTracker); opTracker.setFlushOnExit(true); opTracker.flushIfNeeded(); break; diff --git a/asterixdb/asterix-common/src/main/java/org/apache/asterix/common/utils/Datasets.java b/asterixdb/asterix-common/src/main/java/org/apache/asterix/common/utils/Datasets.java new file mode 100644 index 0000000000..51c1e9d1f7 --- /dev/null +++ b/asterixdb/asterix-common/src/main/java/org/apache/asterix/common/utils/Datasets.java @@ -0,0 +1,105 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.asterix.common.utils; + +import java.io.Serial; +import java.io.Serializable; +import java.util.function.IntPredicate; + +import it.unimi.dsi.fastutil.ints.IntAVLTreeSet; +import it.unimi.dsi.fastutil.ints.IntCollection; +import it.unimi.dsi.fastutil.ints.IntSortedSet; + +/** + * A specialized set for storing dataset IDs. + */ +public class Datasets implements Serializable { + @Serial + private static final long serialVersionUID = 1L; + private final IntSortedSet delegate; + + public Datasets() { + this.delegate = new IntAVLTreeSet(); + } + + public boolean add(int k) { + return delegate.add(k); + } + + public boolean isEmpty() { + return delegate.isEmpty(); + } + + public void clear() { + delegate.clear(); + } + + @Override + public boolean equals(Object o) { + if (this == o) { + return true; + } + if (!(o instanceof Datasets other)) { + return false; + } + return delegate.equals(other.delegate); + } + + public boolean contains(int i) { + return delegate.contains(i); + } + + public boolean addAll(IntCollection intCollection) { + return delegate.addAll(intCollection); + } + + public boolean containsAll(IntCollection intCollection) { + return delegate.containsAll(intCollection); + } + + public boolean removeIf(IntPredicate filter) { + return delegate.removeIf(filter); + } + + public boolean removeAll(IntCollection intCollection) { + return delegate.removeAll(intCollection); + } + + public boolean retainAll(IntCollection intCollection) { + return delegate.retainAll(intCollection); + } + + public int[] toArray() { + return delegate.toIntArray(); + } + + public int size() { + return delegate.size(); + } + + @Override + public int hashCode() { + return delegate.hashCode(); + } + + @Override + public String toString() { + return IntUtil.toCompactString(delegate.iterator()); + } +} diff --git a/asterixdb/asterix-common/src/main/java/org/apache/asterix/common/utils/IntSortedBitSet.java b/asterixdb/asterix-common/src/main/java/org/apache/asterix/common/utils/IntSortedBitSet.java new file mode 100644 index 0000000000..8d151040f6 --- /dev/null +++ b/asterixdb/asterix-common/src/main/java/org/apache/asterix/common/utils/IntSortedBitSet.java @@ -0,0 +1,234 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.asterix.common.utils; + +import java.io.Serial; +import java.io.Serializable; +import java.util.Arrays; +import java.util.NoSuchElementException; + +import it.unimi.dsi.fastutil.ints.AbstractIntSortedSet; +import it.unimi.dsi.fastutil.ints.IntBidirectionalIterator; +import it.unimi.dsi.fastutil.ints.IntCollection; +import it.unimi.dsi.fastutil.ints.IntComparator; +import it.unimi.dsi.fastutil.ints.IntComparators; +import it.unimi.dsi.fastutil.ints.IntSortedSet; + +public class IntSortedBitSet extends AbstractIntSortedSet implements Serializable { + @Serial + private static final long serialVersionUID = 1L; + private static final int BITS_PER_ELEMENT = Long.SIZE; + private long[] storage; + + public IntSortedBitSet(int initialMaxValue) { + storage = new long[initialMaxValue / BITS_PER_ELEMENT + 1]; + } + + public IntSortedBitSet() { + this(1); + } + + public IntSortedBitSet(IntCollection initial) { + this(); + addAll(initial); + } + + public IntSortedBitSet(int[] initial) { + this(Math.max(initial.length - 1, 1)); + for (int value : initial) { + add(value); + } + } + + @Override + public boolean contains(int k) { + checkRange(k); + int elementIndex = k / BITS_PER_ELEMENT; + int bitIndex = k % BITS_PER_ELEMENT; + return storage.length > elementIndex && (storage[elementIndex] & 1L << bitIndex) != 0; + } + + private void checkRange(int k) { + if (k < 0) { + throw new IllegalArgumentException("negative values not supported"); + } + } + + @Override + public boolean add(int k) { + checkRange(k); + int elementIndex = k / BITS_PER_ELEMENT; + int bitIndex = k % BITS_PER_ELEMENT; + ensureCapacity(elementIndex); + boolean absent = (storage[elementIndex] & 1L << bitIndex) == 0; + storage[elementIndex] |= 1L << bitIndex; + return absent; + } + + @Override + public boolean remove(int k) { + checkRange(k); + int elementIndex = k / BITS_PER_ELEMENT; + int bitIndex = k % BITS_PER_ELEMENT; + if (storage.length <= elementIndex) { + return false; + } + boolean present = (storage[elementIndex] & 1L << bitIndex) != 0; + storage[elementIndex] &= ~(1L << bitIndex); + return present; + } + + @Override + public void clear() { + Arrays.fill(storage, 0L); + } + + @Override + public boolean equals(Object o) { + if (o.getClass() == getClass()) { + return Arrays.equals(storage, ((IntSortedBitSet) o).storage); + } + return super.equals(o); + } + + @Override + public int hashCode() { + return Arrays.hashCode(storage); + } + + private void ensureCapacity(int index) { + if (storage.length <= index) { + long[] newStorage = new long[index + 1]; + System.arraycopy(storage, 0, newStorage, 0, storage.length); + storage = newStorage; + } + } + + @Override + public IntSortedSet subSet(int fromElement, int toElement) { + throw new UnsupportedOperationException("nyi"); + } + + @Override + public IntSortedSet headSet(int toElement) { + throw new UnsupportedOperationException("nyi"); + } + + @Override + public IntSortedSet tailSet(int fromElement) { + throw new UnsupportedOperationException("nyi"); + } + + @Override + public IntComparator comparator() { + return IntComparators.NATURAL_COMPARATOR; + } + + @Override + public int firstInt() { + for (int index = 0; index < storage.length; index++) { + if (storage[index] != 0) { + return (int) (index * BITS_PER_ELEMENT + + (int) MathUtil.log2Unsigned(Long.lowestOneBit(storage[index]))); + } + } + return -1; + } + + @Override + public int lastInt() { + for (int index = storage.length - 1; index >= 0; index--) { + if (storage[index] != 0) { + return index * BITS_PER_ELEMENT + (int) MathUtil.log2Unsigned(Long.highestOneBit(storage[index])); + } + } + return -1; + } + + @Override + public int size() { + int size = 0; + for (long element : storage) { + size += Long.bitCount(element); + } + return size; + } + + @Override + public IntBidirectionalIterator iterator() { + final int firstInt = firstInt(); + return iterator(firstInt, firstInt); + } + + @Override + public IntBidirectionalIterator iterator(int fromElement) { + return iterator(firstInt(), fromElement); + } + + @Override + public String toString() { + return IntUtil.toCompactString(iterator()); + } + + private IntBidirectionalIterator iterator(final int first, final int fromElement) { + return new IntBidirectionalIterator() { + final int last = lastInt(); + int position = fromElement; + int lastReturned = -1; + + @Override + public int previousInt() { + for (; position >= first; position--) { + if ((storage[position / BITS_PER_ELEMENT] & 1L << (position % BITS_PER_ELEMENT)) != 0) { + lastReturned = position; + return position--; + } + } + throw new NoSuchElementException(); + } + + @Override + public boolean hasPrevious() { + return first >= 0 && position >= first; + } + + @Override + public int nextInt() { + for (; position <= last; position++) { + if ((storage[position / BITS_PER_ELEMENT] & 1L << (position % BITS_PER_ELEMENT)) != 0) { + lastReturned = position; + return position++; + } + } + throw new NoSuchElementException(); + } + + @Override + public boolean hasNext() { + return last >= 0 && position <= last; + } + + @Override + public void remove() { + IntSortedBitSet.this.remove(lastReturned); + } + }; + } + +} diff --git a/asterixdb/asterix-common/src/main/java/org/apache/asterix/common/utils/IntUtil.java b/asterixdb/asterix-common/src/main/java/org/apache/asterix/common/utils/IntUtil.java new file mode 100644 index 0000000000..580f98c6e1 --- /dev/null +++ b/asterixdb/asterix-common/src/main/java/org/apache/asterix/common/utils/IntUtil.java @@ -0,0 +1,119 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.asterix.common.utils; + +import java.util.NoSuchElementException; + +import it.unimi.dsi.fastutil.ints.IntIterator; + +public class IntUtil { + private IntUtil() { + throw new AssertionError("do not instantiate"); + } + + /** + * Returns a compact string representation of the supplied {@link IntIterator}, enclosed + * by square-brackets ([]), comma-delimited, collapsing ranges together with a hyphen (-). + * Only provides reasonable results if the contents of the iterator are sorted. + */ + public static String toCompactString(IntIterator iter) { + return toCompactString(iter, 0); + } + + /** + * Returns a compact string representation of the supplied {@link IntIterator}, enclosed + * by square-brackets ([]), comma-delimited, collapsing ranges together with a hyphen (-). + * Only provides reasonable results if the contents of the iterator are sorted. + */ + static String toCompactString(IntIterator iter, int delta) { + if (!iter.hasNext()) { + return "[]"; + } + StringBuilder builder = new StringBuilder(); + builder.append('['); + appendCompact(iter, builder, delta); + builder.append(']'); + return builder.toString(); + } + + /** + * Returns a compact string representation of the supplied int array, enclosed + * by square-brackets ([]), comma-delimited, collapsing ranges together with a hyphen (-). + * Only provides reasonable results if the contents of the iterator are sorted. + */ + public static String toCompactString(int[] iter) { + return toCompactString(wrap(iter)); + } + + /** + * Appends the contents of the supplied {@link IntIterator} to the {@link StringBuilder} instance, + * comma-delimited, collapsing ranges together with a hyphen (-). Only provides reasonable + * results if the contents of the iterator are sorted. + */ + public static void appendCompact(IntIterator iter, StringBuilder builder) { + appendCompact(iter, builder, 0); + } + + /** + * Appends the contents of the supplied {@link IntIterator} to the {@link StringBuilder} instance, + * comma-delimited, collapsing ranges together with a hyphen (-). Only provides reasonable + * results if the contents of the iterator are sorted. + */ + static void appendCompact(IntIterator iter, StringBuilder builder, int outputDelta) { + int rangeStart = iter.nextInt(); + builder.append(rangeStart + outputDelta); + int current = rangeStart; + int prev = current; + while (iter.hasNext()) { + current = iter.nextInt(); + if (current != prev + 1) { + // end any range we were in: + if (rangeStart != prev) { + builder.append('-').append(prev + outputDelta); + } + builder.append(",").append(current + outputDelta); + rangeStart = current; + } + prev = current; + } + if (rangeStart != prev) { + builder.append('-').append(prev + outputDelta); + } + } + + public static IntIterator wrap(int... ints) { + return new IntIterator() { + int index = 0; + + @Override + public int nextInt() { + try { + return ints[index++]; + } catch (ArrayIndexOutOfBoundsException e) { + throw new NoSuchElementException(); + } + } + + @Override + public boolean hasNext() { + return index < ints.length; + } + }; + } +} diff --git a/asterixdb/asterix-common/src/main/java/org/apache/asterix/common/utils/MathUtil.java b/asterixdb/asterix-common/src/main/java/org/apache/asterix/common/utils/MathUtil.java new file mode 100644 index 0000000000..c435427f7b --- /dev/null +++ b/asterixdb/asterix-common/src/main/java/org/apache/asterix/common/utils/MathUtil.java @@ -0,0 +1,53 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.asterix.common.utils; + +import it.unimi.dsi.fastutil.longs.Long2ByteMap; +import it.unimi.dsi.fastutil.longs.Long2ByteOpenHashMap; + +public class MathUtil { + + private static final Long2ByteMap LOG2_MAP; + + static { + LOG2_MAP = new Long2ByteOpenHashMap(); + for (byte i = 0; i < Long.SIZE; i++) { + LOG2_MAP.put(1L << i, i); + } + } + + private MathUtil() { + } + + public static long maxUnsigned(long a, long b) { + return Long.compareUnsigned(a, b) > 0 ? a : b; + } + + public static long minUnsigned(long a, long b) { + return Long.compareUnsigned(a, b) < 0 ? a : b; + } + + public static long log2Unsigned(long value) { + final byte result = LOG2_MAP.getOrDefault(value, Byte.MIN_VALUE); + if (result < 0) { + throw new IllegalArgumentException("cannot resolve log2 value for " + Long.toUnsignedString(value, 16)); + } + return result; + } +} diff --git a/asterixdb/asterix-common/src/main/java/org/apache/asterix/common/utils/Partitions.java b/asterixdb/asterix-common/src/main/java/org/apache/asterix/common/utils/Partitions.java new file mode 100644 index 0000000000..45aaa49150 --- /dev/null +++ b/asterixdb/asterix-common/src/main/java/org/apache/asterix/common/utils/Partitions.java @@ -0,0 +1,110 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.asterix.common.utils; + +import java.io.Serial; +import java.io.Serializable; + +import it.unimi.dsi.fastutil.ints.IntIterator; +import it.unimi.dsi.fastutil.ints.IntSortedSet; + +/** + * A specialized bit set for storing partition IDs. + * Partition IDs are in the range [-1, 32766] (inclusive). + * The value -1 is used to represent the "unpartitioned" partition. + * The value 32767 is reserved for the internal shift and cannot be used as a partition ID. + * This class internally shifts the partition IDs by +1 to fit into an unsigned short range [0, 32767]. + */ +public class Partitions implements Serializable { + @Serial + private static final long serialVersionUID = 1L; + private static final int MAX_PARTITION_ID = Integer.MAX_VALUE - 1; + private static final int MIN_PARTITION_ID = -1; + private static final short MINUS_ONE = (short) -1; + private final IntSortedSet delegate; + + public Partitions() { + delegate = new IntSortedBitSet(); + } + + public Partitions(int initialMaxValue) { + delegate = new IntSortedBitSet(initialMaxValue + 1); + } + + public Partitions(IntSortedSet delegate) { + this.delegate = delegate; + } + + /** + * Adds a partition ID to the set. Partition ID must be in the range [-1, 32766]. + * @param k the partition ID to add + * @return true if the partition ID was added, false if it was already present + * @throws IllegalArgumentException if the partition ID is out of range + */ + public boolean add(int k) { + if (k > MAX_PARTITION_ID || k < MIN_PARTITION_ID) { + throw new IllegalArgumentException("Partition number " + k + " out of range"); + } + return delegate.add((short) (k + 1)); + } + + public boolean isEmpty() { + return delegate.isEmpty(); + } + + public void clear() { + delegate.clear(); + } + + @Override + public boolean equals(Object o) { + if (this == o) { + return true; + } + if (!(o instanceof Partitions other)) { + return false; + } + return delegate.equals(other.delegate); + } + + @Override + public int hashCode() { + return delegate.hashCode(); + } + + @Override + public String toString() { + if (isEmpty()) { + return "[]"; + } + IntIterator iter = delegate.iterator(); + StringBuilder builder = new StringBuilder(); + builder.append('['); + if (delegate.firstInt() == 0) { + builder.append(MINUS_ONE); + iter.nextInt(); // this is the zero we just printed + if (iter.hasNext()) { + builder.append(','); + } + } + IntUtil.appendCompact(iter, builder, MINUS_ONE); + builder.append(']'); + return builder.toString(); + } +} diff --git a/asterixdb/asterix-common/src/main/java/org/apache/asterix/common/utils/ShortSortedBitSet.java b/asterixdb/asterix-common/src/main/java/org/apache/asterix/common/utils/ShortSortedBitSet.java new file mode 100644 index 0000000000..4096f794c9 --- /dev/null +++ b/asterixdb/asterix-common/src/main/java/org/apache/asterix/common/utils/ShortSortedBitSet.java @@ -0,0 +1,235 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.asterix.common.utils; + +import java.io.Serial; +import java.io.Serializable; +import java.util.Arrays; +import java.util.NoSuchElementException; + +import it.unimi.dsi.fastutil.shorts.AbstractShortSortedSet; +import it.unimi.dsi.fastutil.shorts.ShortBidirectionalIterator; +import it.unimi.dsi.fastutil.shorts.ShortCollection; +import it.unimi.dsi.fastutil.shorts.ShortComparator; +import it.unimi.dsi.fastutil.shorts.ShortComparators; +import it.unimi.dsi.fastutil.shorts.ShortSortedSet; + +public class ShortSortedBitSet extends AbstractShortSortedSet implements Serializable { + @Serial + private static final long serialVersionUID = 1L; + private static final short BITS_PER_ELEMENT = Long.SIZE; + private long[] storage; + + public ShortSortedBitSet(int initialMaxValue) { + storage = new long[initialMaxValue / BITS_PER_ELEMENT + 1]; + } + + public ShortSortedBitSet() { + this(1); + } + + public ShortSortedBitSet(ShortCollection initial) { + this(); + addAll(initial); + } + + public ShortSortedBitSet(short[] initial) { + this(Math.max(initial.length - 1, 1)); + for (short value : initial) { + add(value); + } + } + + @Override + public boolean contains(short k) { + checkRange(k); + int elementIndex = k / BITS_PER_ELEMENT; + int bitIndex = k % BITS_PER_ELEMENT; + return storage.length > elementIndex && (storage[elementIndex] & 1L << bitIndex) != 0; + } + + private void checkRange(int k) { + if (k < 0) { + throw new IllegalArgumentException("negative values not supported"); + } + } + + @Override + public boolean add(short k) { + checkRange(k); + int elementIndex = k / BITS_PER_ELEMENT; + int bitIndex = k % BITS_PER_ELEMENT; + ensureCapacity(elementIndex); + boolean absent = (storage[elementIndex] & 1L << bitIndex) == 0; + storage[elementIndex] |= 1L << bitIndex; + return absent; + } + + @Override + public boolean remove(short k) { + checkRange(k); + int elementIndex = k / BITS_PER_ELEMENT; + int bitIndex = k % BITS_PER_ELEMENT; + if (storage.length <= elementIndex) { + return false; + } + boolean present = (storage[elementIndex] & 1L << bitIndex) != 0; + storage[elementIndex] &= ~(1L << bitIndex); + return present; + } + + @Override + public void clear() { + Arrays.fill(storage, 0L); + } + + @Override + public boolean equals(Object o) { + if (o.getClass() == getClass()) { + return Arrays.equals(storage, ((ShortSortedBitSet) o).storage); + } + return super.equals(o); + } + + @Override + public int hashCode() { + return Arrays.hashCode(storage); + } + + private void ensureCapacity(int index) { + if (storage.length <= index) { + long[] newStorage = new long[index + 1]; + System.arraycopy(storage, 0, newStorage, 0, storage.length); + storage = newStorage; + } + } + + @Override + public ShortSortedSet subSet(short fromElement, short toElement) { + throw new UnsupportedOperationException("nyi"); + } + + @Override + public ShortSortedSet headSet(short toElement) { + throw new UnsupportedOperationException("nyi"); + } + + @Override + public ShortSortedSet tailSet(short fromElement) { + throw new UnsupportedOperationException("nyi"); + } + + @Override + public ShortComparator comparator() { + return ShortComparators.NATURAL_COMPARATOR; + } + + @Override + public short firstShort() { + for (int index = 0; index < storage.length; index++) { + if (storage[index] != 0) { + return (short) (index * BITS_PER_ELEMENT + + (short) MathUtil.log2Unsigned(Long.lowestOneBit(storage[index]))); + } + } + return -1; + } + + @Override + public short lastShort() { + for (int index = storage.length - 1; index >= 0; index--) { + if (storage[index] != 0) { + return (short) (index * BITS_PER_ELEMENT + + (short) MathUtil.log2Unsigned(Long.highestOneBit(storage[index]))); + } + } + return -1; + } + + @Override + public int size() { + int size = 0; + for (long element : storage) { + size += Long.bitCount(element); + } + return size; + } + + @Override + public ShortBidirectionalIterator iterator() { + final short firstShort = firstShort(); + return iterator(firstShort, firstShort); + } + + @Override + public ShortBidirectionalIterator iterator(short fromElement) { + return iterator(firstShort(), fromElement); + } + + @Override + public String toString() { + return ShortUtil.toCompactString(iterator()); + } + + private ShortBidirectionalIterator iterator(final short first, final short fromElement) { + return new ShortBidirectionalIterator() { + final short last = lastShort(); + short position = fromElement; + short lastReturned = -1; + + @Override + public short previousShort() { + for (; position >= first; position--) { + if ((storage[position / BITS_PER_ELEMENT] & 1L << (position % BITS_PER_ELEMENT)) != 0) { + lastReturned = position; + return position--; + } + } + throw new NoSuchElementException(); + } + + @Override + public boolean hasPrevious() { + return first >= 0 && position >= first; + } + + @Override + public short nextShort() { + for (; position <= last; position++) { + if ((storage[position / BITS_PER_ELEMENT] & 1L << (position % BITS_PER_ELEMENT)) != 0) { + lastReturned = position; + return position++; + } + } + throw new NoSuchElementException(); + } + + @Override + public boolean hasNext() { + return last >= 0 && position <= last; + } + + @Override + public void remove() { + ShortSortedBitSet.this.remove(lastReturned); + } + }; + } + +} diff --git a/asterixdb/asterix-common/src/main/java/org/apache/asterix/common/utils/ShortUtil.java b/asterixdb/asterix-common/src/main/java/org/apache/asterix/common/utils/ShortUtil.java new file mode 100644 index 0000000000..e5df8d3406 --- /dev/null +++ b/asterixdb/asterix-common/src/main/java/org/apache/asterix/common/utils/ShortUtil.java @@ -0,0 +1,122 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.asterix.common.utils; + +import java.util.NoSuchElementException; + +import it.unimi.dsi.fastutil.shorts.ShortIterator; + +public class ShortUtil { + private ShortUtil() { + throw new AssertionError("do not instantiate"); + } + + /** + * Returns a compact string representation of the supplied {@link ShortIterator}, enclosed + * by square-brackets ([]), comma-delimited, collapsing ranges together with a hyphen (-). + * Only provides reasonable results if the contents of the iterator are sorted. + */ + public static String toCompactString(ShortIterator iter) { + return toCompactString(iter, (short) 0); + } + + /** + * Returns a compact string representation of the supplied {@link ShortIterator}, enclosed + * by square-brackets ([]), comma-delimited, collapsing ranges together with a hyphen (-). + * Only provides reasonable results if the contents of the iterator are sorted. + */ + static String toCompactString(ShortIterator iter, short delta) { + if (!iter.hasNext()) { + return "[]"; + } + StringBuilder builder = new StringBuilder(); + builder.append('['); + appendCompact(iter, builder, delta); + builder.append(']'); + return builder.toString(); + } + + /** + * Returns a compact string representation of the supplied short array, enclosed + * by square-brackets ([]), comma-delimited, collapsing ranges together with a hyphen (-). + * Only provides reasonable results if the contents of the iterator are sorted. + */ + public static String toCompactString(short[] iter) { + return toCompactString(wrap(iter)); + } + + /** + * Appends the contents of the supplied {@link ShortIterator} to the {@link StringBuilder} instance, + * comma-delimited, collapsing ranges together with a hyphen (-). Only provides reasonable + * results if the contents of the iterator are sorted. + */ + public static void appendCompact(ShortIterator iter, StringBuilder builder) { + appendCompact(iter, builder, (short) 0); + } + + /** + * Appends the contents of the supplied {@link ShortIterator} to the {@link StringBuilder} instance, + * comma-delimited, collapsing ranges together with a hyphen (-). Only provides reasonable + * results if the contents of the iterator are sorted. + */ + static void appendCompact(ShortIterator iter, StringBuilder builder, short outputDelta) { + if (!iter.hasNext()) { + return; + } + short rangeStart = iter.nextShort(); + builder.append(rangeStart + outputDelta); + short current = rangeStart; + short prev = current; + while (iter.hasNext()) { + current = iter.nextShort(); + if (current != prev + 1) { + // end any range we were in: + if (rangeStart != prev) { + builder.append('-').append(prev + outputDelta); + } + builder.append(",").append(current + outputDelta); + rangeStart = current; + } + prev = current; + } + if (rangeStart != prev) { + builder.append('-').append(prev + outputDelta); + } + } + + public static ShortIterator wrap(short... shorts) { + return new ShortIterator() { + int index = 0; + + @Override + public short nextShort() { + try { + return shorts[index++]; + } catch (ArrayIndexOutOfBoundsException e) { + throw new NoSuchElementException(); + } + } + + @Override + public boolean hasNext() { + return index < shorts.length; + } + }; + } +} diff --git a/asterixdb/asterix-common/src/test/java/org/apache/asterix/test/utils/MathUtilTest.java b/asterixdb/asterix-common/src/test/java/org/apache/asterix/test/utils/MathUtilTest.java new file mode 100644 index 0000000000..70c77b763e --- /dev/null +++ b/asterixdb/asterix-common/src/test/java/org/apache/asterix/test/utils/MathUtilTest.java @@ -0,0 +1,79 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.asterix.test.utils; + +import org.apache.asterix.common.utils.MathUtil; +import org.apache.logging.log4j.LogManager; +import org.apache.logging.log4j.Logger; +import org.junit.Assert; +import org.junit.Test; + +public class MathUtilTest { + private static final Logger LOGGER = LogManager.getLogger(); + private static final int BENCHMARK_ITERATIONS = 5000000; + private final double LOG_2 = Math.log(2); + + @Test + public void test() { + for (int i = 0; i < Long.SIZE; i++) { + long value = 1L << i; + Assert.assertEquals(Long.toUnsignedString(value, 16), + value == Long.MIN_VALUE ? Long.SIZE - 1 : (long) (Math.log(value) / LOG_2), + MathUtil.log2Unsigned(value)); + } + } + + @Test + public void benchmarkLookupTable() { + lookupTableRun(); + lookupTableRun(); + } + + @Test + public void benchmarkMathLog() { + mathLogRun(); + mathLogRun(); + } + + private void lookupTableRun() { + long start = System.nanoTime(); + for (int j = 0; j < BENCHMARK_ITERATIONS; j++) { + for (int i = 0; i < Long.SIZE; i++) { + final long value = 1L << i; + MathUtil.log2Unsigned(value); + } + } + LOGGER.info("total time for {} iterations of lookup table: {}ns", BENCHMARK_ITERATIONS * Long.SIZE, + System.nanoTime() - start); + } + + private void mathLogRun() { + long ignored = 0; + long start = System.nanoTime(); + for (int j = 0; j < BENCHMARK_ITERATIONS; j++) { + for (int i = 0; i < Long.SIZE; i++) { + final long value = 1L << i; + ignored += (long) (Math.log(value) / LOG_2); + } + } + LOGGER.info("total time for {} iterations of Math.log(n) / Math.log(2): {}ns", BENCHMARK_ITERATIONS * Long.SIZE, + System.nanoTime() - start); + } +} diff --git a/asterixdb/asterix-common/src/test/java/org/apache/asterix/test/utils/ShortSortedBitSetTest.java b/asterixdb/asterix-common/src/test/java/org/apache/asterix/test/utils/ShortSortedBitSetTest.java new file mode 100644 index 0000000000..561e647785 --- /dev/null +++ b/asterixdb/asterix-common/src/test/java/org/apache/asterix/test/utils/ShortSortedBitSetTest.java @@ -0,0 +1,108 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.asterix.test.utils; + +import java.util.Random; + +import org.apache.asterix.common.utils.ShortSortedBitSet; +import org.apache.logging.log4j.Level; +import org.apache.logging.log4j.LogManager; +import org.apache.logging.log4j.Logger; +import org.junit.Assert; +import org.junit.Test; + +import it.unimi.dsi.fastutil.shorts.ShortIterator; +import it.unimi.dsi.fastutil.shorts.ShortSortedSet; + +public class ShortSortedBitSetTest { + private static final Logger LOGGER = LogManager.getLogger(); + + @Test + public void test() { + Random r = new Random(); + ShortSortedSet set = new ShortSortedBitSet(); + short uniques = 0; + for (short i = 0; i < 500; i++) { + uniques += set.add((short) r.nextInt(1000)) ? 1 : 0; + } + LOGGER.info("set {}", set); + LOGGER.info("set contains {} unique elements", uniques); + Assert.assertEquals("set size() != number of uniques", uniques, set.size()); + for (short values : set) { + uniques--; + } + Assert.assertEquals("iteration count != number of uniques", 0, uniques); + int size = set.size(); + short removed = 0; + for (ShortIterator iter = set.iterator(); iter.hasNext();) { + iter.nextShort(); + iter.remove(); + removed++; + } + Assert.assertEquals("removed count != before size", size, removed); + Assert.assertEquals("new size != 0", 0, set.size()); + for (ShortIterator iter = set.iterator(); iter.hasNext();) { + Assert.fail("got iteration on empty set: " + iter.nextShort()); + } + for (short value : set) { + Assert.fail("got iteration on empty set: " + value); + } + } + + @Test + public void testSingletonSets() { + for (short i = 0; i <= 16384; i++) { + testSingletonSet(i); + } + } + + private void testSingletonSet(short key) { + LOGGER.log(key % 1024 == 0 ? Level.INFO : Level.DEBUG, "testing {}", key); + ShortSortedSet set = new ShortSortedBitSet(); + set.add(key); + final int size = set.size(); + LOGGER.debug("singleton set {} contains {} unique elements", set, size); + Assert.assertEquals("singleton size != 1", 1, size); + for (short values : set) { + LOGGER.debug("got value: {}", values); + } + int removed = 0; + for (ShortIterator iter = set.iterator(); iter.hasNext();) { + iter.nextShort(); + iter.remove(); + removed++; + } + Assert.assertEquals("removed count != before size", size, removed); + Assert.assertEquals("new size != 0", 0, set.size()); + } + + @Test + public void emptySetTest() { + ShortSortedSet set = new ShortSortedBitSet(); + LOGGER.info("set {}", set); + LOGGER.info("set contains {} unique elements", set.size()); + for (ShortIterator iter = set.iterator(); iter.hasNext();) { + Assert.fail("got iteration on empty set: " + iter.nextShort()); + } + for (short value : set) { + Assert.fail("got iteration on empty set: " + value); + } + } +}
