This is an automated email from the ASF dual-hosted git repository. alsay pushed a commit to branch main in repository https://gitbox.apache.org/repos/asf/datasketches-go.git
commit 96e79dd68da18b9a205577ee40122c03b1d25218 Author: Pierre Lacave <[email protected]> AuthorDate: Thu Dec 14 20:12:36 2023 +0100 Minor cleanup in HLL and remove interface for auxHashMap and rather use a concrete type directly --- hll/aux_hash_map.go | 51 +++++++++++++++++++-------------------------------- hll/coupon.go | 7 +++---- hll/hll_4update.go | 36 ++++++++++++++++++++---------------- hll/hll_6array.go | 3 +-- hll/hll_8array.go | 3 +-- hll/hll_array.go | 12 ++++++------ hll/hll_sketch.go | 44 +++++++++++++++++++------------------------- 7 files changed, 69 insertions(+), 87 deletions(-) diff --git a/hll/aux_hash_map.go b/hll/aux_hash_map.go index 9d09534..f80014c 100644 --- a/hll/aux_hash_map.go +++ b/hll/aux_hash_map.go @@ -22,37 +22,24 @@ import ( "fmt" ) -type auxHashMap interface { - mustFindValueFor(slotNo int) int - mustReplace(slotNo int, value int) - mustAdd(slotNo int, value int) - iterator() pairIterator - getAuxCount() int - getLgAuxArrInts() int - copy() auxHashMap - getUpdatableSizeBytes() int - getCompactSizeBytes() int - getAuxIntArr() []int -} - -// auxHashMapImpl is a hash table for the Aux array. -type auxHashMapImpl struct { +// auxHashMap is a hash table for the Aux array. +type auxHashMap struct { lgConfigK int //required for #slot bits lgAuxArrInts int auxCount int auxIntArr []int } -func (a *auxHashMapImpl) copy() auxHashMap { - newA := a +func (a *auxHashMap) copy() *auxHashMap { + newA := *a newA.auxIntArr = make([]int, len(a.auxIntArr)) copy(newA.auxIntArr, a.auxIntArr) - return newA + return &newA } // newAuxHashMap returns a new auxHashMap. -func newAuxHashMap(lgAuxArrInts int, lgConfigK int) auxHashMap { - return &auxHashMapImpl{ +func newAuxHashMap(lgAuxArrInts int, lgConfigK int) *auxHashMap { + return &auxHashMap{ lgConfigK: lgConfigK, lgAuxArrInts: lgAuxArrInts, auxCount: 0, @@ -61,7 +48,7 @@ func newAuxHashMap(lgAuxArrInts int, lgConfigK int) auxHashMap { } // deserializeAuxHashMap returns a new auxHashMap from the given byte array. -func deserializeAuxHashMap(byteArray []byte, offset int, lgConfigL int, auxCount int, srcCompact bool) auxHashMap { +func deserializeAuxHashMap(byteArray []byte, offset int, lgConfigL int, auxCount int, srcCompact bool) *auxHashMap { var ( lgAuxArrInts int ) @@ -97,19 +84,19 @@ func deserializeAuxHashMap(byteArray []byte, offset int, lgConfigL int, auxCount return auxMap } -func (a *auxHashMapImpl) getAuxIntArr() []int { +func (a *auxHashMap) getAuxIntArr() []int { return a.auxIntArr } -func (a *auxHashMapImpl) getCompactSizeBytes() int { +func (a *auxHashMap) getCompactSizeBytes() int { return a.auxCount << 2 } -func (a *auxHashMapImpl) getUpdatableSizeBytes() int { +func (a *auxHashMap) getUpdatableSizeBytes() int { return 4 << a.lgAuxArrInts } -func (a *auxHashMapImpl) mustFindValueFor(slotNo int) int { +func (a *auxHashMap) mustFindValueFor(slotNo int) int { index := findAuxHashMap(a.auxIntArr, a.lgAuxArrInts, a.lgConfigK, slotNo) if index < 0 { panic(fmt.Sprintf("SlotNo not found: %d", slotNo)) @@ -117,7 +104,7 @@ func (a *auxHashMapImpl) mustFindValueFor(slotNo int) int { return getPairValue(a.auxIntArr[index]) } -func (a *auxHashMapImpl) mustReplace(slotNo int, value int) { +func (a *auxHashMap) mustReplace(slotNo int, value int) { index := findAuxHashMap(a.auxIntArr, a.lgAuxArrInts, a.lgConfigK, slotNo) if index < 0 { pairStr := pairString(pair(slotNo, value)) @@ -129,7 +116,7 @@ func (a *auxHashMapImpl) mustReplace(slotNo int, value int) { // mustAdd adds the slotNo and value to the aux array. // slotNo the index from the HLL array // value the HLL value at the slotNo. -func (a *auxHashMapImpl) mustAdd(slotNo int, value int) { +func (a *auxHashMap) mustAdd(slotNo int, value int) { index := findAuxHashMap(a.auxIntArr, a.lgAuxArrInts, a.lgConfigK, slotNo) pair := pair(slotNo, value) if index >= 0 { @@ -141,22 +128,22 @@ func (a *auxHashMapImpl) mustAdd(slotNo int, value int) { a.checkGrow() } -func (a *auxHashMapImpl) getLgAuxArrInts() int { +func (a *auxHashMap) getLgAuxArrInts() int { return a.lgAuxArrInts } // iterator returns an iterator over the Aux array. -func (a *auxHashMapImpl) iterator() pairIterator { +func (a *auxHashMap) iterator() pairIterator { return newIntArrayPairIterator(a.auxIntArr, a.lgConfigK) } // getAuxCount returns the number of entries in the Aux array. -func (a *auxHashMapImpl) getAuxCount() int { +func (a *auxHashMap) getAuxCount() int { return a.auxCount } // checkGrow checks to see if the aux array should be grown and does so if needed. -func (a *auxHashMapImpl) checkGrow() { +func (a *auxHashMap) checkGrow() { if (resizeDenom * a.auxCount) <= (resizeNumber * len(a.auxIntArr)) { return } @@ -164,7 +151,7 @@ func (a *auxHashMapImpl) checkGrow() { } // growAuxSpace doubles the size of the aux array and reinsert the existing entries. -func (a *auxHashMapImpl) growAuxSpace() { +func (a *auxHashMap) growAuxSpace() { oldArray := a.auxIntArr configKMask := int((1 << a.lgConfigK) - 1) a.lgAuxArrInts++ diff --git a/hll/coupon.go b/hll/coupon.go index 0604aca..2d32b00 100644 --- a/hll/coupon.go +++ b/hll/coupon.go @@ -105,11 +105,10 @@ func mergeCouponTo(from hllCoupon, dest HllSketch) { arrLen := len(intArrFrom) for i := 0; i < arrLen; i++ { pair := intArrFrom[i] - if pair == empty { - continue + if pair != empty { + sk := dest.(*hllSketchImpl).sketch.couponUpdate(pair) + dest.(*hllSketchImpl).sketch = sk } - sk := dest.(*hllSketchImpl).sketch.couponUpdate(pair) - dest.(*hllSketchImpl).sketch = sk } } diff --git a/hll/hll_4update.go b/hll/hll_4update.go index b5d0c54..11df47d 100644 --- a/hll/hll_4update.go +++ b/hll/hll_4update.go @@ -23,18 +23,18 @@ import ( // internalHll4Update is the internal update method for Hll4Array. func internalHll4Update(h *hll4ArrayImpl, slotNo int, newValue int) { - curMin := h.curMin - rawStoredOldNibble := h.getNibble(slotNo) // could be 0 - lb0nOldValue := rawStoredOldNibble + curMin // provable lower bound, could be 0 + var ( + actualOldValue int + shiftedNewValue int //value - curMin + curMin = h.curMin + rawStoredOldNibble = h.getNibble(slotNo) // could be 0 + lb0nOldValue = rawStoredOldNibble + h.curMin // provable lower bound, could be 0 + ) if newValue <= lb0nOldValue { return } - var ( - actualOldValue int - shiftedNewValue int //value - curMin - ) // Based on whether we have an AUX_TOKEN and whether the shiftedNewValue is greater than // AUX_TOKEN, we have four cases for how to actually modify the data structure: // 1. (shiftedNewValue >= AUX_TOKEN) && (rawStoredOldNibble = AUX_TOKEN) //881: @@ -109,14 +109,16 @@ func internalHll4Update(h *hll4ArrayImpl, slotNo int, newValue int) { // Entering this routine assumes that all slots have valid nibbles > 0 and <= 15. // An auxHashMap must exist if any values in the current hllByteArray are already 15. func shiftToBiggerCurMin(h *hll4ArrayImpl) { - oldCurMin := h.curMin - newCurMin := oldCurMin + 1 - lgConfigK := h.lgConfigK - configK := 1 << lgConfigK - configKmask := configK - 1 + var ( + oldCurMin = h.curMin + newCurMin = oldCurMin + 1 + lgConfigK = h.lgConfigK + configK = 1 << lgConfigK + configKmask = configK - 1 - numAtNewCurMin := 0 - numAuxTokens := 0 + numAtNewCurMin = 0 + numAuxTokens = 0 + ) // Walk through the slots of 4-bit array decrementing stored values by one unless it // equals AUX_TOKEN, where it is left alone but counted to be checked later. @@ -143,8 +145,10 @@ func shiftToBiggerCurMin(h *hll4ArrayImpl) { } // If old auxHashMap exists, walk through it updating some slots and build a new auxHashMap // if needed. - var newAuxMap auxHashMap - oldAuxMap := h.auxHashMap + var ( + newAuxMap *auxHashMap + oldAuxMap = h.auxHashMap + ) if oldAuxMap != nil { var ( diff --git a/hll/hll_6array.go b/hll/hll_6array.go index 657d00d..f560091 100644 --- a/hll/hll_6array.go +++ b/hll/hll_6array.go @@ -194,6 +194,5 @@ func (h *hll6Iterator) getValue() int { } func (h *hll6Iterator) getPair() int { - v := h.getValue() - return pair(h.index, v) + return pair(h.index, h.getValue()) } diff --git a/hll/hll_8array.go b/hll/hll_8array.go index c6f69f4..13b4488 100644 --- a/hll/hll_8array.go +++ b/hll/hll_8array.go @@ -165,6 +165,5 @@ func (h *hll8Iterator) getValue() int { } func (h *hll8Iterator) getPair() int { - v := h.getValue() - return pair(h.index, v) + return pair(h.index, h.getValue()) } diff --git a/hll/hll_array.go b/hll/hll_array.go index b72ba5e..b545929 100644 --- a/hll/hll_array.go +++ b/hll/hll_array.go @@ -25,7 +25,7 @@ import ( type hllArray interface { hllSketchBase - getAuxHashMap() auxHashMap + getAuxHashMap() *auxHashMap getAuxStart() int getCurMin() int getHipAccum() float64 @@ -35,7 +35,7 @@ type hllArray interface { getKxQ1() float64 getNumAtCurMin() int - putAuxHashMap(auxHashMap auxHashMap, compact bool) + putAuxHashMap(auxHashMap *auxHashMap, compact bool) putCurMin(curMin int) putHipAccum(hipAccum float64) putKxQ0(kxq0 float64) @@ -60,7 +60,7 @@ type hllArrayImpl struct { hllByteArr []byte - auxHashMap auxHashMap + auxHashMap *auxHashMap auxStart int //used for direct HLL4 } @@ -176,7 +176,7 @@ func (a *hllArrayImpl) isOutOfOrder() bool { return a.oooFrag } -func (a *hllArrayImpl) putAuxHashMap(auxHashMap auxHashMap, _ bool) { +func (a *hllArrayImpl) putAuxHashMap(auxHashMap *auxHashMap, _ bool) { a.auxHashMap = auxHashMap } @@ -203,12 +203,12 @@ func (a *hllArrayImpl) putRebuildCurMinNumKxQFlag(rebuildCurMinNumKxQ bool) { } // getNewAuxHashMap returns a new auxHashMap. -func (a *hllArrayImpl) getNewAuxHashMap() auxHashMap { +func (a *hllArrayImpl) getNewAuxHashMap() *auxHashMap { return newAuxHashMap(lgAuxArrInts[a.lgConfigK], a.lgConfigK) } // getAuxHashMap returns the auxHashMap. -func (a *hllArrayImpl) getAuxHashMap() auxHashMap { +func (a *hllArrayImpl) getAuxHashMap() *auxHashMap { return a.auxHashMap } diff --git a/hll/hll_sketch.go b/hll/hll_sketch.go index 08b0149..71fda5d 100644 --- a/hll/hll_sketch.go +++ b/hll/hll_sketch.go @@ -101,19 +101,6 @@ type hllSketchImpl struct { // extends BaseHllSketch scratch [8]byte } -func (h *hllSketchImpl) Reset() error { - lgK, err := checkLgK(h.sketch.GetLgConfigK()) - if err != nil { - return err - } - couponList, err := newCouponList(lgK, h.sketch.GetTgtHllType(), curMode_LIST) - if err != nil { - return err - } - h.sketch = &couponList - return nil -} - func NewHllSketch(lgConfigK int, tgtHllType TgtHllType) (HllSketch, error) { lgK := lgConfigK lgK, err := checkLgK(lgK) @@ -211,8 +198,7 @@ func (h *hllSketchImpl) GetUpdatableSerializationBytes() int { func (h *hllSketchImpl) UpdateUInt64(datum uint64) { binary.LittleEndian.PutUint64(h.scratch[:], datum) - hi, lo := h.hash(h.scratch[:]) - h.couponUpdate(coupon(hi, lo)) + h.couponUpdate(coupon(h.hash(h.scratch[:]))) } func (h *hllSketchImpl) UpdateInt64(datum int64) { @@ -223,14 +209,12 @@ func (h *hllSketchImpl) UpdateSlice(datum []byte) { if len(datum) == 0 { return } - hi, lo := h.hash(datum) - h.couponUpdate(coupon(hi, lo)) + h.couponUpdate(coupon(h.hash(datum))) } func (h *hllSketchImpl) UpdateString(datum string) { // get a slice to the string data (avoiding a copy to heap) - unsafeSlice := unsafe.Slice(unsafe.StringData(datum), len(datum)) - h.UpdateSlice(unsafeSlice) + h.UpdateSlice(unsafe.Slice(unsafe.StringData(datum), len(datum))) } func (h *hllSketchImpl) IsEmpty() bool { @@ -257,6 +241,19 @@ func (h *hllSketchImpl) GetCurMode() curMode { return h.sketch.GetCurMode() } +func (h *hllSketchImpl) Reset() error { + lgK, err := checkLgK(h.sketch.GetLgConfigK()) + if err != nil { + return err + } + couponList, err := newCouponList(lgK, h.sketch.GetTgtHllType(), curMode_LIST) + if err != nil { + return err + } + h.sketch = &couponList + return nil +} + func (h *hllSketchImpl) iterator() pairIterator { return h.sketch.iterator() } @@ -285,13 +282,11 @@ func (h *hllSketchImpl) mergeTo(dest HllSketch) { } func (h *hllSketchImpl) CopyAs(tgtHllType TgtHllType) HllSketch { - a := h.sketch.copyAs(tgtHllType) - return newHllSketchImpl(a) + return newHllSketchImpl(h.sketch.copyAs(tgtHllType)) } func (h *hllSketchImpl) Copy() HllSketch { - a := h.sketch.copy() - return newHllSketchImpl(a) + return newHllSketchImpl(h.sketch.copy()) } // IsEstimationMode returns true for all sketches in this package. @@ -306,6 +301,5 @@ func (h *hllSketchImpl) GetSerializationVersion() int { } func (h *hllSketchImpl) hash(bs []byte) (uint64, uint64) { - hi, lo := murmur3.Sum128WithSeed(bs, thetacommon.DEFAULT_UPDATE_SEED) - return hi, lo + return murmur3.Sum128WithSeed(bs, thetacommon.DEFAULT_UPDATE_SEED) } --------------------------------------------------------------------- To unsubscribe, e-mail: [email protected] For additional commands, e-mail: [email protected]
