QasimKhan5d commented on code in PR #29: URL: https://github.com/apache/datasketches-go/pull/29#discussion_r2002793088
########## cpc/cpc_sketch.go: ########## @@ -513,3 +535,149 @@ func (c *CpcSketch) ToCompactSlice() ([]byte, error) { return (byte[]) wmem.getArray(); } */ + +// GetFamily returns the CPC family identifier. +func (c *CpcSketch) GetFamily() internal.Family { + return internal.FamilyEnum.CPC +} + +// GetLgK returns the log-base-2 of K. +func (c *CpcSketch) GetLgK() int { + return c.lgK +} + +// IsEmpty returns true if no coupons have been collected. +func (c *CpcSketch) IsEmpty() bool { + return c.numCoupons == 0 +} + +// Validate recomputes the coupon count from the bit matrix and returns true if it matches the sketch's numCoupons. +func (c *CpcSketch) Validate() bool { + bitMatrix := c.bitMatrixOfSketch() + matrixCoupons := countBitsSetInMatrix(bitMatrix) + return matrixCoupons == c.numCoupons +} + +// Copy creates and returns a deep copy of the CpcSketch. +func (c *CpcSketch) Copy() *CpcSketch { + // Create a new sketch with the same lgK and seed. + copySketch, err := NewCpcSketch(c.lgK, c.seed) + if err != nil { + // This should never happen if the current sketch is valid. + panic(err) + } + // Copy basic fields. + copySketch.numCoupons = c.numCoupons + copySketch.mergeFlag = c.mergeFlag + copySketch.fiCol = c.fiCol + copySketch.windowOffset = c.windowOffset + + // Clone the slidingWindow slice if present. + if c.slidingWindow != nil { + copySketch.slidingWindow = make([]byte, len(c.slidingWindow)) + copy(copySketch.slidingWindow, c.slidingWindow) + } else { + copySketch.slidingWindow = nil + } + + // Copy the pair table if present. + if c.pairTable != nil { + copySketch.pairTable = c.pairTable.Copy() // Assumes pairTable has a Copy() method. + } else { + copySketch.pairTable = nil + } + + // Copy floating-point accumulators. + copySketch.kxp = c.kxp + copySketch.hipEstAccum = c.hipEstAccum + + /* + Added the copy of the scratch buffer to ensure that every field, even temporary ones, in the struct is duplicated, + so that the copy is entirely independent of the original. Since the scratch buffer is part of the struct, we copy it too. + */ + copy(copySketch.scratch[:], c.scratch[:]) + + return copySketch +} + +// RefreshKXP recalculates the KXP register of the sketch from the given bitMatrix. +// It improves numerical accuracy by summing the contributions of each byte separately. +func (c *CpcSketch) RefreshKXP(bitMatrix []uint64) error { + // k is the number of rows in the bitMatrix. + k := 1 << c.lgK + + // Initialize an array of 8 float64 values (one per byte position). + byteSums := make([]float64, 8) + for i := 0; i < 8; i++ { + byteSums[i] = 0.0 + } + + // For each row in the matrix, extract each byte and accumulate its contribution. + for i := 0; i < k; i++ { + row := bitMatrix[i] + for j := 0; j < 8; j++ { + byteIdx := int(row & 0xFF) + byteSums[j] += kxpByteLookup[byteIdx] + row >>= 8 // Logical right shift. + } + } + + total := 0.0 + // Loop in reverse order: from j = 6 down to 0. + for j := 6; j >= 0; j-- { + factor, err := internal.InvPow2(8 * j) + if err != nil { + return err + } + total += factor * byteSums[j] + } + c.kxp = total + return nil +} + +// RowColUpdate processes the given rowCol value and updates the sketch accordingly. +// It returns an error if any sub-method fails. +func (c *CpcSketch) RowColUpdate(rowCol int) error { Review Comment: removed redundant RowColUpdate since it was already there -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: dev-unsubscr...@datasketches.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org --------------------------------------------------------------------- To unsubscribe, e-mail: dev-unsubscr...@datasketches.apache.org For additional commands, e-mail: dev-h...@datasketches.apache.org