[ 
https://issues.apache.org/jira/browse/MAHOUT-799?focusedWorklogId=1001490&page=com.atlassian.jira.plugin.system.issuetabpanels:worklog-tabpanel#worklog-1001490
 ]

ASF GitHub Bot logged work on MAHOUT-799:
-----------------------------------------

                Author: ASF GitHub Bot
            Created on: 23/Jan/26 07:08
            Start Date: 23/Jan/26 07:08
    Worklog Time Spent: 10m 
      Work Description: rich7420 commented on code in PR #868:
URL: https://github.com/apache/mahout/pull/868#discussion_r2719870400


##########
qdp/qdp-kernels/src/iqp.cu:
##########
@@ -0,0 +1,232 @@
+//
+// Licensed to the Apache Software Foundation (ASF) under one or more
+// contributor license agreements.  See the NOTICE file distributed with
+// this work for additional information regarding copyright ownership.
+// The ASF licenses this file to You under the Apache License, Version 2.0
+// (the "License"); you may not use this file except in compliance with
+// the License.  You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// IQP (Instantaneous Quantum Polynomial) Encoding CUDA Kernels
+//
+// Creates entangled quantum states via diagonal phase gates:
+// |psi> = H^n * U_phase(data) * H^n |0>^n
+//
+// The amplitude for basis state |z> is:
+// amplitude[z] = (1/2^n) * sum_x exp(i*theta(x)) * (-1)^popcount(x AND z)
+//
+// Two variants:
+// - enable_zz=0: theta(x) = sum_i x_i * data_i  (n parameters)
+// - enable_zz=1: theta(x) = sum_i x_i * data_i + sum_{i<j} x_i * x_j * data_ij
+//                (n + n*(n-1)/2 parameters)
+
+#include <cuda_runtime.h>
+#include <cuComplex.h>
+#include <math.h>
+
+// Compute phase theta(x) for a given basis state x
+__device__ double compute_phase(
+    const double* __restrict__ data,
+    size_t x,
+    unsigned int num_qubits,
+    int enable_zz
+) {
+    double phase = 0.0;
+
+    // Single-qubit Z terms: sum_i x_i * data[i]
+    for (unsigned int i = 0; i < num_qubits; ++i) {
+        if ((x >> i) & 1U) {
+            phase += data[i];
+        }
+    }
+
+    // Two-qubit ZZ terms (if enabled): sum_{i<j} x_i * x_j * data[n + 
pair_index]
+    if (enable_zz) {
+        unsigned int pair_idx = num_qubits;
+        for (unsigned int i = 0; i < num_qubits; ++i) {
+            for (unsigned int j = i + 1; j < num_qubits; ++j) {
+                if (((x >> i) & 1U) && ((x >> j) & 1U)) {
+                    phase += data[pair_idx];
+                }
+                pair_idx++;
+            }
+        }
+    }
+
+    return phase;
+}
+
+__global__ void iqp_encode_kernel(
+    const double* __restrict__ data,
+    cuDoubleComplex* __restrict__ state,
+    size_t state_len,
+    unsigned int num_qubits,
+    int enable_zz
+) {
+    size_t z = blockIdx.x * blockDim.x + threadIdx.x;
+    if (z >= state_len) return;
+
+    double real_sum = 0.0;
+    double imag_sum = 0.0;
+
+    // Sum over all input basis states x
+    for (size_t x = 0; x < state_len; ++x) {
+        double phase = compute_phase(data, x, num_qubits, enable_zz);
+
+        // Compute (-1)^{popcount(x AND z)} using __popcll intrinsic
+        int parity = __popcll(x & z) & 1;
+        double sign = (parity == 0) ? 1.0 : -1.0;
+
+        // Accumulate: sign * exp(i*phase) = sign * (cos(phase) + i*sin(phase))
+        double cos_phase, sin_phase;
+        sincos(phase, &sin_phase, &cos_phase);
+        real_sum += sign * cos_phase;
+        imag_sum += sign * sin_phase;
+    }
+
+    // Normalize by 1/2^n (state_len = 2^n)
+    double norm = 1.0 / (double)state_len;
+    state[z] = make_cuDoubleComplex(real_sum * norm, imag_sum * norm);
+}
+
+__global__ void iqp_encode_batch_kernel(
+    const double* __restrict__ data_batch,
+    cuDoubleComplex* __restrict__ state_batch,
+    size_t num_samples,
+    size_t state_len,
+    unsigned int num_qubits,
+    unsigned int data_len,
+    int enable_zz
+) {
+    const size_t total_elements = num_samples * state_len;
+    const size_t stride = gridDim.x * blockDim.x;
+    const size_t state_mask = state_len - 1;
+
+    for (size_t global_idx = blockIdx.x * blockDim.x + threadIdx.x;
+         global_idx < total_elements;
+         global_idx += stride) {
+        const size_t sample_idx = global_idx >> num_qubits;
+        const size_t z = global_idx & state_mask;
+        const double* data = data_batch + sample_idx * data_len;
+
+        double real_sum = 0.0;
+        double imag_sum = 0.0;
+
+        // Sum over all input basis states x
+        for (size_t x = 0; x < state_len; ++x) {
+            double phase = compute_phase(data, x, num_qubits, enable_zz);
+
+            // Compute (-1)^{popcount(x AND z)}
+            int parity = __popcll(x & z) & 1;
+            double sign = (parity == 0) ? 1.0 : -1.0;
+
+            double cos_phase, sin_phase;
+            sincos(phase, &sin_phase, &cos_phase);
+            real_sum += sign * cos_phase;
+            imag_sum += sign * sin_phase;
+        }
+
+        double norm = 1.0 / (double)state_len;
+        state_batch[global_idx] = make_cuDoubleComplex(real_sum * norm, 
imag_sum * norm);
+    }
+}
+
+extern "C" {
+
+/// Launch IQP encoding kernel
+///
+/// # Arguments
+/// * data_d - Device pointer to encoding parameters
+/// * state_d - Device pointer to output state vector
+/// * state_len - Target state vector size (2^num_qubits)
+/// * num_qubits - Number of qubits
+/// * enable_zz - 0 for Z-only, 1 for full ZZ interactions
+/// * stream - CUDA stream for async execution (nullptr = default stream)
+///
+/// # Returns
+/// CUDA error code (0 = cudaSuccess)
+int launch_iqp_encode(
+    const double* data_d,
+    void* state_d,
+    size_t state_len,
+    unsigned int num_qubits,
+    int enable_zz,
+    cudaStream_t stream
+) {
+    if (state_len == 0 || num_qubits == 0) {
+        return cudaErrorInvalidValue;
+    }
+
+    cuDoubleComplex* state_complex_d = static_cast<cuDoubleComplex*>(state_d);
+
+    const int blockSize = 256;

Review Comment:
   The IQP kernel uses hardcoded values `256` and `2048` instead of the shared 
constants defined in `kernel_config.h`.  Plz include `kernel_config.h` and use 
the defined constants.



##########
qdp/qdp-kernels/src/iqp.cu:
##########
@@ -0,0 +1,232 @@
+//
+// Licensed to the Apache Software Foundation (ASF) under one or more
+// contributor license agreements.  See the NOTICE file distributed with
+// this work for additional information regarding copyright ownership.
+// The ASF licenses this file to You under the Apache License, Version 2.0
+// (the "License"); you may not use this file except in compliance with
+// the License.  You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// IQP (Instantaneous Quantum Polynomial) Encoding CUDA Kernels
+//
+// Creates entangled quantum states via diagonal phase gates:
+// |psi> = H^n * U_phase(data) * H^n |0>^n
+//
+// The amplitude for basis state |z> is:
+// amplitude[z] = (1/2^n) * sum_x exp(i*theta(x)) * (-1)^popcount(x AND z)
+//
+// Two variants:
+// - enable_zz=0: theta(x) = sum_i x_i * data_i  (n parameters)
+// - enable_zz=1: theta(x) = sum_i x_i * data_i + sum_{i<j} x_i * x_j * data_ij
+//                (n + n*(n-1)/2 parameters)
+
+#include <cuda_runtime.h>
+#include <cuComplex.h>
+#include <math.h>
+
+// Compute phase theta(x) for a given basis state x
+__device__ double compute_phase(
+    const double* __restrict__ data,
+    size_t x,
+    unsigned int num_qubits,
+    int enable_zz
+) {
+    double phase = 0.0;
+
+    // Single-qubit Z terms: sum_i x_i * data[i]
+    for (unsigned int i = 0; i < num_qubits; ++i) {
+        if ((x >> i) & 1U) {
+            phase += data[i];
+        }
+    }
+
+    // Two-qubit ZZ terms (if enabled): sum_{i<j} x_i * x_j * data[n + 
pair_index]
+    if (enable_zz) {
+        unsigned int pair_idx = num_qubits;
+        for (unsigned int i = 0; i < num_qubits; ++i) {
+            for (unsigned int j = i + 1; j < num_qubits; ++j) {
+                if (((x >> i) & 1U) && ((x >> j) & 1U)) {
+                    phase += data[pair_idx];
+                }
+                pair_idx++;
+            }
+        }
+    }
+
+    return phase;
+}
+
+__global__ void iqp_encode_kernel(

Review Comment:
   line66-139. just a syggestion. it could follow-up. 
   Maybe we could try a method FWT Optimization here. 
   to optimize the time complexity from O(4^n) to O(n * 2^n).





Issue Time Tracking
-------------------

    Worklog Id:     (was: 1001490)
    Time Spent: 1h 10m  (was: 1h)

> Cannot run SequenceFilesFromCsvFilter, ever
> -------------------------------------------
>
>                 Key: MAHOUT-799
>                 URL: https://issues.apache.org/jira/browse/MAHOUT-799
>             Project: Mahout
>          Issue Type: Bug
>          Components: classic
>    Affects Versions: 0.5
>            Reporter: Jack Tanner
>            Assignee: Sean R. Owen
>            Priority: Major
>             Fix For: 0.6
>
>         Attachments: MAHOUT-799.patch, MAHOUT-799.patch
>
>          Time Spent: 1h 10m
>  Remaining Estimate: 0h
>
> As described here:
> http://mail-archives.apache.org/mod_mbox/mahout-user/201106.mbox/%[email protected]%3E
> SequenceFilesFromCsvFilter cannot be invoked with default parameter values, 
> because it dies like so:
> bin/mahout seqdirectory -i input -o output -filter 
> org.apache.mahout.text.SequenceFilesFromCsvFilter
> ...
> Caused by: java.lang.NumberFormatException: null
>      at java.lang.Integer.parseInt(Integer.java:417)
>      at java.lang.Integer.parseInt(Integer.java:499)
>      at 
> org.apache.mahout.text.SequenceFilesFromCsvFilter.<init>(SequenceFilesFromCsvFilter.java:56)
> If one adds the parameters -kcol 0 -vcol 0 (or their long-form versions), it 
> dies like so:
> Unexpected -kcol while processing Job-Specific Options
> Commenting out SequenceFilesFromCsvFilter:56 and 
> SequenceFilesFromCsvFilter:57, like so, allows the run to proceed
> //    this.keyColumn = Integer.parseInt(options.get(KEY_COLUMN_OPTION[0]));
> //    this.valueColumn = 
> Integer.parseInt(options.get(VALUE_COLUMN_OPTION[0]));



--
This message was sent by Atlassian Jira
(v8.20.10#820010)

Reply via email to