[
https://issues.apache.org/jira/browse/MAHOUT-799?focusedWorklogId=1001490&page=com.atlassian.jira.plugin.system.issuetabpanels:worklog-tabpanel#worklog-1001490
]
ASF GitHub Bot logged work on MAHOUT-799:
-----------------------------------------
Author: ASF GitHub Bot
Created on: 23/Jan/26 07:08
Start Date: 23/Jan/26 07:08
Worklog Time Spent: 10m
Work Description: rich7420 commented on code in PR #868:
URL: https://github.com/apache/mahout/pull/868#discussion_r2719870400
##########
qdp/qdp-kernels/src/iqp.cu:
##########
@@ -0,0 +1,232 @@
+//
+// Licensed to the Apache Software Foundation (ASF) under one or more
+// contributor license agreements. See the NOTICE file distributed with
+// this work for additional information regarding copyright ownership.
+// The ASF licenses this file to You under the Apache License, Version 2.0
+// (the "License"); you may not use this file except in compliance with
+// the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// IQP (Instantaneous Quantum Polynomial) Encoding CUDA Kernels
+//
+// Creates entangled quantum states via diagonal phase gates:
+// |psi> = H^n * U_phase(data) * H^n |0>^n
+//
+// The amplitude for basis state |z> is:
+// amplitude[z] = (1/2^n) * sum_x exp(i*theta(x)) * (-1)^popcount(x AND z)
+//
+// Two variants:
+// - enable_zz=0: theta(x) = sum_i x_i * data_i (n parameters)
+// - enable_zz=1: theta(x) = sum_i x_i * data_i + sum_{i<j} x_i * x_j * data_ij
+// (n + n*(n-1)/2 parameters)
+
+#include <cuda_runtime.h>
+#include <cuComplex.h>
+#include <math.h>
+
+// Compute phase theta(x) for a given basis state x
+__device__ double compute_phase(
+ const double* __restrict__ data,
+ size_t x,
+ unsigned int num_qubits,
+ int enable_zz
+) {
+ double phase = 0.0;
+
+ // Single-qubit Z terms: sum_i x_i * data[i]
+ for (unsigned int i = 0; i < num_qubits; ++i) {
+ if ((x >> i) & 1U) {
+ phase += data[i];
+ }
+ }
+
+ // Two-qubit ZZ terms (if enabled): sum_{i<j} x_i * x_j * data[n +
pair_index]
+ if (enable_zz) {
+ unsigned int pair_idx = num_qubits;
+ for (unsigned int i = 0; i < num_qubits; ++i) {
+ for (unsigned int j = i + 1; j < num_qubits; ++j) {
+ if (((x >> i) & 1U) && ((x >> j) & 1U)) {
+ phase += data[pair_idx];
+ }
+ pair_idx++;
+ }
+ }
+ }
+
+ return phase;
+}
+
+__global__ void iqp_encode_kernel(
+ const double* __restrict__ data,
+ cuDoubleComplex* __restrict__ state,
+ size_t state_len,
+ unsigned int num_qubits,
+ int enable_zz
+) {
+ size_t z = blockIdx.x * blockDim.x + threadIdx.x;
+ if (z >= state_len) return;
+
+ double real_sum = 0.0;
+ double imag_sum = 0.0;
+
+ // Sum over all input basis states x
+ for (size_t x = 0; x < state_len; ++x) {
+ double phase = compute_phase(data, x, num_qubits, enable_zz);
+
+ // Compute (-1)^{popcount(x AND z)} using __popcll intrinsic
+ int parity = __popcll(x & z) & 1;
+ double sign = (parity == 0) ? 1.0 : -1.0;
+
+ // Accumulate: sign * exp(i*phase) = sign * (cos(phase) + i*sin(phase))
+ double cos_phase, sin_phase;
+ sincos(phase, &sin_phase, &cos_phase);
+ real_sum += sign * cos_phase;
+ imag_sum += sign * sin_phase;
+ }
+
+ // Normalize by 1/2^n (state_len = 2^n)
+ double norm = 1.0 / (double)state_len;
+ state[z] = make_cuDoubleComplex(real_sum * norm, imag_sum * norm);
+}
+
+__global__ void iqp_encode_batch_kernel(
+ const double* __restrict__ data_batch,
+ cuDoubleComplex* __restrict__ state_batch,
+ size_t num_samples,
+ size_t state_len,
+ unsigned int num_qubits,
+ unsigned int data_len,
+ int enable_zz
+) {
+ const size_t total_elements = num_samples * state_len;
+ const size_t stride = gridDim.x * blockDim.x;
+ const size_t state_mask = state_len - 1;
+
+ for (size_t global_idx = blockIdx.x * blockDim.x + threadIdx.x;
+ global_idx < total_elements;
+ global_idx += stride) {
+ const size_t sample_idx = global_idx >> num_qubits;
+ const size_t z = global_idx & state_mask;
+ const double* data = data_batch + sample_idx * data_len;
+
+ double real_sum = 0.0;
+ double imag_sum = 0.0;
+
+ // Sum over all input basis states x
+ for (size_t x = 0; x < state_len; ++x) {
+ double phase = compute_phase(data, x, num_qubits, enable_zz);
+
+ // Compute (-1)^{popcount(x AND z)}
+ int parity = __popcll(x & z) & 1;
+ double sign = (parity == 0) ? 1.0 : -1.0;
+
+ double cos_phase, sin_phase;
+ sincos(phase, &sin_phase, &cos_phase);
+ real_sum += sign * cos_phase;
+ imag_sum += sign * sin_phase;
+ }
+
+ double norm = 1.0 / (double)state_len;
+ state_batch[global_idx] = make_cuDoubleComplex(real_sum * norm,
imag_sum * norm);
+ }
+}
+
+extern "C" {
+
+/// Launch IQP encoding kernel
+///
+/// # Arguments
+/// * data_d - Device pointer to encoding parameters
+/// * state_d - Device pointer to output state vector
+/// * state_len - Target state vector size (2^num_qubits)
+/// * num_qubits - Number of qubits
+/// * enable_zz - 0 for Z-only, 1 for full ZZ interactions
+/// * stream - CUDA stream for async execution (nullptr = default stream)
+///
+/// # Returns
+/// CUDA error code (0 = cudaSuccess)
+int launch_iqp_encode(
+ const double* data_d,
+ void* state_d,
+ size_t state_len,
+ unsigned int num_qubits,
+ int enable_zz,
+ cudaStream_t stream
+) {
+ if (state_len == 0 || num_qubits == 0) {
+ return cudaErrorInvalidValue;
+ }
+
+ cuDoubleComplex* state_complex_d = static_cast<cuDoubleComplex*>(state_d);
+
+ const int blockSize = 256;
Review Comment:
The IQP kernel uses hardcoded values `256` and `2048` instead of the shared
constants defined in `kernel_config.h`. Plz include `kernel_config.h` and use
the defined constants.
##########
qdp/qdp-kernels/src/iqp.cu:
##########
@@ -0,0 +1,232 @@
+//
+// Licensed to the Apache Software Foundation (ASF) under one or more
+// contributor license agreements. See the NOTICE file distributed with
+// this work for additional information regarding copyright ownership.
+// The ASF licenses this file to You under the Apache License, Version 2.0
+// (the "License"); you may not use this file except in compliance with
+// the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// IQP (Instantaneous Quantum Polynomial) Encoding CUDA Kernels
+//
+// Creates entangled quantum states via diagonal phase gates:
+// |psi> = H^n * U_phase(data) * H^n |0>^n
+//
+// The amplitude for basis state |z> is:
+// amplitude[z] = (1/2^n) * sum_x exp(i*theta(x)) * (-1)^popcount(x AND z)
+//
+// Two variants:
+// - enable_zz=0: theta(x) = sum_i x_i * data_i (n parameters)
+// - enable_zz=1: theta(x) = sum_i x_i * data_i + sum_{i<j} x_i * x_j * data_ij
+// (n + n*(n-1)/2 parameters)
+
+#include <cuda_runtime.h>
+#include <cuComplex.h>
+#include <math.h>
+
+// Compute phase theta(x) for a given basis state x
+__device__ double compute_phase(
+ const double* __restrict__ data,
+ size_t x,
+ unsigned int num_qubits,
+ int enable_zz
+) {
+ double phase = 0.0;
+
+ // Single-qubit Z terms: sum_i x_i * data[i]
+ for (unsigned int i = 0; i < num_qubits; ++i) {
+ if ((x >> i) & 1U) {
+ phase += data[i];
+ }
+ }
+
+ // Two-qubit ZZ terms (if enabled): sum_{i<j} x_i * x_j * data[n +
pair_index]
+ if (enable_zz) {
+ unsigned int pair_idx = num_qubits;
+ for (unsigned int i = 0; i < num_qubits; ++i) {
+ for (unsigned int j = i + 1; j < num_qubits; ++j) {
+ if (((x >> i) & 1U) && ((x >> j) & 1U)) {
+ phase += data[pair_idx];
+ }
+ pair_idx++;
+ }
+ }
+ }
+
+ return phase;
+}
+
+__global__ void iqp_encode_kernel(
Review Comment:
line66-139. just a syggestion. it could follow-up.
Maybe we could try a method FWT Optimization here.
to optimize the time complexity from O(4^n) to O(n * 2^n).
Issue Time Tracking
-------------------
Worklog Id: (was: 1001490)
Time Spent: 1h 10m (was: 1h)
> Cannot run SequenceFilesFromCsvFilter, ever
> -------------------------------------------
>
> Key: MAHOUT-799
> URL: https://issues.apache.org/jira/browse/MAHOUT-799
> Project: Mahout
> Issue Type: Bug
> Components: classic
> Affects Versions: 0.5
> Reporter: Jack Tanner
> Assignee: Sean R. Owen
> Priority: Major
> Fix For: 0.6
>
> Attachments: MAHOUT-799.patch, MAHOUT-799.patch
>
> Time Spent: 1h 10m
> Remaining Estimate: 0h
>
> As described here:
> http://mail-archives.apache.org/mod_mbox/mahout-user/201106.mbox/%[email protected]%3E
> SequenceFilesFromCsvFilter cannot be invoked with default parameter values,
> because it dies like so:
> bin/mahout seqdirectory -i input -o output -filter
> org.apache.mahout.text.SequenceFilesFromCsvFilter
> ...
> Caused by: java.lang.NumberFormatException: null
> at java.lang.Integer.parseInt(Integer.java:417)
> at java.lang.Integer.parseInt(Integer.java:499)
> at
> org.apache.mahout.text.SequenceFilesFromCsvFilter.<init>(SequenceFilesFromCsvFilter.java:56)
> If one adds the parameters -kcol 0 -vcol 0 (or their long-form versions), it
> dies like so:
> Unexpected -kcol while processing Job-Specific Options
> Commenting out SequenceFilesFromCsvFilter:56 and
> SequenceFilesFromCsvFilter:57, like so, allows the run to proceed
> // this.keyColumn = Integer.parseInt(options.get(KEY_COLUMN_OPTION[0]));
> // this.valueColumn =
> Integer.parseInt(options.get(VALUE_COLUMN_OPTION[0]));
--
This message was sent by Atlassian Jira
(v8.20.10#820010)