This is an automated email from the ASF dual-hosted git repository.

JackieTien97 pushed a commit to branch dev/1.3
in repository https://gitbox.apache.org/repos/asf/iotdb.git


The following commit(s) were added to refs/heads/dev/1.3 by this push:
     new c40b8780df7 [To del/1.3] Fix Percentile, Quantile UDFs and update 
Cluster UDF (#17435)
c40b8780df7 is described below

commit c40b8780df70354162702e05f029a13b1d0db4f0
Author: Yunxiang Su <[email protected]>
AuthorDate: Thu Apr 30 09:09:00 2026 +0800

    [To del/1.3] Fix Percentile, Quantile UDFs and update Cluster UDF (#17435)
---
 .../apache/iotdb/libudf/it/dlearn/DLearnIT.java    |  82 ++++++++
 library-udf/src/assembly/tools/register-UDF.bat    |   1 +
 library-udf/src/assembly/tools/register-UDF.sh     |   1 +
 .../apache/iotdb/library/dlearn/UDTFCluster.java   | 216 +++++++++++++++++++++
 .../library/dlearn/util/cluster/ClusterUtils.java  | 156 +++++++++++++++
 .../iotdb/library/dlearn/util/cluster/KMeans.java  | 130 +++++++++++++
 .../iotdb/library/dlearn/util/cluster/KShape.java  | 190 ++++++++++++++++++
 .../library/dlearn/util/cluster/MedoidShape.java   | 213 ++++++++++++++++++++
 .../iotdb/library/dprofile/UDAFQuantile.java       |  78 ++++----
 .../dprofile/util/ExactOrderStatistics.java        |  53 ++---
 .../iotdb/library/dprofile/util/GKArray.java       |  15 +-
 .../iotdb/library/match/PatternExecutor.java       |   9 +-
 12 files changed, 1066 insertions(+), 78 deletions(-)

diff --git 
a/integration-test/src/test/java/org/apache/iotdb/libudf/it/dlearn/DLearnIT.java
 
b/integration-test/src/test/java/org/apache/iotdb/libudf/it/dlearn/DLearnIT.java
index 480dff4e46d..b4a1c8daa92 100644
--- 
a/integration-test/src/test/java/org/apache/iotdb/libudf/it/dlearn/DLearnIT.java
+++ 
b/integration-test/src/test/java/org/apache/iotdb/libudf/it/dlearn/DLearnIT.java
@@ -94,6 +94,11 @@ public class DLearnIT {
               + "datatype=double, "
               + "encoding=plain, "
               + "compression=uncompressed");
+      statement.addBatch(
+          "create timeseries root.vehicle.d3.s1 with "
+              + "datatype=int32, "
+              + "encoding=plain, "
+              + "compression=uncompressed");
       statement.executeBatch();
     } catch (SQLException throwable) {
       fail(throwable.getMessage());
@@ -168,6 +173,16 @@ public class DLearnIT {
           String.format(
               "insert into root.vehicle.d2(timestamp,s1,s2,s3,s4) 
values(%d,%d,%d,%d,%d)",
               900, 4, 4, 4, 4));
+      // Toy series for cluster UDF (l=3, k=2): windows [1,2,3], [10,20,30], 
[1,5,1]. With
+      // norm=false,
+      // k-means groups the first two windows; k-shape / medoidshape group 
windows 0 and 2
+      // (shape-related).
+      int[] toy = {1, 2, 3, 10, 20, 30, 1, 5, 1};
+      for (int i = 0; i < toy.length; i++) {
+        statement.addBatch(
+            String.format(
+                "insert into root.vehicle.d3(timestamp,s1) values(%d,%d)", (i 
+ 1) * 100, toy[i]));
+      }
       statement.executeBatch();
     } catch (SQLException throwable) {
       fail(throwable.getMessage());
@@ -179,6 +194,7 @@ public class DLearnIT {
         Statement statement = connection.createStatement()) {
       statement.execute("create function iqr as 
'org.apache.iotdb.library.anomaly.UDTFIQR'");
       statement.execute("create function ar as 
'org.apache.iotdb.library.dlearn.UDTFAR'");
+      statement.execute("create function cluster as 
'org.apache.iotdb.library.dlearn.UDTFCluster'");
     } catch (SQLException throwable) {
       fail(throwable.getMessage());
     }
@@ -308,4 +324,70 @@ public class DLearnIT {
       fail(throwable.getMessage());
     }
   }
+
+  @Test
+  public void testCluster1() {
+    String sqlStr =
+        "select cluster(d3.s1, 'l'='3', 'k'='2', 'method'='kmeans', 
'norm'='false', "
+            + "'maxiter'='50', 'output'='label') from root.vehicle";
+    try (Connection connection = EnvFactory.getEnv().getConnection();
+        Statement statement = connection.createStatement();
+        ResultSet resultSet = statement.executeQuery(sqlStr)) {
+      resultSet.next();
+      int l0 = resultSet.getInt(2);
+      resultSet.next();
+      int l1 = resultSet.getInt(2);
+      resultSet.next();
+      int l2 = resultSet.getInt(2);
+      Assert.assertFalse(resultSet.next());
+      Assert.assertEquals(l0, l2);
+      Assert.assertNotEquals(l0, l1);
+    } catch (SQLException throwable) {
+      fail(throwable.getMessage());
+    }
+  }
+
+  @Test
+  public void testCluster2() {
+    String sqlStr =
+        "select cluster(d3.s1, 'l'='3', 'k'='2', 'method'='kshape', 
'norm'='true', "
+            + "'maxiter'='50', 'output'='label') from root.vehicle";
+    try (Connection connection = EnvFactory.getEnv().getConnection();
+        Statement statement = connection.createStatement();
+        ResultSet resultSet = statement.executeQuery(sqlStr)) {
+      resultSet.next();
+      int l0 = resultSet.getInt(2);
+      resultSet.next();
+      int l1 = resultSet.getInt(2);
+      resultSet.next();
+      int l2 = resultSet.getInt(2);
+      Assert.assertFalse(resultSet.next());
+      Assert.assertEquals(l0, l1);
+      Assert.assertNotEquals(l0, l2);
+    } catch (SQLException throwable) {
+      fail(throwable.getMessage());
+    }
+  }
+
+  @Test
+  public void testCluster3() {
+    String sqlStr =
+        "select cluster(d3.s1, 'l'='3', 'k'='2', 'method'='medoidshape', 
'norm'='true', "
+            + "'sample_rate'='1', 'maxiter'='50', 'output'='label') from 
root.vehicle";
+    try (Connection connection = EnvFactory.getEnv().getConnection();
+        Statement statement = connection.createStatement();
+        ResultSet resultSet = statement.executeQuery(sqlStr)) {
+      resultSet.next();
+      int l0 = resultSet.getInt(2);
+      resultSet.next();
+      int l1 = resultSet.getInt(2);
+      resultSet.next();
+      int l2 = resultSet.getInt(2);
+      Assert.assertFalse(resultSet.next());
+      Assert.assertEquals(l0, l1);
+      Assert.assertNotEquals(l0, l2);
+    } catch (SQLException throwable) {
+      fail(throwable.getMessage());
+    }
+  }
 }
diff --git a/library-udf/src/assembly/tools/register-UDF.bat 
b/library-udf/src/assembly/tools/register-UDF.bat
index c6683bc96ac..f30db5d2a3a 100644
--- a/library-udf/src/assembly/tools/register-UDF.bat
+++ b/library-udf/src/assembly/tools/register-UDF.bat
@@ -102,6 +102,7 @@ call ../sbin/start-cli.bat -h %host% -p %rpcPort% -u %user% 
-pw %pass% -e "creat
 
 @REM Machine Learning
 call ../sbin/start-cli.bat -h %host% -p %rpcPort% -u %user% -pw %pass% -e 
"create function ar as 'org.apache.iotdb.library.dlearn.UDTFAR'"
+call ../sbin/start-cli.bat -h %host% -p %rpcPort% -u %user% -pw %pass% -e 
"create function cluster as 'org.apache.iotdb.library.dlearn.UDTFCluster'"
 
 @REM Match
 call ../sbin/start-cli.bat -h %host% -p %rpcPort% -u %user% -pw %pass% -e 
"create function pattern_match as 
'org.apache.iotdb.library.match.UDAFPatternMatch'"
diff --git a/library-udf/src/assembly/tools/register-UDF.sh 
b/library-udf/src/assembly/tools/register-UDF.sh
index 16ab59f143b..faaa4df68b5 100755
--- a/library-udf/src/assembly/tools/register-UDF.sh
+++ b/library-udf/src/assembly/tools/register-UDF.sh
@@ -102,6 +102,7 @@ pass=root
 
 # Machine Learning
 ../sbin/start-cli.sh -h $host -p $rpcPort -u $user -pw $pass -e "create 
function ar as 'org.apache.iotdb.library.dlearn.UDTFAR'"
+../sbin/start-cli.sh -h $host -p $rpcPort -u $user -pw $pass -e "create 
function cluster as 'org.apache.iotdb.library.dlearn.UDTFCluster'"
 
 # Match
 ../sbin/start-cli.sh -h $host -p $rpcPort -u $user -pw $pass -e "create 
function pattern_match as 'org.apache.iotdb.library.match.UDAFPatternMatch'"
diff --git 
a/library-udf/src/main/java/org/apache/iotdb/library/dlearn/UDTFCluster.java 
b/library-udf/src/main/java/org/apache/iotdb/library/dlearn/UDTFCluster.java
new file mode 100644
index 00000000000..f64f9747992
--- /dev/null
+++ b/library-udf/src/main/java/org/apache/iotdb/library/dlearn/UDTFCluster.java
@@ -0,0 +1,216 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.iotdb.library.dlearn;
+
+import org.apache.iotdb.library.dlearn.util.cluster.KMeans;
+import org.apache.iotdb.library.dlearn.util.cluster.KShape;
+import org.apache.iotdb.library.dlearn.util.cluster.MedoidShape;
+import org.apache.iotdb.library.util.Util;
+import org.apache.iotdb.udf.api.UDTF;
+import org.apache.iotdb.udf.api.access.Row;
+import org.apache.iotdb.udf.api.collector.PointCollector;
+import org.apache.iotdb.udf.api.customizer.config.UDTFConfigurations;
+import org.apache.iotdb.udf.api.customizer.parameter.UDFParameterValidator;
+import org.apache.iotdb.udf.api.customizer.parameter.UDFParameters;
+import org.apache.iotdb.udf.api.customizer.strategy.RowByRowAccessStrategy;
+import org.apache.iotdb.udf.api.exception.UDFException;
+import org.apache.iotdb.udf.api.type.Type;
+
+import java.util.ArrayList;
+import java.util.List;
+
+/**
+ * Clusters a time series by partitioning it into non-overlapping subsequences 
of length l.
+ * Parameters: l, k, method (default kmeans), norm, maxiter, output; 
medoidshape also uses
+ * sample_rate (greedy sampling ratio; use 1 when the window count is small). 
Requires at least k
+ * windows.
+ */
+public class UDTFCluster implements UDTF {
+
+  private static final String METHOD_KMEANS = "kmeans";
+  private static final String METHOD_KSHAPE = "kshape";
+  private static final String METHOD_MEDOIDSHAPE = "medoidshape";
+
+  private static final String OUTPUT_LABEL = "label";
+  private static final String OUTPUT_CENTROID = "centroid";
+
+  private static final int DEFAULT_MAX_ITER = 200;
+  private static final double DEFAULT_SAMPLE_RATE = 0.3;
+  private static final String DEFAULT_METHOD = METHOD_KMEANS;
+
+  private int l;
+  private int k;
+  private String method;
+  private boolean norm;
+  private int maxIter;
+  private String output;
+  private double sampleRate;
+
+  private final List<Long> timestamps = new ArrayList<>();
+  private final List<Double> values = new ArrayList<>();
+
+  @Override
+  public void validate(UDFParameterValidator validator) throws Exception {
+    validator
+        .validateInputSeriesNumber(1)
+        .validateInputSeriesDataType(0, Type.INT32, Type.INT64, Type.FLOAT, 
Type.DOUBLE)
+        .validate(
+            x -> (int) x > 0,
+            "Parameter l must be a positive integer.",
+            validator.getParameters().getInt("l"))
+        .validate(
+            x -> (int) x >= 2,
+            "Parameter k must be at least 2.",
+            validator.getParameters().getInt("k"))
+        .validate(
+            x -> {
+              String m = ((String) x).toLowerCase();
+              return METHOD_KMEANS.equals(m)
+                  || METHOD_KSHAPE.equals(m)
+                  || METHOD_MEDOIDSHAPE.equals(m);
+            },
+            "Parameter method must be one of: kmeans, kshape, medoidshape.",
+            validator.getParameters().getStringOrDefault("method", 
DEFAULT_METHOD))
+        .validate(
+            x -> (int) x >= 1,
+            "Parameter maxiter must be a positive integer.",
+            validator.getParameters().getIntOrDefault("maxiter", 
DEFAULT_MAX_ITER))
+        .validate(
+            x -> {
+              String o = ((String) x).toLowerCase();
+              return OUTPUT_LABEL.equals(o) || OUTPUT_CENTROID.equals(o);
+            },
+            "Parameter output must be label or centroid.",
+            validator.getParameters().getStringOrDefault("output", 
OUTPUT_LABEL))
+        .validate(
+            x -> {
+              double d = ((Number) x).doubleValue();
+              return d > 0 && d <= 1.0;
+            },
+            "Parameter sample_rate must be in (0, 1].",
+            validator.getParameters().getDoubleOrDefault("sample_rate", 
DEFAULT_SAMPLE_RATE));
+  }
+
+  @Override
+  public void beforeStart(UDFParameters parameters, UDTFConfigurations 
configurations)
+      throws Exception {
+    this.output = parameters.getStringOrDefault("output", 
OUTPUT_LABEL).toLowerCase();
+    if (OUTPUT_CENTROID.equals(output)) {
+      configurations.setAccessStrategy(new 
RowByRowAccessStrategy()).setOutputDataType(Type.DOUBLE);
+    } else {
+      configurations.setAccessStrategy(new 
RowByRowAccessStrategy()).setOutputDataType(Type.INT32);
+    }
+    this.l = parameters.getInt("l");
+    this.k = parameters.getInt("k");
+    this.method = parameters.getStringOrDefault("method", 
DEFAULT_METHOD).toLowerCase();
+    this.norm = parameters.getBooleanOrDefault("norm", true);
+    this.maxIter = parameters.getIntOrDefault("maxiter", DEFAULT_MAX_ITER);
+    this.sampleRate = parameters.getDoubleOrDefault("sample_rate", 
DEFAULT_SAMPLE_RATE);
+    timestamps.clear();
+    values.clear();
+  }
+
+  @Override
+  public void transform(Row row, PointCollector collector) throws Exception {
+    if (!row.isNull(0)) {
+      timestamps.add(row.getTime());
+      values.add(Util.getValueAsDouble(row));
+    }
+  }
+
+  @Override
+  public void terminate(PointCollector collector) throws Exception {
+    int n = values.size();
+    if (n < l) {
+      throw new UDFException(
+          "Time series length must be at least l; got " + n + " points, l=" + 
l + ".");
+    }
+    int numWindows = n / l;
+    if (numWindows < k) {
+      throw new UDFException(
+          "Not enough non-overlapping windows: got "
+              + numWindows
+              + " windows, need at least k="
+              + k
+              + ".");
+    }
+
+    double[][] windows = new double[numWindows][l];
+    long[] windowStartTime = new long[numWindows];
+    for (int w = 0; w < numWindows; w++) {
+      windowStartTime[w] = timestamps.get(w * l);
+      for (int j = 0; j < l; j++) {
+        windows[w][j] = values.get(w * l + j);
+      }
+    }
+
+    if (OUTPUT_LABEL.equals(output)) {
+      int[] labels;
+      if (METHOD_KMEANS.equals(method)) {
+        KMeans km = new KMeans();
+        km.fit(windows, k, norm, maxIter);
+        labels = km.getLabels();
+      } else if (METHOD_KSHAPE.equals(method)) {
+        KShape ks = new KShape();
+        ks.fit(windows, k, norm, maxIter);
+        labels = ks.getLabels();
+      } else if (METHOD_MEDOIDSHAPE.equals(method)) {
+        MedoidShape ms = new MedoidShape();
+        ms.setSampleRate(sampleRate);
+        ms.fit(windows, k, norm, maxIter);
+        labels = ms.getLabels();
+      } else {
+        throw new UDFException("Unsupported method: " + method);
+      }
+      for (int w = 0; w < numWindows; w++) {
+        collector.putInt(windowStartTime[w], labels[w]);
+      }
+    } else {
+      double[][] centroids;
+      if (METHOD_KMEANS.equals(method)) {
+        KMeans km = new KMeans();
+        km.fit(windows, k, norm, maxIter);
+        centroids = km.getCentroids();
+      } else if (METHOD_KSHAPE.equals(method)) {
+        KShape ks = new KShape();
+        ks.fit(windows, k, norm, maxIter);
+        centroids = ks.getCentroids();
+      } else if (METHOD_MEDOIDSHAPE.equals(method)) {
+        MedoidShape ms = new MedoidShape();
+        ms.setSampleRate(sampleRate);
+        ms.fit(windows, k, norm, maxIter);
+        centroids = ms.getCentroids();
+      } else {
+        throw new UDFException("Unsupported method: " + method);
+      }
+      emitConcatenatedCentroids(collector, centroids);
+    }
+  }
+
+  private static void emitConcatenatedCentroids(PointCollector collector, 
double[][] centroids)
+      throws Exception {
+    long t = 0L;
+    for (double[] row : centroids) {
+      for (double v : row) {
+        collector.putDouble(t++, v);
+      }
+    }
+  }
+}
diff --git 
a/library-udf/src/main/java/org/apache/iotdb/library/dlearn/util/cluster/ClusterUtils.java
 
b/library-udf/src/main/java/org/apache/iotdb/library/dlearn/util/cluster/ClusterUtils.java
new file mode 100644
index 00000000000..a646adb2bed
--- /dev/null
+++ 
b/library-udf/src/main/java/org/apache/iotdb/library/dlearn/util/cluster/ClusterUtils.java
@@ -0,0 +1,156 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.iotdb.library.dlearn.util.cluster;
+
+import org.apache.commons.math3.complex.Complex;
+import org.apache.commons.math3.transform.DftNormalization;
+import org.apache.commons.math3.transform.FastFourierTransformer;
+import org.apache.commons.math3.transform.TransformType;
+
+import java.util.Arrays;
+
+/**
+ * Subsequence z-normalize, Euclidean distance, and FFT-based NCC / SBD 
(shared by KShape and
+ * MedoidShape).
+ */
+public final class ClusterUtils {
+
+  public static final double EPS = 1e-9;
+
+  private static final FastFourierTransformer FFT =
+      new FastFourierTransformer(DftNormalization.STANDARD);
+
+  private ClusterUtils() {}
+
+  public static double[] maybeZNormalize(double[] a, boolean normalize) {
+    if (normalize) {
+      return zNormalize(a);
+    }
+    return Arrays.copyOf(a, a.length);
+  }
+
+  public static double[] zNormalize(double[] a) {
+    int n = a.length;
+    double sum = 0.0;
+    for (double v : a) {
+      sum += v;
+    }
+    double mean = sum / n;
+    double var = 0.0;
+    for (double v : a) {
+      double d = v - mean;
+      var += d * d;
+    }
+    var /= n;
+    double std = Math.sqrt(Math.max(var, 0.0));
+    double[] z = new double[n];
+    if (std < EPS) {
+      return z;
+    }
+    for (int i = 0; i < n; i++) {
+      z[i] = (a[i] - mean) / std;
+    }
+    return z;
+  }
+
+  public static double squaredEuclidean(double[] a, double[] b) {
+    double s = 0.0;
+    for (int i = 0; i < a.length; i++) {
+      double d = a[i] - b[i];
+      s += d * d;
+    }
+    return s;
+  }
+
+  public static int findLargestCluster(int[] counts) {
+    int best = 0;
+    for (int i = 1; i < counts.length; i++) {
+      if (counts[i] > counts[best]) {
+        best = i;
+      }
+    }
+    return best;
+  }
+
+  /**
+   * Maximum over the normalized cross-correlation sequence (FFT); used for 
SBD and MedoidShape
+   * objective.
+   */
+  public static double maxNcc(double[] x, double[] y) {
+    double[] cc = nccFft(x, y);
+    double max = Double.NEGATIVE_INFINITY;
+    for (double v : cc) {
+      if (v > max) {
+        max = v;
+      }
+    }
+    return max;
+  }
+
+  /** SBD: 1 − max NCC (consistent with the NCC-based definition in k-Shape / 
FastKShape). */
+  public static double shapeDistance(double[] x, double[] y) {
+    return 1.0 - maxNcc(x, y);
+  }
+
+  public static double symmetricSbd(double[] a, double[] b) {
+    return 0.5 * (shapeDistance(a, b) + shapeDistance(b, a));
+  }
+
+  private static double[] nccFft(double[] x, double[] y) {
+    int xLen = x.length;
+    double den = l2Norm(x) * l2Norm(y);
+    if (den < 1e-9) {
+      den = Double.POSITIVE_INFINITY;
+    }
+    int fftSize = 1 << (32 - Integer.numberOfLeadingZeros(2 * xLen - 1));
+
+    Complex[] cx = new Complex[fftSize];
+    Complex[] cy = new Complex[fftSize];
+    for (int i = 0; i < fftSize; i++) {
+      cx[i] = new Complex(i < xLen ? x[i] : 0.0, 0.0);
+      cy[i] = new Complex(i < xLen ? y[i] : 0.0, 0.0);
+    }
+    Complex[] fx = FFT.transform(cx, TransformType.FORWARD);
+    Complex[] fy = FFT.transform(cy, TransformType.FORWARD);
+    Complex[] prod = new Complex[fftSize];
+    for (int i = 0; i < fftSize; i++) {
+      prod[i] = fx[i].multiply(fy[i].conjugate());
+    }
+    Complex[] ccFull = FFT.transform(prod, TransformType.INVERSE);
+
+    double[] ccPacked = new double[2 * xLen - 1];
+    int p = 0;
+    for (int i = fftSize - (xLen - 1); i < fftSize; i++) {
+      ccPacked[p++] = ccFull[i].getReal() / den;
+    }
+    for (int i = 0; i < xLen; i++) {
+      ccPacked[p++] = ccFull[i].getReal() / den;
+    }
+    return ccPacked;
+  }
+
+  private static double l2Norm(double[] v) {
+    double s = 0.0;
+    for (double x : v) {
+      s += x * x;
+    }
+    return Math.sqrt(s);
+  }
+}
diff --git 
a/library-udf/src/main/java/org/apache/iotdb/library/dlearn/util/cluster/KMeans.java
 
b/library-udf/src/main/java/org/apache/iotdb/library/dlearn/util/cluster/KMeans.java
new file mode 100644
index 00000000000..37c44eade96
--- /dev/null
+++ 
b/library-udf/src/main/java/org/apache/iotdb/library/dlearn/util/cluster/KMeans.java
@@ -0,0 +1,130 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.iotdb.library.dlearn.util.cluster;
+
+import java.util.Arrays;
+
+/**
+ * Univariate subsequence k-means (Lloyd); optionally z-normalize, then 
cluster in Euclidean space.
+ */
+public class KMeans {
+
+  private double[][] centroids;
+  private int[] labels;
+
+  public void fit(double[][] samples, int k, boolean normalize, int 
maxIterations) {
+    validate(samples, k, maxIterations);
+    int n = samples.length;
+    int dim = samples[0].length;
+
+    double[][] z = new double[n][dim];
+    for (int i = 0; i < n; i++) {
+      z[i] = ClusterUtils.maybeZNormalize(samples[i], normalize);
+    }
+
+    centroids = new double[k][dim];
+    for (int c = 0; c < k; c++) {
+      System.arraycopy(z[c], 0, centroids[c], 0, dim);
+    }
+
+    labels = new int[n];
+    Arrays.fill(labels, -1);
+
+    for (int iter = 0; iter < maxIterations; iter++) {
+      double[][] prevCentroids = new double[k][dim];
+      for (int c = 0; c < k; c++) {
+        System.arraycopy(centroids[c], 0, prevCentroids[c], 0, dim);
+      }
+
+      boolean changed = false;
+      for (int i = 0; i < n; i++) {
+        int best = 0;
+        double bestDist = Double.POSITIVE_INFINITY;
+        for (int c = 0; c < k; c++) {
+          double d = ClusterUtils.squaredEuclidean(z[i], centroids[c]);
+          if (d < bestDist) {
+            bestDist = d;
+            best = c;
+          }
+        }
+        if (labels[i] != best) {
+          labels[i] = best;
+          changed = true;
+        }
+      }
+
+      double[][] newCentroids = new double[k][dim];
+      int[] counts = new int[k];
+      for (int i = 0; i < n; i++) {
+        int c = labels[i];
+        counts[c]++;
+        for (int d = 0; d < dim; d++) {
+          newCentroids[c][d] += z[i][d];
+        }
+      }
+      for (int c = 0; c < k; c++) {
+        if (counts[c] == 0) {
+          int donor = ClusterUtils.findLargestCluster(counts);
+          System.arraycopy(prevCentroids[donor], 0, centroids[c], 0, dim);
+          for (int d = 0; d < dim; d++) {
+            centroids[c][d] += (d == 0 ? 1e-4 : -1e-4);
+          }
+        } else {
+          for (int d = 0; d < dim; d++) {
+            centroids[c][d] = newCentroids[c][d] / counts[c];
+          }
+        }
+      }
+
+      if (!changed) {
+        break;
+      }
+    }
+  }
+
+  public double[][] getCentroids() {
+    return centroids;
+  }
+
+  public int[] getLabels() {
+    return labels;
+  }
+
+  private static void validate(double[][] samples, int k, int maxIterations) {
+    if (samples == null || samples.length == 0) {
+      throw new IllegalArgumentException("samples must be non-empty.");
+    }
+    if (k < 2 || k > samples.length) {
+      throw new IllegalArgumentException("k must satisfy 2 <= k <= 
samples.length.");
+    }
+    if (maxIterations < 1) {
+      throw new IllegalArgumentException("maxIterations must be at least 1.");
+    }
+    int dim = samples[0].length;
+    if (dim == 0) {
+      throw new IllegalArgumentException("sample dimension must be positive.");
+    }
+    for (double[] row : samples) {
+      if (row == null || row.length != dim) {
+        throw new IllegalArgumentException("All samples must have the same 
length.");
+      }
+    }
+  }
+}
diff --git 
a/library-udf/src/main/java/org/apache/iotdb/library/dlearn/util/cluster/KShape.java
 
b/library-udf/src/main/java/org/apache/iotdb/library/dlearn/util/cluster/KShape.java
new file mode 100644
index 00000000000..315e1c51d2f
--- /dev/null
+++ 
b/library-udf/src/main/java/org/apache/iotdb/library/dlearn/util/cluster/KShape.java
@@ -0,0 +1,190 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.iotdb.library.dlearn.util.cluster;
+
+import org.apache.commons.math3.linear.MatrixUtils;
+import org.apache.commons.math3.linear.RealMatrix;
+import org.apache.commons.math3.linear.RealVector;
+import org.apache.commons.math3.linear.SingularValueDecomposition;
+
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.List;
+
+/**
+ * k-Shape: assignment uses {@link ClusterUtils#shapeDistance} (SBD = 1 − max 
NCC); centroids are
+ * the first right singular vector of the cluster matrix from SVD, sign 
correction, then z-normalize
+ * or L2 normalization.
+ */
+public class KShape {
+
+  private double[][] centroids;
+  private int[] labels;
+
+  public void fit(double[][] samples, int k, boolean normalize, int 
maxIterations) {
+    validate(samples, k, maxIterations);
+    int n = samples.length;
+    int dim = samples[0].length;
+
+    double[][] z = new double[n][dim];
+    for (int i = 0; i < n; i++) {
+      z[i] = ClusterUtils.maybeZNormalize(samples[i], normalize);
+    }
+
+    centroids = new double[k][dim];
+    for (int c = 0; c < k; c++) {
+      System.arraycopy(z[c], 0, centroids[c], 0, dim);
+    }
+
+    labels = new int[n];
+    Arrays.fill(labels, -1);
+
+    for (int iter = 0; iter < maxIterations; iter++) {
+      double[][] prevCentroids = new double[k][dim];
+      for (int c = 0; c < k; c++) {
+        System.arraycopy(centroids[c], 0, prevCentroids[c], 0, dim);
+      }
+
+      boolean changed = false;
+      for (int i = 0; i < n; i++) {
+        int best = 0;
+        double bestDist = Double.POSITIVE_INFINITY;
+        for (int c = 0; c < k; c++) {
+          double d = ClusterUtils.shapeDistance(z[i], centroids[c]);
+          if (d < bestDist) {
+            bestDist = d;
+            best = c;
+          }
+        }
+        if (labels[i] != best) {
+          labels[i] = best;
+          changed = true;
+        }
+      }
+
+      int[] counts = new int[k];
+      @SuppressWarnings("unchecked")
+      List<double[]>[] byCluster = new List[k];
+      for (int c = 0; c < k; c++) {
+        byCluster[c] = new ArrayList<>();
+      }
+      for (int i = 0; i < n; i++) {
+        int c = labels[i];
+        counts[c]++;
+        byCluster[c].add(z[i]);
+      }
+
+      for (int c = 0; c < k; c++) {
+        if (counts[c] == 0) {
+          int donor = ClusterUtils.findLargestCluster(counts);
+          System.arraycopy(prevCentroids[donor], 0, centroids[c], 0, dim);
+        } else {
+          List<double[]> members = byCluster[c];
+          double[][] mat = new double[members.size()][dim];
+          for (int i = 0; i < members.size(); i++) {
+            mat[i] = members.get(i);
+          }
+          centroids[c] = centroidFromSvd(mat, normalize);
+        }
+      }
+
+      if (!changed) {
+        break;
+      }
+    }
+  }
+
+  public double[][] getCentroids() {
+    return centroids;
+  }
+
+  public int[] getLabels() {
+    return labels;
+  }
+
+  private static double[] centroidFromSvd(double[][] members, boolean 
zNormalizeCentroid) {
+    int m = members.length;
+    int dim = members[0].length;
+    if (m == 1) {
+      double[] u = Arrays.copyOf(members[0], dim);
+      return zNormalizeCentroid ? ClusterUtils.zNormalize(u) : l2Unit(u);
+    }
+    RealMatrix y = MatrixUtils.createRealMatrix(members);
+    SingularValueDecomposition svd = new SingularValueDecomposition(y);
+    RealMatrix v = svd.getV();
+    RealVector col0 = v.getColumnVector(0);
+    double[] r = col0.toArray();
+    double sumDot = 0.0;
+    for (double[] row : members) {
+      sumDot += dot(row, r);
+    }
+    if (sumDot < 0) {
+      for (int i = 0; i < r.length; i++) {
+        r[i] = -r[i];
+      }
+    }
+    return zNormalizeCentroid ? ClusterUtils.zNormalize(r) : l2Unit(r);
+  }
+
+  private static double dot(double[] a, double[] b) {
+    double s = 0.0;
+    for (int i = 0; i < a.length; i++) {
+      s += a[i] * b[i];
+    }
+    return s;
+  }
+
+  private static double[] l2Unit(double[] v) {
+    double s = 0.0;
+    for (double x : v) {
+      s += x * x;
+    }
+    s = Math.sqrt(s);
+    if (s < ClusterUtils.EPS) {
+      return new double[v.length];
+    }
+    double[] o = new double[v.length];
+    for (int i = 0; i < v.length; i++) {
+      o[i] = v[i] / s;
+    }
+    return o;
+  }
+
+  private static void validate(double[][] samples, int k, int maxIterations) {
+    if (samples == null || samples.length == 0) {
+      throw new IllegalArgumentException("samples must be non-empty.");
+    }
+    if (k < 2 || k > samples.length) {
+      throw new IllegalArgumentException("k must satisfy 2 <= k <= 
samples.length.");
+    }
+    if (maxIterations < 1) {
+      throw new IllegalArgumentException("maxIterations must be at least 1.");
+    }
+    int dim = samples[0].length;
+    if (dim == 0) {
+      throw new IllegalArgumentException("sample dimension must be positive.");
+    }
+    for (double[] row : samples) {
+      if (row == null || row.length != dim) {
+        throw new IllegalArgumentException("All samples must have the same 
length.");
+      }
+    }
+  }
+}
diff --git 
a/library-udf/src/main/java/org/apache/iotdb/library/dlearn/util/cluster/MedoidShape.java
 
b/library-udf/src/main/java/org/apache/iotdb/library/dlearn/util/cluster/MedoidShape.java
new file mode 100644
index 00000000000..31f9d8c8df1
--- /dev/null
+++ 
b/library-udf/src/main/java/org/apache/iotdb/library/dlearn/util/cluster/MedoidShape.java
@@ -0,0 +1,213 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.iotdb.library.dlearn.util.cluster;
+
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.Collections;
+import java.util.HashSet;
+import java.util.List;
+import java.util.Objects;
+import java.util.Random;
+import java.util.Set;
+
+/**
+ * Coarse clustering: {@link KMeans} uses {@code min(2k, n)} clusters (n = 
number of windows);
+ * greedy fastKShape picks k representatives; both labels and the objective 
use {@link
+ * ClusterUtils#maxNcc}.
+ */
+public class MedoidShape {
+
+  private double sampleRate = 0.3;
+  private Random random = new Random();
+
+  /** Overrides the RNG used for greedy sampling (default is {@link 
Random#Random()}). */
+  public void setRandom(Random random) {
+    this.random = Objects.requireNonNull(random);
+  }
+
+  private double[][] centroids;
+  private int[] labels;
+
+  public void setSampleRate(double sampleRate) {
+    if (sampleRate <= 0 || sampleRate > 1.0) {
+      throw new IllegalArgumentException("sampleRate must be in (0, 1].");
+    }
+    this.sampleRate = sampleRate;
+  }
+
+  public double getSampleRate() {
+    return sampleRate;
+  }
+
+  public void fit(double[][] samples, int k, boolean normalize, int 
maxIterations) {
+    validate(samples, k, maxIterations);
+    int n = samples.length;
+    int dim = samples[0].length;
+
+    int coarseK = Math.min(2 * k, n);
+
+    double[][] x = new double[n][dim];
+    for (int i = 0; i < n; i++) {
+      x[i] = ClusterUtils.maybeZNormalize(samples[i], normalize);
+    }
+
+    KMeans coarse = new KMeans();
+    coarse.fit(x, coarseK, false, maxIterations);
+    double[][] euclideanCentroids = coarse.getCentroids();
+    int[] kmLabels = coarse.getLabels();
+    long[] clusterSize = new long[coarseK];
+    for (int lb : kmLabels) {
+      clusterSize[lb]++;
+    }
+
+    centroids = fastKShape(x, k, sampleRate, dim, euclideanCentroids, 
clusterSize, random);
+
+    labels = new int[n];
+    for (int i = 0; i < n; i++) {
+      double maxNcc = Double.NEGATIVE_INFINITY;
+      int label = -1;
+      for (int j = 0; j < k; j++) {
+        double cur = ClusterUtils.maxNcc(x[i], centroids[j]);
+        if (cur > maxNcc) {
+          maxNcc = cur;
+          label = j;
+        }
+      }
+      labels[i] = label;
+    }
+  }
+
+  public double[][] getCentroids() {
+    return centroids;
+  }
+
+  public int[] getLabels() {
+    return labels;
+  }
+
+  private static double[][] fastKShape(
+      double[][] x,
+      int k,
+      double r,
+      int dim,
+      double[][] euclideanCentroids,
+      long[] clusterSize,
+      Random rnd) {
+    int n = x.length;
+    if (n <= k) {
+      double[][] out = new double[k][dim];
+      for (int i = 0; i < n; i++) {
+        out[i] = Arrays.copyOf(x[i], dim);
+      }
+      for (int i = n; i < k; i++) {
+        out[i] = Arrays.copyOf(x[n - 1], dim);
+      }
+      return out;
+    }
+
+    List<double[]> picked = new ArrayList<>();
+    Set<Integer> coresetIdx = new HashSet<>();
+
+    for (int round = 0; round < k; round++) {
+      List<Integer> pool = new ArrayList<>();
+      for (int i = 0; i < n; i++) {
+        if (!coresetIdx.contains(i)) {
+          pool.add(i);
+        }
+      }
+      if (pool.isEmpty()) {
+        throw new IllegalStateException("fastKShape: empty candidate pool.");
+      }
+      int sampleCount = Math.max(1, (int) (r * n));
+      sampleCount = Math.min(sampleCount, pool.size());
+      Collections.shuffle(pool, rnd);
+      List<Integer> sampleIdx = pool.subList(0, sampleCount);
+
+      double maxDelta = Double.NEGATIVE_INFINITY;
+      double[] bestSeg = null;
+      int bestIdx = -1;
+
+      for (int idx : sampleIdx) {
+        double[] seq = x[idx];
+        picked.add(seq);
+        double delta = evaluateAim(picked, euclideanCentroids, clusterSize);
+        picked.remove(picked.size() - 1);
+        if (delta > maxDelta) {
+          maxDelta = delta;
+          bestSeg = Arrays.copyOf(seq, dim);
+          bestIdx = idx;
+        }
+      }
+
+      if (bestSeg == null) {
+        throw new IllegalStateException("fastKShape: no candidate selected.");
+      }
+      picked.add(bestSeg);
+      coresetIdx.add(bestIdx);
+    }
+
+    double[][] out = new double[k][dim];
+    for (int i = 0; i < k; i++) {
+      out[i] = picked.get(i);
+    }
+    return out;
+  }
+
+  private static double evaluateAim(
+      List<double[]> curCentroids, double[][] euclideanCentroids, long[] 
clusterSize) {
+    double res = 0.0;
+    for (int i = 0; i < euclideanCentroids.length; i++) {
+      double maxNcc = Double.NEGATIVE_INFINITY;
+      for (double[] cur : curCentroids) {
+        double n = ClusterUtils.maxNcc(cur, euclideanCentroids[i]);
+        if (n > maxNcc) {
+          maxNcc = n;
+        }
+      }
+      res += maxNcc * clusterSize[i];
+    }
+    return res;
+  }
+
+  private static void validate(double[][] samples, int k, int maxIterations) {
+    if (samples == null || samples.length == 0) {
+      throw new IllegalArgumentException("samples must be non-empty.");
+    }
+    if (k < 2) {
+      throw new IllegalArgumentException("k must be at least 2.");
+    }
+    if (k > samples.length) {
+      throw new IllegalArgumentException("k must not exceed the number of 
samples.");
+    }
+    if (maxIterations < 1) {
+      throw new IllegalArgumentException("maxIterations must be at least 1.");
+    }
+    int dim = samples[0].length;
+    if (dim == 0) {
+      throw new IllegalArgumentException("sample dimension must be positive.");
+    }
+    for (double[] row : samples) {
+      if (row == null || row.length != dim) {
+        throw new IllegalArgumentException("All samples must have the same 
length.");
+      }
+    }
+  }
+}
diff --git 
a/library-udf/src/main/java/org/apache/iotdb/library/dprofile/UDAFQuantile.java 
b/library-udf/src/main/java/org/apache/iotdb/library/dprofile/UDAFQuantile.java
index 047beafe1fc..0518e7a3d76 100644
--- 
a/library-udf/src/main/java/org/apache/iotdb/library/dprofile/UDAFQuantile.java
+++ 
b/library-udf/src/main/java/org/apache/iotdb/library/dprofile/UDAFQuantile.java
@@ -65,59 +65,65 @@ public class UDAFQuantile implements UDTF {
 
   @Override
   public void transform(Row row, PointCollector collector) throws Exception {
-    double res = Util.getValueAsDouble(row);
-    sketch.update(dataToLong(res));
+    final long encoded;
+    switch (dataType) {
+      case INT32:
+        encoded = row.getInt(0);
+        break;
+      case INT64:
+        encoded = row.getLong(0);
+        break;
+      default:
+        encoded = dataToLong(Util.getValueAsDouble(row));
+        break;
+    }
+    sketch.update(encoded);
   }
 
   @Override
   public void terminate(PointCollector collector) throws Exception {
-    long result = sketch.findMinValueWithRank((long) (rank * sketch.getN()));
-    double res = longToResult(result);
+    long n = sketch.getN();
+    // Nearest-rank: k-th smallest uses getApproxRank (strictly-less-than 
count) in [0, n-1];
+    // rank=1 must map to k=n-1, not k=n which is unreachable and can 
overshoot the max sample.
+    long k = 0;
+    if (n > 0) {
+      k = (long) Math.ceil(rank * n) - 1;
+      if (k < 0) {
+        k = 0;
+      } else if (k >= n) {
+        k = n - 1;
+      }
+    }
+    long result = sketch.findMinValueWithRank(k);
     switch (dataType) {
       case INT32:
-        collector.putInt(0, (int) res);
+        collector.putInt(0, (int) result);
         break;
       case INT64:
-        collector.putLong(0, (long) res);
+        collector.putLong(0, result);
         break;
       case FLOAT:
-        collector.putFloat(0, (float) res);
+        collector.putFloat(0, (float) longToResult(result));
         break;
       case DOUBLE:
-        collector.putDouble(0, res);
+        collector.putDouble(0, longToResult(result));
         break;
-      case TIMESTAMP:
-      case DATE:
-      case TEXT:
-      case STRING:
-      case BLOB:
-      case BOOLEAN:
       default:
         break;
     }
   }
 
-  private long dataToLong(Object data) {
-    long result;
+  private long dataToLong(double res) {
     switch (dataType) {
-      case INT32:
-        return (int) data;
       case FLOAT:
-        result = Float.floatToIntBits((float) data);
-        return (float) data >= 0f ? result : result ^ Long.MAX_VALUE;
-      case INT64:
-        return (long) data;
+        float f = (float) res;
+        long flBits = Float.floatToIntBits(f);
+        return f >= 0f ? flBits : flBits ^ Long.MAX_VALUE;
       case DOUBLE:
-        result = Double.doubleToLongBits((double) data);
-        return (double) data >= 0d ? result : result ^ Long.MAX_VALUE;
-      case BLOB:
-      case BOOLEAN:
-      case STRING:
-      case TEXT:
-      case DATE:
-      case TIMESTAMP:
+        long d = Double.doubleToLongBits(res);
+        return res >= 0d ? d : d ^ Long.MAX_VALUE;
       default:
-        return (long) data;
+        return (long) res;
     }
   }
 
@@ -129,16 +135,8 @@ public class UDAFQuantile implements UDTF {
       case DOUBLE:
         result = (result >>> 63) == 0 ? result : result ^ Long.MAX_VALUE;
         return Double.longBitsToDouble(result);
-      case INT64:
-      case INT32:
-      case DATE:
-      case TEXT:
-      case STRING:
-      case BOOLEAN:
-      case BLOB:
-      case TIMESTAMP:
       default:
-        return (result);
+        return (double) result;
     }
   }
 }
diff --git 
a/library-udf/src/main/java/org/apache/iotdb/library/dprofile/util/ExactOrderStatistics.java
 
b/library-udf/src/main/java/org/apache/iotdb/library/dprofile/util/ExactOrderStatistics.java
index e1f0baa7c06..47ca5e2b12f 100644
--- 
a/library-udf/src/main/java/org/apache/iotdb/library/dprofile/util/ExactOrderStatistics.java
+++ 
b/library-udf/src/main/java/org/apache/iotdb/library/dprofile/util/ExactOrderStatistics.java
@@ -31,7 +31,14 @@ import 
org.eclipse.collections.impl.list.mutable.primitive.LongArrayList;
 import java.io.IOException;
 import java.util.NoSuchElementException;
 
-/** Util for computing median, MAD, percentile. */
+/**
+ * Util for computing median, MAD, percentile.
+ *
+ * <p>Percentile / quantile ({@link #getPercentile}) uses <b>discrete 
nearest-rank</b>: for sorted
+ * size {@code n} and {@code phi} in (0, 1], take 1-based rank {@code k = 
ceil(n * phi)} and 0-based
+ * index {@code k - 1}, clamped to {@code [0, n - 1]}. No interpolation; 
{@code phi = 0.5} is not
+ * required to match {@link #getMedian}.
+ */
 public class ExactOrderStatistics {
 
   private final Type dataType;
@@ -55,12 +62,6 @@ public class ExactOrderStatistics {
       case DOUBLE:
         doubleArrayList = new DoubleArrayList();
         break;
-      case STRING:
-      case TEXT:
-      case BOOLEAN:
-      case BLOB:
-      case DATE:
-      case TIMESTAMP:
       default:
         // This will not happen.
         throw new UDFInputSeriesDataTypeNotValidException(
@@ -88,12 +89,6 @@ public class ExactOrderStatistics {
           doubleArrayList.add(vd);
         }
         break;
-      case DATE:
-      case TIMESTAMP:
-      case BLOB:
-      case BOOLEAN:
-      case TEXT:
-      case STRING:
       default:
         // This will not happen.
         throw new UDFInputSeriesDataTypeNotValidException(
@@ -111,12 +106,6 @@ public class ExactOrderStatistics {
         return getMedian(floatArrayList);
       case DOUBLE:
         return getMedian(doubleArrayList);
-      case TEXT:
-      case STRING:
-      case BOOLEAN:
-      case BLOB:
-      case TIMESTAMP:
-      case DATE:
       default:
         // This will not happen.
         throw new UDFInputSeriesDataTypeNotValidException(
@@ -199,12 +188,6 @@ public class ExactOrderStatistics {
         return getMad(floatArrayList);
       case DOUBLE:
         return getMad(doubleArrayList);
-      case TIMESTAMP:
-      case DATE:
-      case BLOB:
-      case BOOLEAN:
-      case STRING:
-      case TEXT:
       default:
         // This will not happen.
         throw new UDFInputSeriesDataTypeNotValidException(
@@ -251,12 +234,18 @@ public class ExactOrderStatistics {
     }
   }
 
+  /** Discrete nearest-rank index into sorted data of length {@code n}; see 
class Javadoc. */
+  private static int discreteNearestRankIndex(int n, double phi) {
+    int idx = (int) Math.ceil(n * phi) - 1;
+    return Math.max(0, Math.min(n - 1, idx));
+  }
+
   public static float getPercentile(FloatArrayList nums, double phi) {
     if (nums.isEmpty()) {
       throw new NoSuchElementException();
     } else {
       nums.sortThis();
-      return nums.get((int) Math.ceil(nums.size() * phi));
+      return nums.get(discreteNearestRankIndex(nums.size(), phi));
     }
   }
 
@@ -265,7 +254,7 @@ public class ExactOrderStatistics {
       throw new NoSuchElementException();
     } else {
       nums.sortThis();
-      return nums.get((int) Math.ceil(nums.size() * phi));
+      return nums.get(discreteNearestRankIndex(nums.size(), phi));
     }
   }
 
@@ -279,12 +268,6 @@ public class ExactOrderStatistics {
         return Float.toString(getPercentile(floatArrayList, phi));
       case DOUBLE:
         return Double.toString(getPercentile(doubleArrayList, phi));
-      case STRING:
-      case TEXT:
-      case BOOLEAN:
-      case BLOB:
-      case DATE:
-      case TIMESTAMP:
       default:
         // This will not happen.
         throw new UDFInputSeriesDataTypeNotValidException(
@@ -297,7 +280,7 @@ public class ExactOrderStatistics {
       throw new NoSuchElementException();
     } else {
       nums.sortThis();
-      return nums.get((int) Math.ceil(nums.size() * phi));
+      return nums.get(discreteNearestRankIndex(nums.size(), phi));
     }
   }
 
@@ -306,7 +289,7 @@ public class ExactOrderStatistics {
       throw new NoSuchElementException();
     } else {
       nums.sortThis();
-      return nums.get((int) Math.ceil(nums.size() * phi));
+      return nums.get(discreteNearestRankIndex(nums.size(), phi));
     }
   }
 }
diff --git 
a/library-udf/src/main/java/org/apache/iotdb/library/dprofile/util/GKArray.java 
b/library-udf/src/main/java/org/apache/iotdb/library/dprofile/util/GKArray.java
index 1870bdfb7a4..7dbcc934e78 100644
--- 
a/library-udf/src/main/java/org/apache/iotdb/library/dprofile/util/GKArray.java
+++ 
b/library-udf/src/main/java/org/apache/iotdb/library/dprofile/util/GKArray.java
@@ -124,6 +124,19 @@ public class GKArray {
 
         i++;
 
+      } else if (i >= additionalEntries.size()) {
+        // Only sketch entries left (must check before comparing 
additionalEntries.get(i)).
+        if (j + 1 < entries.size()
+            && entries.get(j).g + entries.get(j + 1).g + entries.get(j + 
1).delta
+                <= removalThreshold) {
+          // Removable from sketch.
+          entries.get(j + 1).g += entries.get(j).g;
+        } else {
+          mergedEntries.add(entries.get(j));
+        }
+
+        j++;
+
       } else if (additionalEntries.get(i).v < entries.get(j).v) {
         if (additionalEntries.get(i).g + entries.get(j).g + 
entries.get(j).delta
             <= removalThreshold) {
@@ -136,7 +149,7 @@ public class GKArray {
 
         i++;
 
-      } else { // the same as i == additionalEntries.size()
+      } else {
         if (j + 1 < entries.size()
             && entries.get(j).g + entries.get(j + 1).g + entries.get(j + 
1).delta
                 <= removalThreshold) {
diff --git 
a/library-udf/src/main/java/org/apache/iotdb/library/match/PatternExecutor.java 
b/library-udf/src/main/java/org/apache/iotdb/library/match/PatternExecutor.java
index 01d1c533493..4e5e7320670 100644
--- 
a/library-udf/src/main/java/org/apache/iotdb/library/match/PatternExecutor.java
+++ 
b/library-udf/src/main/java/org/apache/iotdb/library/match/PatternExecutor.java
@@ -154,7 +154,7 @@ public class PatternExecutor {
 
   /** Execute the query in a particular smooth iteration */
   private void executeQueryInSI(PatternContext queryCtx) {
-    int dsi = 0;
+    int dsi = 0; // dsi: data section index
     if (queryCtx.getDatasetSize() == null) {
       queryCtx.setDatasetSize(
           queryCtx.getDataPoints().get(queryCtx.getDataPoints().size() - 
1).getX()
@@ -237,11 +237,13 @@ public class PatternExecutor {
    */
   private List<Section> findCurveSections(
       List<Double> tangents, List<Point> points, double minHeightPerc) {
+
     List<Section> sections = new ArrayList<>();
     Double lastTg = null;
     Point lastPt = null;
     double totalHeight = calcHeight(points);
     double lastSectHeight = 0;
+
     for (int i = 0; i < tangents.size(); i++) {
       Double tangent = tangents.get(i);
       Point pt = points.get(i);
@@ -257,6 +259,7 @@ public class PatternExecutor {
               && (!(minHeightPerc > 0) || lastSectHeight / totalHeight > 
minHeightPerc)) {
             Section newSection = new Section(sign);
             sections.add(newSection);
+
             newSection.getPoints().add(lastPt);
             newSection.getTangents().add(lastTg);
           }
@@ -340,7 +343,6 @@ public class PatternExecutor {
             || currSect.getNext().get(0).getSize() == 
currSect.getNext().get(0).getTimes())) {
       matchValue = this.calculateMatch(dataSectsForQ, newQSections, queryCtx, 
false);
       if (matchValue != null) {
-
         // Keep only one (best) match if the same area is selected in 
different smooth iterations
         int duplicateMatchIdx =
             PatternMatchConfig.REMOVE_EQUAL_MATCHES
@@ -488,6 +490,7 @@ public class PatternExecutor {
       querySect.setPoints(querySections.get(si).getPoints());
       querySect.setWidth(calcWidth(querySect.getPoints()));
       querySect.setHeight(calcHeight(querySect.getPoints()));
+
       if (querySect.getHeight() == 0) {
         continue;
       }
@@ -507,6 +510,7 @@ public class PatternExecutor {
       }
       dataSect.setWidth(calcWidth(dataSect.getPoints()));
       dataSect.setHeight(calcHeight(dataSect.getPoints()));
+
       if (dataSect.getHeight() == 0) {
         continue;
       }
@@ -540,6 +544,7 @@ public class PatternExecutor {
                 * scaleFactorY);
       }
       querySect.setCentroidY(querySect.getCentroidY() / 
querySect.getPoints().size());
+
       centroidsDifference =
           querySect.getPoints().get(0).getY()
                   * (PatternMatchConfig.RESCALING_Y

Reply via email to