This is an automated email from the ASF dual-hosted git repository.
asolimando pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/calcite.git
The following commit(s) were added to refs/heads/main by this push:
new 8581dba433 [CALCITE-6388] PsTableFunction throws NumberFormatException
when the 'user' column has spaces
8581dba433 is described below
commit 8581dba43337e27b2d70f540b91f1bdf66d18dab
Author: Alessandro Solimando <[email protected]>
AuthorDate: Sat Apr 27 21:57:14 2024 +0200
[CALCITE-6388] PsTableFunction throws NumberFormatException when the 'user'
column has spaces
---
.../apache/calcite/adapter/os/PsTableFunction.java | 243 ++++++++++++++-------
.../calcite/adapter/os/PsTableFunctionTest.java | 73 +++++++
2 files changed, 231 insertions(+), 85 deletions(-)
diff --git
a/plus/src/main/java/org/apache/calcite/adapter/os/PsTableFunction.java
b/plus/src/main/java/org/apache/calcite/adapter/os/PsTableFunction.java
index 8c829877b2..36980044de 100644
--- a/plus/src/main/java/org/apache/calcite/adapter/os/PsTableFunction.java
+++ b/plus/src/main/java/org/apache/calcite/adapter/os/PsTableFunction.java
@@ -26,13 +26,16 @@ import org.apache.calcite.schema.ScannableTable;
import org.apache.calcite.sql.type.SqlTypeName;
import org.apache.calcite.util.Util;
+import com.google.common.annotations.VisibleForTesting;
import com.google.common.collect.ImmutableList;
+import com.google.common.collect.ImmutableMap;
import org.checkerframework.checker.nullness.qual.Nullable;
import java.util.List;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
+import java.util.stream.Collectors;
/**
* Table function that executes the OS "ps" command
@@ -43,16 +46,146 @@ public class PsTableFunction {
Pattern.compile("([0-9]+):([0-9]+):([0-9]+)");
private static final Pattern HOUR_MINUTE_SECOND_PATTERN =
Pattern.compile("([0-9]+):([0-9]+)\\.([0-9]+)");
+ private static final Pattern NUMERIC_PATTERN = Pattern.compile("(\\d+)");
+
+ // it acts as a partial mapping, missing entries are the identity (e.g.,
"user" -> "user")
+ private static final ImmutableMap<String, String> UNIX_TO_MAC_PS_FIELDS =
+ ImmutableMap.<String, String>builder()
+ .put("pgrp", "pgid")
+ .put("start_time", "lstart")
+ .put("euid", "uid")
+ .build();
+
+ private static final List<String> PS_FIELD_NAMES =
+ ImmutableList.of("user",
+ "pid",
+ "ppid",
+ "pgrp",
+ "tpgid",
+ "stat",
+ "pcpu",
+ "pmem",
+ "vsz",
+ "rss",
+ "tty",
+ "start_time",
+ "time",
+ "euid",
+ "ruid",
+ "sess",
+ "comm");
private PsTableFunction() {
+ throw new AssertionError("Utility class should not be instantiated");
+ }
+
+ /**
+ * Class for parsing, line by line, the output of the ps command for a
+ * predefined list of parameters.
+ */
+ @VisibleForTesting
+ protected static class LineParser implements Function1<String, Object[]> {
+
+ @Override public Object[] apply(String line) {
+ final String[] tokens = line.trim().split(" +");
+ final Object[] values = new Object[PS_FIELD_NAMES.size()];
+
+ if (tokens.length < PS_FIELD_NAMES.size()) {
+ throw new IllegalArgumentException(
+ "Expected at least " + PS_FIELD_NAMES.size() + ", got " +
tokens.length);
+ }
+
+ int fieldIdx = 0;
+ int processedTokens = 0;
+ // more tokens than fields, either "user" or "comm" (or both) contain
whitespaces, we assume
+ // usernames don't have numeric parts separated by whitespaces (e.g.,
"root 123"), therefore
+ // we stop whenever we find a numeric token assuming it's the "pid" and
"user" is over
+ if (tokens.length > PS_FIELD_NAMES.size()) {
+ StringBuilder sb = new StringBuilder();
+ for (String field : tokens) {
+ if (NUMERIC_PATTERN.matcher(field).matches()) {
+ break;
+ }
+ processedTokens++;
+ sb.append(field).append(" ");
+ }
+ values[fieldIdx] =
+ field(PS_FIELD_NAMES.get(fieldIdx), sb.deleteCharAt(sb.length() -
1).toString());
+ fieldIdx++;
+ }
+
+ for (; fieldIdx < values.length - 1; fieldIdx++) {
+ try {
+ values[fieldIdx] = field(PS_FIELD_NAMES.get(fieldIdx),
tokens[processedTokens++]);
+ } catch (RuntimeException e) {
+ throw new RuntimeException("while parsing value ["
+ + tokens[fieldIdx] + "] of field [" +
PS_FIELD_NAMES.get(fieldIdx)
+ + "] in line [" + line + "]");
+ }
+ }
+
+ // spaces also in the "comm" part
+ if (processedTokens < tokens.length - 1) {
+ StringBuilder sb = new StringBuilder();
+ while (processedTokens < tokens.length) {
+ sb.append(tokens[processedTokens++]).append(" ");
+ }
+ values[fieldIdx] =
+ field(PS_FIELD_NAMES.get(fieldIdx), sb.deleteCharAt(sb.length() -
1).toString());
+ } else {
+ values[fieldIdx] = field(PS_FIELD_NAMES.get(fieldIdx),
tokens[processedTokens]);
+ }
+ return values;
+ }
+
+ private Object field(String field, String value) {
+ switch (field) {
+ case "pid":
+ case "ppid":
+ case "pgrp": // linux only; macOS equivalent is "pgid"
+ case "pgid": // see "pgrp"
+ case "tpgid":
+ return Integer.valueOf(value);
+ case "pcpu":
+ case "pmem":
+ return (int) (Float.parseFloat(value) * 10f);
+ case "time":
+ final Matcher m1 =
+ MINUTE_SECOND_MILLIS_PATTERN.matcher(value);
+ if (m1.matches()) {
+ final long h = Long.parseLong(m1.group(1));
+ final long m = Long.parseLong(m1.group(2));
+ final long s = Long.parseLong(m1.group(3));
+ return h * 3600000L + m * 60000L + s * 1000L;
+ }
+ final Matcher m2 =
+ HOUR_MINUTE_SECOND_PATTERN.matcher(value);
+ if (m2.matches()) {
+ final long m = Long.parseLong(m2.group(1));
+ final long s = Long.parseLong(m2.group(2));
+ StringBuilder g3 = new StringBuilder(m2.group(3));
+ while (g3.length() < 3) {
+ g3.append("0");
+ }
+ final long millis = Long.parseLong(g3.toString());
+ return m * 60000L + s * 1000L + millis;
+ }
+ return 0L;
+ case "start_time": // linux only; macOS version is "lstart"
+ case "lstart": // see "start_time"
+ case "euid": // linux only; macOS equivalent is "uid"
+ case "uid": // see "euid"
+ default:
+ return value;
+ }
+ }
}
public static ScannableTable eval(boolean b) {
return new AbstractBaseScannableTable() {
@Override public Enumerable<@Nullable Object[]> scan(DataContext root) {
final RelDataType rowType = getRowType(root.getTypeFactory());
- final List<String> fieldNames =
- ImmutableList.copyOf(rowType.getFieldNames());
+ final List<String> fieldNames =
ImmutableList.copyOf(rowType.getFieldNames());
final String[] args;
final String osName = System.getProperty("os.name");
final String osVersion = System.getProperty("os.version");
@@ -60,97 +193,37 @@ public class PsTableFunction {
switch (osName) {
case "Mac OS X": // tested on version 10.12.5
args = new String[] {
- "ps", "ax", "-o", "ppid=,pid=,pgid=,tpgid=,stat=,"
- + "user=,pcpu=,pmem=,vsz=,rss=,tty=,start=,time=,uid=,ruid=,"
- + "sess=,comm="};
+ "ps", "ax", "-o",
+ fieldNames.stream()
+ .map(s -> UNIX_TO_MAC_PS_FIELDS.getOrDefault(s, s) + "=")
+ .collect(Collectors.joining(","))};
break;
default:
args = new String[] {
- "ps", "--no-headers", "axo", "ppid,pid,pgrp,"
- + "tpgid,stat,user,pcpu,pmem,vsz,rss,tty,start_time,time,euid,"
- + "ruid,sess,comm"};
+ "ps", "--no-headers", "axo", String.join(",", fieldNames)};
}
- return Processes.processLines(args)
- .select(
- new Function1<String, Object[]>() {
- @Override public Object[] apply(String line) {
- final String[] fields = line.trim().split(" +");
- final Object[] values = new Object[fieldNames.size()];
- for (int i = 0; i < values.length; i++) {
- try {
- values[i] = field(fieldNames.get(i), fields[i]);
- } catch (RuntimeException e) {
- throw new RuntimeException("while parsing value ["
- + fields[i] + "] of field [" + fieldNames.get(i)
- + "] in line [" + line + "]");
- }
- }
- return values;
- }
-
- private Object field(String field, String value) {
- switch (field) {
- case "pid":
- case "ppid":
- case "pgrp": // linux only; macOS equivalent is "pgid"
- case "pgid": // see "pgrp"
- case "tpgid":
- return Integer.valueOf(value);
- case "pcpu":
- case "pmem":
- return (int) (Float.valueOf(value) * 10f);
- case "time":
- final Matcher m1 =
- MINUTE_SECOND_MILLIS_PATTERN.matcher(value);
- if (m1.matches()) {
- final long h = Long.parseLong(m1.group(1));
- final long m = Long.parseLong(m1.group(2));
- final long s = Long.parseLong(m1.group(3));
- return h * 3600000L + m * 60000L + s * 1000L;
- }
- final Matcher m2 =
- HOUR_MINUTE_SECOND_PATTERN.matcher(value);
- if (m2.matches()) {
- final long m = Long.parseLong(m2.group(1));
- final long s = Long.parseLong(m2.group(2));
- String g3 = m2.group(3);
- while (g3.length() < 3) {
- g3 = g3 + "0";
- }
- final long millis = Long.parseLong(g3);
- return m * 60000L + s * 1000L + millis;
- }
- return 0L;
- case "start_time": // linux only; macOS version is "lstart"
- case "lstart": // see "start_time"
- case "euid": // linux only; macOS equivalent is "uid"
- case "uid": // see "euid"
- default:
- return value;
- }
- }
- });
+ return Processes.processLines(args).select(new LineParser());
}
@Override public RelDataType getRowType(RelDataTypeFactory typeFactory) {
return typeFactory.builder()
- .add("pid", SqlTypeName.INTEGER)
- .add("ppid", SqlTypeName.INTEGER)
- .add("pgrp", SqlTypeName.INTEGER)
- .add("tpgid", SqlTypeName.INTEGER)
- .add("stat", SqlTypeName.VARCHAR)
- .add("user", SqlTypeName.VARCHAR)
- .add("pcpu", SqlTypeName.DECIMAL, 3, 1)
- .add("pmem", SqlTypeName.DECIMAL, 3, 1)
- .add("vsz", SqlTypeName.INTEGER)
- .add("rss", SqlTypeName.INTEGER)
- .add("tty", SqlTypeName.VARCHAR)
- .add("start_time", SqlTypeName.VARCHAR)
- .add("time", TimeUnit.HOUR, -1, TimeUnit.SECOND, 0)
- .add("euid", SqlTypeName.VARCHAR)
- .add("ruid", SqlTypeName.VARCHAR)
- .add("sess", SqlTypeName.VARCHAR)
- .add("command", SqlTypeName.VARCHAR)
+ .add(PS_FIELD_NAMES.get(0), SqlTypeName.VARCHAR)
+ .add(PS_FIELD_NAMES.get(1), SqlTypeName.INTEGER)
+ .add(PS_FIELD_NAMES.get(2), SqlTypeName.INTEGER)
+ .add(PS_FIELD_NAMES.get(3), SqlTypeName.INTEGER)
+ .add(PS_FIELD_NAMES.get(4), SqlTypeName.INTEGER)
+ .add(PS_FIELD_NAMES.get(5), SqlTypeName.VARCHAR)
+ .add(PS_FIELD_NAMES.get(6), SqlTypeName.DECIMAL, 3, 1)
+ .add(PS_FIELD_NAMES.get(7), SqlTypeName.DECIMAL, 3, 1)
+ .add(PS_FIELD_NAMES.get(8), SqlTypeName.INTEGER)
+ .add(PS_FIELD_NAMES.get(9), SqlTypeName.INTEGER)
+ .add(PS_FIELD_NAMES.get(10), SqlTypeName.VARCHAR)
+ .add(PS_FIELD_NAMES.get(11), SqlTypeName.VARCHAR)
+ .add(PS_FIELD_NAMES.get(12), TimeUnit.HOUR, -1, TimeUnit.SECOND, 0)
+ .add(PS_FIELD_NAMES.get(13), SqlTypeName.VARCHAR)
+ .add(PS_FIELD_NAMES.get(14), SqlTypeName.VARCHAR)
+ .add(PS_FIELD_NAMES.get(15), SqlTypeName.VARCHAR)
+ .add(PS_FIELD_NAMES.get(16), SqlTypeName.VARCHAR)
.build();
}
};
diff --git
a/plus/src/test/java/org/apache/calcite/adapter/os/PsTableFunctionTest.java
b/plus/src/test/java/org/apache/calcite/adapter/os/PsTableFunctionTest.java
new file mode 100644
index 0000000000..181cb5655b
--- /dev/null
+++ b/plus/src/test/java/org/apache/calcite/adapter/os/PsTableFunctionTest.java
@@ -0,0 +1,73 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to you under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.calcite.adapter.os;
+
+import com.google.common.collect.ImmutableList;
+
+import org.junit.jupiter.api.Test;
+
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+
+import static org.hamcrest.CoreMatchers.is;
+import static org.hamcrest.MatcherAssert.assertThat;
+
+/**
+ * Unit tests for the ps (process status) table function.
+ */
+class PsTableFunctionTest {
+
+ /** Test case for
+ * <a
href="https://issues.apache.org/jira/browse/CALCITE-6388">[CALCITE-6388]
+ * PsTableFunction throws NumberFormatException when the 'user' column has
spaces</a>.
+ */
+ @Test void testPsInfoParsing() {
+ final List<String> input = new ArrayList<>();
+ input.add("startup user 56399 1 56399 0 S 0.0 0.0
410348128 6672 ??"
+ + " 3:25PM 0:00.22 501 501 0 /usr/lib exec/trustd");
+ input.add("root 1 107 107 0 Ss 0.0 0.0
410142784 4016 ??"
+ + " 11Apr24 0:52.32 0 0 0 "
+ +
"/System/Library/PrivateFrameworks/Uninstall.framework/Resources/uninstalld");
+ input.add("user.name 1 1661 1661 0 S 0.7 0.2 412094800
75232 ?? "
+ + "11Apr24 325:33.63 775020228 775020228 0 "
+ + "/System/Library/CoreServices/ControlCenter
app/Contents/MacOS/ControlCenter");
+
+ final List<List<Object>> output =
+ ImmutableList.of(
+ Arrays.asList("startup user", 56399, 1, 56399, 0, "S", 0, 0,
"410348128", "6672", "??",
+ "3:25PM", 220L, "501", "501", "0", "/usr/lib exec/trustd"),
+ Arrays.asList("root", 1, 107, 107, 0, "Ss", 0, 0, "410142784", "4016",
"??",
+ "11Apr24", 52320L, "0", "0", "0",
+
"/System/Library/PrivateFrameworks/Uninstall.framework/Resources/uninstalld"),
+ Arrays.asList("user.name", 1, 1661, 1661, 0, "S", 7, 2, "412094800",
"75232", "??",
+ "11Apr24", 19533630L, "775020228", "775020228", "0",
+ "/System/Library/CoreServices/ControlCenter
app/Contents/MacOS/ControlCenter"));
+
+ final Map<String, List<Object>> testValues = new HashMap<>();
+ for (int i = 0; i < input.size(); i++) {
+ testValues.put(input.get(i), output.get(i));
+ }
+
+ final PsTableFunction.LineParser psLineParser = new
PsTableFunction.LineParser();
+ for (Map.Entry<String, List<Object>> e : testValues.entrySet()) {
+ assertThat(psLineParser.apply(e.getKey()), is(e.getValue().toArray()));
+ }
+ }
+}