KurtYoung commented on a change in pull request #10239: [Flink-11491][Test] Support all TPC-DS queries URL: https://github.com/apache/flink/pull/10239#discussion_r347723729
########## File path: flink-end-to-end-tests/flink-tpcds-test/src/main/java/org/apache/flink/table/tpcds/utils/TpcdsResultComparator.java ########## @@ -0,0 +1,232 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.flink.table.tpcds.utils; + +import org.apache.flink.api.java.utils.ParameterTool; + +import java.io.BufferedReader; +import java.io.File; +import java.io.FileReader; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.List; + +/** + * Result comparator for TPC-DS test, according to the TPC-DS standard specification v2.11.0. + * skip validate query 6、19、30、31、46、67、68、81 temporary, + * because they can not match answer set perfectly from now and + * we'd take some effort to address it. + */ +public class TpcdsResultComparator { + + private static final int VALIDATE_QUERY_NUM = 95; + private static final List<String> VALIDATE_QUERIES = Arrays.asList( + "1", "2", "3", "4", "5", "7", "8", "9", "10", + "11", "12", "13", "14a", "14b", "15", "16", "17", "18", "20", + "21", "22", "23a", "23b", "24a", "24b", "25", "26", "27", "28", "29", + "32", "33", "34", "35", "36", "37", "38", "39a", "39b", "40", + "41", "42", "43", "44", "45", "47", "48", "49", "50", + "51", "52", "53", "54", "55", "56", "57", "58", "59", "60", + "61", "62", "63", "64", "65", "66", "69", "70", + "71", "72", "73", "74", "75", "76", "77", "78", "79", "80", + "82", "83", "84", "85", "86", "87", "88", "89", "90", + "91", "92", "93", "94", "95", "96", "97", "98", "99" + ); + + private static final String REGEX_SPLIT_BAR = "\\|"; + private static final String FILE_SEPARATOR = "/"; + private static final String RESULT_SUFFIX = ".ans"; + private static final double TOLERATED_DOUBLE_DEVIATION = 0.01d; + + public static void main(String[] args) { + ParameterTool params = ParameterTool.fromArgs(args); + String expectedDir = params.getRequired("expectedDir"); + String actualDir = params.getRequired("actualDir"); + int passCnt = 0; + for (String queryId : VALIDATE_QUERIES) { + File expectedFile = new File(expectedDir + FILE_SEPARATOR + queryId + RESULT_SUFFIX); + File actualFile = new File(actualDir + FILE_SEPARATOR + queryId + RESULT_SUFFIX); + + if (compareResult(expectedFile, actualFile)) { + passCnt++; + System.out.println("[INFO] validate success, file: " + expectedFile.getName() + " cnt:" + passCnt); + } else { + System.out.println("[WARN] validate fail, file: " + expectedFile.getName() + "\n"); + } + } + if (passCnt == VALIDATE_QUERY_NUM) { + System.exit(0); + } + System.exit(1); + } + + private static boolean compareResult(File expectedFile, File actualFile) { + try { + BufferedReader expectedReader = new BufferedReader(new FileReader(expectedFile)); + BufferedReader actualReader = new BufferedReader(new FileReader(actualFile)); + + int expectedLineNum = 0; + int actualLineNum = 0; + + String expectedLine, actualLine; + while ((expectedLine = expectedReader.readLine()) != null && + (actualLine = actualReader.readLine()) != null) { + expectedLineNum++; + actualLineNum++; + + // reslut top 8 line of query 34, + // result line 2、3 0f query 77 + // result line 18、 19 of query 79 + // have different order, because of Flink keep nulls last for DESC, nulls first for ASC. Review comment: do we still need this? I saw you already use different answer files for each query. Like query 34, you already picked `34_NULLS_FIRST` to compare ---------------------------------------------------------------- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: [email protected] With regards, Apache Git Services
