Github user xndai commented on a diff in the pull request: https://github.com/apache/orc/pull/199#discussion_r155596136 --- Diff: tools/src/CSVFileImport.cc --- @@ -0,0 +1,411 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "orc/Exceptions.hh" +#include "orc/OrcFile.hh" + +#include <algorithm> +#include <fstream> +#include <iostream> +#include <memory> +#include <string> +#include <sys/time.h> +#include <time.h> + +#define DELIMITER ',' + +std::string extractColumn(std::string s, uint64_t colIndex) { + uint64_t col = 0; + size_t start = 0; + size_t end = s.find(DELIMITER); + while (col < colIndex && end != std::string::npos) { + start = end + 1; + end = s.find(DELIMITER, start); + ++col; + } + return s.substr(start, end - start); --- End diff -- You will subtract from string::npos when the last column doesn't end with a delimiter or the number of columns in csv is less than what specified in schema. We need to better handle these cases.
---