[GitHub] carbondata pull request #1984: [CARBONDATA-2187] Partition restructure
Github user asfgit closed the pull request at: https://github.com/apache/carbondata/pull/1984 ---
[GitHub] carbondata pull request #1984: [CARBONDATA-2187] Partition restructure
Github user ravipesala commented on a diff in the pull request: https://github.com/apache/carbondata/pull/1984#discussion_r169860093 --- Diff: core/src/main/java/org/apache/carbondata/core/metadata/SegmentFileStore.java --- @@ -0,0 +1,717 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + *http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.carbondata.core.metadata; + +import java.io.BufferedReader; +import java.io.BufferedWriter; +import java.io.DataInputStream; +import java.io.DataOutputStream; +import java.io.IOException; +import java.io.InputStreamReader; +import java.io.OutputStreamWriter; +import java.io.Serializable; +import java.nio.charset.Charset; +import java.util.ArrayList; +import java.util.HashMap; +import java.util.HashSet; +import java.util.List; +import java.util.Map; +import java.util.Set; + +import org.apache.carbondata.core.constants.CarbonCommonConstants; +import org.apache.carbondata.core.datamap.Segment; +import org.apache.carbondata.core.datastore.filesystem.CarbonFile; +import org.apache.carbondata.core.datastore.filesystem.CarbonFileFilter; +import org.apache.carbondata.core.datastore.impl.FileFactory; +import org.apache.carbondata.core.fileoperations.AtomicFileOperations; +import org.apache.carbondata.core.fileoperations.AtomicFileOperationsImpl; +import org.apache.carbondata.core.fileoperations.FileWriteOperation; +import org.apache.carbondata.core.indexstore.PartitionSpec; +import org.apache.carbondata.core.indexstore.blockletindex.SegmentIndexFileStore; +import org.apache.carbondata.core.metadata.blocklet.DataFileFooter; +import org.apache.carbondata.core.metadata.schema.table.CarbonTable; +import org.apache.carbondata.core.mutate.CarbonUpdateUtil; +import org.apache.carbondata.core.statusmanager.LoadMetadataDetails; +import org.apache.carbondata.core.statusmanager.SegmentStatus; +import org.apache.carbondata.core.statusmanager.SegmentStatusManager; +import org.apache.carbondata.core.util.CarbonUtil; +import org.apache.carbondata.core.util.DataFileFooterConverter; +import org.apache.carbondata.core.util.path.CarbonTablePath; + +import com.google.gson.Gson; +import org.apache.hadoop.fs.Path; + +/** + * Provide read and write support for segment file associated with each segment + */ +public class SegmentFileStore { + + private SegmentFile segmentFile; + + private MapindexFilesMap; + + private String tablePath; + + public SegmentFileStore(String tablePath, String segmentFileName) throws IOException { +this.tablePath = tablePath; +this.segmentFile = readSegment(tablePath, segmentFileName); + } + + /** + * Write segment information to the segment folder with indexfilename and + * corresponding partitions. + */ + public static void writeSegmentFile(String tablePath, final String taskNo, String location, + String timeStamp, List partionNames) throws IOException { +String tempFolderLoc = timeStamp + ".tmp"; +String writePath = CarbonTablePath.getSegmentFilesLocation(tablePath) + "/" + tempFolderLoc; +CarbonFile carbonFile = FileFactory.getCarbonFile(writePath); +if (!carbonFile.exists()) { + carbonFile.mkdirs(writePath, FileFactory.getFileType(writePath)); +} +CarbonFile tempFolder = +FileFactory.getCarbonFile(location + CarbonCommonConstants.FILE_SEPARATOR + tempFolderLoc); + +if (tempFolder.exists() && partionNames.size() > 0) { + CarbonFile[] carbonFiles = tempFolder.listFiles(new CarbonFileFilter() { +@Override public boolean accept(CarbonFile file) { + return file.getName().startsWith(taskNo) && file.getName() + .endsWith(CarbonTablePath.INDEX_FILE_EXT); +} + }); + if (carbonFiles != null && carbonFiles.length > 0) { +boolean isRelative = false; +if (location.startsWith(tablePath)) { +
[GitHub] carbondata pull request #1984: [CARBONDATA-2187] Partition restructure
Github user ravipesala commented on a diff in the pull request: https://github.com/apache/carbondata/pull/1984#discussion_r169859849 --- Diff: core/src/main/java/org/apache/carbondata/core/metadata/SegmentFileStore.java --- @@ -0,0 +1,689 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + *http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.carbondata.core.metadata; + +import java.io.BufferedReader; +import java.io.BufferedWriter; +import java.io.DataInputStream; +import java.io.DataOutputStream; +import java.io.IOException; +import java.io.InputStreamReader; +import java.io.OutputStreamWriter; +import java.io.Serializable; +import java.nio.charset.Charset; +import java.util.ArrayList; +import java.util.HashMap; +import java.util.HashSet; +import java.util.List; +import java.util.Map; +import java.util.Set; + +import org.apache.carbondata.core.constants.CarbonCommonConstants; +import org.apache.carbondata.core.datamap.Segment; +import org.apache.carbondata.core.datastore.filesystem.CarbonFile; +import org.apache.carbondata.core.datastore.filesystem.CarbonFileFilter; +import org.apache.carbondata.core.datastore.impl.FileFactory; +import org.apache.carbondata.core.fileoperations.AtomicFileOperations; +import org.apache.carbondata.core.fileoperations.AtomicFileOperationsImpl; +import org.apache.carbondata.core.fileoperations.FileWriteOperation; +import org.apache.carbondata.core.indexstore.PartitionSpec; +import org.apache.carbondata.core.indexstore.blockletindex.SegmentIndexFileStore; +import org.apache.carbondata.core.metadata.blocklet.DataFileFooter; +import org.apache.carbondata.core.metadata.schema.table.CarbonTable; +import org.apache.carbondata.core.mutate.CarbonUpdateUtil; +import org.apache.carbondata.core.statusmanager.LoadMetadataDetails; +import org.apache.carbondata.core.statusmanager.SegmentStatus; +import org.apache.carbondata.core.statusmanager.SegmentStatusManager; +import org.apache.carbondata.core.util.CarbonUtil; +import org.apache.carbondata.core.util.DataFileFooterConverter; +import org.apache.carbondata.core.util.path.CarbonTablePath; + +import com.google.gson.Gson; +import org.apache.hadoop.fs.Path; + +/** + * Provide read and write support for segment file associated with each segment + */ +public class SegmentFileStore { + + private SegmentFile segmentFile; + + private MapindexFilesMap; --- End diff -- ok ---
[GitHub] carbondata pull request #1984: [CARBONDATA-2187] Partition restructure
Github user manishgupta88 commented on a diff in the pull request: https://github.com/apache/carbondata/pull/1984#discussion_r169858244 --- Diff: core/src/main/java/org/apache/carbondata/core/datastore/page/SafeDecimalColumnPage.java --- @@ -217,6 +217,6 @@ public void convertValue(ColumnPageValueConverter codec) { @Override public void freeMemory() { - +byteArrayData = null; --- End diff -- We implemented decimal compression wherein based on precision and scale we are storing decimal as integer, short and long. So other types can be used based on decimal precision and scale ---
[GitHub] carbondata pull request #1984: [CARBONDATA-2187] Partition restructure
Github user gvramana commented on a diff in the pull request: https://github.com/apache/carbondata/pull/1984#discussion_r169641109 --- Diff: core/src/main/java/org/apache/carbondata/core/metadata/SegmentFileStore.java --- @@ -0,0 +1,690 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + *http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.carbondata.core.metadata; + +import java.io.BufferedReader; +import java.io.BufferedWriter; +import java.io.DataInputStream; +import java.io.DataOutputStream; +import java.io.IOException; +import java.io.InputStreamReader; +import java.io.OutputStreamWriter; +import java.io.Serializable; +import java.nio.charset.Charset; +import java.util.ArrayList; +import java.util.HashMap; +import java.util.HashSet; +import java.util.List; +import java.util.Map; +import java.util.Set; + +import org.apache.carbondata.core.constants.CarbonCommonConstants; +import org.apache.carbondata.core.datamap.Segment; +import org.apache.carbondata.core.datastore.filesystem.CarbonFile; +import org.apache.carbondata.core.datastore.filesystem.CarbonFileFilter; +import org.apache.carbondata.core.datastore.impl.FileFactory; +import org.apache.carbondata.core.fileoperations.AtomicFileOperations; +import org.apache.carbondata.core.fileoperations.AtomicFileOperationsImpl; +import org.apache.carbondata.core.fileoperations.FileWriteOperation; +import org.apache.carbondata.core.indexstore.PartitionSpec; +import org.apache.carbondata.core.indexstore.blockletindex.SegmentIndexFileStore; +import org.apache.carbondata.core.metadata.blocklet.DataFileFooter; +import org.apache.carbondata.core.metadata.schema.table.CarbonTable; +import org.apache.carbondata.core.mutate.CarbonUpdateUtil; +import org.apache.carbondata.core.statusmanager.LoadMetadataDetails; +import org.apache.carbondata.core.statusmanager.SegmentStatus; +import org.apache.carbondata.core.statusmanager.SegmentStatusManager; +import org.apache.carbondata.core.util.CarbonUtil; +import org.apache.carbondata.core.util.DataFileFooterConverter; +import org.apache.carbondata.core.util.path.CarbonTablePath; + +import com.google.gson.Gson; +import org.apache.hadoop.fs.Path; + +/** + * Provide read and write support for segment file associated with each segment + */ +public class SegmentFileStore { + + private SegmentFile segmentFile; + + private MapindexFilesMap; + + private String tablePath; + + /** + * Write segment information to the segment folder with indexfilename and + * corresponding partitions. + */ + public void writeSegmentFile(String tablePath, final String taskNo, String location, + String timeStamp, List partionNames) throws IOException { +String tempFolderLoc = timeStamp + ".tmp"; +String writePath = CarbonTablePath.getSegmentFilesLocation(tablePath) + "/" + tempFolderLoc; +CarbonFile carbonFile = FileFactory.getCarbonFile(writePath); +if (!carbonFile.exists()) { + carbonFile.mkdirs(writePath, FileFactory.getFileType(writePath)); +} +CarbonFile tempFolder = +FileFactory.getCarbonFile(location + CarbonCommonConstants.FILE_SEPARATOR + tempFolderLoc); +boolean isRelative = false; +if (location.startsWith(tablePath)) { + location = location.substring(tablePath.length(), location.length()); + isRelative = true; +} +if (tempFolder.exists() && partionNames.size() > 0) { + CarbonFile[] carbonFiles = tempFolder.listFiles(new CarbonFileFilter() { +@Override public boolean accept(CarbonFile file) { + return file.getName().startsWith(taskNo) && file.getName() + .endsWith(CarbonTablePath.INDEX_FILE_EXT); +} + }); + if (carbonFiles != null && carbonFiles.length > 0) { +SegmentFile segmentFile = new SegmentFile(); +Map locationMap = new HashMap<>();
[GitHub] carbondata pull request #1984: [CARBONDATA-2187] Partition restructure
Github user gvramana commented on a diff in the pull request: https://github.com/apache/carbondata/pull/1984#discussion_r169638631 --- Diff: core/src/main/java/org/apache/carbondata/core/metadata/SegmentFileStore.java --- @@ -0,0 +1,717 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + *http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.carbondata.core.metadata; + +import java.io.BufferedReader; +import java.io.BufferedWriter; +import java.io.DataInputStream; +import java.io.DataOutputStream; +import java.io.IOException; +import java.io.InputStreamReader; +import java.io.OutputStreamWriter; +import java.io.Serializable; +import java.nio.charset.Charset; +import java.util.ArrayList; +import java.util.HashMap; +import java.util.HashSet; +import java.util.List; +import java.util.Map; +import java.util.Set; + +import org.apache.carbondata.core.constants.CarbonCommonConstants; +import org.apache.carbondata.core.datamap.Segment; +import org.apache.carbondata.core.datastore.filesystem.CarbonFile; +import org.apache.carbondata.core.datastore.filesystem.CarbonFileFilter; +import org.apache.carbondata.core.datastore.impl.FileFactory; +import org.apache.carbondata.core.fileoperations.AtomicFileOperations; +import org.apache.carbondata.core.fileoperations.AtomicFileOperationsImpl; +import org.apache.carbondata.core.fileoperations.FileWriteOperation; +import org.apache.carbondata.core.indexstore.PartitionSpec; +import org.apache.carbondata.core.indexstore.blockletindex.SegmentIndexFileStore; +import org.apache.carbondata.core.metadata.blocklet.DataFileFooter; +import org.apache.carbondata.core.metadata.schema.table.CarbonTable; +import org.apache.carbondata.core.mutate.CarbonUpdateUtil; +import org.apache.carbondata.core.statusmanager.LoadMetadataDetails; +import org.apache.carbondata.core.statusmanager.SegmentStatus; +import org.apache.carbondata.core.statusmanager.SegmentStatusManager; +import org.apache.carbondata.core.util.CarbonUtil; +import org.apache.carbondata.core.util.DataFileFooterConverter; +import org.apache.carbondata.core.util.path.CarbonTablePath; + +import com.google.gson.Gson; +import org.apache.hadoop.fs.Path; + +/** + * Provide read and write support for segment file associated with each segment + */ +public class SegmentFileStore { + + private SegmentFile segmentFile; + + private MapindexFilesMap; + + private String tablePath; + + public SegmentFileStore(String tablePath, String segmentFileName) throws IOException { +this.tablePath = tablePath; +this.segmentFile = readSegment(tablePath, segmentFileName); + } + + /** + * Write segment information to the segment folder with indexfilename and + * corresponding partitions. + */ + public static void writeSegmentFile(String tablePath, final String taskNo, String location, + String timeStamp, List partionNames) throws IOException { +String tempFolderLoc = timeStamp + ".tmp"; +String writePath = CarbonTablePath.getSegmentFilesLocation(tablePath) + "/" + tempFolderLoc; +CarbonFile carbonFile = FileFactory.getCarbonFile(writePath); +if (!carbonFile.exists()) { + carbonFile.mkdirs(writePath, FileFactory.getFileType(writePath)); +} +CarbonFile tempFolder = +FileFactory.getCarbonFile(location + CarbonCommonConstants.FILE_SEPARATOR + tempFolderLoc); + +if (tempFolder.exists() && partionNames.size() > 0) { + CarbonFile[] carbonFiles = tempFolder.listFiles(new CarbonFileFilter() { +@Override public boolean accept(CarbonFile file) { + return file.getName().startsWith(taskNo) && file.getName() + .endsWith(CarbonTablePath.INDEX_FILE_EXT); +} + }); + if (carbonFiles != null && carbonFiles.length > 0) { +boolean isRelative = false; +if (location.startsWith(tablePath)) { +
[GitHub] carbondata pull request #1984: [CARBONDATA-2187] Partition restructure
Github user ravipesala commented on a diff in the pull request: https://github.com/apache/carbondata/pull/1984#discussion_r169635879 --- Diff: core/src/main/java/org/apache/carbondata/core/metadata/SegmentFileStore.java --- @@ -0,0 +1,690 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + *http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.carbondata.core.metadata; + +import java.io.BufferedReader; +import java.io.BufferedWriter; +import java.io.DataInputStream; +import java.io.DataOutputStream; +import java.io.IOException; +import java.io.InputStreamReader; +import java.io.OutputStreamWriter; +import java.io.Serializable; +import java.nio.charset.Charset; +import java.util.ArrayList; +import java.util.HashMap; +import java.util.HashSet; +import java.util.List; +import java.util.Map; +import java.util.Set; + +import org.apache.carbondata.core.constants.CarbonCommonConstants; +import org.apache.carbondata.core.datamap.Segment; +import org.apache.carbondata.core.datastore.filesystem.CarbonFile; +import org.apache.carbondata.core.datastore.filesystem.CarbonFileFilter; +import org.apache.carbondata.core.datastore.impl.FileFactory; +import org.apache.carbondata.core.fileoperations.AtomicFileOperations; +import org.apache.carbondata.core.fileoperations.AtomicFileOperationsImpl; +import org.apache.carbondata.core.fileoperations.FileWriteOperation; +import org.apache.carbondata.core.indexstore.PartitionSpec; +import org.apache.carbondata.core.indexstore.blockletindex.SegmentIndexFileStore; +import org.apache.carbondata.core.metadata.blocklet.DataFileFooter; +import org.apache.carbondata.core.metadata.schema.table.CarbonTable; +import org.apache.carbondata.core.mutate.CarbonUpdateUtil; +import org.apache.carbondata.core.statusmanager.LoadMetadataDetails; +import org.apache.carbondata.core.statusmanager.SegmentStatus; +import org.apache.carbondata.core.statusmanager.SegmentStatusManager; +import org.apache.carbondata.core.util.CarbonUtil; +import org.apache.carbondata.core.util.DataFileFooterConverter; +import org.apache.carbondata.core.util.path.CarbonTablePath; + +import com.google.gson.Gson; +import org.apache.hadoop.fs.Path; + +/** + * Provide read and write support for segment file associated with each segment + */ +public class SegmentFileStore { + + private SegmentFile segmentFile; + + private MapindexFilesMap; + + private String tablePath; + + /** + * Write segment information to the segment folder with indexfilename and + * corresponding partitions. + */ + public void writeSegmentFile(String tablePath, final String taskNo, String location, + String timeStamp, List partionNames) throws IOException { +String tempFolderLoc = timeStamp + ".tmp"; +String writePath = CarbonTablePath.getSegmentFilesLocation(tablePath) + "/" + tempFolderLoc; +CarbonFile carbonFile = FileFactory.getCarbonFile(writePath); +if (!carbonFile.exists()) { + carbonFile.mkdirs(writePath, FileFactory.getFileType(writePath)); +} +CarbonFile tempFolder = +FileFactory.getCarbonFile(location + CarbonCommonConstants.FILE_SEPARATOR + tempFolderLoc); +boolean isRelative = false; +if (location.startsWith(tablePath)) { + location = location.substring(tablePath.length(), location.length()); + isRelative = true; +} +if (tempFolder.exists() && partionNames.size() > 0) { + CarbonFile[] carbonFiles = tempFolder.listFiles(new CarbonFileFilter() { +@Override public boolean accept(CarbonFile file) { + return file.getName().startsWith(taskNo) && file.getName() + .endsWith(CarbonTablePath.INDEX_FILE_EXT); +} + }); + if (carbonFiles != null && carbonFiles.length > 0) { +SegmentFile segmentFile = new SegmentFile(); +Map locationMap = new HashMap<>();
[GitHub] carbondata pull request #1984: [CARBONDATA-2187] Partition restructure
Github user ravipesala commented on a diff in the pull request: https://github.com/apache/carbondata/pull/1984#discussion_r169635618 --- Diff: core/src/main/java/org/apache/carbondata/core/metadata/SegmentFileStore.java --- @@ -0,0 +1,690 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + *http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.carbondata.core.metadata; + +import java.io.BufferedReader; +import java.io.BufferedWriter; +import java.io.DataInputStream; +import java.io.DataOutputStream; +import java.io.IOException; +import java.io.InputStreamReader; +import java.io.OutputStreamWriter; +import java.io.Serializable; +import java.nio.charset.Charset; +import java.util.ArrayList; +import java.util.HashMap; +import java.util.HashSet; +import java.util.List; +import java.util.Map; +import java.util.Set; + +import org.apache.carbondata.core.constants.CarbonCommonConstants; +import org.apache.carbondata.core.datamap.Segment; +import org.apache.carbondata.core.datastore.filesystem.CarbonFile; +import org.apache.carbondata.core.datastore.filesystem.CarbonFileFilter; +import org.apache.carbondata.core.datastore.impl.FileFactory; +import org.apache.carbondata.core.fileoperations.AtomicFileOperations; +import org.apache.carbondata.core.fileoperations.AtomicFileOperationsImpl; +import org.apache.carbondata.core.fileoperations.FileWriteOperation; +import org.apache.carbondata.core.indexstore.PartitionSpec; +import org.apache.carbondata.core.indexstore.blockletindex.SegmentIndexFileStore; +import org.apache.carbondata.core.metadata.blocklet.DataFileFooter; +import org.apache.carbondata.core.metadata.schema.table.CarbonTable; +import org.apache.carbondata.core.mutate.CarbonUpdateUtil; +import org.apache.carbondata.core.statusmanager.LoadMetadataDetails; +import org.apache.carbondata.core.statusmanager.SegmentStatus; +import org.apache.carbondata.core.statusmanager.SegmentStatusManager; +import org.apache.carbondata.core.util.CarbonUtil; +import org.apache.carbondata.core.util.DataFileFooterConverter; +import org.apache.carbondata.core.util.path.CarbonTablePath; + +import com.google.gson.Gson; +import org.apache.hadoop.fs.Path; + +/** + * Provide read and write support for segment file associated with each segment + */ +public class SegmentFileStore { + + private SegmentFile segmentFile; + + private MapindexFilesMap; + + private String tablePath; + + /** + * Write segment information to the segment folder with indexfilename and + * corresponding partitions. + */ + public void writeSegmentFile(String tablePath, final String taskNo, String location, + String timeStamp, List partionNames) throws IOException { +String tempFolderLoc = timeStamp + ".tmp"; +String writePath = CarbonTablePath.getSegmentFilesLocation(tablePath) + "/" + tempFolderLoc; +CarbonFile carbonFile = FileFactory.getCarbonFile(writePath); +if (!carbonFile.exists()) { + carbonFile.mkdirs(writePath, FileFactory.getFileType(writePath)); +} +CarbonFile tempFolder = +FileFactory.getCarbonFile(location + CarbonCommonConstants.FILE_SEPARATOR + tempFolderLoc); +boolean isRelative = false; +if (location.startsWith(tablePath)) { + location = location.substring(tablePath.length(), location.length()); + isRelative = true; +} +if (tempFolder.exists() && partionNames.size() > 0) { + CarbonFile[] carbonFiles = tempFolder.listFiles(new CarbonFileFilter() { +@Override public boolean accept(CarbonFile file) { + return file.getName().startsWith(taskNo) && file.getName() + .endsWith(CarbonTablePath.INDEX_FILE_EXT); +} + }); + if (carbonFiles != null && carbonFiles.length > 0) { +SegmentFile segmentFile = new SegmentFile(); +Map locationMap = new HashMap<>();
[GitHub] carbondata pull request #1984: [CARBONDATA-2187] Partition restructure
Github user ravipesala commented on a diff in the pull request: https://github.com/apache/carbondata/pull/1984#discussion_r169632391 --- Diff: core/src/main/java/org/apache/carbondata/core/metadata/SegmentFileStore.java --- @@ -0,0 +1,690 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + *http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.carbondata.core.metadata; + +import java.io.BufferedReader; +import java.io.BufferedWriter; +import java.io.DataInputStream; +import java.io.DataOutputStream; +import java.io.IOException; +import java.io.InputStreamReader; +import java.io.OutputStreamWriter; +import java.io.Serializable; +import java.nio.charset.Charset; +import java.util.ArrayList; +import java.util.HashMap; +import java.util.HashSet; +import java.util.List; +import java.util.Map; +import java.util.Set; + +import org.apache.carbondata.core.constants.CarbonCommonConstants; +import org.apache.carbondata.core.datamap.Segment; +import org.apache.carbondata.core.datastore.filesystem.CarbonFile; +import org.apache.carbondata.core.datastore.filesystem.CarbonFileFilter; +import org.apache.carbondata.core.datastore.impl.FileFactory; +import org.apache.carbondata.core.fileoperations.AtomicFileOperations; +import org.apache.carbondata.core.fileoperations.AtomicFileOperationsImpl; +import org.apache.carbondata.core.fileoperations.FileWriteOperation; +import org.apache.carbondata.core.indexstore.PartitionSpec; +import org.apache.carbondata.core.indexstore.blockletindex.SegmentIndexFileStore; +import org.apache.carbondata.core.metadata.blocklet.DataFileFooter; +import org.apache.carbondata.core.metadata.schema.table.CarbonTable; +import org.apache.carbondata.core.mutate.CarbonUpdateUtil; +import org.apache.carbondata.core.statusmanager.LoadMetadataDetails; +import org.apache.carbondata.core.statusmanager.SegmentStatus; +import org.apache.carbondata.core.statusmanager.SegmentStatusManager; +import org.apache.carbondata.core.util.CarbonUtil; +import org.apache.carbondata.core.util.DataFileFooterConverter; +import org.apache.carbondata.core.util.path.CarbonTablePath; + +import com.google.gson.Gson; +import org.apache.hadoop.fs.Path; + +/** + * Provide read and write support for segment file associated with each segment + */ +public class SegmentFileStore { + + private SegmentFile segmentFile; + + private MapindexFilesMap; + + private String tablePath; + + /** + * Write segment information to the segment folder with indexfilename and + * corresponding partitions. + */ + public void writeSegmentFile(String tablePath, final String taskNo, String location, + String timeStamp, List partionNames) throws IOException { +String tempFolderLoc = timeStamp + ".tmp"; +String writePath = CarbonTablePath.getSegmentFilesLocation(tablePath) + "/" + tempFolderLoc; +CarbonFile carbonFile = FileFactory.getCarbonFile(writePath); +if (!carbonFile.exists()) { + carbonFile.mkdirs(writePath, FileFactory.getFileType(writePath)); +} +CarbonFile tempFolder = +FileFactory.getCarbonFile(location + CarbonCommonConstants.FILE_SEPARATOR + tempFolderLoc); +boolean isRelative = false; +if (location.startsWith(tablePath)) { + location = location.substring(tablePath.length(), location.length()); + isRelative = true; +} +if (tempFolder.exists() && partionNames.size() > 0) { + CarbonFile[] carbonFiles = tempFolder.listFiles(new CarbonFileFilter() { +@Override public boolean accept(CarbonFile file) { + return file.getName().startsWith(taskNo) && file.getName() + .endsWith(CarbonTablePath.INDEX_FILE_EXT); +} + }); + if (carbonFiles != null && carbonFiles.length > 0) { +SegmentFile segmentFile = new SegmentFile(); +Map locationMap = new HashMap<>();
[GitHub] carbondata pull request #1984: [CARBONDATA-2187] Partition restructure
Github user ravipesala commented on a diff in the pull request: https://github.com/apache/carbondata/pull/1984#discussion_r169629728 --- Diff: core/src/main/java/org/apache/carbondata/core/metadata/SegmentFileStore.java --- @@ -0,0 +1,690 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + *http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.carbondata.core.metadata; + +import java.io.BufferedReader; +import java.io.BufferedWriter; +import java.io.DataInputStream; +import java.io.DataOutputStream; +import java.io.IOException; +import java.io.InputStreamReader; +import java.io.OutputStreamWriter; +import java.io.Serializable; +import java.nio.charset.Charset; +import java.util.ArrayList; +import java.util.HashMap; +import java.util.HashSet; +import java.util.List; +import java.util.Map; +import java.util.Set; + +import org.apache.carbondata.core.constants.CarbonCommonConstants; +import org.apache.carbondata.core.datamap.Segment; +import org.apache.carbondata.core.datastore.filesystem.CarbonFile; +import org.apache.carbondata.core.datastore.filesystem.CarbonFileFilter; +import org.apache.carbondata.core.datastore.impl.FileFactory; +import org.apache.carbondata.core.fileoperations.AtomicFileOperations; +import org.apache.carbondata.core.fileoperations.AtomicFileOperationsImpl; +import org.apache.carbondata.core.fileoperations.FileWriteOperation; +import org.apache.carbondata.core.indexstore.PartitionSpec; +import org.apache.carbondata.core.indexstore.blockletindex.SegmentIndexFileStore; +import org.apache.carbondata.core.metadata.blocklet.DataFileFooter; +import org.apache.carbondata.core.metadata.schema.table.CarbonTable; +import org.apache.carbondata.core.mutate.CarbonUpdateUtil; +import org.apache.carbondata.core.statusmanager.LoadMetadataDetails; +import org.apache.carbondata.core.statusmanager.SegmentStatus; +import org.apache.carbondata.core.statusmanager.SegmentStatusManager; +import org.apache.carbondata.core.util.CarbonUtil; +import org.apache.carbondata.core.util.DataFileFooterConverter; +import org.apache.carbondata.core.util.path.CarbonTablePath; + +import com.google.gson.Gson; +import org.apache.hadoop.fs.Path; + +/** + * Provide read and write support for segment file associated with each segment + */ +public class SegmentFileStore { + + private SegmentFile segmentFile; + + private MapindexFilesMap; + + private String tablePath; + + /** + * Write segment information to the segment folder with indexfilename and + * corresponding partitions. + */ + public void writeSegmentFile(String tablePath, final String taskNo, String location, + String timeStamp, List partionNames) throws IOException { +String tempFolderLoc = timeStamp + ".tmp"; +String writePath = CarbonTablePath.getSegmentFilesLocation(tablePath) + "/" + tempFolderLoc; +CarbonFile carbonFile = FileFactory.getCarbonFile(writePath); +if (!carbonFile.exists()) { + carbonFile.mkdirs(writePath, FileFactory.getFileType(writePath)); +} +CarbonFile tempFolder = +FileFactory.getCarbonFile(location + CarbonCommonConstants.FILE_SEPARATOR + tempFolderLoc); +boolean isRelative = false; +if (location.startsWith(tablePath)) { + location = location.substring(tablePath.length(), location.length()); + isRelative = true; +} +if (tempFolder.exists() && partionNames.size() > 0) { + CarbonFile[] carbonFiles = tempFolder.listFiles(new CarbonFileFilter() { +@Override public boolean accept(CarbonFile file) { + return file.getName().startsWith(taskNo) && file.getName() + .endsWith(CarbonTablePath.INDEX_FILE_EXT); +} + }); + if (carbonFiles != null && carbonFiles.length > 0) { +SegmentFile segmentFile = new SegmentFile(); +Map locationMap = new HashMap<>();
[GitHub] carbondata pull request #1984: [CARBONDATA-2187] Partition restructure
Github user ravipesala commented on a diff in the pull request: https://github.com/apache/carbondata/pull/1984#discussion_r169629127 --- Diff: core/src/main/java/org/apache/carbondata/core/metadata/SegmentFileStore.java --- @@ -0,0 +1,690 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + *http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.carbondata.core.metadata; + +import java.io.BufferedReader; +import java.io.BufferedWriter; +import java.io.DataInputStream; +import java.io.DataOutputStream; +import java.io.IOException; +import java.io.InputStreamReader; +import java.io.OutputStreamWriter; +import java.io.Serializable; +import java.nio.charset.Charset; +import java.util.ArrayList; +import java.util.HashMap; +import java.util.HashSet; +import java.util.List; +import java.util.Map; +import java.util.Set; + +import org.apache.carbondata.core.constants.CarbonCommonConstants; +import org.apache.carbondata.core.datamap.Segment; +import org.apache.carbondata.core.datastore.filesystem.CarbonFile; +import org.apache.carbondata.core.datastore.filesystem.CarbonFileFilter; +import org.apache.carbondata.core.datastore.impl.FileFactory; +import org.apache.carbondata.core.fileoperations.AtomicFileOperations; +import org.apache.carbondata.core.fileoperations.AtomicFileOperationsImpl; +import org.apache.carbondata.core.fileoperations.FileWriteOperation; +import org.apache.carbondata.core.indexstore.PartitionSpec; +import org.apache.carbondata.core.indexstore.blockletindex.SegmentIndexFileStore; +import org.apache.carbondata.core.metadata.blocklet.DataFileFooter; +import org.apache.carbondata.core.metadata.schema.table.CarbonTable; +import org.apache.carbondata.core.mutate.CarbonUpdateUtil; +import org.apache.carbondata.core.statusmanager.LoadMetadataDetails; +import org.apache.carbondata.core.statusmanager.SegmentStatus; +import org.apache.carbondata.core.statusmanager.SegmentStatusManager; +import org.apache.carbondata.core.util.CarbonUtil; +import org.apache.carbondata.core.util.DataFileFooterConverter; +import org.apache.carbondata.core.util.path.CarbonTablePath; + +import com.google.gson.Gson; +import org.apache.hadoop.fs.Path; + +/** + * Provide read and write support for segment file associated with each segment + */ +public class SegmentFileStore { + + private SegmentFile segmentFile; + + private MapindexFilesMap; + + private String tablePath; + + /** + * Write segment information to the segment folder with indexfilename and + * corresponding partitions. + */ + public void writeSegmentFile(String tablePath, final String taskNo, String location, + String timeStamp, List partionNames) throws IOException { +String tempFolderLoc = timeStamp + ".tmp"; +String writePath = CarbonTablePath.getSegmentFilesLocation(tablePath) + "/" + tempFolderLoc; +CarbonFile carbonFile = FileFactory.getCarbonFile(writePath); +if (!carbonFile.exists()) { + carbonFile.mkdirs(writePath, FileFactory.getFileType(writePath)); +} +CarbonFile tempFolder = +FileFactory.getCarbonFile(location + CarbonCommonConstants.FILE_SEPARATOR + tempFolderLoc); +boolean isRelative = false; +if (location.startsWith(tablePath)) { + location = location.substring(tablePath.length(), location.length()); + isRelative = true; +} +if (tempFolder.exists() && partionNames.size() > 0) { + CarbonFile[] carbonFiles = tempFolder.listFiles(new CarbonFileFilter() { +@Override public boolean accept(CarbonFile file) { + return file.getName().startsWith(taskNo) && file.getName() + .endsWith(CarbonTablePath.INDEX_FILE_EXT); +} + }); + if (carbonFiles != null && carbonFiles.length > 0) { +SegmentFile segmentFile = new SegmentFile(); +Map locationMap = new HashMap<>();
[GitHub] carbondata pull request #1984: [CARBONDATA-2187] Partition restructure
Github user ravipesala commented on a diff in the pull request: https://github.com/apache/carbondata/pull/1984#discussion_r169628918 --- Diff: core/src/main/java/org/apache/carbondata/core/metadata/SegmentFileStore.java --- @@ -0,0 +1,690 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + *http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.carbondata.core.metadata; + +import java.io.BufferedReader; +import java.io.BufferedWriter; +import java.io.DataInputStream; +import java.io.DataOutputStream; +import java.io.IOException; +import java.io.InputStreamReader; +import java.io.OutputStreamWriter; +import java.io.Serializable; +import java.nio.charset.Charset; +import java.util.ArrayList; +import java.util.HashMap; +import java.util.HashSet; +import java.util.List; +import java.util.Map; +import java.util.Set; + +import org.apache.carbondata.core.constants.CarbonCommonConstants; +import org.apache.carbondata.core.datamap.Segment; +import org.apache.carbondata.core.datastore.filesystem.CarbonFile; +import org.apache.carbondata.core.datastore.filesystem.CarbonFileFilter; +import org.apache.carbondata.core.datastore.impl.FileFactory; +import org.apache.carbondata.core.fileoperations.AtomicFileOperations; +import org.apache.carbondata.core.fileoperations.AtomicFileOperationsImpl; +import org.apache.carbondata.core.fileoperations.FileWriteOperation; +import org.apache.carbondata.core.indexstore.PartitionSpec; +import org.apache.carbondata.core.indexstore.blockletindex.SegmentIndexFileStore; +import org.apache.carbondata.core.metadata.blocklet.DataFileFooter; +import org.apache.carbondata.core.metadata.schema.table.CarbonTable; +import org.apache.carbondata.core.mutate.CarbonUpdateUtil; +import org.apache.carbondata.core.statusmanager.LoadMetadataDetails; +import org.apache.carbondata.core.statusmanager.SegmentStatus; +import org.apache.carbondata.core.statusmanager.SegmentStatusManager; +import org.apache.carbondata.core.util.CarbonUtil; +import org.apache.carbondata.core.util.DataFileFooterConverter; +import org.apache.carbondata.core.util.path.CarbonTablePath; + +import com.google.gson.Gson; +import org.apache.hadoop.fs.Path; + +/** + * Provide read and write support for segment file associated with each segment + */ +public class SegmentFileStore { + + private SegmentFile segmentFile; + + private MapindexFilesMap; + + private String tablePath; + + /** + * Write segment information to the segment folder with indexfilename and + * corresponding partitions. + */ + public void writeSegmentFile(String tablePath, final String taskNo, String location, + String timeStamp, List partionNames) throws IOException { +String tempFolderLoc = timeStamp + ".tmp"; +String writePath = CarbonTablePath.getSegmentFilesLocation(tablePath) + "/" + tempFolderLoc; +CarbonFile carbonFile = FileFactory.getCarbonFile(writePath); +if (!carbonFile.exists()) { + carbonFile.mkdirs(writePath, FileFactory.getFileType(writePath)); +} +CarbonFile tempFolder = +FileFactory.getCarbonFile(location + CarbonCommonConstants.FILE_SEPARATOR + tempFolderLoc); +boolean isRelative = false; +if (location.startsWith(tablePath)) { --- End diff -- ok ---
[GitHub] carbondata pull request #1984: [CARBONDATA-2187] Partition restructure
Github user ravipesala commented on a diff in the pull request: https://github.com/apache/carbondata/pull/1984#discussion_r169628603 --- Diff: core/src/main/java/org/apache/carbondata/core/indexstore/blockletindex/SegmentIndexFileStore.java --- @@ -161,9 +212,13 @@ private void readMergeFile(String mergeFilePath) throws IOException { MergedBlockIndex mergedBlockIndex = readMergeBlockIndex(thriftReader); List file_names = indexHeader.getFile_names(); List fileData = mergedBlockIndex.getFileData(); +CarbonFile mergeFile = FileFactory.getCarbonFile(mergeFilePath); assert (file_names.size() == fileData.size()); for (int i = 0; i < file_names.size(); i++) { carbonIndexMap.put(file_names.get(i), fileData.get(i).array()); + carbonIndexMapWithFullPath.put( + mergeFile.getParentFile().getAbsolutePath() + CarbonCommonConstants.FILE_SEPARATOR + + file_names.get(i), fileData.get(i).array()); } thriftReader.close(); --- End diff -- ok ---
[GitHub] carbondata pull request #1984: [CARBONDATA-2187] Partition restructure
Github user ravipesala commented on a diff in the pull request: https://github.com/apache/carbondata/pull/1984#discussion_r169628343 --- Diff: core/src/main/java/org/apache/carbondata/core/indexstore/blockletindex/SegmentIndexFileStore.java --- @@ -79,6 +87,44 @@ public void readAllIIndexOfSegment(String segmentPath) throws IOException { } } + /** + * Read all index files and keep the cache in it. + * + * @param segmentFileStore + * @throws IOException + */ + public void readAllIIndexOfSegment(SegmentFileStore segmentFileStore, SegmentStatus status, + boolean ignoreStatus) throws IOException { +List carbonIndexFiles = new ArrayList<>(); +if (segmentFileStore.getLocationMap() == null) { + return; +} +for (Map.Entrylocations : segmentFileStore +.getLocationMap().entrySet()) { + String location = locations.getKey(); + if (locations.getValue().isRelative()) { +location = --- End diff -- ok ---
[GitHub] carbondata pull request #1984: [CARBONDATA-2187] Partition restructure
Github user ravipesala commented on a diff in the pull request: https://github.com/apache/carbondata/pull/1984#discussion_r169628065 --- Diff: core/src/main/java/org/apache/carbondata/core/indexstore/blockletindex/BlockletDataMap.java --- @@ -660,22 +650,23 @@ public boolean isScanRequired(FilterResolverIntf filterExp) { return blocklets; } - @Override public List prune(FilterResolverIntf filterExp, List partitions) { + @Override + public List prune(FilterResolverIntf filterExp, List partitions) { if (unsafeMemoryDMStore.getRowCount() == 0) { return new ArrayList<>(); } -// First get the partitions which are stored inside datamap. -List storedPartitions = getPartitions(); // if it has partitioned datamap but there is no partitioned information stored, it means // partitions are dropped so return empty list. -if (isPartitionedSegment && (storedPartitions == null || storedPartitions.size() == 0)) { - return new ArrayList<>(); -} -if (storedPartitions != null && storedPartitions.size() > 0) { +if (partitions != null) { + // First get the partitions which are stored inside datamap. + String[] fileDetails = getFileDetails(); // Check the exact match of partition information inside the stored partitions. boolean found = false; - if (partitions != null && partitions.size() > 0) { -found = partitions.containsAll(storedPartitions); + Path folderPath = new Path(fileDetails[0]); + for (PartitionSpec spec : partitions) { +if (folderPath.equals(spec.getLocation())) { + found = true; --- End diff -- ok ---
[GitHub] carbondata pull request #1984: [CARBONDATA-2187] Partition restructure
Github user ravipesala commented on a diff in the pull request: https://github.com/apache/carbondata/pull/1984#discussion_r169627883 --- Diff: core/src/main/java/org/apache/carbondata/core/indexstore/TableBlockIndexUniqueIdentifier.java --- @@ -17,91 +17,59 @@ package org.apache.carbondata.core.indexstore; +import java.util.Objects; + import org.apache.carbondata.core.constants.CarbonCommonConstants; -import org.apache.carbondata.core.metadata.AbsoluteTableIdentifier; -import org.apache.carbondata.core.metadata.CarbonTableIdentifier; /** - * Class holds the absoluteTableIdentifier and segmentId to uniquely identify a segment + * Class holds the absoluteTableIdentifier and segment to uniquely identify a segment --- End diff -- ok ---
[GitHub] carbondata pull request #1984: [CARBONDATA-2187] Partition restructure
Github user ravipesala commented on a diff in the pull request: https://github.com/apache/carbondata/pull/1984#discussion_r169627561 --- Diff: core/src/main/java/org/apache/carbondata/core/datastore/page/SafeDecimalColumnPage.java --- @@ -217,6 +217,6 @@ public void convertValue(ColumnPageValueConverter codec) { @Override public void freeMemory() { - +byteArrayData = null; --- End diff -- In decimal page it only uses byteArrayData, so not required ---
[GitHub] carbondata pull request #1984: [CARBONDATA-2187] Partition restructure
Github user ravipesala commented on a diff in the pull request: https://github.com/apache/carbondata/pull/1984#discussion_r169627320 --- Diff: core/src/main/java/org/apache/carbondata/core/datamap/Segment.java --- @@ -0,0 +1,93 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + *http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.carbondata.core.datamap; + +import java.io.Serializable; +import java.util.ArrayList; +import java.util.List; +import java.util.Objects; + +/** + * Represents one load of carbondata + */ +public class Segment implements Serializable { + + private static final long serialVersionUID = 7044555408162234064L; + + private String segmentId; + + private String segmentFileName; + + public Segment(String segmentId, String segmentFileName) { +this.segmentId = segmentId; +this.segmentFileName = segmentFileName; + } + + public String getSegmentId() { +return segmentId; + } + + public String getSegmentFileName() { +return segmentFileName; + } + + public static List toSegmentList(String[] segmentIds) { +List list = new ArrayList<>(segmentIds.length); +for (String segmentId : segmentIds) { + list.add(toSegment(segmentId)); +} +return list; + } + + public static List toSegmentList(List segmentIds) { +List list = new ArrayList<>(segmentIds.size()); +for (String segmentId : segmentIds) { + list.add(toSegment(segmentId)); +} +return list; + } + + public static Segment toSegment(String segmentId) { +String[] split = segmentId.split("#"); +if (split.length > 1) { + return new Segment(split[0], split[1]); +} else if (split.length > 0) { + return new Segment(split[0], null); +} +return new Segment(segmentId, null); --- End diff -- Added ---
[GitHub] carbondata pull request #1984: [CARBONDATA-2187] Partition restructure
Github user ravipesala commented on a diff in the pull request: https://github.com/apache/carbondata/pull/1984#discussion_r169627375 --- Diff: core/src/main/java/org/apache/carbondata/core/datamap/Segment.java --- @@ -0,0 +1,93 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + *http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.carbondata.core.datamap; + +import java.io.Serializable; +import java.util.ArrayList; +import java.util.List; +import java.util.Objects; + +/** + * Represents one load of carbondata + */ +public class Segment implements Serializable { + + private static final long serialVersionUID = 7044555408162234064L; + + private String segmentId; + + private String segmentFileName; + + public Segment(String segmentId, String segmentFileName) { +this.segmentId = segmentId; +this.segmentFileName = segmentFileName; + } + + public String getSegmentId() { +return segmentId; + } + + public String getSegmentFileName() { +return segmentFileName; + } + + public static List toSegmentList(String[] segmentIds) { +List list = new ArrayList<>(segmentIds.length); +for (String segmentId : segmentIds) { + list.add(toSegment(segmentId)); +} +return list; + } + + public static List toSegmentList(List segmentIds) { +List list = new ArrayList<>(segmentIds.size()); +for (String segmentId : segmentIds) { + list.add(toSegment(segmentId)); +} +return list; + } + + public static Segment toSegment(String segmentId) { +String[] split = segmentId.split("#"); +if (split.length > 1) { + return new Segment(split[0], split[1]); +} else if (split.length > 0) { + return new Segment(split[0], null); +} +return new Segment(segmentId, null); + } + + @Override public boolean equals(Object o) { +if (this == o) return true; +if (o == null || getClass() != o.getClass()) return false; +Segment segment = (Segment) o; +return Objects.equals(segmentId, segment.segmentId); + } + + @Override public int hashCode() { + --- End diff -- ok ---
[GitHub] carbondata pull request #1984: [CARBONDATA-2187] Partition restructure
Github user ravipesala commented on a diff in the pull request: https://github.com/apache/carbondata/pull/1984#discussion_r169627117 --- Diff: core/src/main/java/org/apache/carbondata/core/util/CarbonUtil.java --- @@ -2449,5 +2496,40 @@ public static String encodeToString(byte[] bytes) throws UnsupportedEncodingExce return updatedMinMaxValues; } + /** + * Generate the blockid as per the block path + * + * @param identifier + * @param filePath + * @param segmentId + * @return + */ + public static String getBlockId(AbsoluteTableIdentifier identifier, String filePath, + String segmentId) { +String blockId; +String blockName = filePath.substring(filePath.lastIndexOf("/") + 1, filePath.length()); +String tablePath = identifier.getTablePath(); +if (filePath.startsWith(tablePath)) { + String factDir = + CarbonStorePath.getCarbonTablePath(tablePath, identifier.getCarbonTableIdentifier()) + .getFactDir(); + if (filePath.startsWith(factDir)) { +blockId = "Part0" + CarbonCommonConstants.FILE_SEPARATOR + "Segment_" + segmentId ++ CarbonCommonConstants.FILE_SEPARATOR + blockName; + } else { +String partitionDir = +filePath.substring(tablePath.length() + 1, filePath.length() - blockName.length() - 1); + +blockId = partitionDir.replace("/", "#") + CarbonCommonConstants.FILE_SEPARATOR + "Segment_" --- End diff -- Added comment ---
[GitHub] carbondata pull request #1984: [CARBONDATA-2187] Partition restructure
Github user ravipesala commented on a diff in the pull request: https://github.com/apache/carbondata/pull/1984#discussion_r169626625 --- Diff: core/src/main/java/org/apache/carbondata/core/metadata/SegmentFileStore.java --- @@ -0,0 +1,690 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + *http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.carbondata.core.metadata; + +import java.io.BufferedReader; +import java.io.BufferedWriter; +import java.io.DataInputStream; +import java.io.DataOutputStream; +import java.io.IOException; +import java.io.InputStreamReader; +import java.io.OutputStreamWriter; +import java.io.Serializable; +import java.nio.charset.Charset; +import java.util.ArrayList; +import java.util.HashMap; +import java.util.HashSet; +import java.util.List; +import java.util.Map; +import java.util.Set; + +import org.apache.carbondata.core.constants.CarbonCommonConstants; +import org.apache.carbondata.core.datamap.Segment; +import org.apache.carbondata.core.datastore.filesystem.CarbonFile; +import org.apache.carbondata.core.datastore.filesystem.CarbonFileFilter; +import org.apache.carbondata.core.datastore.impl.FileFactory; +import org.apache.carbondata.core.fileoperations.AtomicFileOperations; +import org.apache.carbondata.core.fileoperations.AtomicFileOperationsImpl; +import org.apache.carbondata.core.fileoperations.FileWriteOperation; +import org.apache.carbondata.core.indexstore.PartitionSpec; +import org.apache.carbondata.core.indexstore.blockletindex.SegmentIndexFileStore; +import org.apache.carbondata.core.metadata.blocklet.DataFileFooter; +import org.apache.carbondata.core.metadata.schema.table.CarbonTable; +import org.apache.carbondata.core.mutate.CarbonUpdateUtil; +import org.apache.carbondata.core.statusmanager.LoadMetadataDetails; +import org.apache.carbondata.core.statusmanager.SegmentStatus; +import org.apache.carbondata.core.statusmanager.SegmentStatusManager; +import org.apache.carbondata.core.util.CarbonUtil; +import org.apache.carbondata.core.util.DataFileFooterConverter; +import org.apache.carbondata.core.util.path.CarbonTablePath; + +import com.google.gson.Gson; +import org.apache.hadoop.fs.Path; + +/** + * Provide read and write support for segment file associated with each segment + */ +public class SegmentFileStore { + + private SegmentFile segmentFile; + + private MapindexFilesMap; + + private String tablePath; + + /** + * Write segment information to the segment folder with indexfilename and + * corresponding partitions. + */ + public void writeSegmentFile(String tablePath, final String taskNo, String location, + String timeStamp, List partionNames) throws IOException { +String tempFolderLoc = timeStamp + ".tmp"; +String writePath = CarbonTablePath.getSegmentFilesLocation(tablePath) + "/" + tempFolderLoc; +CarbonFile carbonFile = FileFactory.getCarbonFile(writePath); +if (!carbonFile.exists()) { + carbonFile.mkdirs(writePath, FileFactory.getFileType(writePath)); +} +CarbonFile tempFolder = +FileFactory.getCarbonFile(location + CarbonCommonConstants.FILE_SEPARATOR + tempFolderLoc); +boolean isRelative = false; +if (location.startsWith(tablePath)) { + location = location.substring(tablePath.length(), location.length()); + isRelative = true; +} +if (tempFolder.exists() && partionNames.size() > 0) { + CarbonFile[] carbonFiles = tempFolder.listFiles(new CarbonFileFilter() { +@Override public boolean accept(CarbonFile file) { + return file.getName().startsWith(taskNo) && file.getName() + .endsWith(CarbonTablePath.INDEX_FILE_EXT); +} + }); + if (carbonFiles != null && carbonFiles.length > 0) { +SegmentFile segmentFile = new SegmentFile(); +Map locationMap = new HashMap<>();
[GitHub] carbondata pull request #1984: [CARBONDATA-2187] Partition restructure
Github user ravipesala commented on a diff in the pull request: https://github.com/apache/carbondata/pull/1984#discussion_r169626278 --- Diff: core/src/main/java/org/apache/carbondata/core/metadata/SegmentFileStore.java --- @@ -0,0 +1,690 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + *http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.carbondata.core.metadata; + +import java.io.BufferedReader; +import java.io.BufferedWriter; +import java.io.DataInputStream; +import java.io.DataOutputStream; +import java.io.IOException; +import java.io.InputStreamReader; +import java.io.OutputStreamWriter; +import java.io.Serializable; +import java.nio.charset.Charset; +import java.util.ArrayList; +import java.util.HashMap; +import java.util.HashSet; +import java.util.List; +import java.util.Map; +import java.util.Set; + +import org.apache.carbondata.core.constants.CarbonCommonConstants; +import org.apache.carbondata.core.datamap.Segment; +import org.apache.carbondata.core.datastore.filesystem.CarbonFile; +import org.apache.carbondata.core.datastore.filesystem.CarbonFileFilter; +import org.apache.carbondata.core.datastore.impl.FileFactory; +import org.apache.carbondata.core.fileoperations.AtomicFileOperations; +import org.apache.carbondata.core.fileoperations.AtomicFileOperationsImpl; +import org.apache.carbondata.core.fileoperations.FileWriteOperation; +import org.apache.carbondata.core.indexstore.PartitionSpec; +import org.apache.carbondata.core.indexstore.blockletindex.SegmentIndexFileStore; +import org.apache.carbondata.core.metadata.blocklet.DataFileFooter; +import org.apache.carbondata.core.metadata.schema.table.CarbonTable; +import org.apache.carbondata.core.mutate.CarbonUpdateUtil; +import org.apache.carbondata.core.statusmanager.LoadMetadataDetails; +import org.apache.carbondata.core.statusmanager.SegmentStatus; +import org.apache.carbondata.core.statusmanager.SegmentStatusManager; +import org.apache.carbondata.core.util.CarbonUtil; +import org.apache.carbondata.core.util.DataFileFooterConverter; +import org.apache.carbondata.core.util.path.CarbonTablePath; + +import com.google.gson.Gson; +import org.apache.hadoop.fs.Path; + +/** + * Provide read and write support for segment file associated with each segment + */ +public class SegmentFileStore { + + private SegmentFile segmentFile; + + private MapindexFilesMap; + + private String tablePath; + + /** + * Write segment information to the segment folder with indexfilename and + * corresponding partitions. + */ + public void writeSegmentFile(String tablePath, final String taskNo, String location, + String timeStamp, List partionNames) throws IOException { +String tempFolderLoc = timeStamp + ".tmp"; --- End diff -- Added in CarbonOutputCOmmiter abortjob ---
[GitHub] carbondata pull request #1984: [CARBONDATA-2187] Partition restructure
Github user ravipesala commented on a diff in the pull request: https://github.com/apache/carbondata/pull/1984#discussion_r169620760 --- Diff: core/src/main/java/org/apache/carbondata/core/indexstore/blockletindex/BlockletDataMapFactory.java --- @@ -185,26 +212,27 @@ public void clear(String segmentId) { @Override public void clear() { for (String segmentId : segmentMap.keySet().toArray(new String[segmentMap.size()])) { - clear(segmentId); + clear(new Segment(segmentId, null)); } } @Override public List getDataMaps(DataMapDistributable distributable) throws IOException { BlockletDataMapDistributable mapDistributable = (BlockletDataMapDistributable) distributable; List identifiers = new ArrayList<>(); -if (mapDistributable.getFilePath().endsWith(CarbonTablePath.INDEX_FILE_EXT)) { - identifiers.add(new TableBlockIndexUniqueIdentifier(identifier, distributable.getSegmentId(), - mapDistributable.getFilePath())); -} else if (mapDistributable.getFilePath().endsWith(CarbonTablePath.MERGE_INDEX_FILE_EXT)) { +String indexPath = mapDistributable.getFilePath(); +if (indexPath.endsWith(CarbonTablePath.INDEX_FILE_EXT)) { + String parent = indexPath.substring(0, indexPath.lastIndexOf("/")); --- End diff -- Ok, I have changed to HDFS Path and get parent from it. ---
[GitHub] carbondata pull request #1984: [CARBONDATA-2187] Partition restructure
Github user ravipesala commented on a diff in the pull request: https://github.com/apache/carbondata/pull/1984#discussion_r169619961 --- Diff: core/src/main/java/org/apache/carbondata/core/indexstore/PartitionSpec.java --- @@ -0,0 +1,80 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + *http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.carbondata.core.indexstore; + +import java.io.Serializable; +import java.net.URI; +import java.util.List; +import java.util.Objects; + +import org.apache.carbondata.core.datastore.impl.FileFactory; + +import org.apache.hadoop.fs.Path; + +/** + * Holds partition information. + */ +public class PartitionSpec implements Serializable { + + private static final long serialVersionUID = 4828007433384867678L; + + private List partitions; --- End diff -- It signifies column=value, I have added comment ---
[GitHub] carbondata pull request #1984: [CARBONDATA-2187] Partition restructure
Github user ravipesala commented on a diff in the pull request: https://github.com/apache/carbondata/pull/1984#discussion_r169619680 --- Diff: core/src/main/java/org/apache/carbondata/core/datamap/Segment.java --- @@ -0,0 +1,93 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + *http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.carbondata.core.datamap; + +import java.io.Serializable; +import java.util.ArrayList; +import java.util.List; +import java.util.Objects; + +/** + * Represents one load of carbondata + */ +public class Segment implements Serializable { + + private static final long serialVersionUID = 7044555408162234064L; + + private String segmentId; + + private String segmentFileName; + + public Segment(String segmentId, String segmentFileName) { +this.segmentId = segmentId; +this.segmentFileName = segmentFileName; + } + + public String getSegmentId() { +return segmentId; + } + + public String getSegmentFileName() { +return segmentFileName; + } + + public static List toSegmentList(String[] segmentIds) { --- End diff -- ok, renamed ---
[GitHub] carbondata pull request #1984: [CARBONDATA-2187] Partition restructure
Github user manishgupta88 commented on a diff in the pull request: https://github.com/apache/carbondata/pull/1984#discussion_r169544845 --- Diff: core/src/main/java/org/apache/carbondata/core/metadata/SegmentFileStore.java --- @@ -0,0 +1,690 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + *http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.carbondata.core.metadata; + +import java.io.BufferedReader; +import java.io.BufferedWriter; +import java.io.DataInputStream; +import java.io.DataOutputStream; +import java.io.IOException; +import java.io.InputStreamReader; +import java.io.OutputStreamWriter; +import java.io.Serializable; +import java.nio.charset.Charset; +import java.util.ArrayList; +import java.util.HashMap; +import java.util.HashSet; +import java.util.List; +import java.util.Map; +import java.util.Set; + +import org.apache.carbondata.core.constants.CarbonCommonConstants; +import org.apache.carbondata.core.datamap.Segment; +import org.apache.carbondata.core.datastore.filesystem.CarbonFile; +import org.apache.carbondata.core.datastore.filesystem.CarbonFileFilter; +import org.apache.carbondata.core.datastore.impl.FileFactory; +import org.apache.carbondata.core.fileoperations.AtomicFileOperations; +import org.apache.carbondata.core.fileoperations.AtomicFileOperationsImpl; +import org.apache.carbondata.core.fileoperations.FileWriteOperation; +import org.apache.carbondata.core.indexstore.PartitionSpec; +import org.apache.carbondata.core.indexstore.blockletindex.SegmentIndexFileStore; +import org.apache.carbondata.core.metadata.blocklet.DataFileFooter; +import org.apache.carbondata.core.metadata.schema.table.CarbonTable; +import org.apache.carbondata.core.mutate.CarbonUpdateUtil; +import org.apache.carbondata.core.statusmanager.LoadMetadataDetails; +import org.apache.carbondata.core.statusmanager.SegmentStatus; +import org.apache.carbondata.core.statusmanager.SegmentStatusManager; +import org.apache.carbondata.core.util.CarbonUtil; +import org.apache.carbondata.core.util.DataFileFooterConverter; +import org.apache.carbondata.core.util.path.CarbonTablePath; + +import com.google.gson.Gson; +import org.apache.hadoop.fs.Path; + +/** + * Provide read and write support for segment file associated with each segment + */ +public class SegmentFileStore { + + private SegmentFile segmentFile; + + private MapindexFilesMap; + + private String tablePath; + + /** + * Write segment information to the segment folder with indexfilename and + * corresponding partitions. + */ + public void writeSegmentFile(String tablePath, final String taskNo, String location, + String timeStamp, List partionNames) throws IOException { +String tempFolderLoc = timeStamp + ".tmp"; +String writePath = CarbonTablePath.getSegmentFilesLocation(tablePath) + "/" + tempFolderLoc; +CarbonFile carbonFile = FileFactory.getCarbonFile(writePath); +if (!carbonFile.exists()) { + carbonFile.mkdirs(writePath, FileFactory.getFileType(writePath)); +} +CarbonFile tempFolder = +FileFactory.getCarbonFile(location + CarbonCommonConstants.FILE_SEPARATOR + tempFolderLoc); +boolean isRelative = false; +if (location.startsWith(tablePath)) { + location = location.substring(tablePath.length(), location.length()); + isRelative = true; +} +if (tempFolder.exists() && partionNames.size() > 0) { + CarbonFile[] carbonFiles = tempFolder.listFiles(new CarbonFileFilter() { +@Override public boolean accept(CarbonFile file) { + return file.getName().startsWith(taskNo) && file.getName() + .endsWith(CarbonTablePath.INDEX_FILE_EXT); +} + }); + if (carbonFiles != null && carbonFiles.length > 0) { +SegmentFile segmentFile = new SegmentFile(); +Map locationMap = new HashMap<>();
[GitHub] carbondata pull request #1984: [CARBONDATA-2187] Partition restructure
Github user manishgupta88 commented on a diff in the pull request: https://github.com/apache/carbondata/pull/1984#discussion_r169544627 --- Diff: core/src/main/java/org/apache/carbondata/core/metadata/SegmentFileStore.java --- @@ -0,0 +1,690 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + *http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.carbondata.core.metadata; + +import java.io.BufferedReader; +import java.io.BufferedWriter; +import java.io.DataInputStream; +import java.io.DataOutputStream; +import java.io.IOException; +import java.io.InputStreamReader; +import java.io.OutputStreamWriter; +import java.io.Serializable; +import java.nio.charset.Charset; +import java.util.ArrayList; +import java.util.HashMap; +import java.util.HashSet; +import java.util.List; +import java.util.Map; +import java.util.Set; + +import org.apache.carbondata.core.constants.CarbonCommonConstants; +import org.apache.carbondata.core.datamap.Segment; +import org.apache.carbondata.core.datastore.filesystem.CarbonFile; +import org.apache.carbondata.core.datastore.filesystem.CarbonFileFilter; +import org.apache.carbondata.core.datastore.impl.FileFactory; +import org.apache.carbondata.core.fileoperations.AtomicFileOperations; +import org.apache.carbondata.core.fileoperations.AtomicFileOperationsImpl; +import org.apache.carbondata.core.fileoperations.FileWriteOperation; +import org.apache.carbondata.core.indexstore.PartitionSpec; +import org.apache.carbondata.core.indexstore.blockletindex.SegmentIndexFileStore; +import org.apache.carbondata.core.metadata.blocklet.DataFileFooter; +import org.apache.carbondata.core.metadata.schema.table.CarbonTable; +import org.apache.carbondata.core.mutate.CarbonUpdateUtil; +import org.apache.carbondata.core.statusmanager.LoadMetadataDetails; +import org.apache.carbondata.core.statusmanager.SegmentStatus; +import org.apache.carbondata.core.statusmanager.SegmentStatusManager; +import org.apache.carbondata.core.util.CarbonUtil; +import org.apache.carbondata.core.util.DataFileFooterConverter; +import org.apache.carbondata.core.util.path.CarbonTablePath; + +import com.google.gson.Gson; +import org.apache.hadoop.fs.Path; + +/** + * Provide read and write support for segment file associated with each segment + */ +public class SegmentFileStore { + + private SegmentFile segmentFile; + + private MapindexFilesMap; + + private String tablePath; + + /** + * Write segment information to the segment folder with indexfilename and + * corresponding partitions. + */ + public void writeSegmentFile(String tablePath, final String taskNo, String location, + String timeStamp, List partionNames) throws IOException { +String tempFolderLoc = timeStamp + ".tmp"; +String writePath = CarbonTablePath.getSegmentFilesLocation(tablePath) + "/" + tempFolderLoc; +CarbonFile carbonFile = FileFactory.getCarbonFile(writePath); +if (!carbonFile.exists()) { + carbonFile.mkdirs(writePath, FileFactory.getFileType(writePath)); +} +CarbonFile tempFolder = +FileFactory.getCarbonFile(location + CarbonCommonConstants.FILE_SEPARATOR + tempFolderLoc); +boolean isRelative = false; +if (location.startsWith(tablePath)) { + location = location.substring(tablePath.length(), location.length()); + isRelative = true; +} +if (tempFolder.exists() && partionNames.size() > 0) { + CarbonFile[] carbonFiles = tempFolder.listFiles(new CarbonFileFilter() { +@Override public boolean accept(CarbonFile file) { + return file.getName().startsWith(taskNo) && file.getName() + .endsWith(CarbonTablePath.INDEX_FILE_EXT); +} + }); + if (carbonFiles != null && carbonFiles.length > 0) { +SegmentFile segmentFile = new SegmentFile(); +Map locationMap = new HashMap<>();
[GitHub] carbondata pull request #1984: [CARBONDATA-2187] Partition restructure
Github user manishgupta88 commented on a diff in the pull request: https://github.com/apache/carbondata/pull/1984#discussion_r169544464 --- Diff: core/src/main/java/org/apache/carbondata/core/metadata/SegmentFileStore.java --- @@ -0,0 +1,690 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + *http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.carbondata.core.metadata; + +import java.io.BufferedReader; +import java.io.BufferedWriter; +import java.io.DataInputStream; +import java.io.DataOutputStream; +import java.io.IOException; +import java.io.InputStreamReader; +import java.io.OutputStreamWriter; +import java.io.Serializable; +import java.nio.charset.Charset; +import java.util.ArrayList; +import java.util.HashMap; +import java.util.HashSet; +import java.util.List; +import java.util.Map; +import java.util.Set; + +import org.apache.carbondata.core.constants.CarbonCommonConstants; +import org.apache.carbondata.core.datamap.Segment; +import org.apache.carbondata.core.datastore.filesystem.CarbonFile; +import org.apache.carbondata.core.datastore.filesystem.CarbonFileFilter; +import org.apache.carbondata.core.datastore.impl.FileFactory; +import org.apache.carbondata.core.fileoperations.AtomicFileOperations; +import org.apache.carbondata.core.fileoperations.AtomicFileOperationsImpl; +import org.apache.carbondata.core.fileoperations.FileWriteOperation; +import org.apache.carbondata.core.indexstore.PartitionSpec; +import org.apache.carbondata.core.indexstore.blockletindex.SegmentIndexFileStore; +import org.apache.carbondata.core.metadata.blocklet.DataFileFooter; +import org.apache.carbondata.core.metadata.schema.table.CarbonTable; +import org.apache.carbondata.core.mutate.CarbonUpdateUtil; +import org.apache.carbondata.core.statusmanager.LoadMetadataDetails; +import org.apache.carbondata.core.statusmanager.SegmentStatus; +import org.apache.carbondata.core.statusmanager.SegmentStatusManager; +import org.apache.carbondata.core.util.CarbonUtil; +import org.apache.carbondata.core.util.DataFileFooterConverter; +import org.apache.carbondata.core.util.path.CarbonTablePath; + +import com.google.gson.Gson; +import org.apache.hadoop.fs.Path; + +/** + * Provide read and write support for segment file associated with each segment + */ +public class SegmentFileStore { + + private SegmentFile segmentFile; + + private MapindexFilesMap; + + private String tablePath; + + /** + * Write segment information to the segment folder with indexfilename and + * corresponding partitions. + */ + public void writeSegmentFile(String tablePath, final String taskNo, String location, + String timeStamp, List partionNames) throws IOException { +String tempFolderLoc = timeStamp + ".tmp"; +String writePath = CarbonTablePath.getSegmentFilesLocation(tablePath) + "/" + tempFolderLoc; +CarbonFile carbonFile = FileFactory.getCarbonFile(writePath); +if (!carbonFile.exists()) { + carbonFile.mkdirs(writePath, FileFactory.getFileType(writePath)); +} +CarbonFile tempFolder = +FileFactory.getCarbonFile(location + CarbonCommonConstants.FILE_SEPARATOR + tempFolderLoc); +boolean isRelative = false; +if (location.startsWith(tablePath)) { + location = location.substring(tablePath.length(), location.length()); + isRelative = true; +} +if (tempFolder.exists() && partionNames.size() > 0) { + CarbonFile[] carbonFiles = tempFolder.listFiles(new CarbonFileFilter() { +@Override public boolean accept(CarbonFile file) { + return file.getName().startsWith(taskNo) && file.getName() + .endsWith(CarbonTablePath.INDEX_FILE_EXT); +} + }); + if (carbonFiles != null && carbonFiles.length > 0) { +SegmentFile segmentFile = new SegmentFile(); +Map locationMap = new HashMap<>();
[GitHub] carbondata pull request #1984: [CARBONDATA-2187] Partition restructure
Github user manishgupta88 commented on a diff in the pull request: https://github.com/apache/carbondata/pull/1984#discussion_r169543473 --- Diff: core/src/main/java/org/apache/carbondata/core/metadata/SegmentFileStore.java --- @@ -0,0 +1,690 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + *http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.carbondata.core.metadata; + +import java.io.BufferedReader; +import java.io.BufferedWriter; +import java.io.DataInputStream; +import java.io.DataOutputStream; +import java.io.IOException; +import java.io.InputStreamReader; +import java.io.OutputStreamWriter; +import java.io.Serializable; +import java.nio.charset.Charset; +import java.util.ArrayList; +import java.util.HashMap; +import java.util.HashSet; +import java.util.List; +import java.util.Map; +import java.util.Set; + +import org.apache.carbondata.core.constants.CarbonCommonConstants; +import org.apache.carbondata.core.datamap.Segment; +import org.apache.carbondata.core.datastore.filesystem.CarbonFile; +import org.apache.carbondata.core.datastore.filesystem.CarbonFileFilter; +import org.apache.carbondata.core.datastore.impl.FileFactory; +import org.apache.carbondata.core.fileoperations.AtomicFileOperations; +import org.apache.carbondata.core.fileoperations.AtomicFileOperationsImpl; +import org.apache.carbondata.core.fileoperations.FileWriteOperation; +import org.apache.carbondata.core.indexstore.PartitionSpec; +import org.apache.carbondata.core.indexstore.blockletindex.SegmentIndexFileStore; +import org.apache.carbondata.core.metadata.blocklet.DataFileFooter; +import org.apache.carbondata.core.metadata.schema.table.CarbonTable; +import org.apache.carbondata.core.mutate.CarbonUpdateUtil; +import org.apache.carbondata.core.statusmanager.LoadMetadataDetails; +import org.apache.carbondata.core.statusmanager.SegmentStatus; +import org.apache.carbondata.core.statusmanager.SegmentStatusManager; +import org.apache.carbondata.core.util.CarbonUtil; +import org.apache.carbondata.core.util.DataFileFooterConverter; +import org.apache.carbondata.core.util.path.CarbonTablePath; + +import com.google.gson.Gson; +import org.apache.hadoop.fs.Path; + +/** + * Provide read and write support for segment file associated with each segment + */ +public class SegmentFileStore { + + private SegmentFile segmentFile; + + private MapindexFilesMap; + + private String tablePath; + + /** + * Write segment information to the segment folder with indexfilename and + * corresponding partitions. + */ + public void writeSegmentFile(String tablePath, final String taskNo, String location, + String timeStamp, List partionNames) throws IOException { +String tempFolderLoc = timeStamp + ".tmp"; +String writePath = CarbonTablePath.getSegmentFilesLocation(tablePath) + "/" + tempFolderLoc; +CarbonFile carbonFile = FileFactory.getCarbonFile(writePath); +if (!carbonFile.exists()) { + carbonFile.mkdirs(writePath, FileFactory.getFileType(writePath)); +} +CarbonFile tempFolder = +FileFactory.getCarbonFile(location + CarbonCommonConstants.FILE_SEPARATOR + tempFolderLoc); +boolean isRelative = false; +if (location.startsWith(tablePath)) { + location = location.substring(tablePath.length(), location.length()); + isRelative = true; +} +if (tempFolder.exists() && partionNames.size() > 0) { + CarbonFile[] carbonFiles = tempFolder.listFiles(new CarbonFileFilter() { +@Override public boolean accept(CarbonFile file) { + return file.getName().startsWith(taskNo) && file.getName() + .endsWith(CarbonTablePath.INDEX_FILE_EXT); +} + }); + if (carbonFiles != null && carbonFiles.length > 0) { +SegmentFile segmentFile = new SegmentFile(); +Map locationMap = new HashMap<>();
[GitHub] carbondata pull request #1984: [CARBONDATA-2187] Partition restructure
Github user manishgupta88 commented on a diff in the pull request: https://github.com/apache/carbondata/pull/1984#discussion_r169543287 --- Diff: core/src/main/java/org/apache/carbondata/core/metadata/SegmentFileStore.java --- @@ -0,0 +1,690 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + *http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.carbondata.core.metadata; + +import java.io.BufferedReader; +import java.io.BufferedWriter; +import java.io.DataInputStream; +import java.io.DataOutputStream; +import java.io.IOException; +import java.io.InputStreamReader; +import java.io.OutputStreamWriter; +import java.io.Serializable; +import java.nio.charset.Charset; +import java.util.ArrayList; +import java.util.HashMap; +import java.util.HashSet; +import java.util.List; +import java.util.Map; +import java.util.Set; + +import org.apache.carbondata.core.constants.CarbonCommonConstants; +import org.apache.carbondata.core.datamap.Segment; +import org.apache.carbondata.core.datastore.filesystem.CarbonFile; +import org.apache.carbondata.core.datastore.filesystem.CarbonFileFilter; +import org.apache.carbondata.core.datastore.impl.FileFactory; +import org.apache.carbondata.core.fileoperations.AtomicFileOperations; +import org.apache.carbondata.core.fileoperations.AtomicFileOperationsImpl; +import org.apache.carbondata.core.fileoperations.FileWriteOperation; +import org.apache.carbondata.core.indexstore.PartitionSpec; +import org.apache.carbondata.core.indexstore.blockletindex.SegmentIndexFileStore; +import org.apache.carbondata.core.metadata.blocklet.DataFileFooter; +import org.apache.carbondata.core.metadata.schema.table.CarbonTable; +import org.apache.carbondata.core.mutate.CarbonUpdateUtil; +import org.apache.carbondata.core.statusmanager.LoadMetadataDetails; +import org.apache.carbondata.core.statusmanager.SegmentStatus; +import org.apache.carbondata.core.statusmanager.SegmentStatusManager; +import org.apache.carbondata.core.util.CarbonUtil; +import org.apache.carbondata.core.util.DataFileFooterConverter; +import org.apache.carbondata.core.util.path.CarbonTablePath; + +import com.google.gson.Gson; +import org.apache.hadoop.fs.Path; + +/** + * Provide read and write support for segment file associated with each segment + */ +public class SegmentFileStore { + + private SegmentFile segmentFile; + + private MapindexFilesMap; + + private String tablePath; + + /** + * Write segment information to the segment folder with indexfilename and + * corresponding partitions. + */ + public void writeSegmentFile(String tablePath, final String taskNo, String location, + String timeStamp, List partionNames) throws IOException { +String tempFolderLoc = timeStamp + ".tmp"; +String writePath = CarbonTablePath.getSegmentFilesLocation(tablePath) + "/" + tempFolderLoc; +CarbonFile carbonFile = FileFactory.getCarbonFile(writePath); +if (!carbonFile.exists()) { + carbonFile.mkdirs(writePath, FileFactory.getFileType(writePath)); +} +CarbonFile tempFolder = +FileFactory.getCarbonFile(location + CarbonCommonConstants.FILE_SEPARATOR + tempFolderLoc); +boolean isRelative = false; +if (location.startsWith(tablePath)) { + location = location.substring(tablePath.length(), location.length()); + isRelative = true; +} +if (tempFolder.exists() && partionNames.size() > 0) { + CarbonFile[] carbonFiles = tempFolder.listFiles(new CarbonFileFilter() { +@Override public boolean accept(CarbonFile file) { + return file.getName().startsWith(taskNo) && file.getName() + .endsWith(CarbonTablePath.INDEX_FILE_EXT); +} + }); + if (carbonFiles != null && carbonFiles.length > 0) { +SegmentFile segmentFile = new SegmentFile(); +Map locationMap = new HashMap<>();
[GitHub] carbondata pull request #1984: [CARBONDATA-2187] Partition restructure
Github user manishgupta88 commented on a diff in the pull request: https://github.com/apache/carbondata/pull/1984#discussion_r169542459 --- Diff: core/src/main/java/org/apache/carbondata/core/metadata/SegmentFileStore.java --- @@ -0,0 +1,690 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + *http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.carbondata.core.metadata; + +import java.io.BufferedReader; +import java.io.BufferedWriter; +import java.io.DataInputStream; +import java.io.DataOutputStream; +import java.io.IOException; +import java.io.InputStreamReader; +import java.io.OutputStreamWriter; +import java.io.Serializable; +import java.nio.charset.Charset; +import java.util.ArrayList; +import java.util.HashMap; +import java.util.HashSet; +import java.util.List; +import java.util.Map; +import java.util.Set; + +import org.apache.carbondata.core.constants.CarbonCommonConstants; +import org.apache.carbondata.core.datamap.Segment; +import org.apache.carbondata.core.datastore.filesystem.CarbonFile; +import org.apache.carbondata.core.datastore.filesystem.CarbonFileFilter; +import org.apache.carbondata.core.datastore.impl.FileFactory; +import org.apache.carbondata.core.fileoperations.AtomicFileOperations; +import org.apache.carbondata.core.fileoperations.AtomicFileOperationsImpl; +import org.apache.carbondata.core.fileoperations.FileWriteOperation; +import org.apache.carbondata.core.indexstore.PartitionSpec; +import org.apache.carbondata.core.indexstore.blockletindex.SegmentIndexFileStore; +import org.apache.carbondata.core.metadata.blocklet.DataFileFooter; +import org.apache.carbondata.core.metadata.schema.table.CarbonTable; +import org.apache.carbondata.core.mutate.CarbonUpdateUtil; +import org.apache.carbondata.core.statusmanager.LoadMetadataDetails; +import org.apache.carbondata.core.statusmanager.SegmentStatus; +import org.apache.carbondata.core.statusmanager.SegmentStatusManager; +import org.apache.carbondata.core.util.CarbonUtil; +import org.apache.carbondata.core.util.DataFileFooterConverter; +import org.apache.carbondata.core.util.path.CarbonTablePath; + +import com.google.gson.Gson; +import org.apache.hadoop.fs.Path; + +/** + * Provide read and write support for segment file associated with each segment + */ +public class SegmentFileStore { + + private SegmentFile segmentFile; + + private MapindexFilesMap; + + private String tablePath; + + /** + * Write segment information to the segment folder with indexfilename and + * corresponding partitions. + */ + public void writeSegmentFile(String tablePath, final String taskNo, String location, + String timeStamp, List partionNames) throws IOException { +String tempFolderLoc = timeStamp + ".tmp"; +String writePath = CarbonTablePath.getSegmentFilesLocation(tablePath) + "/" + tempFolderLoc; +CarbonFile carbonFile = FileFactory.getCarbonFile(writePath); +if (!carbonFile.exists()) { + carbonFile.mkdirs(writePath, FileFactory.getFileType(writePath)); +} +CarbonFile tempFolder = +FileFactory.getCarbonFile(location + CarbonCommonConstants.FILE_SEPARATOR + tempFolderLoc); +boolean isRelative = false; +if (location.startsWith(tablePath)) { --- End diff -- Move this complete if check inside below if check {if (carbonFiles != null && carbonFiles.length > 0)} ---
[GitHub] carbondata pull request #1984: [CARBONDATA-2187] Partition restructure
Github user manishgupta88 commented on a diff in the pull request: https://github.com/apache/carbondata/pull/1984#discussion_r169518712 --- Diff: core/src/main/java/org/apache/carbondata/core/datamap/Segment.java --- @@ -0,0 +1,93 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + *http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.carbondata.core.datamap; + +import java.io.Serializable; +import java.util.ArrayList; +import java.util.List; +import java.util.Objects; + +/** + * Represents one load of carbondata + */ +public class Segment implements Serializable { + + private static final long serialVersionUID = 7044555408162234064L; + + private String segmentId; + + private String segmentFileName; + + public Segment(String segmentId, String segmentFileName) { +this.segmentId = segmentId; +this.segmentFileName = segmentFileName; + } + + public String getSegmentId() { +return segmentId; + } + + public String getSegmentFileName() { +return segmentFileName; + } + + public static List toSegmentList(String[] segmentIds) { +List list = new ArrayList<>(segmentIds.length); +for (String segmentId : segmentIds) { + list.add(toSegment(segmentId)); +} +return list; + } + + public static List toSegmentList(List segmentIds) { +List list = new ArrayList<>(segmentIds.size()); +for (String segmentId : segmentIds) { + list.add(toSegment(segmentId)); +} +return list; + } + + public static Segment toSegment(String segmentId) { +String[] split = segmentId.split("#"); +if (split.length > 1) { + return new Segment(split[0], split[1]); +} else if (split.length > 0) { + return new Segment(split[0], null); +} +return new Segment(segmentId, null); --- End diff -- Please provide a comment for this method with an example covering the if, else if and else case ---
[GitHub] carbondata pull request #1984: [CARBONDATA-2187] Partition restructure
Github user manishgupta88 commented on a diff in the pull request: https://github.com/apache/carbondata/pull/1984#discussion_r169540773 --- Diff: core/src/main/java/org/apache/carbondata/core/indexstore/blockletindex/SegmentIndexFileStore.java --- @@ -79,6 +87,44 @@ public void readAllIIndexOfSegment(String segmentPath) throws IOException { } } + /** + * Read all index files and keep the cache in it. + * + * @param segmentFileStore + * @throws IOException + */ + public void readAllIIndexOfSegment(SegmentFileStore segmentFileStore, SegmentStatus status, + boolean ignoreStatus) throws IOException { +List carbonIndexFiles = new ArrayList<>(); +if (segmentFileStore.getLocationMap() == null) { + return; +} +for (Map.Entrylocations : segmentFileStore +.getLocationMap().entrySet()) { + String location = locations.getKey(); + if (locations.getValue().isRelative()) { +location = --- End diff -- This location variable fetching and value modification can be done after the below if check ---
[GitHub] carbondata pull request #1984: [CARBONDATA-2187] Partition restructure
Github user manishgupta88 commented on a diff in the pull request: https://github.com/apache/carbondata/pull/1984#discussion_r169537887 --- Diff: core/src/main/java/org/apache/carbondata/core/indexstore/blockletindex/BlockletDataMap.java --- @@ -660,22 +650,23 @@ public boolean isScanRequired(FilterResolverIntf filterExp) { return blocklets; } - @Override public List prune(FilterResolverIntf filterExp, List partitions) { + @Override + public List prune(FilterResolverIntf filterExp, List partitions) { if (unsafeMemoryDMStore.getRowCount() == 0) { return new ArrayList<>(); } -// First get the partitions which are stored inside datamap. -List storedPartitions = getPartitions(); // if it has partitioned datamap but there is no partitioned information stored, it means // partitions are dropped so return empty list. -if (isPartitionedSegment && (storedPartitions == null || storedPartitions.size() == 0)) { - return new ArrayList<>(); -} -if (storedPartitions != null && storedPartitions.size() > 0) { +if (partitions != null) { + // First get the partitions which are stored inside datamap. + String[] fileDetails = getFileDetails(); // Check the exact match of partition information inside the stored partitions. boolean found = false; - if (partitions != null && partitions.size() > 0) { -found = partitions.containsAll(storedPartitions); + Path folderPath = new Path(fileDetails[0]); + for (PartitionSpec spec : partitions) { +if (folderPath.equals(spec.getLocation())) { + found = true; --- End diff -- break the loop once found ---
[GitHub] carbondata pull request #1984: [CARBONDATA-2187] Partition restructure
Github user manishgupta88 commented on a diff in the pull request: https://github.com/apache/carbondata/pull/1984#discussion_r169518845 --- Diff: core/src/main/java/org/apache/carbondata/core/datamap/Segment.java --- @@ -0,0 +1,93 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + *http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.carbondata.core.datamap; + +import java.io.Serializable; +import java.util.ArrayList; +import java.util.List; +import java.util.Objects; + +/** + * Represents one load of carbondata + */ +public class Segment implements Serializable { + + private static final long serialVersionUID = 7044555408162234064L; + + private String segmentId; + + private String segmentFileName; + + public Segment(String segmentId, String segmentFileName) { +this.segmentId = segmentId; +this.segmentFileName = segmentFileName; + } + + public String getSegmentId() { +return segmentId; + } + + public String getSegmentFileName() { +return segmentFileName; + } + + public static List toSegmentList(String[] segmentIds) { +List list = new ArrayList<>(segmentIds.length); +for (String segmentId : segmentIds) { + list.add(toSegment(segmentId)); +} +return list; + } + + public static List toSegmentList(List segmentIds) { +List list = new ArrayList<>(segmentIds.size()); +for (String segmentId : segmentIds) { + list.add(toSegment(segmentId)); +} +return list; + } + + public static Segment toSegment(String segmentId) { +String[] split = segmentId.split("#"); +if (split.length > 1) { + return new Segment(split[0], split[1]); +} else if (split.length > 0) { + return new Segment(split[0], null); +} +return new Segment(segmentId, null); + } + + @Override public boolean equals(Object o) { +if (this == o) return true; +if (o == null || getClass() != o.getClass()) return false; +Segment segment = (Segment) o; +return Objects.equals(segmentId, segment.segmentId); + } + + @Override public int hashCode() { + --- End diff -- Remove his extra line ---
[GitHub] carbondata pull request #1984: [CARBONDATA-2187] Partition restructure
Github user manishgupta88 commented on a diff in the pull request: https://github.com/apache/carbondata/pull/1984#discussion_r169540997 --- Diff: core/src/main/java/org/apache/carbondata/core/indexstore/blockletindex/SegmentIndexFileStore.java --- @@ -161,9 +212,13 @@ private void readMergeFile(String mergeFilePath) throws IOException { MergedBlockIndex mergedBlockIndex = readMergeBlockIndex(thriftReader); List file_names = indexHeader.getFile_names(); List fileData = mergedBlockIndex.getFileData(); +CarbonFile mergeFile = FileFactory.getCarbonFile(mergeFilePath); assert (file_names.size() == fileData.size()); for (int i = 0; i < file_names.size(); i++) { carbonIndexMap.put(file_names.get(i), fileData.get(i).array()); + carbonIndexMapWithFullPath.put( + mergeFile.getParentFile().getAbsolutePath() + CarbonCommonConstants.FILE_SEPARATOR + + file_names.get(i), fileData.get(i).array()); } thriftReader.close(); --- End diff -- use try and finally block and close the thrift reader inside finally block ---
[GitHub] carbondata pull request #1984: [CARBONDATA-2187] Partition restructure
Github user manishgupta88 commented on a diff in the pull request: https://github.com/apache/carbondata/pull/1984#discussion_r169520945 --- Diff: core/src/main/java/org/apache/carbondata/core/datastore/page/SafeDecimalColumnPage.java --- @@ -217,6 +217,6 @@ public void convertValue(ColumnPageValueConverter codec) { @Override public void freeMemory() { - +byteArrayData = null; --- End diff -- other types like intData, longData can also be used, so please set the references to null for all the types here ---
[GitHub] carbondata pull request #1984: [CARBONDATA-2187] Partition restructure
Github user manishgupta88 commented on a diff in the pull request: https://github.com/apache/carbondata/pull/1984#discussion_r169522942 --- Diff: core/src/main/java/org/apache/carbondata/core/indexstore/TableBlockIndexUniqueIdentifier.java --- @@ -17,91 +17,59 @@ package org.apache.carbondata.core.indexstore; +import java.util.Objects; + import org.apache.carbondata.core.constants.CarbonCommonConstants; -import org.apache.carbondata.core.metadata.AbsoluteTableIdentifier; -import org.apache.carbondata.core.metadata.CarbonTableIdentifier; /** - * Class holds the absoluteTableIdentifier and segmentId to uniquely identify a segment + * Class holds the absoluteTableIdentifier and segment to uniquely identify a segment --- End diff -- Modify the comment to remove absoluteTableIdentifier ---
[GitHub] carbondata pull request #1984: [CARBONDATA-2187] Partition restructure
Github user gvramana commented on a diff in the pull request: https://github.com/apache/carbondata/pull/1984#discussion_r169413537 --- Diff: core/src/main/java/org/apache/carbondata/core/util/CarbonUtil.java --- @@ -2449,5 +2496,40 @@ public static String encodeToString(byte[] bytes) throws UnsupportedEncodingExce return updatedMinMaxValues; } + /** + * Generate the blockid as per the block path + * + * @param identifier + * @param filePath + * @param segmentId + * @return + */ + public static String getBlockId(AbsoluteTableIdentifier identifier, String filePath, + String segmentId) { +String blockId; +String blockName = filePath.substring(filePath.lastIndexOf("/") + 1, filePath.length()); +String tablePath = identifier.getTablePath(); +if (filePath.startsWith(tablePath)) { + String factDir = + CarbonStorePath.getCarbonTablePath(tablePath, identifier.getCarbonTableIdentifier()) + .getFactDir(); + if (filePath.startsWith(factDir)) { +blockId = "Part0" + CarbonCommonConstants.FILE_SEPARATOR + "Segment_" + segmentId ++ CarbonCommonConstants.FILE_SEPARATOR + blockName; + } else { +String partitionDir = +filePath.substring(tablePath.length() + 1, filePath.length() - blockName.length() - 1); + +blockId = partitionDir.replace("/", "#") + CarbonCommonConstants.FILE_SEPARATOR + "Segment_" --- End diff -- why / is replaced with \#, please write comment. ---
[GitHub] carbondata pull request #1984: [CARBONDATA-2187] Partition restructure
Github user gvramana commented on a diff in the pull request: https://github.com/apache/carbondata/pull/1984#discussion_r169411226 --- Diff: core/src/main/java/org/apache/carbondata/core/metadata/SegmentFileStore.java --- @@ -0,0 +1,690 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + *http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.carbondata.core.metadata; + +import java.io.BufferedReader; +import java.io.BufferedWriter; +import java.io.DataInputStream; +import java.io.DataOutputStream; +import java.io.IOException; +import java.io.InputStreamReader; +import java.io.OutputStreamWriter; +import java.io.Serializable; +import java.nio.charset.Charset; +import java.util.ArrayList; +import java.util.HashMap; +import java.util.HashSet; +import java.util.List; +import java.util.Map; +import java.util.Set; + +import org.apache.carbondata.core.constants.CarbonCommonConstants; +import org.apache.carbondata.core.datamap.Segment; +import org.apache.carbondata.core.datastore.filesystem.CarbonFile; +import org.apache.carbondata.core.datastore.filesystem.CarbonFileFilter; +import org.apache.carbondata.core.datastore.impl.FileFactory; +import org.apache.carbondata.core.fileoperations.AtomicFileOperations; +import org.apache.carbondata.core.fileoperations.AtomicFileOperationsImpl; +import org.apache.carbondata.core.fileoperations.FileWriteOperation; +import org.apache.carbondata.core.indexstore.PartitionSpec; +import org.apache.carbondata.core.indexstore.blockletindex.SegmentIndexFileStore; +import org.apache.carbondata.core.metadata.blocklet.DataFileFooter; +import org.apache.carbondata.core.metadata.schema.table.CarbonTable; +import org.apache.carbondata.core.mutate.CarbonUpdateUtil; +import org.apache.carbondata.core.statusmanager.LoadMetadataDetails; +import org.apache.carbondata.core.statusmanager.SegmentStatus; +import org.apache.carbondata.core.statusmanager.SegmentStatusManager; +import org.apache.carbondata.core.util.CarbonUtil; +import org.apache.carbondata.core.util.DataFileFooterConverter; +import org.apache.carbondata.core.util.path.CarbonTablePath; + +import com.google.gson.Gson; +import org.apache.hadoop.fs.Path; + +/** + * Provide read and write support for segment file associated with each segment + */ +public class SegmentFileStore { + + private SegmentFile segmentFile; + + private MapindexFilesMap; + + private String tablePath; + + /** + * Write segment information to the segment folder with indexfilename and + * corresponding partitions. + */ + public void writeSegmentFile(String tablePath, final String taskNo, String location, + String timeStamp, List partionNames) throws IOException { +String tempFolderLoc = timeStamp + ".tmp"; +String writePath = CarbonTablePath.getSegmentFilesLocation(tablePath) + "/" + tempFolderLoc; +CarbonFile carbonFile = FileFactory.getCarbonFile(writePath); +if (!carbonFile.exists()) { + carbonFile.mkdirs(writePath, FileFactory.getFileType(writePath)); +} +CarbonFile tempFolder = +FileFactory.getCarbonFile(location + CarbonCommonConstants.FILE_SEPARATOR + tempFolderLoc); +boolean isRelative = false; +if (location.startsWith(tablePath)) { + location = location.substring(tablePath.length(), location.length()); + isRelative = true; +} +if (tempFolder.exists() && partionNames.size() > 0) { + CarbonFile[] carbonFiles = tempFolder.listFiles(new CarbonFileFilter() { +@Override public boolean accept(CarbonFile file) { + return file.getName().startsWith(taskNo) && file.getName() + .endsWith(CarbonTablePath.INDEX_FILE_EXT); +} + }); + if (carbonFiles != null && carbonFiles.length > 0) { +SegmentFile segmentFile = new SegmentFile(); +Map locationMap = new HashMap<>();
[GitHub] carbondata pull request #1984: [CARBONDATA-2187] Partition restructure
Github user gvramana commented on a diff in the pull request: https://github.com/apache/carbondata/pull/1984#discussion_r169402456 --- Diff: core/src/main/java/org/apache/carbondata/core/metadata/SegmentFileStore.java --- @@ -0,0 +1,690 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + *http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.carbondata.core.metadata; + +import java.io.BufferedReader; +import java.io.BufferedWriter; +import java.io.DataInputStream; +import java.io.DataOutputStream; +import java.io.IOException; +import java.io.InputStreamReader; +import java.io.OutputStreamWriter; +import java.io.Serializable; +import java.nio.charset.Charset; +import java.util.ArrayList; +import java.util.HashMap; +import java.util.HashSet; +import java.util.List; +import java.util.Map; +import java.util.Set; + +import org.apache.carbondata.core.constants.CarbonCommonConstants; +import org.apache.carbondata.core.datamap.Segment; +import org.apache.carbondata.core.datastore.filesystem.CarbonFile; +import org.apache.carbondata.core.datastore.filesystem.CarbonFileFilter; +import org.apache.carbondata.core.datastore.impl.FileFactory; +import org.apache.carbondata.core.fileoperations.AtomicFileOperations; +import org.apache.carbondata.core.fileoperations.AtomicFileOperationsImpl; +import org.apache.carbondata.core.fileoperations.FileWriteOperation; +import org.apache.carbondata.core.indexstore.PartitionSpec; +import org.apache.carbondata.core.indexstore.blockletindex.SegmentIndexFileStore; +import org.apache.carbondata.core.metadata.blocklet.DataFileFooter; +import org.apache.carbondata.core.metadata.schema.table.CarbonTable; +import org.apache.carbondata.core.mutate.CarbonUpdateUtil; +import org.apache.carbondata.core.statusmanager.LoadMetadataDetails; +import org.apache.carbondata.core.statusmanager.SegmentStatus; +import org.apache.carbondata.core.statusmanager.SegmentStatusManager; +import org.apache.carbondata.core.util.CarbonUtil; +import org.apache.carbondata.core.util.DataFileFooterConverter; +import org.apache.carbondata.core.util.path.CarbonTablePath; + +import com.google.gson.Gson; +import org.apache.hadoop.fs.Path; + +/** + * Provide read and write support for segment file associated with each segment + */ +public class SegmentFileStore { + + private SegmentFile segmentFile; + + private MapindexFilesMap; + + private String tablePath; + + /** + * Write segment information to the segment folder with indexfilename and + * corresponding partitions. + */ + public void writeSegmentFile(String tablePath, final String taskNo, String location, + String timeStamp, List partionNames) throws IOException { +String tempFolderLoc = timeStamp + ".tmp"; --- End diff -- As segmentfile already has transactionTimestamp/LoadTimestamp, temp folder is not required. ---
[GitHub] carbondata pull request #1984: [CARBONDATA-2187] Partition restructure
Github user gvramana commented on a diff in the pull request: https://github.com/apache/carbondata/pull/1984#discussion_r169399178 --- Diff: core/src/main/java/org/apache/carbondata/core/indexstore/blockletindex/BlockletDataMapFactory.java --- @@ -185,26 +212,27 @@ public void clear(String segmentId) { @Override public void clear() { for (String segmentId : segmentMap.keySet().toArray(new String[segmentMap.size()])) { - clear(segmentId); + clear(new Segment(segmentId, null)); } } @Override public List getDataMaps(DataMapDistributable distributable) throws IOException { BlockletDataMapDistributable mapDistributable = (BlockletDataMapDistributable) distributable; List identifiers = new ArrayList<>(); -if (mapDistributable.getFilePath().endsWith(CarbonTablePath.INDEX_FILE_EXT)) { - identifiers.add(new TableBlockIndexUniqueIdentifier(identifier, distributable.getSegmentId(), - mapDistributable.getFilePath())); -} else if (mapDistributable.getFilePath().endsWith(CarbonTablePath.MERGE_INDEX_FILE_EXT)) { +String indexPath = mapDistributable.getFilePath(); +if (indexPath.endsWith(CarbonTablePath.INDEX_FILE_EXT)) { + String parent = indexPath.substring(0, indexPath.lastIndexOf("/")); --- End diff -- Need to use filedelimiter, as it requires to work on windows path ---
[GitHub] carbondata pull request #1984: [CARBONDATA-2187] Partition restructure
Github user gvramana commented on a diff in the pull request: https://github.com/apache/carbondata/pull/1984#discussion_r169392444 --- Diff: core/src/main/java/org/apache/carbondata/core/indexstore/PartitionSpec.java --- @@ -0,0 +1,80 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + *http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.carbondata.core.indexstore; + +import java.io.Serializable; +import java.net.URI; +import java.util.List; +import java.util.Objects; + +import org.apache.carbondata.core.datastore.impl.FileFactory; + +import org.apache.hadoop.fs.Path; + +/** + * Holds partition information. + */ +public class PartitionSpec implements Serializable { + + private static final long serialVersionUID = 4828007433384867678L; + + private List partitions; --- End diff -- is paritions signify list of partition columns? ---
[GitHub] carbondata pull request #1984: [CARBONDATA-2187] Partition restructure
Github user gvramana commented on a diff in the pull request: https://github.com/apache/carbondata/pull/1984#discussion_r169392124 --- Diff: core/src/main/java/org/apache/carbondata/core/indexstore/PartitionSpec.java --- @@ -0,0 +1,80 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + *http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.carbondata.core.indexstore; + +import java.io.Serializable; +import java.net.URI; +import java.util.List; +import java.util.Objects; + +import org.apache.carbondata.core.datastore.impl.FileFactory; + +import org.apache.hadoop.fs.Path; + +/** + * Holds partition information. + */ +public class PartitionSpec implements Serializable { + + private static final long serialVersionUID = 4828007433384867678L; + + private List partitions; + + private transient Path locationPath; + + private String location; --- End diff -- what is difference between location and locationspec? ---
[GitHub] carbondata pull request #1984: [CARBONDATA-2187] Partition restructure
Github user gvramana commented on a diff in the pull request: https://github.com/apache/carbondata/pull/1984#discussion_r169380463 --- Diff: core/src/main/java/org/apache/carbondata/core/datamap/Segment.java --- @@ -0,0 +1,93 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + *http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.carbondata.core.datamap; + +import java.io.Serializable; +import java.util.ArrayList; +import java.util.List; +import java.util.Objects; + +/** + * Represents one load of carbondata + */ +public class Segment implements Serializable { + + private static final long serialVersionUID = 7044555408162234064L; + + private String segmentId; + + private String segmentFileName; + + public Segment(String segmentId, String segmentFileName) { +this.segmentId = segmentId; +this.segmentFileName = segmentFileName; + } + + public String getSegmentId() { +return segmentId; + } + + public String getSegmentFileName() { +return segmentFileName; + } + + public static List toSegmentList(String[] segmentIds) { --- End diff -- this segmentId parameter is different from this.segmentId. Parameter can be renamed to segmentFullId with (segementId#SegmentFileName). Or this.segmentId renamed to segmentNo. ---