Build failed in Jenkins: carbondata-master-spark-2.4 #2438

2020-08-18 Thread Apache Jenkins Server
See 


Changes:


--
Started by an SCM change
Running as SYSTEM
[EnvInject] - Loading node environment variables.
Building remotely on H30 (ubuntu) in workspace 
/home/jenkins/jenkins-slave/workspace/carbondata-master-spark-2.4
FATAL: java.io.IOException: Unexpected termination of the channel
java.io.EOFException
at 
java.io.ObjectInputStream$PeekInputStream.readFully(ObjectInputStream.java:2681)
at 
java.io.ObjectInputStream$BlockDataInputStream.readShort(ObjectInputStream.java:3156)
at 
java.io.ObjectInputStream.readStreamHeader(ObjectInputStream.java:862)
at java.io.ObjectInputStream.(ObjectInputStream.java:358)
at 
hudson.remoting.ObjectInputStreamEx.(ObjectInputStreamEx.java:49)
at hudson.remoting.Command.readFrom(Command.java:140)
at hudson.remoting.Command.readFrom(Command.java:126)
at 
hudson.remoting.AbstractSynchronousByteArrayCommandTransport.read(AbstractSynchronousByteArrayCommandTransport.java:36)
at 
hudson.remoting.SynchronousCommandTransport$ReaderThread.run(SynchronousCommandTransport.java:63)
Caused: java.io.IOException: Unexpected termination of the channel
at 
hudson.remoting.SynchronousCommandTransport$ReaderThread.run(SynchronousCommandTransport.java:77)
Also:   hudson.remoting.Channel$CallSiteStackTrace: Remote call to H30
at 
hudson.remoting.Channel.attachCallSiteStackTrace(Channel.java:1743)
at hudson.remoting.Request.call(Request.java:202)
at hudson.remoting.Channel.call(Channel.java:956)
at hudson.FilePath.act(FilePath.java:1072)
at hudson.FilePath.act(FilePath.java:1061)
at org.jenkinsci.plugins.gitclient.Git.getClient(Git.java:144)
at hudson.plugins.git.GitSCM.createClient(GitSCM.java:822)
at hudson.plugins.git.GitSCM.createClient(GitSCM.java:813)
at hudson.plugins.git.GitSCM.checkout(GitSCM.java:1186)
at hudson.scm.SCM.checkout(SCM.java:504)
at 
hudson.model.AbstractProject.checkout(AbstractProject.java:1208)
at 
hudson.model.AbstractBuild$AbstractBuildExecution.defaultCheckout(AbstractBuild.java:574)
at 
jenkins.scm.SCMCheckoutStrategy.checkout(SCMCheckoutStrategy.java:86)
at 
hudson.model.AbstractBuild$AbstractBuildExecution.run(AbstractBuild.java:499)
at hudson.model.Run.execute(Run.java:1815)
at 
hudson.maven.MavenModuleSetBuild.run(MavenModuleSetBuild.java:543)
at 
hudson.model.ResourceController.execute(ResourceController.java:97)
at hudson.model.Executor.run(Executor.java:429)
Caused: hudson.remoting.RequestAbortedException
at hudson.remoting.Request.abort(Request.java:340)
at hudson.remoting.Channel.terminate(Channel.java:1040)
at 
hudson.remoting.SynchronousCommandTransport$ReaderThread.run(SynchronousCommandTransport.java:94)
ERROR: H30 is offline; cannot locate JDK 1.8 (latest)
ERROR: H30 is offline; cannot locate Maven 3.3.9
ERROR: H30 is offline; cannot locate JDK 1.8 (latest)
ERROR: H30 is offline; cannot locate Maven 3.3.9
ERROR: H30 is offline; cannot locate JDK 1.8 (latest)
ERROR: H30 is offline; cannot locate Maven 3.3.9


[carbondata] branch master updated: [CARBONDATA-3927]Remove unwanted fields from tupleID to make it short and to improve store size and performance

2020-08-18 Thread kunalkapoor
This is an automated email from the ASF dual-hosted git repository.

kunalkapoor pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/carbondata.git


The following commit(s) were added to refs/heads/master by this push:
 new 0298273  [CARBONDATA-3927]Remove unwanted fields from tupleID to make 
it short and to improve store size and performance
0298273 is described below

commit 02982739ed6a472e172b9df13912bf15f3a00488
Author: akashrn5 
AuthorDate: Sun Jul 12 21:23:23 2020 +0530

[CARBONDATA-3927]Remove unwanted fields from tupleID to make it
short and to improve store size and performance

Why is this PR needed?
Currently, we store the tupleId which is very long, which
increases the store size and reduces query performance.

What changes were proposed in this PR?
Remove the compressor name, part id and batch number from the tupleID.
This helps to improve the store size and improve the query performance also.

part_0 is not required as it will be same for all
_batchno is not required as common word, can be replaced by -
compressor name not required as it does not plan any role.

This closes #3837
---
 .../carbondata/core/mutate/CarbonUpdateUtil.java   | 39 +++---
 .../apache/carbondata/core/mutate/TupleIdEnum.java | 18 ---
 .../apache/carbondata/core/util/CarbonUtil.java| 12 +++--
 .../carbondata/core/util/path/CarbonTablePath.java | 38 +++---
 .../carbondata/core/util/CarbonUtilTest.java   | 28 ++
 .../command/management/CommonLoadUtils.scala   |  7 ++-
 .../command/mutation/DeleteExecution.scala | 60 --
 .../testsuite/iud/DeleteCarbonTableTestCase.scala  | 26 +-
 .../spark/carbondata/query/SubQueryTestSuite.scala |  2 +-
 9 files changed, 173 insertions(+), 57 deletions(-)

diff --git 
a/core/src/main/java/org/apache/carbondata/core/mutate/CarbonUpdateUtil.java 
b/core/src/main/java/org/apache/carbondata/core/mutate/CarbonUpdateUtil.java
index ec7894d..f43a5dc 100644
--- a/core/src/main/java/org/apache/carbondata/core/mutate/CarbonUpdateUtil.java
+++ b/core/src/main/java/org/apache/carbondata/core/mutate/CarbonUpdateUtil.java
@@ -84,7 +84,16 @@ public class CarbonUpdateUtil {
*/
   public static String getSegmentWithBlockFromTID(String Tid, boolean 
isPartitionTable) {
 if (isPartitionTable) {
-  return getRequiredFieldFromTID(Tid, TupleIdEnum.SEGMENT_ID);
+  return getRequiredFieldFromTID(Tid, TupleIdEnum.PARTITION_SEGMENT_ID);
+}
+// this case is to check for add segment case, as now the segment id is 
present at first index,
+// in add segment case, it will be in second index as the blockletID is 
generated by adding the
+// complete external path
+// this is in case of the external segment, where the tuple id has 
external path with #
+if (Tid.contains("#")) {
+  return getRequiredFieldFromTID(Tid, TupleIdEnum.EXTERNAL_SEGMENT_ID)
+  + CarbonCommonConstants.FILE_SEPARATOR + getRequiredFieldFromTID(Tid,
+  TupleIdEnum.EXTERNAL_BLOCK_ID);
 }
 return getRequiredFieldFromTID(Tid, TupleIdEnum.SEGMENT_ID)
 + CarbonCommonConstants.FILE_SEPARATOR + getRequiredFieldFromTID(Tid, 
TupleIdEnum.BLOCK_ID);
@@ -93,11 +102,17 @@ public class CarbonUpdateUtil {
   /**
* Returns block path from tuple id
*/
-  public static String getTableBlockPath(String tid, String tablePath, boolean 
isStandardTable) {
-String partField = getRequiredFieldFromTID(tid, TupleIdEnum.PART_ID);
+  public static String getTableBlockPath(String tid, String tablePath, boolean 
isStandardTable,
+  boolean isPartitionTable) {
+String partField = "0";
 // If it has segment file then part field can be appended directly to 
table path
 if (!isStandardTable) {
-  return tablePath + CarbonCommonConstants.FILE_SEPARATOR + 
partField.replace("#", "/");
+  if (isPartitionTable) {
+partField = getRequiredFieldFromTID(tid, 
TupleIdEnum.PARTITION_PART_ID);
+return tablePath + CarbonCommonConstants.FILE_SEPARATOR + 
partField.replace("#", "/");
+  } else {
+return tablePath;
+  }
 }
 String part = CarbonTablePath.addPartPrefix(partField);
 String segment =
@@ -941,13 +956,21 @@ public class CarbonUpdateUtil {
*/
   public static String getSegmentBlockNameKey(String segID, String blockName,
   boolean isPartitionTable) {
-String blockNameWithOutPart = blockName
+String blockNameWithOutPartAndBatchNo = blockName
 .substring(blockName.indexOf(CarbonCommonConstants.HYPHEN) + 1,
-blockName.lastIndexOf(CarbonTablePath.getCarbonDataExtension()));
+blockName.lastIndexOf(CarbonTablePath.getCarbonDataExtension()))
+.replace(CarbonTablePath.BATCH_PREFIX, 
CarbonCommonConstants.UNDERSCORE);
+// to remove compressor name
+int index = 

[carbondata] branch master updated: [CARBONDATA-3863]after using index service clean the temp data

2020-08-18 Thread kunalkapoor
This is an automated email from the ASF dual-hosted git repository.

kunalkapoor pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/carbondata.git


The following commit(s) were added to refs/heads/master by this push:
 new d111036  [CARBONDATA-3863]after using index service clean the temp data
d111036 is described below

commit d111036b0ec11ec24f373eb16baeb009b21b1288
Author: litao 
AuthorDate: Mon Jul 20 21:52:28 2020 +0800

[CARBONDATA-3863]after using index service clean the temp data

Why is this PR needed?
each query that use index server will create a folder under
/tmp/indexservertmp. but when query finished the folder will not
be delete. therefore as the number of queries increases, the
folders in /tmp/indexservertmp will increased. then will get
the directory item limit.

What changes were proposed in this PR?
after query finished delete the folder that created.
clean the /tmp/indexservertmp after index server restart.
run a thread that will delete the folder in /tmp/indexservertmp
that has existed for more than 3 hours.

This closes #3855
---
 .../core/constants/CarbonCommonConstants.java  | 12 
 .../filesystem/AbstractDFSCarbonFile.java  | 16 +
 .../core/datastore/filesystem/CarbonFile.java  |  2 +
 .../core/datastore/filesystem/LocalCarbonFile.java | 16 +
 .../core/datastore/impl/FileFactory.java   | 10 +++
 .../apache/carbondata/core/util/CarbonUtil.java| 56 +++
 .../apache/carbondata/indexserver/IndexJobs.scala  |  6 ++
 .../carbondata/indexserver/IndexServer.scala   | 25 ++-
 .../indexserver/DistributedRDDUtilsTest.scala  | 80 --
 .../org/apache/indexserver/IndexServerTest.scala   | 73 
 10 files changed, 290 insertions(+), 6 deletions(-)

diff --git 
a/core/src/main/java/org/apache/carbondata/core/constants/CarbonCommonConstants.java
 
b/core/src/main/java/org/apache/carbondata/core/constants/CarbonCommonConstants.java
index 8f68bf2..2925e76 100644
--- 
a/core/src/main/java/org/apache/carbondata/core/constants/CarbonCommonConstants.java
+++ 
b/core/src/main/java/org/apache/carbondata/core/constants/CarbonCommonConstants.java
@@ -2456,4 +2456,16 @@ public final class CarbonCommonConstants {
* property which defines the insert stage flow
*/
   public static final String IS_INSERT_STAGE = "is_insert_stage";
+
+  /**
+   * index server temp folder aging period
+   */
+  @CarbonProperty
+  public static final String CARBON_INDEXSERVER_TEMPFOLDER_DELETETIME =
+  "carbon.indexserver.tempfolder.deletetime";
+
+  /**
+   * index server temp folder aging period default value 3hours.
+   */
+  public static final String CARBON_INDEXSERVER_TEMPFOLDER_DELETETIME_DEFAULT 
= "1080";
 }
diff --git 
a/core/src/main/java/org/apache/carbondata/core/datastore/filesystem/AbstractDFSCarbonFile.java
 
b/core/src/main/java/org/apache/carbondata/core/datastore/filesystem/AbstractDFSCarbonFile.java
index f5cb539..9278421 100644
--- 
a/core/src/main/java/org/apache/carbondata/core/datastore/filesystem/AbstractDFSCarbonFile.java
+++ 
b/core/src/main/java/org/apache/carbondata/core/datastore/filesystem/AbstractDFSCarbonFile.java
@@ -25,8 +25,10 @@ import java.io.IOException;
 import java.io.InputStream;
 import java.io.OutputStream;
 import java.util.ArrayList;
+import java.util.Arrays;
 import java.util.List;
 import java.util.Objects;
+import java.util.stream.Collectors;
 
 import org.apache.carbondata.common.logging.LogServiceFactory;
 import org.apache.carbondata.core.constants.CarbonCommonConstants;
@@ -605,4 +607,18 @@ public abstract class AbstractDFSCarbonFile implements 
CarbonFile {
   public long getLength() throws IOException {
 return fileSystem.getFileStatus(path).getLen();
   }
+
+  @Override
+  public List listDirs() throws IOException {
+FileStatus[] listStatus = null;
+if (null != fileStatus && fileStatus.isDirectory()) {
+  Path path = fileStatus.getPath();
+  listStatus = fileSystem.listStatus(path);
+  CarbonFile[] dirs = getFiles(listStatus);
+  List result = new ArrayList(Arrays.asList(dirs));
+  return result.stream().filter(x -> 
x.isDirectory()).collect(Collectors.toList());
+} else {
+  return new ArrayList();
+}
+  }
 }
diff --git 
a/core/src/main/java/org/apache/carbondata/core/datastore/filesystem/CarbonFile.java
 
b/core/src/main/java/org/apache/carbondata/core/datastore/filesystem/CarbonFile.java
index e8e86f0..1b439e1 100644
--- 
a/core/src/main/java/org/apache/carbondata/core/datastore/filesystem/CarbonFile.java
+++ 
b/core/src/main/java/org/apache/carbondata/core/datastore/filesystem/CarbonFile.java
@@ -39,6 +39,8 @@ public interface CarbonFile {
 
   List listFiles(boolean recursive, CarbonFileFilter fileFilter) 
throws IOException;
 
+  List listDirs() throws IOException;
+
   /**
* It returns list of files with 

[carbondata] branch master updated: [CARBONDATA-3943] Handling the addition of geo column to hive at the time of table creation.

2020-08-18 Thread akashrn5
This is an automated email from the ASF dual-hosted git repository.

akashrn5 pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/carbondata.git


The following commit(s) were added to refs/heads/master by this push:
 new 896a9bf  [CARBONDATA-3943] Handling the addition of geo column to hive 
at the time of table creation.
896a9bf is described below

commit 896a9bf1745b7f069789dabdb1afaaca705a2e4e
Author: ShreelekhyaG 
AuthorDate: Wed Aug 5 22:45:00 2020 +0530

[CARBONDATA-3943] Handling the addition of geo column to hive at the time 
of table creation.

Why is this PR needed?
PR #3774 adds geocolumn to hive when it is generated at the time of load.

What changes were proposed in this PR?
Handling the addition of column at create table itself. Added example class 
for the scenario
to check create geo table with carbon session.

Does this PR introduce any user interface change?
No

Is any new testcase added?
Yes

This closes #3879
---
 .../GeoTableExampleWithCarbonSession.scala | 93 ++
 .../scala/org/apache/spark/sql/CarbonSource.scala  | 18 -
 .../spark/sql/catalyst/CarbonParserUtil.scala  |  4 +-
 .../spark/sql/hive/CarbonFileMetastore.scala   |  3 +-
 .../scala/org/apache/carbondata/geo/GeoTest.scala  | 39 +++--
 5 files changed, 145 insertions(+), 12 deletions(-)

diff --git 
a/examples/spark/src/main/scala/org/apache/carbondata/examples/GeoTableExampleWithCarbonSession.scala
 
b/examples/spark/src/main/scala/org/apache/carbondata/examples/GeoTableExampleWithCarbonSession.scala
new file mode 100644
index 000..ee4a4a0
--- /dev/null
+++ 
b/examples/spark/src/main/scala/org/apache/carbondata/examples/GeoTableExampleWithCarbonSession.scala
@@ -0,0 +1,93 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.carbondata.examples
+
+import java.io.File
+
+import org.apache.log4j.PropertyConfigurator
+import org.apache.spark.sql.SparkSession
+
+import org.apache.carbondata.core.constants.CarbonCommonConstants
+import org.apache.carbondata.core.util.CarbonProperties
+import org.apache.carbondata.examples.util.ExampleUtils
+
+object GeoTableExampleWithCarbonSession {
+
+  def main(args: Array[String]) {
+val rootPath = new File(this.getClass.getResource("/").getPath
++ "../../../..").getCanonicalPath
+System.setProperty("path.target", s"$rootPath/examples/spark/target")
+// print profiler log to a separated file: target/profiler.log
+PropertyConfigurator.configure(
+  s"$rootPath/examples/spark/src/main/resources/log4j.properties")
+
+CarbonProperties.getInstance()
+  .addProperty(CarbonCommonConstants.ENABLE_QUERY_STATISTICS, "false")
+val spark = 
ExampleUtils.createCarbonSession("GeoTableExampleWithCarbonSession")
+spark.sparkContext.setLogLevel("error")
+Seq(
+  "stored as carbondata",
+  "using carbondata",
+  "stored by 'carbondata'",
+  "stored by 'org.apache.carbondata.format'"
+).foreach { formatSyntax =>
+  exampleBody(spark, formatSyntax)
+}
+spark.close()
+  }
+
+  def exampleBody(spark: SparkSession, formatSyntax: String = "stored as 
carbondata"): Unit = {
+
+val rootPath = new File(this.getClass.getResource("/").getPath
++ "../../../..").getCanonicalPath
+val path = s"$rootPath/integration/spark/src/test/resources/geodata.csv"
+
+spark.sql("DROP TABLE IF EXISTS geoTable")
+
+// Create table
+spark.sql(
+  s"""
+CREATE TABLE geoTable(
+ | timevalue BIGINT,
+ | longitude LONG,
+ | latitude LONG)
+ | $formatSyntax
+ |  TBLPROPERTIES ('SPATIAL_INDEX'='mygeohash',
+ | 'SPATIAL_INDEX.mygeohash.type'='geohash',
+ | 'SPATIAL_INDEX.mygeohash.sourcecolumns'='longitude, latitude',
+ | 'SPATIAL_INDEX.mygeohash.originLatitude'='39.832277',
+ | 'SPATIAL_INDEX.mygeohash.gridSize'='50',
+ | 'SPATIAL_INDEX.mygeohash.minLongitude'='115.811865',
+ | 'SPATIAL_INDEX.mygeohash.maxLongitude'='116.782233',
+ | 

[carbondata] branch master updated: [CARBONDATA-3919] Improve concurrent query performance

2020-08-18 Thread akashrn5
This is an automated email from the ASF dual-hosted git repository.

akashrn5 pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/carbondata.git


The following commit(s) were added to refs/heads/master by this push:
 new 1cca062  [CARBONDATA-3919] Improve concurrent query performance
1cca062 is described below

commit 1cca06233b957aa6bf7874991248f5bce0670131
Author: ajantha-bhat 
AuthorDate: Wed Jul 22 16:43:15 2020 +0530

[CARBONDATA-3919] Improve concurrent query performance

Why is this PR needed?
1. when 500 queries executed concurrently.
checkIfRefreshIsNeeded method was synchronized. so only one thread was 
working at a time.
But actually synchronization is required only when schema modified to drop 
tables. Not
for whole function.
2. TokenCache.obtainTokensForNamenodes was causing a performance bottleneck 
for concurrent
queries.so, removed it

What changes were proposed in this PR?
1. Synchronize only remove table part. Observed 500 query total performance 
improved from
10s to 3 seconds in cluster.
2. Avoid calling the API.

This closes #3858
---
 .../carbondata/hadoop/api/CarbonInputFormat.java |  4 
 .../apache/spark/sql/hive/CarbonFileMetastore.scala  | 20 
 2 files changed, 16 insertions(+), 8 deletions(-)

diff --git 
a/hadoop/src/main/java/org/apache/carbondata/hadoop/api/CarbonInputFormat.java 
b/hadoop/src/main/java/org/apache/carbondata/hadoop/api/CarbonInputFormat.java
index 130e0d9..557fbfa 100644
--- 
a/hadoop/src/main/java/org/apache/carbondata/hadoop/api/CarbonInputFormat.java
+++ 
b/hadoop/src/main/java/org/apache/carbondata/hadoop/api/CarbonInputFormat.java
@@ -84,7 +84,6 @@ import org.apache.hadoop.mapreduce.JobContext;
 import org.apache.hadoop.mapreduce.RecordReader;
 import org.apache.hadoop.mapreduce.TaskAttemptContext;
 import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
-import org.apache.hadoop.mapreduce.security.TokenCache;
 import org.apache.log4j.Logger;
 
 /**
@@ -472,9 +471,6 @@ public abstract class CarbonInputFormat extends 
FileInputFormat {
 QueryStatisticsRecorder recorder = 
CarbonTimeStatisticsFactory.createDriverRecorder();
 QueryStatistic statistic = new QueryStatistic();
 
-// get tokens for all the required FileSystem for table path
-TokenCache.obtainTokensForNamenodes(job.getCredentials(),
-new Path[] { new Path(carbonTable.getTablePath()) }, 
job.getConfiguration());
 List prunedBlocklets =
 getPrunedBlocklets(job, carbonTable, expression, segmentIds, 
invalidSegments,
 segmentsToBeRefreshed);
diff --git 
a/integration/spark/src/main/scala/org/apache/spark/sql/hive/CarbonFileMetastore.scala
 
b/integration/spark/src/main/scala/org/apache/spark/sql/hive/CarbonFileMetastore.scala
index c9f78b5..b16579e 100644
--- 
a/integration/spark/src/main/scala/org/apache/spark/sql/hive/CarbonFileMetastore.scala
+++ 
b/integration/spark/src/main/scala/org/apache/spark/sql/hive/CarbonFileMetastore.scala
@@ -67,7 +67,7 @@ private object CarbonFileMetastore {
   final val tableModifiedTimeStore = new ConcurrentHashMap[String, Long]()
 
   def checkIfRefreshIsNeeded(absoluteTableIdentifier: AbsoluteTableIdentifier,
-  localTimeStamp: Long): Boolean = synchronized {
+  localTimeStamp: Long): Boolean = {
 val schemaFilePath = 
CarbonTablePath.getSchemaFilePath(absoluteTableIdentifier.getTablePath)
 val schemaCarbonFile = FileFactory.getCarbonFile(schemaFilePath)
 if (schemaCarbonFile.exists()) {
@@ -81,9 +81,21 @@ private object CarbonFileMetastore {
 case None => true
   }
   if (isSchemaModified) {
-CarbonMetadata.getInstance().removeTable(absoluteTableIdentifier
-  .getCarbonTableIdentifier.getTableUniqueName)
-IndexStoreManager.getInstance().clearIndex(absoluteTableIdentifier)
+if (CarbonMetadata.getInstance()
+  .getCarbonTable(absoluteTableIdentifier
+.getCarbonTableIdentifier
+.getTableUniqueName) != null) {
+  synchronized {
+if (CarbonMetadata.getInstance()
+  .getCarbonTable(absoluteTableIdentifier
+.getCarbonTableIdentifier
+.getTableUniqueName) != null) {
+  CarbonMetadata.getInstance().removeTable(absoluteTableIdentifier
+.getCarbonTableIdentifier.getTableUniqueName)
+  
IndexStoreManager.getInstance().clearIndex(absoluteTableIdentifier)
+}
+  }
+}
 true
   } else {
 localTimeStamp != newTime