[GitHub] carbondata pull request #1641: [CARBONDATA-1882] select with group by and in...
Github user asfgit closed the pull request at: https://github.com/apache/carbondata/pull/1641 ---
[GitHub] carbondata pull request #1641: [CARBONDATA-1882] select with group by and in...
Github user jackylk commented on a diff in the pull request: https://github.com/apache/carbondata/pull/1641#discussion_r157155410 --- Diff: integration/spark2/src/main/scala/org/apache/carbondata/spark/rdd/CarbonDataRDDFactory.scala --- @@ -505,7 +516,11 @@ object CarbonDataRDDFactory { carbonLoadModel) operationContext.setProperty("isOverwrite", overwriteTable) OperationListenerBus.getInstance().fireEvent(loadTablePreStatusUpdateEvent, operationContext) - val done = updateTableStatus(status, carbonLoadModel, loadStatus, overwriteTable) + val done = updateTableStatus(status, --- End diff -- move status to next line ---
[GitHub] carbondata pull request #1641: [CARBONDATA-1882] select with group by and in...
Github user kushalsaha commented on a diff in the pull request: https://github.com/apache/carbondata/pull/1641#discussion_r156994717 --- Diff: integration/spark-common-test/src/test/scala/org/apache/carbondata/spark/testsuite/allqueries/InsertIntoCarbonTableTestCase.scala --- @@ -276,6 +281,121 @@ class InsertIntoCarbonTableTestCase extends QueryTest with BeforeAndAfterAll { } sql("LOAD DATA INPATH '" + resourcesPath + "/100_olap.csv' overwrite INTO table TCarbonSourceOverwrite options ('DELIMITER'=',', 'QUOTECHAR'='\', 'FILEHEADER'='imei,deviceInformationId,MAC,deviceColor,device_backColor,modelId,marketName,AMSize,ROMSize,CUPAudit,CPIClocked,series,productionDate,bomCode,internalModels,deliveryTime,channelsId,channelsName,deliveryAreaId,deliveryCountry,deliveryProvince,deliveryCity,deliveryDistrict,deliveryStreet,oxSingleNumber,ActiveCheckTime,ActiveAreaId,ActiveCountry,ActiveProvince,Activecity,ActiveDistrict,ActiveStreet,ActiveOperatorId,Active_releaseId,Active_EMUIVersion,Active_operaSysVersion,Active_BacVerNumber,Active_BacFlashVer,Active_webUIVersion,Active_webUITypeCarrVer,Active_webTypeDataVerNumber,Active_operatorsVersion,Active_phonePADPartitionedVersions,Latest_YEAR,Latest_MONTH,Latest_DAY,Latest_HOUR,Latest_areaId,Latest_country,Latest_province,Latest_city,Latest_district,Latest_street,Latest_releaseId,Latest_EMUIVersion,Latest_operaS ysVersion,Latest_BacVerNumber,Latest_BacFlashVer,Latest_webUIVersion,Latest_webUITypeCarrVer,Latest_webTypeDataVerNumber,Latest_operatorsVersion,Latest_phonePADPartitionedVersions,Latest_operatorId,gamePointDescription,gamePointId,contractNumber')") assert(rowCount == sql("select imei from TCarbonSourceOverwrite").count()) + + } + + test("insert overwrite in group by scenario with t1 no record and t2 no record") { + queryExecution("overwriteTable1_noRecord.csv","overwriteTable2_noRecord.csv") +sql ("insert overwrite table OverwriteTable_t2 select id,name,sum(salary) as TotalSalary,'98' as age from OverwriteTable_t1 group by id,name,salary") +val exists_t1 = checkSegment("OverwriteTable_t1") +val exists_t2 = checkSegment("OverwriteTable_t2") +assert(!exists_t1) +assert(!exists_t2) +assert(sql("select * from OverwriteTable_t1").count() == sql("select * from OverwriteTable_t2").count()) +checkAnswer(sql("select * from OverwriteTable_t2"), + Seq()) +checkAnswer(sql("select * from OverwriteTable_t1"), + sql("select * from OverwriteTable_t2")) + } + + + test("insert overwrite in group by scenario with t1 no record and t2 some record") { + queryExecution("overwriteTable1_noRecord.csv","overwriteTable2_someRecord.csv") +sql ("insert overwrite table OverwriteTable_t2 select id,name,sum(salary) as TotalSalary,'98' as age from OverwriteTable_t1 group by id,name,salary") --- End diff -- only insert overwrite query is kept in the testcases as we handled two different scenario 1) IN GROUP BY CASE 2) WITHOUT GROUP BY CASE if we refactor so one extra method need to be written where again same code will exists ---
[GitHub] carbondata pull request #1641: [CARBONDATA-1882] select with group by and in...
Github user gvramana commented on a diff in the pull request: https://github.com/apache/carbondata/pull/1641#discussion_r156700781 --- Diff: integration/spark2/src/main/scala/org/apache/carbondata/spark/rdd/CarbonDataRDDFactory.scala --- @@ -483,20 +491,21 @@ object CarbonDataRDDFactory { s"${ carbonLoadModel.getDatabaseName }.${ carbonLoadModel.getTableName }") throw new Exception(status(0)._2._2.errorMsg) } - // if segment is empty then fail the data load + + var newEntryLoadStatus = if (!carbonLoadModel.getCarbonDataLoadSchema.getCarbonTable.isChildDataMap && !CarbonLoaderUtil.isValidSegment(carbonLoadModel, carbonLoadModel.getSegmentId.toInt)) { -// update the load entry in table status file for changing the status to marked for delete -CommonUtil.updateTableStatusForFailure(carbonLoadModel) -LOGGER.info("starting clean up**") -CarbonLoaderUtil.deleteSegment(carbonLoadModel, carbonLoadModel.getSegmentId.toInt) -LOGGER.info("clean up done**") + LOGGER.audit(s"Data load is failed for " + s"${ carbonLoadModel.getDatabaseName }.${ carbonLoadModel.getTableName }" + " as there is no data to load") LOGGER.warn("Cannot write load metadata file as data load failed") -throw new Exception("No Data to load") + --- End diff -- write comment 'as no records loaded in new segment, new segment should be deleted' ---
[GitHub] carbondata pull request #1641: [CARBONDATA-1882] select with group by and in...
Github user gvramana commented on a diff in the pull request: https://github.com/apache/carbondata/pull/1641#discussion_r156700497 --- Diff: integration/spark2/src/main/scala/org/apache/carbondata/spark/rdd/CarbonDataRDDFactory.scala --- @@ -375,7 +375,15 @@ object CarbonDataRDDFactory { } } } else { - loadStatus = SegmentStatus.LOAD_FAILURE + if (dataFrame.isDefined && updateModel.isEmpty) { --- End diff -- Write comment explaining this ---
[GitHub] carbondata pull request #1641: [CARBONDATA-1882] select with group by and in...
Github user gvramana commented on a diff in the pull request: https://github.com/apache/carbondata/pull/1641#discussion_r156698805 --- Diff: integration/spark-common-test/src/test/scala/org/apache/carbondata/spark/testsuite/allqueries/InsertIntoCarbonTableTestCase.scala --- @@ -276,8 +281,178 @@ class InsertIntoCarbonTableTestCase extends QueryTest with BeforeAndAfterAll { } sql("LOAD DATA INPATH '" + resourcesPath + "/100_olap.csv' overwrite INTO table TCarbonSourceOverwrite options ('DELIMITER'=',', 'QUOTECHAR'='\', 'FILEHEADER'='imei,deviceInformationId,MAC,deviceColor,device_backColor,modelId,marketName,AMSize,ROMSize,CUPAudit,CPIClocked,series,productionDate,bomCode,internalModels,deliveryTime,channelsId,channelsName,deliveryAreaId,deliveryCountry,deliveryProvince,deliveryCity,deliveryDistrict,deliveryStreet,oxSingleNumber,ActiveCheckTime,ActiveAreaId,ActiveCountry,ActiveProvince,Activecity,ActiveDistrict,ActiveStreet,ActiveOperatorId,Active_releaseId,Active_EMUIVersion,Active_operaSysVersion,Active_BacVerNumber,Active_BacFlashVer,Active_webUIVersion,Active_webUITypeCarrVer,Active_webTypeDataVerNumber,Active_operatorsVersion,Active_phonePADPartitionedVersions,Latest_YEAR,Latest_MONTH,Latest_DAY,Latest_HOUR,Latest_areaId,Latest_country,Latest_province,Latest_city,Latest_district,Latest_street,Latest_releaseId,Latest_EMUIVersion,Latest_operaS ysVersion,Latest_BacVerNumber,Latest_BacFlashVer,Latest_webUIVersion,Latest_webUITypeCarrVer,Latest_webTypeDataVerNumber,Latest_operatorsVersion,Latest_phonePADPartitionedVersions,Latest_operatorId,gamePointDescription,gamePointId,contractNumber')") assert(rowCount == sql("select imei from TCarbonSourceOverwrite").count()) + + } + + test("insert overwrite in group by scenario with t1 no record and t2 some record") { --- End diff -- Move common code to a function ---
[GitHub] carbondata pull request #1641: [CARBONDATA-1882] select with group by and in...
Github user kushalsaha commented on a diff in the pull request: https://github.com/apache/carbondata/pull/1641#discussion_r156318860 --- Diff: integration/spark2/src/main/scala/org/apache/carbondata/spark/rdd/CarbonDataRDDFactory.scala --- @@ -486,6 +486,21 @@ object CarbonDataRDDFactory { // if segment is empty then fail the data load if (!carbonLoadModel.getCarbonDataLoadSchema.getCarbonTable.isChildDataMap && !CarbonLoaderUtil.isValidSegment(carbonLoadModel, carbonLoadModel.getSegmentId.toInt)) { + +if (overwriteTable && dataFrame.isDefined) { + carbonLoadModel.getLoadMetadataDetails.asScala.foreach { +loadDetails => + if (loadDetails.getSegmentStatus.equals(SegmentStatus.SUCCESS)) { + loadDetails.setSegmentStatus(SegmentStatus.MARKED_FOR_DELETE) + } + } + val carbonTablePath = CarbonStorePath --- End diff -- loadTablePreStatusUpdateEvent is not fired, :- it will fire only when the data loading is done . in case of zero record data loading is not done how about old dictionary to be overwritten :- in case of insert overwrite dictionary is appending ---
[GitHub] carbondata pull request #1641: [CARBONDATA-1882] select with group by and in...
Github user gvramana commented on a diff in the pull request: https://github.com/apache/carbondata/pull/1641#discussion_r156276866 --- Diff: integration/spark2/src/main/scala/org/apache/carbondata/spark/rdd/CarbonDataRDDFactory.scala --- @@ -486,6 +486,21 @@ object CarbonDataRDDFactory { // if segment is empty then fail the data load if (!carbonLoadModel.getCarbonDataLoadSchema.getCarbonTable.isChildDataMap && !CarbonLoaderUtil.isValidSegment(carbonLoadModel, carbonLoadModel.getSegmentId.toInt)) { + +if (overwriteTable && dataFrame.isDefined) { + carbonLoadModel.getLoadMetadataDetails.asScala.foreach { +loadDetails => + if (loadDetails.getSegmentStatus.equals(SegmentStatus.SUCCESS)) { + loadDetails.setSegmentStatus(SegmentStatus.MARKED_FOR_DELETE) + } + } + val carbonTablePath = CarbonStorePath --- End diff -- 1) loadTablePreStatusUpdateEvent is not fired, 2) how about old dictionary to be overwritten? 3) updatestatus file also needs to be handled accordingly. Suggest to flow the original flow handling empty segment case ---
[GitHub] carbondata pull request #1641: [CARBONDATA-1882] select with group by and in...
Github user gvramana commented on a diff in the pull request: https://github.com/apache/carbondata/pull/1641#discussion_r156275284 --- Diff: integration/spark2/src/main/scala/org/apache/carbondata/spark/rdd/CarbonDataRDDFactory.scala --- @@ -486,6 +486,21 @@ object CarbonDataRDDFactory { // if segment is empty then fail the data load --- End diff -- Correct comment ---