Chetan Bhat created CARBONDATA-3987:
---------------------------------------

             Summary: Issues in SDK Pagination reader (2 issues)
                 Key: CARBONDATA-3987
                 URL: https://issues.apache.org/jira/browse/CARBONDATA-3987
             Project: CarbonData
          Issue Type: Bug
          Components: other
    Affects Versions: 2.1.0
            Reporter: Chetan Bhat


Issue 1 : 
write data to table and insert into one more row , error is thrown when try to 
read new added row where as getTotalRows get incremented by 1.

Test code-
/**
 * Carbon Files are written using CarbonWriter in outputpath
 *
 * Carbon Files are read using paginationCarbonReader object
 * Checking pagination with insert on large data with 8 split
 */
 @Test
 public void testSDKPaginationInsertData() throws IOException, 
InvalidLoadOptionException, InterruptedException {
 System.out.println("___________________________________________" + 
name.getMethodName() + " TestCase Execution is 
started________________________________________________");

//
// String outputPath1 = getOutputPath(outputDir, name.getMethodName() + 
"large");
//
// long uid = 123456;
// TimeZone.setDefault(TimeZone.getTimeZone("Asia/Shanghai"));
// writeMultipleCarbonFiles("id int,name string,rank short,salary double,active 
boolean,dob date,doj timestamp,city string,dept string", getDatas(), 
outputPath1, uid, null, null);
//
// System.out.println("Data is written");

List<String[]> data1 = new ArrayList<String[]>();
 String[] row1 = \{"1", "AAA", "3", "3444345.66", "true", "1979-12-09", 
"2011-2-10 1:00:20", "Pune", "IT"};
 String[] row2 = \{"2", "BBB", "2", "543124.66", "false", "1987-2-19", 
"2017-1-1 12:00:20", "Bangalore", "DATA"};
 String[] row3 = \{"3", "CCC", "1", "787878.888", "false", "1982-05-12", 
"2015-12-1 2:20:20", "Pune", "DATA"};
 String[] row4 = \{"4", "DDD", "1", "99999.24", "true", "1981-04-09", 
"2000-1-15 7:00:20", "Delhi", "MAINS"};
 String[] row5 = \{"5", "EEE", "3", "545656.99", "true", "1987-12-09", 
"2017-11-25 04:00:20", "Delhi", "IT"};

data1.add(row1);
 data1.add(row2);
 data1.add(row3);
 data1.add(row4);
 data1.add(row5);

String outputPath1 = getOutputPath(outputDir, name.getMethodName() + "large");

long uid = 123456;
 TimeZone.setDefault(TimeZone.getTimeZone("Asia/Shanghai"));
 writeMultipleCarbonFiles("id int,name string,rank short,salary double,active 
boolean,dob date,doj timestamp,city string,dept string", data1, outputPath1, 
uid, null, null);

System.out.println("Data is written");

String hdfsPath1 = moveFiles(outputPath1, outputPath1);
 String datapath1 = hdfsPath1.concat("/" + name.getMethodName() + "large");
 System.out.println("HDFS Data Path is: " + datapath1);

runSQL("create table " + name.getMethodName() + "large" + " using carbon 
location '" + datapath1 + "'");
 System.out.println("Table " + name.getMethodName() + " is created 
Successfully");
 runSQL("select count(*) from " + name.getMethodName() + "large");


 long uid1 = 123;
 String outputPath = getOutputPath(outputDir, name.getMethodName());
 List<String[]> data = new ArrayList<String[]>();
 String[] row = \{"222", "Daisy", "3", "334.456", "true", "1956-11-08", 
"2013-12-10 12:00:20", "Pune", "IT"};
 data.add(row);
 writeData("id int,name string,rank short,salary double,active boolean,dob 
date,doj timestamp,city string,dept string", data, outputPath, uid, null, null);
 String hdfsPath = moveFiles(outputPath, outputPath);
 String datapath = hdfsPath.concat("/" + name.getMethodName());

runSQL("create table " + name.getMethodName() + " using carbon location '" + 
datapath + "'");
 runSQL("select count(*) from " + name.getMethodName());
 System.out.println("----Insert------");
 runSQL("insert into table " + name.getMethodName() + " select * from " + 
name.getMethodName() + "large");
 System.out.println("Inserted");
 System.out.println("----------After Insert--------------");
 System.out.println("----Query 1----");
 runSQL("select count(*) from " + name.getMethodName());


 // configure cache size = 4 blocklet
 CarbonProperties.getInstance()
 .addProperty(CarbonCommonConstants.CARBON_MAX_PAGINATION_LRU_CACHE_SIZE_IN_MB, 
"4");

CarbonReaderBuilder carbonReaderBuilder = CarbonReader.builder(datapath, 
"_temp").withPaginationSupport().projection(new 
String[]\{"id","name","rank","salary","active","dob","doj","city","dept"});
 PaginationCarbonReader<Object> paginationCarbonReader =
 (PaginationCarbonReader<Object>) carbonReaderBuilder.build();


 File[] dataFiles1 = new File(datapath).listFiles(new FilenameFilter() {
 @Override public boolean accept(File dir, String name) {
 return name.endsWith("carbondata");
 }
 });
 String 
version=CarbonSchemaReader.getVersionDetails(dataFiles1[0].getAbsolutePath());
 System.out.println("version "+version);

System.out.println("Total no of rows is : 
"+paginationCarbonReader.getTotalRows() );
 assertTrue(paginationCarbonReader.getTotalRows() == 6);

Object[] rows=paginationCarbonReader.read(1,6);
 //assertTrue(rows.length==5);
 for (Object rowss : rows) {
 System.out.println(((Object[]) rowss)[0]);
 // assertTrue (((Object[]) row)[1].equals(5001));
 }

// close the reader
 paginationCarbonReader.close();

}

 

Issue 2 : when filter () is used to filter certain row . getTotalRows() still 
showing previous total no of row where as when try to read all the row getting 
error.

/**
 * Carbon Files are written using CarbonWriter in outputpath
 *
 * Carbon Files are read using paginationCarbonReader object with filter
 */
 @Test
 public void testSDKPaginationFilter() throws IOException, 
InvalidLoadOptionException, InterruptedException {
 System.out.println("___________________________________________" + 
name.getMethodName() + " TestCase Execution is 
started________________________________________________");

List<String []> data =new ArrayList<String []>();
 String [] row1= \{"100","MNO","A","1001"};
 String [] row2= \{"100","MNOP","C","3001"};
 String [] row3= \{"100","MNOQ","X","2001"};
 String [] row4= \{"100","MNOR","Z","7001"};
 String [] row5= \{"100","MNOS","P","5001"};
 data.add(row1);
 data.add(row2);
 data.add(row3);
 data.add(row4);
 data.add(row5);

String outputPath=getOutputPath(outputDir,name.getMethodName());
 boolean isTransactionalTable=false;
 long uid=System.currentTimeMillis();
 String blockletsize= String.valueOf(2);
 String blocksize= String.valueOf(4);
 String [] sortColumns=\{"c4","c3"};
 writeData("c1 int,c2 string,c3 string,c4 int",data,outputPath, 
uid,blocksize,blockletsize,sortColumns);
 System.out.println("Data is written");

String hdfsPath = moveFiles(outputPath, outputPath);
 String dataPath = hdfsPath.concat("/" + name.getMethodName());
 System.out.println("HDFS Data Path is: " + dataPath);

// configure cache size = 4 blocklet
 CarbonProperties.getInstance()
 .addProperty(CarbonCommonConstants.CARBON_MAX_PAGINATION_LRU_CACHE_SIZE_IN_MB, 
"4");

//filter expression
 EqualToExpression equalExpression =
 new EqualToExpression(new ColumnExpression("c3", DataTypes.STRING),
 new LiteralExpression("P", DataTypes.STRING));

CarbonReaderBuilder carbonReaderBuilder = CarbonReader.builder(dataPath, 
"_temp").withPaginationSupport().projection(new String[]\{"c2", 
"c4"}).filter(equalExpression);
 PaginationCarbonReader<Object> paginationCarbonReader =
 (PaginationCarbonReader<Object>) carbonReaderBuilder.build();


 File[] dataFiles1 = new File(dataPath).listFiles(new FilenameFilter() {
 @Override public boolean accept(File dir, String name) {
 return name.endsWith("carbondata");
 }
 });
 String 
version=CarbonSchemaReader.getVersionDetails(dataFiles1[0].getAbsolutePath());
 System.out.println("version "+version);

System.out.println("Total no of rows is : 
"+paginationCarbonReader.getTotalRows() );
 assertTrue(paginationCarbonReader.getTotalRows() == 5);

Object[] rows=paginationCarbonReader.read(1,5);
 for (Object row : rows) {
 System.out.println(((Object[]) row)[0]);
 // assertTrue (((Object[]) row)[1].equals(5001));
 }
 // close the reader
 paginationCarbonReader.close();
 }

 



--
This message was sent by Atlassian Jira
(v8.3.4#803005)

Reply via email to