smolnar82 opened a new pull request #157: KNOX-2022 - KnoxShellTable contains Comparables instead of Strings and split KnoxShellTable into smaller classes URL: https://github.com/apache/knox/pull/157 ## What changes were proposed in this pull request? This PR consists of two commits: 1. Split the `KnoxShellTable` into smaller classes. This way it's easier to read, maintain and understand 2. Changed the underlying data structure from `String` to `Combarable<? extends Object>`. So that we can store data in the table with their type. ## How was this patch tested? In addition to updating/adding/fixing JUnit tests, I've executed integration tests in `knoxshell`. First I've created a test DB using `DerbyDatabase` and saved it locally. I've used that path in my tests as `connectionUrl`. This DB has 2 tables: - locations: 1.200.000 rows - murder_statistics: 22.800.000 rows (statistics for each ZIP between 2000 and 2019) ``` create table locations(zip int, country varchar(64), state varchar(64), city varchar(64), population int, primary key(zip)) CALL SYSCS_UTIL.SYSCS_IMPORT_TABLE ('SAMPLE', 'LOCATIONS', '/Users/smolnar/test/knoxline/sample/test.locations.load.dat', null, null, null, 1) create table murder_statistics(zip int, num_of_murders int, recorded date, primary key(zip, recorded), foreign key(zip) references locations(zip)) CALL SYSCS_UTIL.SYSCS_IMPORT_TABLE ('SAMPLE', 'MURDER_STATISTICS', '/Users/smolnar/test/knoxline/sample/test.murder.statistics.load.dat', null, null, null, 1) ``` In `knoxshell` I used these tables as follows: ``` knox:000> locations = KnoxShellTable.builder().jdbc().driver("org.apache.derby.jdbc.EmbeddedDriver").connectTo("jdbc:derby:/Users/smolnar/test/derbyDb").sql("SELECT * FROM sample.locations where zip < 10") ===> LOCATIONS +--------+------------+----------+----------+--------------+ | ZIP | COUNTRY | STATE | CITY | POPULATION | +--------+------------+----------+----------+--------------+ | 1 | US | NY | City1 | 100000 | | 2 | US | NY | City2 | 100000 | | 3 | US | NY | City3 | 100000 | | 4 | US | NY | City4 | 100000 | | 5 | US | NY | City5 | 100000 | | 6 | US | NY | City6 | 100000 | | 7 | US | NY | City7 | 100000 | | 8 | US | NY | City8 | 100000 | | 9 | US | NY | City9 | 100000 | +--------+------------+----------+----------+--------------+ knox:000> murderStats = KnoxShellTable.builder().jdbc().driver("org.apache.derby.jdbc.EmbeddedDriver").connectTo("jdbc:derby:/Users/smolnar/test/derbyDb").sql("SELECT * FROM sample.murder_statistics where zip < 10") ===> MURDER_STATISTICS +--------+------------------+--------------+ | ZIP | NUM_OF_MURDERS | RECORDED | +--------+------------------+--------------+ | 1 | 104 | 2000-12-31 | | 1 | 136 | 2001-12-31 | | 1 | 299 | 2002-12-31 | | 1 | 240 | 2003-12-31 | | 1 | 104 | 2004-12-31 | | 1 | 130 | 2005-12-31 | | 1 | 335 | 2006-12-31 | | 1 | 14 | 2007-12-31 | | 1 | 176 | 2008-12-31 | | 1 | 388 | 2009-12-31 | ... | 9 | 58 | 2010-12-31 | | 9 | 98 | 2011-12-31 | | 9 | 13 | 2012-12-31 | | 9 | 299 | 2013-12-31 | | 9 | 235 | 2014-12-31 | | 9 | 30 | 2015-12-31 | | 9 | 307 | 2016-12-31 | | 9 | 202 | 2017-12-31 | | 9 | 261 | 2018-12-31 | +--------+------------------+--------------+ knox:000> filteredMurderStats = murderStats.filter().name("NUM_OF_MURDERS").greaterThan(300) ===> +--------+------------------+--------------+ | ZIP | NUM_OF_MURDERS | RECORDED | +--------+------------------+--------------+ | 1 | 335 | 2006-12-31 | | 1 | 388 | 2009-12-31 | | 1 | 392 | 2016-12-31 | | 2 | 371 | 2002-12-31 | | 2 | 348 | 2003-12-31 | | 2 | 394 | 2006-12-31 | | 2 | 362 | 2010-12-31 | | 2 | 304 | 2015-12-31 | | 3 | 336 | 2000-12-31 | | 3 | 301 | 2006-12-31 | | 3 | 316 | 2014-12-31 | | 3 | 311 | 2018-12-31 | | 4 | 400 | 2001-12-31 | | 4 | 371 | 2007-12-31 | | 4 | 344 | 2009-12-31 | | 4 | 335 | 2011-12-31 | | 4 | 339 | 2014-12-31 | | 5 | 332 | 2004-12-31 | | 5 | 321 | 2005-12-31 | | 5 | 337 | 2008-12-31 | | 5 | 307 | 2016-12-31 | | 6 | 345 | 2004-12-31 | | 6 | 363 | 2005-12-31 | | 6 | 371 | 2018-12-31 | | 7 | 383 | 2000-12-31 | | 7 | 327 | 2009-12-31 | | 7 | 342 | 2010-12-31 | | 7 | 387 | 2011-12-31 | | 7 | 369 | 2014-12-31 | | 8 | 308 | 2004-12-31 | | 8 | 365 | 2008-12-31 | | 8 | 334 | 2009-12-31 | | 8 | 348 | 2010-12-31 | | 8 | 398 | 2012-12-31 | | 8 | 385 | 2014-12-31 | | 8 | 318 | 2015-12-31 | | 9 | 349 | 2000-12-31 | | 9 | 324 | 2005-12-31 | | 9 | 336 | 2008-12-31 | | 9 | 388 | 2009-12-31 | | 9 | 307 | 2016-12-31 | +--------+------------------+--------------+ knox:000> filteredMurderStats.toCSV() ===> ZIP,NUM_OF_MURDERS,RECORDED 1,335,2006-12-31 1,388,2009-12-31 1,392,2016-12-31 2,371,2002-12-31 2,348,2003-12-31 2,394,2006-12-31 2,362,2010-12-31 2,304,2015-12-31 3,336,2000-12-31 3,301,2006-12-31 3,316,2014-12-31 3,311,2018-12-31 4,400,2001-12-31 4,371,2007-12-31 4,344,2009-12-31 4,335,2011-12-31 4,339,2014-12-31 5,332,2004-12-31 5,321,2005-12-31 5,337,2008-12-31 5,307,2016-12-31 6,345,2004-12-31 6,363,2005-12-31 6,371,2018-12-31 7,383,2000-12-31 7,327,2009-12-31 7,342,2010-12-31 7,387,2011-12-31 7,369,2014-12-31 8,308,2004-12-31 8,365,2008-12-31 8,334,2009-12-31 8,348,2010-12-31 8,398,2012-12-31 8,385,2014-12-31 8,318,2015-12-31 9,349,2000-12-31 9,324,2005-12-31 9,336,2008-12-31 9,388,2009-12-31 9,307,2016-12-31 knox:000> filteredMurderStats.toJSON() ===> { "headers" : [ "ZIP", "NUM_OF_MURDERS", "RECORDED" ], "title" : null, "rows" : [ [ 1, 335, 1167519600000 ], [ 1, 388, 1262214000000 ], [ 1, 392, 1483138800000 ], [ 2, 371, 1041289200000 ], [ 2, 348, 1072825200000 ], [ 2, 394, 1167519600000 ], [ 2, 362, 1293750000000 ], [ 2, 304, 1451516400000 ], [ 3, 336, 978217200000 ], [ 3, 301, 1167519600000 ], [ 3, 316, 1419980400000 ], [ 3, 311, 1546210800000 ], [ 4, 400, 1009753200000 ], [ 4, 371, 1199055600000 ], [ 4, 344, 1262214000000 ], [ 4, 335, 1325286000000 ], [ 4, 339, 1419980400000 ], [ 5, 332, 1104447600000 ], [ 5, 321, 1135983600000 ], [ 5, 337, 1230678000000 ], [ 5, 307, 1483138800000 ], [ 6, 345, 1104447600000 ], [ 6, 363, 1135983600000 ], [ 6, 371, 1546210800000 ], [ 7, 383, 978217200000 ], [ 7, 327, 1262214000000 ], [ 7, 342, 1293750000000 ], [ 7, 387, 1325286000000 ], [ 7, 369, 1419980400000 ], [ 8, 308, 1104447600000 ], [ 8, 365, 1230678000000 ], [ 8, 334, 1262214000000 ], [ 8, 348, 1293750000000 ], [ 8, 398, 1356908400000 ], [ 8, 385, 1419980400000 ], [ 8, 318, 1451516400000 ], [ 9, 349, 978217200000 ], [ 9, 324, 1135983600000 ], [ 9, 336, 1230678000000 ], [ 9, 388, 1262214000000 ], [ 9, 307, 1483138800000 ] ] } knox:000> sortedFilteredMurderStats = filteredMurderStats.sort("NUM_OF_MURDERS") ===> +--------+------------------+--------------+ | ZIP | NUM_OF_MURDERS | RECORDED | +--------+------------------+--------------+ | 3 | 301 | 2006-12-31 | | 2 | 304 | 2015-12-31 | | 5 | 307 | 2016-12-31 | | 9 | 307 | 2016-12-31 | | 8 | 308 | 2004-12-31 | | 3 | 311 | 2018-12-31 | | 3 | 316 | 2014-12-31 | | 8 | 318 | 2015-12-31 | | 5 | 321 | 2005-12-31 | | 9 | 324 | 2005-12-31 | | 7 | 327 | 2009-12-31 | | 5 | 332 | 2004-12-31 | | 8 | 334 | 2009-12-31 | | 1 | 335 | 2006-12-31 | | 4 | 335 | 2011-12-31 | | 3 | 336 | 2000-12-31 | | 9 | 336 | 2008-12-31 | | 5 | 337 | 2008-12-31 | | 4 | 339 | 2014-12-31 | | 7 | 342 | 2010-12-31 | | 4 | 344 | 2009-12-31 | | 6 | 345 | 2004-12-31 | | 2 | 348 | 2003-12-31 | | 8 | 348 | 2010-12-31 | | 9 | 349 | 2000-12-31 | | 2 | 362 | 2010-12-31 | | 6 | 363 | 2005-12-31 | | 8 | 365 | 2008-12-31 | | 7 | 369 | 2014-12-31 | | 2 | 371 | 2002-12-31 | | 4 | 371 | 2007-12-31 | | 6 | 371 | 2018-12-31 | | 7 | 383 | 2000-12-31 | | 8 | 385 | 2014-12-31 | | 7 | 387 | 2011-12-31 | | 1 | 388 | 2009-12-31 | | 9 | 388 | 2009-12-31 | | 1 | 392 | 2016-12-31 | | 2 | 394 | 2006-12-31 | | 8 | 398 | 2012-12-31 | | 4 | 400 | 2001-12-31 | +--------+------------------+--------------+ knox:000> sortedFilteredMurderStatsFromJSONFile = KnoxShellTable.builder().json().path("/Users/smolnar/test/knoxline/sampleKnoxShellTable.json") ===> +--------+------------------+------------------+ | ZIP | NUM_OF_MURDERS | RECORDED | +--------+------------------+------------------+ | 3 | 301 | 1167519600000 | | 2 | 304 | 1451516400000 | | 5 | 307 | 1483138800000 | | 9 | 307 | 1483138800000 | | 8 | 308 | 1104447600000 | | 3 | 311 | 1546210800000 | | 3 | 316 | 1419980400000 | | 8 | 318 | 1451516400000 | | 5 | 321 | 1135983600000 | | 9 | 324 | 1135983600000 | | 7 | 327 | 1262214000000 | | 5 | 332 | 1104447600000 | | 8 | 334 | 1262214000000 | | 1 | 335 | 1167519600000 | | 4 | 335 | 1325286000000 | | 3 | 336 | 978217200000 | | 9 | 336 | 1230678000000 | | 5 | 337 | 1230678000000 | | 4 | 339 | 1419980400000 | | 7 | 342 | 1293750000000 | | 4 | 344 | 1262214000000 | | 6 | 345 | 1104447600000 | | 2 | 348 | 1072825200000 | | 8 | 348 | 1293750000000 | | 9 | 349 | 978217200000 | | 2 | 362 | 1293750000000 | | 6 | 363 | 1135983600000 | | 8 | 365 | 1230678000000 | | 7 | 369 | 1419980400000 | | 2 | 371 | 1041289200000 | | 4 | 371 | 1199055600000 | | 6 | 371 | 1546210800000 | | 7 | 383 | 978217200000 | | 8 | 385 | 1419980400000 | | 7 | 387 | 1325286000000 | | 1 | 388 | 1262214000000 | | 9 | 388 | 1262214000000 | | 1 | 392 | 1483138800000 | | 2 | 394 | 1167519600000 | | 8 | 398 | 1356908400000 | | 4 | 400 | 1009753200000 | +--------+------------------+------------------+ knox:000> joinedTableMurderStats = KnoxShellTable.builder().join().title("MURDER_STATISTICS_LOCATIONS").left(murderStats).right(locations).on("ZIP") ===> MURDER_STATISTICS_LOCATIONS +--------+------------------+--------------+--------+------------+----------+----------+--------------+ | ZIP | NUM_OF_MURDERS | RECORDED | ZIP | COUNTRY | STATE | CITY | POPULATION | +--------+------------------+--------------+--------+------------+----------+----------+--------------+ | 1 | 104 | 2000-12-31 | 1 | US | NY | City1 | 100000 | | 1 | 136 | 2001-12-31 | 1 | US | NY | City1 | 100000 | | 1 | 299 | 2002-12-31 | 1 | US | NY | City1 | 100000 | | 1 | 240 | 2003-12-31 | 1 | US | NY | City1 | 100000 | | 1 | 104 | 2004-12-31 | 1 | US | NY | City1 | 100000 | | 1 | 130 | 2005-12-31 | 1 | US | NY | City1 | 100000 | | 1 | 335 | 2006-12-31 | 1 | US | NY | City1 | 100000 | | 1 | 14 | 2007-12-31 | 1 | US | NY | City1 | 100000 | | 1 | 176 | 2008-12-31 | 1 | US | NY | City1 | 100000 | | 1 | 388 | 2009-12-31 | 1 | US | NY | City1 | 100000 | | 1 | 75 | 2010-12-31 | 1 | US | NY | City1 | 100000 | ... | 9 | 58 | 2010-12-31 | 9 | US | NY | City9 | 100000 | | 9 | 98 | 2011-12-31 | 9 | US | NY | City9 | 100000 | | 9 | 13 | 2012-12-31 | 9 | US | NY | City9 | 100000 | | 9 | 299 | 2013-12-31 | 9 | US | NY | City9 | 100000 | | 9 | 235 | 2014-12-31 | 9 | US | NY | City9 | 100000 | | 9 | 30 | 2015-12-31 | 9 | US | NY | City9 | 100000 | | 9 | 307 | 2016-12-31 | 9 | US | NY | City9 | 100000 | | 9 | 202 | 2017-12-31 | 9 | US | NY | City9 | 100000 | | 9 | 261 | 2018-12-31 | 9 | US | NY | City9 | 100000 | +--------+------------------+--------------+--------+------------+----------+----------+--------------+ ```
---------------------------------------------------------------- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: [email protected] With regards, Apache Git Services
