This is an automated email from the ASF dual-hosted git repository. eldenmoon pushed a commit to branch variant in repository https://gitbox.apache.org/repos/asf/doris.git
commit 7e2d798899735a86472d76e0d907d125cbea7ec5 Author: eldenmoon <[email protected]> AuthorDate: Mon Aug 14 17:55:57 2023 +0800 add github events test --- be/src/vec/common/schema_util.cpp | 6 +- .../github_events_p0/affinityByIssuesAndPRs1.sql | 15 + .../github_events_p0/affinityByIssuesAndPRs2.sql | 15 + .../github_events_p0/authorsWithTheMostPushes.sql | 9 + .../variant_p0/github_events_p0/countingStar1.sql | 1 + .../variant_p0/github_events_p0/countingStar2.sql | 1 + .../variant_p0/github_events_p0/countingStar3.sql | 1 + .../distributionOfRepositoriesByStarCount.sql | 14 + .../variant_p0/github_events_p0/githubRoulette.sql | 1 + ...heListOfTopRepositoriesChangedOverTheYears1.sql | 1 + ...heListOfTopRepositoriesChangedOverTheYears2.sql | 1 + ...heListOfTopRepositoriesChangedOverTheYears3.sql | 1 + ...heListOfTopRepositoriesChangedOverTheYears4.sql | 1 + ...heListOfTopRepositoriesChangedOverTheYears5.sql | 1 + ...heListOfTopRepositoriesChangedOverTheYears6.sql | 1 + ...heListOfTopRepositoriesChangedOverTheYears7.sql | 28 ++ .../howHasTheTotalNumberOfStarsChangedOverTime.sql | 2 + .../issuesWithTheMostComments1.sql | 1 + .../issuesWithTheMostComments2.sql | 1 + .../issuesWithTheMostComments3.sql | 17 + .../issuesWithTheMostComments4.sql | 9 + .../issuesWithTheMostComments5.sql | 9 + .../issuesWithTheMostComments6.sql | 11 + .../issuesWithTheMostComments7.sql | 9 + .../issuesWithTheMostComments8.sql | 13 + .../github_events_p0/mostForkedRepositories.sql | 1 + .../mostPopularCommentsOnGithub.sql | 1 + .../organizationsByTheNumberOfRepositories.sql | 14 + .../organizationsByTheNumberOfStars.sql | 8 + .../organizationsByTheSizeOfCommunity.sql | 23 ++ .../proportionsBetweenStarsAndForks1.sql | 17 + .../proportionsBetweenStarsAndForks2.sql | 18 + .../proportionsBetweenStarsAndForks3.sql | 18 + .../proportionsBetweenStarsAndForks4.sql | 13 + .../proportionsBetweenStarsAndForks5.sql | 21 + .../repositoriesByAmountOfModifiedCode.sql | 12 + .../repositoriesByTheNumberOfPushes.sql | 18 + ...epositoriesWithClickhouse_related_comments1.sql | 1 + ...epositoriesWithClickhouse_related_comments2.sql | 17 + .../repositoriesWithDoris_related_comments1.sql | 1 + .../repositoriesWithDoris_related_comments2.sql | 17 + .../repositoriesWithTheHighestGrowthYoY.sql | 20 + .../repositoriesWithTheMaximumAmountOfIssues1.sql | 1 + .../repositoriesWithTheMaximumAmountOfIssues2.sql | 18 + .../repositoriesWithTheMaximumAmountOfIssues3.sql | 19 + .../repositoriesWithTheMaximumAmountOfIssues4.sql | 18 + ...sitoriesWithTheMaximumAmountOfPullRequests1.sql | 1 + ...sitoriesWithTheMaximumAmountOfPullRequests2.sql | 1 + ...esWithTheMaximumNumberOfAcceptedInvitations.sql | 17 + ...sitoriesWithTheMostPeopleWhoHavePushAccess1.sql | 13 + ...sitoriesWithTheMostPeopleWhoHavePushAccess2.sql | 13 + ...sitoriesWithTheMostPeopleWhoHavePushAccess3.sql | 16 + .../repositoriesWithTheMostStarsOverOneDay1.sql | 25 ++ .../repositoriesWithTheMostStarsOverOneDay2.sql | 25 ++ .../repositoriesWithTheMostStarsOverOneDay3.sql | 1 + ...repositoriesWithTheMostSteadyGrowthOverTime.sql | 20 + .../repositoriesWithTheWorstStagnation_order.sql | 20 + .../github_events_p0/repositoryAffinityList1.sql | 14 + .../github_events_p0/repositoryAffinityList2.sql | 23 ++ .../starsFromHeavyGithubUsers1.sql | 14 + .../starsFromHeavyGithubUsers2.sql | 15 + .../theLongestRepositoryNames1.sql | 1 + .../theLongestRepositoryNames2.sql | 1 + .../github_events_p0/theMostToughCodeReviews.sql | 10 + .../theTotalNumberOfRepositoriesOnGithub.sql | 1 + .../theTotalNumberOfUsersOnGithub1.sql | 1 + .../theTotalNumberOfUsersOnGithub2.sql | 1 + .../theTotalNumberOfUsersOnGithub3.sql | 1 + .../theTotalNumberOfUsersOnGithub4.sql | 1 + .../variant_p0/github_events_p0/topLabels1.sql | 9 + .../variant_p0/github_events_p0/topLabels2.sql | 9 + .../variant_p0/github_events_p0/topLabels3.sql | 14 + .../github_events_p0/topRepositoriesByStars.sql | 1 + .../whatIsTheBestDayOfTheWeekToCatchAStar.sql | 1 + .../whoAreAllThosePeopleGivingStars1.sql | 1 + .../whoAreAllThosePeopleGivingStars2.sql | 1 + .../whoAreAllThosePeopleGivingStars3.sql | 14 + regression-test/suites/variant_p0/load.groovy | 446 +++++++++++---------- 78 files changed, 960 insertions(+), 216 deletions(-) diff --git a/be/src/vec/common/schema_util.cpp b/be/src/vec/common/schema_util.cpp index 3527ae12c6..3f92354b32 100644 --- a/be/src/vec/common/schema_util.cpp +++ b/be/src/vec/common/schema_util.cpp @@ -44,6 +44,7 @@ #include "olap/tablet_schema.h" #include "runtime/client_cache.h" #include "runtime/exec_env.h" +#include "udf/udf.h" #include "vec/columns/column.h" #include "vec/columns/column_array.h" #include "vec/columns/column_nullable.h" @@ -194,8 +195,9 @@ Status cast_column(const ColumnWithTypeAndName& arg, const DataTypePtr& type, Co argnum.emplace_back(1); size_t result_column = tmp_block.columns(); tmp_block.insert({nullptr, type, arg.name}); + auto fn_ctx = FunctionContext::create_context(nullptr, {}, {}); RETURN_IF_ERROR( - function->execute(nullptr, tmp_block, argnum, result_column, arg.column->size())); + function->execute(fn_ctx.get(), tmp_block, argnum, result_column, arg.column->size())); *result = std::move(tmp_block.get_by_position(result_column).column); // Variant column is a really special case, src type is nullable but dst variant type is none nullable, // but we still need to wrap nullmap into variant root column to prevent from nullable info lost. @@ -242,7 +244,7 @@ void get_column_by_type(const vectorized::DataTypePtr& data_type, const std::str column.set_default_value("[]"); return; } - if (WhichDataType(*data_type).is_string()) { + if (WhichDataType(*data_type).is_string() || WhichDataType(*data_type).is_json()) { return; } if (WhichDataType(*data_type).is_simple()) { diff --git a/regression-test/suites/variant_p0/github_events_p0/affinityByIssuesAndPRs1.sql b/regression-test/suites/variant_p0/github_events_p0/affinityByIssuesAndPRs1.sql new file mode 100644 index 0000000000..11faa5b4aa --- /dev/null +++ b/regression-test/suites/variant_p0/github_events_p0/affinityByIssuesAndPRs1.sql @@ -0,0 +1,15 @@ +-- ERROR: unmatched column +SELECT + cast(v:repo.name as string), + count() AS prs, + count(distinct cast(v:actor.login as string)) AS authors +FROM github_events +WHERE (cast(v:type as string) = 'PullRequestEvent') AND (cast(v:payload.action as string) = 'opened') AND (cast(v:actor.login as string) IN +( + SELECT cast(v:actor.login as string) + FROM github_events + WHERE (cast(v:type as string) = 'PullRequestEvent') AND (cast(v:payload.action as string)= 'opened') AND (cast(v:repo.name as string) IN ('rspec/rspec-core', 'golden-warning/giraffedraft-server', 'apache/spark')) +)) AND (lower(cast(v:repo.name as string)) NOT LIKE '%clickhouse%') +GROUP BY cast(v:repo.name as string) +ORDER BY authors DESC, prs DESC, length(cast(v:repo.name as string)) DESC +LIMIT 50 diff --git a/regression-test/suites/variant_p0/github_events_p0/affinityByIssuesAndPRs2.sql b/regression-test/suites/variant_p0/github_events_p0/affinityByIssuesAndPRs2.sql new file mode 100644 index 0000000000..ec9b96ed4b --- /dev/null +++ b/regression-test/suites/variant_p0/github_events_p0/affinityByIssuesAndPRs2.sql @@ -0,0 +1,15 @@ +-- ERROR: unmatched column +SELECT + cast(v:repo.name as string), + count() AS prs, + count(distinct cast(v:actor.login as string)) AS authors +FROM github_events +WHERE (cast(v:type as string) = 'IssuesEvent') AND (cast(v:payload.action as string) = 'opened') AND (cast(v:actor.login as string) IN +( + SELECT cast(v:actor.login as string) + FROM github_events + WHERE (cast(v:type as string) = 'IssuesEvent') AND (cast(v:payload.action as string) = 'opened') AND (cast(v:repo.name as string) IN ('No-CQRT/GooGuns', 'ivolunteerph/ivolunteerph', 'Tribler/tribler')) +)) AND (lower(cast(v:repo.name as string)) NOT LIKE '%clickhouse%') +GROUP BY cast(v:repo.name as string) +ORDER BY authors DESC, prs DESC, cast(v:repo.name as string) ASC +LIMIT 50 diff --git a/regression-test/suites/variant_p0/github_events_p0/authorsWithTheMostPushes.sql b/regression-test/suites/variant_p0/github_events_p0/authorsWithTheMostPushes.sql new file mode 100644 index 0000000000..006a9ae9d3 --- /dev/null +++ b/regression-test/suites/variant_p0/github_events_p0/authorsWithTheMostPushes.sql @@ -0,0 +1,9 @@ +SELECT + cast(v:actor.login as string), + count() AS c, + count(distinct cast(v:repo.name as string)) AS repos + FROM github_events + WHERE cast(v:type as string) = 'PushEvent' + GROUP BY cast(v:actor.login as string) + ORDER BY c DESC, 1, 3 + LIMIT 50 diff --git a/regression-test/suites/variant_p0/github_events_p0/countingStar1.sql b/regression-test/suites/variant_p0/github_events_p0/countingStar1.sql new file mode 100644 index 0000000000..db71c34c4e --- /dev/null +++ b/regression-test/suites/variant_p0/github_events_p0/countingStar1.sql @@ -0,0 +1 @@ +SELECT count() FROM github_events WHERE cast(v:type as string) = 'WatchEvent' diff --git a/regression-test/suites/variant_p0/github_events_p0/countingStar2.sql b/regression-test/suites/variant_p0/github_events_p0/countingStar2.sql new file mode 100644 index 0000000000..892efa7d72 --- /dev/null +++ b/regression-test/suites/variant_p0/github_events_p0/countingStar2.sql @@ -0,0 +1 @@ +SELECT cast(v:payload.action as string), count() FROM github_events WHERE cast(v:type as string) = 'WatchEvent' GROUP BY cast(v:payload.action as string) \ No newline at end of file diff --git a/regression-test/suites/variant_p0/github_events_p0/countingStar3.sql b/regression-test/suites/variant_p0/github_events_p0/countingStar3.sql new file mode 100644 index 0000000000..9370e01530 --- /dev/null +++ b/regression-test/suites/variant_p0/github_events_p0/countingStar3.sql @@ -0,0 +1 @@ +SELECT count() FROM github_events WHERE cast(v:type as string) = 'WatchEvent' AND cast(v:repo.name as string) IN ('apache/spark', 'GunZi200/Memory-Colour', 'isohuntto/openbay', 'wasabeef/awesome-android-ui') GROUP BY cast(v:payload.action as string) \ No newline at end of file diff --git a/regression-test/suites/variant_p0/github_events_p0/distributionOfRepositoriesByStarCount.sql b/regression-test/suites/variant_p0/github_events_p0/distributionOfRepositoriesByStarCount.sql new file mode 100644 index 0000000000..1e252ef4a7 --- /dev/null +++ b/regression-test/suites/variant_p0/github_events_p0/distributionOfRepositoriesByStarCount.sql @@ -0,0 +1,14 @@ +SELECT + pow(10, floor(log10(c))) AS stars, + count(distinct k) +FROM +( + SELECT + cast(v:repo.name as string) as k, + count() AS c + FROM github_events + WHERE cast(v:type as string) = 'WatchEvent' + GROUP BY cast(v:repo.name as string) +) t +GROUP BY stars +ORDER BY stars ASC diff --git a/regression-test/suites/variant_p0/github_events_p0/githubRoulette.sql b/regression-test/suites/variant_p0/github_events_p0/githubRoulette.sql new file mode 100644 index 0000000000..09eedb9e9c --- /dev/null +++ b/regression-test/suites/variant_p0/github_events_p0/githubRoulette.sql @@ -0,0 +1 @@ +SELECT cast(v:repo.name as string) FROM github_events WHERE cast(v:type as string) = 'WatchEvent' ORDER BY cast(v:created_at as datetime) LIMIT 50 \ No newline at end of file diff --git a/regression-test/suites/variant_p0/github_events_p0/howHasTheListOfTopRepositoriesChangedOverTheYears1.sql b/regression-test/suites/variant_p0/github_events_p0/howHasTheListOfTopRepositoriesChangedOverTheYears1.sql new file mode 100644 index 0000000000..14b2ec2b78 --- /dev/null +++ b/regression-test/suites/variant_p0/github_events_p0/howHasTheListOfTopRepositoriesChangedOverTheYears1.sql @@ -0,0 +1 @@ +SELECT cast(v:repo.name as string), count() AS stars FROM github_events WHERE cast(v:type as string) = 'WatchEvent' AND year(cast(v:created_at as datetime)) = '2015' GROUP BY cast(v:repo.name as string) ORDER BY stars DESC, 1 LIMIT 50 diff --git a/regression-test/suites/variant_p0/github_events_p0/howHasTheListOfTopRepositoriesChangedOverTheYears2.sql b/regression-test/suites/variant_p0/github_events_p0/howHasTheListOfTopRepositoriesChangedOverTheYears2.sql new file mode 100644 index 0000000000..14b2ec2b78 --- /dev/null +++ b/regression-test/suites/variant_p0/github_events_p0/howHasTheListOfTopRepositoriesChangedOverTheYears2.sql @@ -0,0 +1 @@ +SELECT cast(v:repo.name as string), count() AS stars FROM github_events WHERE cast(v:type as string) = 'WatchEvent' AND year(cast(v:created_at as datetime)) = '2015' GROUP BY cast(v:repo.name as string) ORDER BY stars DESC, 1 LIMIT 50 diff --git a/regression-test/suites/variant_p0/github_events_p0/howHasTheListOfTopRepositoriesChangedOverTheYears3.sql b/regression-test/suites/variant_p0/github_events_p0/howHasTheListOfTopRepositoriesChangedOverTheYears3.sql new file mode 100644 index 0000000000..b362aa613f --- /dev/null +++ b/regression-test/suites/variant_p0/github_events_p0/howHasTheListOfTopRepositoriesChangedOverTheYears3.sql @@ -0,0 +1 @@ +SELECT cast(v:repo.name as string), count() AS stars FROM github_events WHERE cast(v:type as string) = 'WatchEvent' AND year(cast(v:created_at as datetime)) = '2015' GROUP BY cast(v:repo.name as string) ORDER BY stars DESC LIMIT 50 diff --git a/regression-test/suites/variant_p0/github_events_p0/howHasTheListOfTopRepositoriesChangedOverTheYears4.sql b/regression-test/suites/variant_p0/github_events_p0/howHasTheListOfTopRepositoriesChangedOverTheYears4.sql new file mode 100644 index 0000000000..14b2ec2b78 --- /dev/null +++ b/regression-test/suites/variant_p0/github_events_p0/howHasTheListOfTopRepositoriesChangedOverTheYears4.sql @@ -0,0 +1 @@ +SELECT cast(v:repo.name as string), count() AS stars FROM github_events WHERE cast(v:type as string) = 'WatchEvent' AND year(cast(v:created_at as datetime)) = '2015' GROUP BY cast(v:repo.name as string) ORDER BY stars DESC, 1 LIMIT 50 diff --git a/regression-test/suites/variant_p0/github_events_p0/howHasTheListOfTopRepositoriesChangedOverTheYears5.sql b/regression-test/suites/variant_p0/github_events_p0/howHasTheListOfTopRepositoriesChangedOverTheYears5.sql new file mode 100644 index 0000000000..b362aa613f --- /dev/null +++ b/regression-test/suites/variant_p0/github_events_p0/howHasTheListOfTopRepositoriesChangedOverTheYears5.sql @@ -0,0 +1 @@ +SELECT cast(v:repo.name as string), count() AS stars FROM github_events WHERE cast(v:type as string) = 'WatchEvent' AND year(cast(v:created_at as datetime)) = '2015' GROUP BY cast(v:repo.name as string) ORDER BY stars DESC LIMIT 50 diff --git a/regression-test/suites/variant_p0/github_events_p0/howHasTheListOfTopRepositoriesChangedOverTheYears6.sql b/regression-test/suites/variant_p0/github_events_p0/howHasTheListOfTopRepositoriesChangedOverTheYears6.sql new file mode 100644 index 0000000000..14b2ec2b78 --- /dev/null +++ b/regression-test/suites/variant_p0/github_events_p0/howHasTheListOfTopRepositoriesChangedOverTheYears6.sql @@ -0,0 +1 @@ +SELECT cast(v:repo.name as string), count() AS stars FROM github_events WHERE cast(v:type as string) = 'WatchEvent' AND year(cast(v:created_at as datetime)) = '2015' GROUP BY cast(v:repo.name as string) ORDER BY stars DESC, 1 LIMIT 50 diff --git a/regression-test/suites/variant_p0/github_events_p0/howHasTheListOfTopRepositoriesChangedOverTheYears7.sql b/regression-test/suites/variant_p0/github_events_p0/howHasTheListOfTopRepositoriesChangedOverTheYears7.sql new file mode 100644 index 0000000000..03fb1d2f0d --- /dev/null +++ b/regression-test/suites/variant_p0/github_events_p0/howHasTheListOfTopRepositoriesChangedOverTheYears7.sql @@ -0,0 +1,28 @@ +SELECT + repo, + year, + cnt +FROM +( + SELECT + row_number() OVER (PARTITION BY year ORDER BY cnt DESC) AS r, + repo, + year, + cnt + FROM + ( + SELECT + lower(cast(v:repo.name as string)) AS repo, + year(cast(v:created_at as datetime)) AS year, + count() AS cnt + FROM github_events + WHERE (cast(v:type as string) = 'WatchEvent') AND (year(cast(v:created_at as datetime)) >= 2015) + GROUP BY + repo, + year + ) t +) t2 +WHERE r <= 10 +ORDER BY + year ASC, + cnt DESC diff --git a/regression-test/suites/variant_p0/github_events_p0/howHasTheTotalNumberOfStarsChangedOverTime.sql b/regression-test/suites/variant_p0/github_events_p0/howHasTheTotalNumberOfStarsChangedOverTime.sql new file mode 100644 index 0000000000..97874a2d1e --- /dev/null +++ b/regression-test/suites/variant_p0/github_events_p0/howHasTheTotalNumberOfStarsChangedOverTime.sql @@ -0,0 +1,2 @@ +SELECT year(cast(v:created_at as datetime)) AS year, count() AS stars FROM github_events WHERE cast(v:type as string) = 'WatchEvent' GROUP BY year ORDER BY year + diff --git a/regression-test/suites/variant_p0/github_events_p0/issuesWithTheMostComments1.sql b/regression-test/suites/variant_p0/github_events_p0/issuesWithTheMostComments1.sql new file mode 100644 index 0000000000..9f4e7db0a1 --- /dev/null +++ b/regression-test/suites/variant_p0/github_events_p0/issuesWithTheMostComments1.sql @@ -0,0 +1 @@ +SELECT count() FROM github_events WHERE cast(v:type as string) = 'IssueCommentEvent' diff --git a/regression-test/suites/variant_p0/github_events_p0/issuesWithTheMostComments2.sql b/regression-test/suites/variant_p0/github_events_p0/issuesWithTheMostComments2.sql new file mode 100644 index 0000000000..293b171c13 --- /dev/null +++ b/regression-test/suites/variant_p0/github_events_p0/issuesWithTheMostComments2.sql @@ -0,0 +1 @@ +SELECT cast(v:repo.name as string), count() FROM github_events WHERE cast(v:type as string) = 'IssueCommentEvent' GROUP BY cast(v:repo.name as string) ORDER BY count() DESC, 1 LIMIT 50 diff --git a/regression-test/suites/variant_p0/github_events_p0/issuesWithTheMostComments3.sql b/regression-test/suites/variant_p0/github_events_p0/issuesWithTheMostComments3.sql new file mode 100644 index 0000000000..cb1926ad20 --- /dev/null +++ b/regression-test/suites/variant_p0/github_events_p0/issuesWithTheMostComments3.sql @@ -0,0 +1,17 @@ +SELECT + repo_name, + comments, + issues, + round(comments / issues, 2) AS ratio +FROM +( + SELECT + cast(v:repo.name as string) as repo_name, + count() AS comments, + count(distinct cast(v:payload.issue.`number` as int)) AS issues + FROM github_events + WHERE cast(v:type as string) = 'IssueCommentEvent' + GROUP BY cast(v:repo.name as string) +) t +ORDER BY comments DESC, 1, 3, 4 +LIMIT 50 diff --git a/regression-test/suites/variant_p0/github_events_p0/issuesWithTheMostComments4.sql b/regression-test/suites/variant_p0/github_events_p0/issuesWithTheMostComments4.sql new file mode 100644 index 0000000000..c9914fb203 --- /dev/null +++ b/regression-test/suites/variant_p0/github_events_p0/issuesWithTheMostComments4.sql @@ -0,0 +1,9 @@ +SELECT + cast(v:repo.name as string), + cast(v:payload.issue.`number` as int) as number, + count() AS comments +FROM github_events +WHERE cast(v:type as string) = 'IssueCommentEvent' AND (cast(v:payload.action as string) = 'created') +GROUP BY cast(v:repo.name as string), number +ORDER BY comments DESC, number ASC, 1 +LIMIT 50 diff --git a/regression-test/suites/variant_p0/github_events_p0/issuesWithTheMostComments5.sql b/regression-test/suites/variant_p0/github_events_p0/issuesWithTheMostComments5.sql new file mode 100644 index 0000000000..04a0f61fa0 --- /dev/null +++ b/regression-test/suites/variant_p0/github_events_p0/issuesWithTheMostComments5.sql @@ -0,0 +1,9 @@ +SELECT + cast(v:repo.name as string), + cast(v:payload.issue.`number` as int) as number, + count() AS comments +FROM github_events +WHERE cast(v:type as string) = 'IssueCommentEvent' AND (cast(v:payload.action as string) = 'created') AND (cast(v:payload.issue.`number` as int) > 10) +GROUP BY cast(v:repo.name as string), number +ORDER BY comments DESC, cast(v:repo.name as string), number +LIMIT 50 diff --git a/regression-test/suites/variant_p0/github_events_p0/issuesWithTheMostComments6.sql b/regression-test/suites/variant_p0/github_events_p0/issuesWithTheMostComments6.sql new file mode 100644 index 0000000000..cf8a435d6f --- /dev/null +++ b/regression-test/suites/variant_p0/github_events_p0/issuesWithTheMostComments6.sql @@ -0,0 +1,11 @@ +SELECT + cast(v:repo.name as string), + cast(v:payload.issue.`number` as int) as number, + count() AS comments, + count(distinct cast(v:actor.login as string)) AS authors +FROM github_events +WHERE cast(v:type as string) = 'IssueCommentEvent' AND (cast(v:payload.action as string) = 'created') AND (cast(v:payload.issue.`number` as int) > 10) +GROUP BY cast(v:repo.name as string), number +HAVING authors >= 4 +ORDER BY comments DESC +LIMIT 50 diff --git a/regression-test/suites/variant_p0/github_events_p0/issuesWithTheMostComments7.sql b/regression-test/suites/variant_p0/github_events_p0/issuesWithTheMostComments7.sql new file mode 100644 index 0000000000..07b91250aa --- /dev/null +++ b/regression-test/suites/variant_p0/github_events_p0/issuesWithTheMostComments7.sql @@ -0,0 +1,9 @@ +SELECT + cast(v:repo.name as string), + count() AS comments, + count(distinct cast(v:actor.login as string)) AS authors +FROM github_events +WHERE cast(v:type as string) = 'CommitCommentEvent' +GROUP BY cast(v:repo.name as string) +ORDER BY count() DESC, 1, 3 +LIMIT 50 diff --git a/regression-test/suites/variant_p0/github_events_p0/issuesWithTheMostComments8.sql b/regression-test/suites/variant_p0/github_events_p0/issuesWithTheMostComments8.sql new file mode 100644 index 0000000000..7a3696f803 --- /dev/null +++ b/regression-test/suites/variant_p0/github_events_p0/issuesWithTheMostComments8.sql @@ -0,0 +1,13 @@ +-- SELECT +-- concat('https://github.com/', cast(v:repo.name as string), '/commit/', cast(v:payload.commit_id as string)) URL, +-- cast(v:payload.commit_id as string) AS commit_id, +-- count() AS comments, +-- count(distinct cast(v:actor.login as string)) AS authors +-- FROM github_events +-- WHERE (cast(v:type as string) = 'CommitCommentEvent') AND commit_id != "" +-- GROUP BY +-- cast(v:repo.name as string), +-- commit_id +-- HAVING authors >= 10 +-- ORDER BY count() DESC, URL, authors +-- LIMIT 50 diff --git a/regression-test/suites/variant_p0/github_events_p0/mostForkedRepositories.sql b/regression-test/suites/variant_p0/github_events_p0/mostForkedRepositories.sql new file mode 100644 index 0000000000..85cf5ef0de --- /dev/null +++ b/regression-test/suites/variant_p0/github_events_p0/mostForkedRepositories.sql @@ -0,0 +1 @@ +SELECT cast(v:repo.name as string), count() AS forks FROM github_events WHERE cast(v:type as string) = 'ForkEvent' GROUP BY cast(v:repo.name as string) ORDER BY forks DESC, 1 LIMIT 50 diff --git a/regression-test/suites/variant_p0/github_events_p0/mostPopularCommentsOnGithub.sql b/regression-test/suites/variant_p0/github_events_p0/mostPopularCommentsOnGithub.sql new file mode 100644 index 0000000000..ff617de4a9 --- /dev/null +++ b/regression-test/suites/variant_p0/github_events_p0/mostPopularCommentsOnGithub.sql @@ -0,0 +1 @@ +SELECT cast(v:payload.comment.body as string), count() FROM github_events WHERE cast(v:payload.comment.body as string) != "" AND length(cast(v:payload.comment.body as string)) < 100 GROUP BY cast(v:payload.comment.body as string) ORDER BY count(), 1 DESC LIMIT 50 \ No newline at end of file diff --git a/regression-test/suites/variant_p0/github_events_p0/organizationsByTheNumberOfRepositories.sql b/regression-test/suites/variant_p0/github_events_p0/organizationsByTheNumberOfRepositories.sql new file mode 100644 index 0000000000..b070f0714a --- /dev/null +++ b/regression-test/suites/variant_p0/github_events_p0/organizationsByTheNumberOfRepositories.sql @@ -0,0 +1,14 @@ +SELECT + lower(split_part(repo_name, '/', 1)) AS org, + count(distinct repo_name) AS repos +FROM +( + SELECT cast(v:repo.name as string) as repo_name + FROM github_events + WHERE cast(v:type as string) = 'WatchEvent' + GROUP BY cast(v:repo.name as string) + HAVING count() >= 10 +) t +GROUP BY org +ORDER BY repos DESC, org ASC +LIMIT 50 diff --git a/regression-test/suites/variant_p0/github_events_p0/organizationsByTheNumberOfStars.sql b/regression-test/suites/variant_p0/github_events_p0/organizationsByTheNumberOfStars.sql new file mode 100644 index 0000000000..e842751ba1 --- /dev/null +++ b/regression-test/suites/variant_p0/github_events_p0/organizationsByTheNumberOfStars.sql @@ -0,0 +1,8 @@ +SELECT + lower(split_part(cast(v:repo.name as string), '/', 1)) AS org, + count() AS stars +FROM github_events +WHERE cast(v:type as string) = 'WatchEvent' +GROUP BY org +ORDER BY stars DESC, 1 +LIMIT 50 diff --git a/regression-test/suites/variant_p0/github_events_p0/organizationsByTheSizeOfCommunity.sql b/regression-test/suites/variant_p0/github_events_p0/organizationsByTheSizeOfCommunity.sql new file mode 100644 index 0000000000..8b3ae0baae --- /dev/null +++ b/regression-test/suites/variant_p0/github_events_p0/organizationsByTheSizeOfCommunity.sql @@ -0,0 +1,23 @@ +-- SELECT +-- lower(split_part(cast(v:repo.name as string), '/', 1)) AS org, +-- count(distinct cast(v:actor.login as string)) AS authors, +-- count(distinct pr_author) AS pr_authors, +-- count(distinct issue_author) AS issue_authors, +-- count(distinct comment_author) AS comment_authors, +-- count(distinct review_author) AS review_authors, +-- count(distinct push_author) AS push_authors +-- FROM +-- ( +-- SELECT +-- cast(v:repo.name as string), +-- cast(v:actor.login as string), +-- CASE WHEN cast(v:type as string) = 'PullRequestEvent' THEN cast(v:actor.login as string) ELSE NULL END pr_author, +-- CASE WHEN cast(v:type as string) = 'IssuesEvent' THEN cast(v:actor.login as string) ELSE NULL END issue_author, +-- CASE WHEN cast(v:type as string) = 'IssueCommentEvent' THEN cast(v:actor.login as string) ELSE NULL END comment_author, +-- CASE WHEN cast(v:type as string) = 'PullRequestReviewCommentEvent' THEN cast(v:actor.login as string) ELSE NULL END review_author, +-- CASE WHEN cast(v:type as string) = 'PushEvent' THEN cast(v:actor.login as string) ELSE NULL END push_author +-- FROM github_events +-- WHERE cast(v:type as string) IN ('PullRequestEvent', 'IssuesEvent', 'IssueCommentEvent', 'PullRequestReviewCommentEvent', 'PushEvent') +-- ) t +-- GROUP BY org +-- ORDER BY authors DESC diff --git a/regression-test/suites/variant_p0/github_events_p0/proportionsBetweenStarsAndForks1.sql b/regression-test/suites/variant_p0/github_events_p0/proportionsBetweenStarsAndForks1.sql new file mode 100644 index 0000000000..b62668b9fa --- /dev/null +++ b/regression-test/suites/variant_p0/github_events_p0/proportionsBetweenStarsAndForks1.sql @@ -0,0 +1,17 @@ +SELECT + repo_name, + sum(fork) AS forks, + sum(star) AS stars, + round(sum(star) / sum(fork), 3) AS ratio +FROM +( + SELECT + cast(v:repo.name as string) as repo_name, + CASE WHEN cast(v:type as string) = 'ForkEvent' THEN 1 ELSE 0 END AS fork, + CASE WHEN cast(v:type as string) = 'WatchEvent' THEN 1 ELSE 0 END AS star + FROM github_events + WHERE cast(v:type as string) IN ('ForkEvent', 'WatchEvent') +) t +GROUP BY repo_name +ORDER BY forks DESC, 1, 3, 4 +LIMIT 50 diff --git a/regression-test/suites/variant_p0/github_events_p0/proportionsBetweenStarsAndForks2.sql b/regression-test/suites/variant_p0/github_events_p0/proportionsBetweenStarsAndForks2.sql new file mode 100644 index 0000000000..fe286c489c --- /dev/null +++ b/regression-test/suites/variant_p0/github_events_p0/proportionsBetweenStarsAndForks2.sql @@ -0,0 +1,18 @@ +SELECT + repo_name, + sum(fork) AS forks, + sum(star) AS stars, + round(sum(star) / sum(fork), 3) AS ratio +FROM +( + SELECT + cast(v:repo.name as string) as repo_name, + CASE WHEN cast(v:type as string) = 'ForkEvent' THEN 1 ELSE 0 END AS fork, + CASE WHEN cast(v:type as string) = 'WatchEvent' THEN 1 ELSE 0 END AS star + FROM github_events + WHERE cast(v:type as string) IN ('ForkEvent', 'WatchEvent') +) t +GROUP BY repo_name +HAVING (stars > 20) AND (forks >= 10) +ORDER BY ratio DESC, repo_name +LIMIT 50 diff --git a/regression-test/suites/variant_p0/github_events_p0/proportionsBetweenStarsAndForks3.sql b/regression-test/suites/variant_p0/github_events_p0/proportionsBetweenStarsAndForks3.sql new file mode 100644 index 0000000000..c8502f59f4 --- /dev/null +++ b/regression-test/suites/variant_p0/github_events_p0/proportionsBetweenStarsAndForks3.sql @@ -0,0 +1,18 @@ +SELECT + repo_name, + sum(fork) AS forks, + sum(star) AS stars, + round(sum(fork) / sum(star), 2) AS ratio +FROM +( + SELECT + cast(v:repo.name as string) as repo_name, + CASE WHEN cast(v:type as string) = 'ForkEvent' THEN 1 ELSE 0 END AS fork, + CASE WHEN cast(v:type as string) = 'WatchEvent' THEN 1 ELSE 0 END AS star + FROM github_events + WHERE cast(v:type as string) IN ('ForkEvent', 'WatchEvent') +) t +GROUP BY repo_name +HAVING (stars > 4) AND (forks > 4) +ORDER BY ratio DESC +LIMIT 50 diff --git a/regression-test/suites/variant_p0/github_events_p0/proportionsBetweenStarsAndForks4.sql b/regression-test/suites/variant_p0/github_events_p0/proportionsBetweenStarsAndForks4.sql new file mode 100644 index 0000000000..f1c405efcd --- /dev/null +++ b/regression-test/suites/variant_p0/github_events_p0/proportionsBetweenStarsAndForks4.sql @@ -0,0 +1,13 @@ +SELECT + sum(fork) AS forks, + sum(star) AS stars, + round(sum(star) / sum(fork), 2) AS ratio +FROM +( + SELECT + cast(v:repo.name as string), + CASE WHEN cast(v:type as string) = 'ForkEvent' THEN 1 ELSE 0 END AS fork, + CASE WHEN cast(v:type as string) = 'WatchEvent' THEN 1 ELSE 0 END AS star + FROM github_events + WHERE cast(v:type as string) IN ('ForkEvent', 'WatchEvent') +) t diff --git a/regression-test/suites/variant_p0/github_events_p0/proportionsBetweenStarsAndForks5.sql b/regression-test/suites/variant_p0/github_events_p0/proportionsBetweenStarsAndForks5.sql new file mode 100644 index 0000000000..22606e7eef --- /dev/null +++ b/regression-test/suites/variant_p0/github_events_p0/proportionsBetweenStarsAndForks5.sql @@ -0,0 +1,21 @@ +SELECT + sum(forks) AS forks, + sum(stars) AS stars, + round(sum(stars) / sum(forks), 2) AS ratio +FROM +( + SELECT + sum(fork) AS forks, + sum(star) AS stars + FROM + ( + SELECT + cast(v:repo.name as string) as repo_name, + CASE WHEN cast(v:type as string) = 'ForkEvent' THEN 1 ELSE 0 END AS fork, + CASE WHEN cast(v:type as string) = 'WatchEvent' THEN 1 ELSE 0 END AS star + FROM github_events + WHERE cast(v:type as string) IN ('ForkEvent', 'WatchEvent') + ) t + GROUP BY repo_name + HAVING stars > 10 +) t2 diff --git a/regression-test/suites/variant_p0/github_events_p0/repositoriesByAmountOfModifiedCode.sql b/regression-test/suites/variant_p0/github_events_p0/repositoriesByAmountOfModifiedCode.sql new file mode 100644 index 0000000000..f27a32bbff --- /dev/null +++ b/regression-test/suites/variant_p0/github_events_p0/repositoriesByAmountOfModifiedCode.sql @@ -0,0 +1,12 @@ +SELECT + cast(v:repo.name as string) as repo_name, + count() AS prs, + count(distinct cast(v:actor.login as string)) AS authors, + sum(cast(v:payload.pull_request.additions as int)) AS adds, + sum(cast(v:payload.pull_request.deletions as int)) AS dels +FROM github_events +WHERE (cast(v:type as string) = 'PullRequestEvent') AND (cast(v:payload.action as string) = 'opened') AND (cast(v:payload.pull_request.additions as int) < 10000) AND (cast(v:payload.pull_request.deletions as int) < 10000) +GROUP BY repo_name +HAVING (adds / dels) < 10 +ORDER BY adds + dels DESC, 1 +LIMIT 50 diff --git a/regression-test/suites/variant_p0/github_events_p0/repositoriesByTheNumberOfPushes.sql b/regression-test/suites/variant_p0/github_events_p0/repositoriesByTheNumberOfPushes.sql new file mode 100644 index 0000000000..86fea6fe68 --- /dev/null +++ b/regression-test/suites/variant_p0/github_events_p0/repositoriesByTheNumberOfPushes.sql @@ -0,0 +1,18 @@ +--ERROR: crash column.h:496] not support +SELECT + cast(v:repo.name as string), + count() AS pushes, + count(distinct cast(v:actor.login as string)) AS authors +FROM github_events +WHERE (cast(v:type as string) = 'PushEvent') AND (cast(v:repo.name as string) IN +( + SELECT cast(v:repo.name as string) + FROM github_events + WHERE cast(v:type as string) = 'WatchEvent' + GROUP BY cast(v:repo.name as string) + ORDER BY count() DESC + LIMIT 10000 +)) +GROUP BY cast(v:repo.name as string) +ORDER BY count() DESC +LIMIT 50 diff --git a/regression-test/suites/variant_p0/github_events_p0/repositoriesWithClickhouse_related_comments1.sql b/regression-test/suites/variant_p0/github_events_p0/repositoriesWithClickhouse_related_comments1.sql new file mode 100644 index 0000000000..af1faa7258 --- /dev/null +++ b/regression-test/suites/variant_p0/github_events_p0/repositoriesWithClickhouse_related_comments1.sql @@ -0,0 +1 @@ +SELECT cast(v:repo.name as string), count() FROM github_events WHERE lower(cast(v:payload.comment.body as string)) LIKE '%apache%' GROUP BY cast(v:repo.name as string) ORDER BY count() DESC, cast(v:repo.name as string) ASC LIMIT 50 \ No newline at end of file diff --git a/regression-test/suites/variant_p0/github_events_p0/repositoriesWithClickhouse_related_comments2.sql b/regression-test/suites/variant_p0/github_events_p0/repositoriesWithClickhouse_related_comments2.sql new file mode 100644 index 0000000000..af974bcf66 --- /dev/null +++ b/regression-test/suites/variant_p0/github_events_p0/repositoriesWithClickhouse_related_comments2.sql @@ -0,0 +1,17 @@ +SELECT + repo_name, + sum(num_star) AS num_stars, + sum(num_comment) AS num_comments +FROM +( + SELECT + cast(v:repo.name as string) as repo_name, + CASE WHEN cast(v:type as string) = 'WatchEvent' THEN 1 ELSE 0 END AS num_star, + CASE WHEN lower(cast(v:payload.comment.body as string)) LIKE '%apache%' THEN 1 ELSE 0 END AS num_comment + FROM github_events + WHERE (lower(cast(v:payload.comment.body as string)) LIKE '%apache%') OR (cast(v:type as string) = 'WatchEvent') +) t +GROUP BY repo_name +HAVING num_comments > 0 +ORDER BY num_stars DESC,num_comments DESC, repo_name ASC +LIMIT 50 diff --git a/regression-test/suites/variant_p0/github_events_p0/repositoriesWithDoris_related_comments1.sql b/regression-test/suites/variant_p0/github_events_p0/repositoriesWithDoris_related_comments1.sql new file mode 100644 index 0000000000..7c3aa1931a --- /dev/null +++ b/regression-test/suites/variant_p0/github_events_p0/repositoriesWithDoris_related_comments1.sql @@ -0,0 +1 @@ +SELECT cast(v:repo.name as string), count() FROM github_events WHERE lower(cast(v:payload.comment.body as string)) LIKE '%spark%' GROUP BY cast(v:repo.name as string) ORDER BY count() DESC, cast(v:repo.name as string) ASC LIMIT 50 \ No newline at end of file diff --git a/regression-test/suites/variant_p0/github_events_p0/repositoriesWithDoris_related_comments2.sql b/regression-test/suites/variant_p0/github_events_p0/repositoriesWithDoris_related_comments2.sql new file mode 100644 index 0000000000..76da89584b --- /dev/null +++ b/regression-test/suites/variant_p0/github_events_p0/repositoriesWithDoris_related_comments2.sql @@ -0,0 +1,17 @@ +SELECT + repo_name, + sum(num_star) AS num_stars, + sum(num_comment) AS num_comments +FROM +( + SELECT + cast(v:repo.name as string) as repo_name, + CASE WHEN cast(v:type as string) = 'WatchEvent' THEN 1 ELSE 0 END AS num_star, + CASE WHEN lower(cast(v:payload.comment.body as string)) LIKE '%spark%' THEN 1 ELSE 0 END AS num_comment + FROM github_events + WHERE (lower(cast(v:payload.comment.body as string)) LIKE '%spark%') OR (cast(v:type as string) = 'WatchEvent') +) t +GROUP BY repo_name +HAVING num_comments > 0 +ORDER BY num_stars DESC,num_comments DESC,repo_name ASC +LIMIT 50 diff --git a/regression-test/suites/variant_p0/github_events_p0/repositoriesWithTheHighestGrowthYoY.sql b/regression-test/suites/variant_p0/github_events_p0/repositoriesWithTheHighestGrowthYoY.sql new file mode 100644 index 0000000000..e8ba7153a7 --- /dev/null +++ b/regression-test/suites/variant_p0/github_events_p0/repositoriesWithTheHighestGrowthYoY.sql @@ -0,0 +1,20 @@ +SELECT + repo_name, + sum(created_at_2022) AS stars2022, + sum(created_at_2015) AS stars2015, + round(sum(created_at_2022) / sum(created_at_2015), 3) AS yoy, + min(created_at) AS first_seen +FROM +( + SELECT + cast(v:repo.name as string) as repo_name, + CASE year(cast(v:created_at as datetime)) WHEN 2022 THEN 1 ELSE 0 END AS created_at_2022, + CASE year(cast(v:created_at as datetime)) WHEN 2015 THEN 1 ELSE 0 END AS created_at_2015, + cast(v:created_at as datetime) as created_at + FROM github_events + WHERE cast(v:type as string) = 'WatchEvent' +) t +GROUP BY repo_name +HAVING (min(created_at) <= '2023-01-01 00:00:00') AND (stars2022 >= 1) and (stars2015 >= 1) +ORDER BY yoy DESC, repo_name +LIMIT 50 diff --git a/regression-test/suites/variant_p0/github_events_p0/repositoriesWithTheMaximumAmountOfIssues1.sql b/regression-test/suites/variant_p0/github_events_p0/repositoriesWithTheMaximumAmountOfIssues1.sql new file mode 100644 index 0000000000..c308adf423 --- /dev/null +++ b/regression-test/suites/variant_p0/github_events_p0/repositoriesWithTheMaximumAmountOfIssues1.sql @@ -0,0 +1 @@ +SELECT cast(v:repo.name as string), count() AS c, count(distinct cast(v:actor.login as string)) AS u FROM github_events WHERE cast(v:type as string) = 'IssuesEvent' AND cast(v:payload.action as string) = 'opened' GROUP BY cast(v:repo.name as string) ORDER BY c DESC, cast(v:repo.name as string) LIMIT 50 diff --git a/regression-test/suites/variant_p0/github_events_p0/repositoriesWithTheMaximumAmountOfIssues2.sql b/regression-test/suites/variant_p0/github_events_p0/repositoriesWithTheMaximumAmountOfIssues2.sql new file mode 100644 index 0000000000..fb977cb817 --- /dev/null +++ b/regression-test/suites/variant_p0/github_events_p0/repositoriesWithTheMaximumAmountOfIssues2.sql @@ -0,0 +1,18 @@ +SELECT + repo_name, + sum(issue_created) AS c, + count(distinct actor_login) AS u, + sum(star) AS stars +FROM +( + SELECT + cast(v:repo.name as string) as repo_name, + CASE WHEN (cast(v:type as string) = 'IssuesEvent') AND (cast(v:payload.action as string) = 'opened') THEN 1 ELSE 0 END AS issue_created, + CASE WHEN cast(v:type as string) = 'WatchEvent' THEN 1 ELSE 0 END AS star, + CASE WHEN (cast(v:type as string) = 'IssuesEvent') AND (cast(v:payload.action as string) = 'opened') THEN cast(v:actor.login as string) ELSE NULL END AS actor_login + FROM github_events + WHERE cast(v:type as string) IN ('IssuesEvent', 'WatchEvent') +) t +GROUP BY repo_name +ORDER BY c DESC, repo_name +LIMIT 50 diff --git a/regression-test/suites/variant_p0/github_events_p0/repositoriesWithTheMaximumAmountOfIssues3.sql b/regression-test/suites/variant_p0/github_events_p0/repositoriesWithTheMaximumAmountOfIssues3.sql new file mode 100644 index 0000000000..370cd1ba45 --- /dev/null +++ b/regression-test/suites/variant_p0/github_events_p0/repositoriesWithTheMaximumAmountOfIssues3.sql @@ -0,0 +1,19 @@ +SELECT + repo_name, + sum(issue_created) AS c, + count(distinct actor_login) AS u, + sum(star) AS stars +FROM +( + SELECT + cast(v:repo.name as string) as repo_name, + CASE WHEN (cast(v:type as string) = 'IssuesEvent') AND (cast(v:payload.action as string) = 'opened') THEN 1 ELSE 0 END AS issue_created, + CASE WHEN cast(v:type as string) = 'WatchEvent' THEN 1 ELSE 0 END AS star, + CASE WHEN (cast(v:type as string) = 'IssuesEvent') AND (cast(v:payload.action as string) = 'opened') THEN cast(v:actor.login as string) ELSE NULL END AS actor_login + FROM github_events + WHERE cast(v:type as string) IN ('IssuesEvent', 'WatchEvent') +) t +GROUP BY repo_name +HAVING stars >= 10 +ORDER BY c, u, stars DESC, repo_name +LIMIT 50 diff --git a/regression-test/suites/variant_p0/github_events_p0/repositoriesWithTheMaximumAmountOfIssues4.sql b/regression-test/suites/variant_p0/github_events_p0/repositoriesWithTheMaximumAmountOfIssues4.sql new file mode 100644 index 0000000000..88d7dca5c3 --- /dev/null +++ b/regression-test/suites/variant_p0/github_events_p0/repositoriesWithTheMaximumAmountOfIssues4.sql @@ -0,0 +1,18 @@ +SELECT + repo_name, + sum(issue_created) AS c, + count(distinct actor_login) AS u, + sum(star) AS stars +FROM +( + SELECT + cast(v:repo.name as string) as repo_name, + CASE WHEN (cast(v:type as string) = 'IssuesEvent') AND (cast(v:payload.action as string) = 'opened') THEN 1 ELSE 0 END AS issue_created, + CASE WHEN cast(v:type as string) = 'WatchEvent' THEN 1 ELSE 0 END AS star, + CASE WHEN (cast(v:type as string) = 'IssuesEvent') AND (cast(v:payload.action as string) = 'opened') THEN cast(v:actor.login as string) ELSE NULL END AS actor_login + FROM github_events + WHERE cast(v:type as string) IN ('IssuesEvent', 'WatchEvent') +) t +GROUP BY repo_name +ORDER BY u, c, stars DESC, 1 +LIMIT 50 diff --git a/regression-test/suites/variant_p0/github_events_p0/repositoriesWithTheMaximumAmountOfPullRequests1.sql b/regression-test/suites/variant_p0/github_events_p0/repositoriesWithTheMaximumAmountOfPullRequests1.sql new file mode 100644 index 0000000000..dab00b8130 --- /dev/null +++ b/regression-test/suites/variant_p0/github_events_p0/repositoriesWithTheMaximumAmountOfPullRequests1.sql @@ -0,0 +1 @@ +SELECT cast(v:repo.name as string) as repo_name, count(), count(distinct cast(v:actor.login as string)) FROM github_events WHERE cast(v:type as string) = 'PullRequestEvent' AND cast(v:payload.action as string) = 'opened' GROUP BY cast(v:repo.name as string) ORDER BY 2,1,3 DESC LIMIT 50 \ No newline at end of file diff --git a/regression-test/suites/variant_p0/github_events_p0/repositoriesWithTheMaximumAmountOfPullRequests2.sql b/regression-test/suites/variant_p0/github_events_p0/repositoriesWithTheMaximumAmountOfPullRequests2.sql new file mode 100644 index 0000000000..741607d4df --- /dev/null +++ b/regression-test/suites/variant_p0/github_events_p0/repositoriesWithTheMaximumAmountOfPullRequests2.sql @@ -0,0 +1 @@ +SELECT cast(v:repo.name as string), count(), count(distinct cast(v:actor.login as string)) AS u FROM github_events WHERE cast(v:type as string) = 'PullRequestEvent' AND cast(v:payload.action as string) = 'opened' GROUP BY cast(v:repo.name as string) ORDER BY u DESC, 2 DESC, 1 LIMIT 50 diff --git a/regression-test/suites/variant_p0/github_events_p0/repositoriesWithTheMaximumNumberOfAcceptedInvitations.sql b/regression-test/suites/variant_p0/github_events_p0/repositoriesWithTheMaximumNumberOfAcceptedInvitations.sql new file mode 100644 index 0000000000..404b2cf765 --- /dev/null +++ b/regression-test/suites/variant_p0/github_events_p0/repositoriesWithTheMaximumNumberOfAcceptedInvitations.sql @@ -0,0 +1,17 @@ +SELECT + repo_name, + sum(invitation) AS invitations, + sum(star) AS stars +FROM +( + SELECT + cast(v:repo.name as string) as repo_name, + CASE WHEN cast(v:type as string) = 'MemberEvent' THEN 1 ELSE 0 END AS invitation, + CASE WHEN cast(v:type as string) = 'WatchEvent' THEN 1 ELSE 0 END AS star + FROM github_events + WHERE cast(v:type as string) IN ('MemberEvent', 'WatchEvent') +) t +GROUP BY repo_name +HAVING stars >= 2 +ORDER BY invitations DESC, stars DESC, repo_name +LIMIT 50 diff --git a/regression-test/suites/variant_p0/github_events_p0/repositoriesWithTheMostPeopleWhoHavePushAccess1.sql b/regression-test/suites/variant_p0/github_events_p0/repositoriesWithTheMostPeopleWhoHavePushAccess1.sql new file mode 100644 index 0000000000..d94bcb149b --- /dev/null +++ b/regression-test/suites/variant_p0/github_events_p0/repositoriesWithTheMostPeopleWhoHavePushAccess1.sql @@ -0,0 +1,13 @@ +SELECT + repo_name, + count(distinct actor_login) AS u, + sum(star) AS stars +FROM +( + SELECT + lower(cast(v:repo.name as string)) as repo_name, + CASE WHEN cast(v:type as string) = 'PushEvent' THEN cast(v:actor.login as string) ELSE NULL END AS actor_login, + CASE WHEN cast(v:type as string) = 'WatchEvent' THEN 1 ELSE 0 END AS star + FROM github_events WHERE cast(v:type as string) IN ('PushEvent', 'WatchEvent') AND cast(v:repo.name as string) != '/' +) t +GROUP BY repo_name ORDER BY u, stars, repo_name DESC LIMIT 50 diff --git a/regression-test/suites/variant_p0/github_events_p0/repositoriesWithTheMostPeopleWhoHavePushAccess2.sql b/regression-test/suites/variant_p0/github_events_p0/repositoriesWithTheMostPeopleWhoHavePushAccess2.sql new file mode 100644 index 0000000000..88a92a06b6 --- /dev/null +++ b/regression-test/suites/variant_p0/github_events_p0/repositoriesWithTheMostPeopleWhoHavePushAccess2.sql @@ -0,0 +1,13 @@ +-- SELECT +-- cast(v:repo.name as string), +-- count(distinct cast(v:actor.login as string)) AS u, +-- sum(star) AS stars +-- FROM +-- ( +-- SELECT +-- cast(v:repo.name as string), +-- CASE WHEN cast(v:type as string) = 'PushEvent' AND (ref LIKE '%/master' OR ref LIKE '%/main') THEN cast(v:actor.login as string) ELSE NULL END AS cast(v:actor.login as string), +-- CASE WHEN cast(v:type as string) = 'WatchEvent' THEN 1 ELSE 0 END AS star +-- FROM github_events WHERE cast(v:type as string) IN ('PushEvent', 'WatchEvent') AND cast(v:repo.name as string) != '/' +-- ) t +-- GROUP BY cast(v:repo.name as string) ORDER BY u DESC LIMIT 50 diff --git a/regression-test/suites/variant_p0/github_events_p0/repositoriesWithTheMostPeopleWhoHavePushAccess3.sql b/regression-test/suites/variant_p0/github_events_p0/repositoriesWithTheMostPeopleWhoHavePushAccess3.sql new file mode 100644 index 0000000000..28498c7a3e --- /dev/null +++ b/regression-test/suites/variant_p0/github_events_p0/repositoriesWithTheMostPeopleWhoHavePushAccess3.sql @@ -0,0 +1,16 @@ +-- SELECT +-- cast(v:repo.name as string), +-- count(distinct cast(v:actor.login as string)) AS u, +-- sum(star) AS stars +-- FROM +-- ( +-- SELECT +-- cast(v:repo.name as string), +-- CASE WHEN cast(v:type as string) = 'PushEvent' AND (ref LIKE '%/master' OR ref LIKE '%/main') THEN cast(v:actor.login as string) ELSE NULL END AS cast(v:actor.login as string), +-- CASE WHEN cast(v:type as string) = 'WatchEvent' THEN 1 ELSE 0 END AS star +-- FROM github_events WHERE cast(v:type as string) IN ('PushEvent', 'WatchEvent') AND cast(v:repo.name as string) != '/' +-- ) t +-- GROUP BY cast(v:repo.name as string) +-- HAVING stars >= 100 +-- ORDER BY u DESC +-- LIMIT 50 diff --git a/regression-test/suites/variant_p0/github_events_p0/repositoriesWithTheMostStarsOverOneDay1.sql b/regression-test/suites/variant_p0/github_events_p0/repositoriesWithTheMostStarsOverOneDay1.sql new file mode 100644 index 0000000000..c7b81b2408 --- /dev/null +++ b/regression-test/suites/variant_p0/github_events_p0/repositoriesWithTheMostStarsOverOneDay1.sql @@ -0,0 +1,25 @@ +SELECT + repo_name, + day, + stars +FROM +( + SELECT + row_number() OVER (PARTITION BY repo_name ORDER BY stars DESC) AS rank, + repo_name, + day, + stars + FROM + ( + SELECT + cast(v:repo.name as string) as repo_name, + to_date(cast(v:created_at as datetime)) AS day, + count() AS stars + FROM github_events + WHERE cast(v:type as string) = 'WatchEvent' + GROUP BY cast(v:repo.name as string), day + ) t1 +) t2 +WHERE rank = 1 +ORDER BY stars DESC, 1 +LIMIT 50 diff --git a/regression-test/suites/variant_p0/github_events_p0/repositoriesWithTheMostStarsOverOneDay2.sql b/regression-test/suites/variant_p0/github_events_p0/repositoriesWithTheMostStarsOverOneDay2.sql new file mode 100644 index 0000000000..9f07d2270d --- /dev/null +++ b/regression-test/suites/variant_p0/github_events_p0/repositoriesWithTheMostStarsOverOneDay2.sql @@ -0,0 +1,25 @@ +-- SELECT +-- cast(v:repo.name as string), +-- day, +-- stars +-- FROM +-- ( +-- SELECT +-- row_number() OVER (PARTITION BY cast(v:repo.name as string) ORDER BY stars DESC) AS rank, +-- cast(v:repo.name as string), +-- day, +-- stars +-- FROM +-- ( +-- SELECT +-- cast(v:repo.name as string), +-- to_date(cast(v:created_at as datetime)) AS day, +-- count() AS stars +-- FROM github_events +-- WHERE cast(v:type as string) = 'WatchEvent' +-- GROUP BY cast(v:repo.name as string), day +-- ) t1 +-- ) t2 +-- WHERE rank = 1 +-- ORDER BY stars DESC +-- LIMIT 50 diff --git a/regression-test/suites/variant_p0/github_events_p0/repositoriesWithTheMostStarsOverOneDay3.sql b/regression-test/suites/variant_p0/github_events_p0/repositoriesWithTheMostStarsOverOneDay3.sql new file mode 100644 index 0000000000..c0ac82e915 --- /dev/null +++ b/regression-test/suites/variant_p0/github_events_p0/repositoriesWithTheMostStarsOverOneDay3.sql @@ -0,0 +1 @@ +-- SELECT cast(v:repo.name as string), cast(v:created_at as datetime), count() AS stars FROM github_events WHERE cast(v:type as string) = 'WatchEvent' GROUP BY cast(v:repo.name as string), cast(v:created_at as datetime) ORDER BY count() DESC LIMIT 50 diff --git a/regression-test/suites/variant_p0/github_events_p0/repositoriesWithTheMostSteadyGrowthOverTime.sql b/regression-test/suites/variant_p0/github_events_p0/repositoriesWithTheMostSteadyGrowthOverTime.sql new file mode 100644 index 0000000000..5c7d2d7c7b --- /dev/null +++ b/regression-test/suites/variant_p0/github_events_p0/repositoriesWithTheMostSteadyGrowthOverTime.sql @@ -0,0 +1,20 @@ +SELECT + repo_name, + max(stars) AS daily_stars, + sum(stars) AS total_stars, + sum(stars) / max(stars) AS rate +FROM +( + SELECT + cast(v:repo.name as string) as repo_name, + to_date(cast(v:created_at as datetime)) AS day, + count() AS stars + FROM github_events + WHERE cast(v:type as string) = 'WatchEvent' + GROUP BY + repo_name, + day +) t +GROUP BY repo_name +ORDER BY rate DESC, 1 +LIMIT 50 diff --git a/regression-test/suites/variant_p0/github_events_p0/repositoriesWithTheWorstStagnation_order.sql b/regression-test/suites/variant_p0/github_events_p0/repositoriesWithTheWorstStagnation_order.sql new file mode 100644 index 0000000000..2265f4b5e4 --- /dev/null +++ b/regression-test/suites/variant_p0/github_events_p0/repositoriesWithTheWorstStagnation_order.sql @@ -0,0 +1,20 @@ +SELECT + repo_name, + sum(created_at_2022) AS stars2022, + sum(created_at_2015) AS stars2015, + round(sum(created_at_2022) / sum(created_at_2015), 3) AS yoy, + min(created_at) AS first_seen +FROM +( + SELECT + cast(v:repo.name as string) as repo_name, + CASE year(cast(v:created_at as datetime)) WHEN 2022 THEN 1 ELSE 0 END AS created_at_2022, + CASE year(cast(v:created_at as datetime)) WHEN 2015 THEN 1 ELSE 0 END AS created_at_2015, + cast(v:created_at as datetime) as created_at + FROM github_events + WHERE cast(v:type as string) = 'WatchEvent' +) t +GROUP BY repo_name +HAVING (min(created_at) <= '2019-01-01 00:00:00') AND (max(created_at) >= '2020-06-01 00:00:00') AND (stars2015 >= 2) +ORDER BY yoy, repo_name +LIMIT 50 diff --git a/regression-test/suites/variant_p0/github_events_p0/repositoryAffinityList1.sql b/regression-test/suites/variant_p0/github_events_p0/repositoryAffinityList1.sql new file mode 100644 index 0000000000..23f6cde280 --- /dev/null +++ b/regression-test/suites/variant_p0/github_events_p0/repositoryAffinityList1.sql @@ -0,0 +1,14 @@ +-- ERROR: unmatched column +SELECT + cast(v:repo.name as string) as repo_name, + count() AS stars +FROM github_events +WHERE (cast(v:type as string) = 'WatchEvent') AND (cast(v:actor.login as string) IN +( + SELECT cast(v:actor.login as string) + FROM github_events + WHERE (cast(v:type as string) = 'WatchEvent') AND (cast(v:repo.name as string) IN ('apache/spark', 'prakhar1989/awesome-courses')) +)) AND (cast(v:repo.name as string) NOT IN ('ClickHouse/ClickHouse', 'yandex/ClickHouse')) +GROUP BY repo_name +ORDER BY stars DESC, repo_name +LIMIT 50 diff --git a/regression-test/suites/variant_p0/github_events_p0/repositoryAffinityList2.sql b/regression-test/suites/variant_p0/github_events_p0/repositoryAffinityList2.sql new file mode 100644 index 0000000000..4c7f36b518 --- /dev/null +++ b/regression-test/suites/variant_p0/github_events_p0/repositoryAffinityList2.sql @@ -0,0 +1,23 @@ +SELECT + repo_name, + total_stars, + round(spark_stars / total_stars, 2) AS ratio +FROM +( + SELECT + cast(v:repo.name as string) as repo_name, + count(distinct cast(v:actor.login as string)) AS total_stars + FROM github_events + WHERE (cast(v:type as string) = 'WatchEvent') AND (cast(v:repo.name as string) NOT IN ('apache/spark')) + GROUP BY repo_name + HAVING total_stars >= 10 +) t1 +JOIN +( + SELECT + count(distinct cast(v:actor.login as string)) AS spark_stars + FROM github_events + WHERE (cast(v:type as string) = 'WatchEvent') AND (cast(v:repo.name as string) IN ('apache/spark')) +) t2 +ORDER BY ratio DESC, repo_name +LIMIT 50 diff --git a/regression-test/suites/variant_p0/github_events_p0/starsFromHeavyGithubUsers1.sql b/regression-test/suites/variant_p0/github_events_p0/starsFromHeavyGithubUsers1.sql new file mode 100644 index 0000000000..0db635efc4 --- /dev/null +++ b/regression-test/suites/variant_p0/github_events_p0/starsFromHeavyGithubUsers1.sql @@ -0,0 +1,14 @@ +-- ERROR: unmatched column +SELECT + cast(v:repo.name as string), + count() +FROM github_events +WHERE (cast(v:type as string) = 'WatchEvent') AND (cast(v:actor.login as string) IN +( + SELECT cast(v:actor.login as string) + FROM github_events + WHERE (cast(v:type as string) = 'PullRequestEvent') AND (cast(v:payload.action as string) = 'opened') +)) +GROUP BY cast(v:repo.name as string) +ORDER BY count() DESC +LIMIT 50 diff --git a/regression-test/suites/variant_p0/github_events_p0/starsFromHeavyGithubUsers2.sql b/regression-test/suites/variant_p0/github_events_p0/starsFromHeavyGithubUsers2.sql new file mode 100644 index 0000000000..1836011dc5 --- /dev/null +++ b/regression-test/suites/variant_p0/github_events_p0/starsFromHeavyGithubUsers2.sql @@ -0,0 +1,15 @@ +SELECT + cast(v:repo.name as string), + count() +FROM github_events +WHERE (cast(v:type as string) = 'WatchEvent') AND (cast(v:actor.login as string) IN +( + SELECT cast(v:actor.login as string) + FROM github_events + WHERE (cast(v:type as string) = 'PullRequestEvent') AND (cast(v:payload.action as string) = 'opened') + GROUP BY cast(v:actor.login as string) + HAVING count() >= 5 +)) +GROUP BY cast(v:repo.name as string) +ORDER BY 1, count() DESC, 1 +LIMIT 50 diff --git a/regression-test/suites/variant_p0/github_events_p0/theLongestRepositoryNames1.sql b/regression-test/suites/variant_p0/github_events_p0/theLongestRepositoryNames1.sql new file mode 100644 index 0000000000..0603fec97b --- /dev/null +++ b/regression-test/suites/variant_p0/github_events_p0/theLongestRepositoryNames1.sql @@ -0,0 +1 @@ +SELECT count(), cast(v:repo.name as string) FROM github_events WHERE cast(v:type as string) = 'WatchEvent' GROUP BY cast(v:repo.name as string) ORDER BY length(cast(v:repo.name as string)) DESC, cast(v:repo.name as string) LIMIT 50 diff --git a/regression-test/suites/variant_p0/github_events_p0/theLongestRepositoryNames2.sql b/regression-test/suites/variant_p0/github_events_p0/theLongestRepositoryNames2.sql new file mode 100644 index 0000000000..244f58e3ff --- /dev/null +++ b/regression-test/suites/variant_p0/github_events_p0/theLongestRepositoryNames2.sql @@ -0,0 +1 @@ +SELECT cast(v:repo.name as string), count() FROM github_events WHERE cast(v:type as string) = 'WatchEvent' AND cast(v:repo.name as string) LIKE '%_/_%' GROUP BY cast(v:repo.name as string) ORDER BY length(cast(v:repo.name as string)) ASC, cast(v:repo.name as string) LIMIT 50 diff --git a/regression-test/suites/variant_p0/github_events_p0/theMostToughCodeReviews.sql b/regression-test/suites/variant_p0/github_events_p0/theMostToughCodeReviews.sql new file mode 100644 index 0000000000..46a896f795 --- /dev/null +++ b/regression-test/suites/variant_p0/github_events_p0/theMostToughCodeReviews.sql @@ -0,0 +1,10 @@ +SELECT + concat('https://github.com/', cast(v:repo.name as string), '/pull/') AS URL, + count(distinct cast(v:actor.login as string)) AS authors +FROM github_events +WHERE (cast(v:type as string) = 'PullRequestReviewCommentEvent') AND (cast(v:payload.action as string) = 'created') +GROUP BY + cast(v:repo.name as string), + cast(v:payload.issue.`number` as string) +ORDER BY authors DESC, URL ASC +LIMIT 50 \ No newline at end of file diff --git a/regression-test/suites/variant_p0/github_events_p0/theTotalNumberOfRepositoriesOnGithub.sql b/regression-test/suites/variant_p0/github_events_p0/theTotalNumberOfRepositoriesOnGithub.sql new file mode 100644 index 0000000000..1d29c3eeb4 --- /dev/null +++ b/regression-test/suites/variant_p0/github_events_p0/theTotalNumberOfRepositoriesOnGithub.sql @@ -0,0 +1 @@ +SELECT count(distinct cast(v:repo.name as string)) FROM github_events diff --git a/regression-test/suites/variant_p0/github_events_p0/theTotalNumberOfUsersOnGithub1.sql b/regression-test/suites/variant_p0/github_events_p0/theTotalNumberOfUsersOnGithub1.sql new file mode 100644 index 0000000000..044aa0470b --- /dev/null +++ b/regression-test/suites/variant_p0/github_events_p0/theTotalNumberOfUsersOnGithub1.sql @@ -0,0 +1 @@ +SELECT count(distinct cast(v:actor.login as string)) FROM github_events diff --git a/regression-test/suites/variant_p0/github_events_p0/theTotalNumberOfUsersOnGithub2.sql b/regression-test/suites/variant_p0/github_events_p0/theTotalNumberOfUsersOnGithub2.sql new file mode 100644 index 0000000000..12776f1bb2 --- /dev/null +++ b/regression-test/suites/variant_p0/github_events_p0/theTotalNumberOfUsersOnGithub2.sql @@ -0,0 +1 @@ +SELECT count(distinct cast(v:actor.login as string)) FROM github_events WHERE cast(v:type as string) = 'WatchEvent' diff --git a/regression-test/suites/variant_p0/github_events_p0/theTotalNumberOfUsersOnGithub3.sql b/regression-test/suites/variant_p0/github_events_p0/theTotalNumberOfUsersOnGithub3.sql new file mode 100644 index 0000000000..8454603277 --- /dev/null +++ b/regression-test/suites/variant_p0/github_events_p0/theTotalNumberOfUsersOnGithub3.sql @@ -0,0 +1 @@ +SELECT count(distinct cast(v:actor.login as string)) FROM github_events WHERE cast(v:type as string) = 'PushEvent' diff --git a/regression-test/suites/variant_p0/github_events_p0/theTotalNumberOfUsersOnGithub4.sql b/regression-test/suites/variant_p0/github_events_p0/theTotalNumberOfUsersOnGithub4.sql new file mode 100644 index 0000000000..384b232f4f --- /dev/null +++ b/regression-test/suites/variant_p0/github_events_p0/theTotalNumberOfUsersOnGithub4.sql @@ -0,0 +1 @@ +SELECT count(distinct cast(v:actor.login as string)) FROM github_events WHERE cast(v:type as string) = 'PullRequestEvent' AND cast(v:payload.action as string) = 'opened' diff --git a/regression-test/suites/variant_p0/github_events_p0/topLabels1.sql b/regression-test/suites/variant_p0/github_events_p0/topLabels1.sql new file mode 100644 index 0000000000..152ec3ba3a --- /dev/null +++ b/regression-test/suites/variant_p0/github_events_p0/topLabels1.sql @@ -0,0 +1,9 @@ +-- SELECT +-- label, +-- count() AS c +-- FROM github_events +-- LATERAL VIEW explode_split(labels, ',') t AS label +-- WHERE (cast(v:type as string) IN ('IssuesEvent', 'PullRequestEvent', 'IssueCommentEvent')) AND (action IN ('created', 'opened', 'labeled')) +-- GROUP BY label +-- ORDER BY c DESC +-- LIMIT 50 diff --git a/regression-test/suites/variant_p0/github_events_p0/topLabels2.sql b/regression-test/suites/variant_p0/github_events_p0/topLabels2.sql new file mode 100644 index 0000000000..bf404d4da5 --- /dev/null +++ b/regression-test/suites/variant_p0/github_events_p0/topLabels2.sql @@ -0,0 +1,9 @@ +-- SELECT +-- label, +-- count() AS c +-- FROM github_events +-- LATERAL VIEW explode_split(labels, ',') t AS label +-- WHERE (cast(v:type as string) IN ('IssuesEvent', 'PullRequestEvent', 'IssueCommentEvent')) AND (action IN ('created', 'opened', 'labeled')) AND ((lower(label) LIKE '%bug%') OR (lower(label) LIKE '%feature%')) +-- GROUP BY label +-- ORDER BY c DESC +-- LIMIT 50 diff --git a/regression-test/suites/variant_p0/github_events_p0/topLabels3.sql b/regression-test/suites/variant_p0/github_events_p0/topLabels3.sql new file mode 100644 index 0000000000..49e6086166 --- /dev/null +++ b/regression-test/suites/variant_p0/github_events_p0/topLabels3.sql @@ -0,0 +1,14 @@ +-- SELECT +-- sum(bug) AS bugs, +-- sum(feature) AS feature, +-- sum(bug) / sum(feature) AS ratio +-- FROM +-- ( +-- SELECT +-- CASE WHEN lower(label) LIKE '%bug%' THEN 1 ELSE 0 END AS bug, +-- CASE WHEN lower(label) LIKE '%feature%' THEN 1 ELSE 0 END AS feature +-- FROM github_events +-- LATERAL VIEW explode_split(labels, ',') t AS label +-- WHERE (cast(v:type as string) IN ('IssuesEvent', 'PullRequestEvent', 'IssueCommentEvent')) AND (action IN ('created', 'opened', 'labeled')) AND ((lower(label) LIKE '%bug%') OR (lower(label) LIKE '%feature%')) +-- ) t +-- LIMIT 50 diff --git a/regression-test/suites/variant_p0/github_events_p0/topRepositoriesByStars.sql b/regression-test/suites/variant_p0/github_events_p0/topRepositoriesByStars.sql new file mode 100644 index 0000000000..3dfd35d9d6 --- /dev/null +++ b/regression-test/suites/variant_p0/github_events_p0/topRepositoriesByStars.sql @@ -0,0 +1 @@ +SELECT cast(v:repo.name as string), count() AS stars FROM github_events WHERE cast(v:type as string) = 'WatchEvent' GROUP BY cast(v:repo.name as string) ORDER BY stars DESC, 1 LIMIT 50 diff --git a/regression-test/suites/variant_p0/github_events_p0/whatIsTheBestDayOfTheWeekToCatchAStar.sql b/regression-test/suites/variant_p0/github_events_p0/whatIsTheBestDayOfTheWeekToCatchAStar.sql new file mode 100644 index 0000000000..0c930308cf --- /dev/null +++ b/regression-test/suites/variant_p0/github_events_p0/whatIsTheBestDayOfTheWeekToCatchAStar.sql @@ -0,0 +1 @@ +SELECT dayofweek(cast(v:created_at as datetime)) AS day, count() AS stars FROM github_events WHERE cast(v:type as string) = 'WatchEvent' GROUP BY day ORDER BY day diff --git a/regression-test/suites/variant_p0/github_events_p0/whoAreAllThosePeopleGivingStars1.sql b/regression-test/suites/variant_p0/github_events_p0/whoAreAllThosePeopleGivingStars1.sql new file mode 100644 index 0000000000..63a4dfd15f --- /dev/null +++ b/regression-test/suites/variant_p0/github_events_p0/whoAreAllThosePeopleGivingStars1.sql @@ -0,0 +1 @@ +SELECT cast(v:actor.login as string), count() AS stars FROM github_events WHERE cast(v:type as string) = 'WatchEvent' GROUP BY cast(v:actor.login as string) ORDER BY stars DESC, 1 LIMIT 50 diff --git a/regression-test/suites/variant_p0/github_events_p0/whoAreAllThosePeopleGivingStars2.sql b/regression-test/suites/variant_p0/github_events_p0/whoAreAllThosePeopleGivingStars2.sql new file mode 100644 index 0000000000..0aa67b56d2 --- /dev/null +++ b/regression-test/suites/variant_p0/github_events_p0/whoAreAllThosePeopleGivingStars2.sql @@ -0,0 +1 @@ +SELECT cast(v:actor.login as string), count() AS stars FROM github_events WHERE cast(v:type as string) = 'WatchEvent' AND cast(v:actor.login as string) = 'cliffordfajardo' GROUP BY cast(v:actor.login as string) ORDER BY stars DESC LIMIT 50 diff --git a/regression-test/suites/variant_p0/github_events_p0/whoAreAllThosePeopleGivingStars3.sql b/regression-test/suites/variant_p0/github_events_p0/whoAreAllThosePeopleGivingStars3.sql new file mode 100644 index 0000000000..99eb217a22 --- /dev/null +++ b/regression-test/suites/variant_p0/github_events_p0/whoAreAllThosePeopleGivingStars3.sql @@ -0,0 +1,14 @@ +-- ERROR: unmatched column +SELECT + cast(v:repo.name as string), + count() AS stars +FROM github_events +WHERE (cast(v:type as string) = 'WatchEvent') AND (cast(v:repo.name as string) IN +( + SELECT cast(v:repo.name as string) + FROM github_events + WHERE (cast(v:type as string) = 'WatchEvent') AND (cast(v:actor.login as string) = 'cliffordfajardo') +)) +GROUP BY cast(v:repo.name as string) +ORDER BY stars DESC +LIMIT 50 diff --git a/regression-test/suites/variant_p0/load.groovy b/regression-test/suites/variant_p0/load.groovy index 48fa191797..cd039f0888 100644 --- a/regression-test/suites/variant_p0/load.groovy +++ b/regression-test/suites/variant_p0/load.groovy @@ -73,226 +73,244 @@ suite("regression_test_variant", "variant_type"){ def (code, out, err) = update_be_config(backendId_to_backendIP.get(backend_id), backendId_to_backendHttpPort.get(backend_id), key, value) logger.info("update config: code=" + code + ", out=" + out + ", err=" + err) } + try { def table_name = "simple_variant" - // // 1. simple cases - create_table table_name - sql """insert into ${table_name} values (1, '[1]'),(1, '{"a" : 1}');""" - sql """insert into ${table_name} values (2, '[2]'),(1, '{"a" : [[[1]]]}');""" - sql """insert into ${table_name} values (3, '3'),(1, '{"a" : 1}'), (1, '{"a" : [1]}');""" - sql """insert into ${table_name} values (4, '"4"'),(1, '{"a" : "1223"}');""" - sql """insert into ${table_name} values (5, '5.0'),(1, '{"a" : [1]}');""" - sql """insert into ${table_name} values (6, '"[6]"'),(1, '{"a" : ["1", 2, 1.1]}');""" - sql """insert into ${table_name} values (7, '7'),(1, '{"a" : 1, "b" : {"c" : 1}}');""" - sql """insert into ${table_name} values (8, '8.11111'),(1, '{"a" : 1, "b" : {"c" : [{"a" : 1}]}}');""" - sql """insert into ${table_name} values (9, '"9999"'),(1, '{"a" : 1, "b" : {"c" : [{"a" : 1}]}}');""" - sql """insert into ${table_name} values (10, '1000000'),(1, '{"a" : 1, "b" : {"c" : [{"a" : 1}]}}');""" - sql """insert into ${table_name} values (11, '[123.0]'),(1, '{"a" : 1, "b" : {"c" : 1}}'),(1, '{"a" : 1, "b" : 10}');""" - sql """insert into ${table_name} values (12, '[123.2]'),(1, '{"a" : 1, "b" : 10}'),(1, '{"a" : 1, "b" : {"c" : 1}}');""" - qt_sql_1 "select k, v from simple_variant order by k, cast(v as string)" - qt_sql_1_1 "select k, v, cast(v:b as string) from simple_variant where length(cast(v:b as string)) > 4 order by k, cast(v as string)" - verify table_name - - // 2. type confilct cases - table_name = "type_conflict_resolution" - create_table table_name - sql """insert into ${table_name} values (1, '{"c" : "123"}');""" - sql """insert into ${table_name} values (2, '{"c" : 123}');""" - sql """insert into ${table_name} values (3, '{"cc" : [123]}');""" - sql """insert into ${table_name} values (4, '{"cc" : [123.1]}');""" - sql """insert into ${table_name} values (5, '{"ccc" : 123}');""" - sql """insert into ${table_name} values (6, '{"ccc" : 123321}');""" - sql """insert into ${table_name} values (7, '{"cccc" : 123.0}');""" - sql """insert into ${table_name} values (8, '{"cccc" : 123.11}');""" - sql """insert into ${table_name} values (9, '{"ccccc" : [123]}');""" - sql """insert into ${table_name} values (10, '{"ccccc" : [123456789]}');""" - sql """insert into ${table_name} values (11, '{"b" : 1111111111111111}');""" - sql """insert into ${table_name} values (12, '{"b" : 1.222222}');""" - sql """insert into ${table_name} values (13, '{"bb" : 1}');""" - sql """insert into ${table_name} values (14, '{"bb" : 214748364711}');""" - sql """insert into ${table_name} values (15, '{"A" : 1}');""" - qt_sql """select v from type_conflict_resolution order by k;""" - verify table_name - - // 3. simple variant sub column select - table_name = "simple_select_variant" - create_table table_name - sql """insert into ${table_name} values (1, '{"A" : 123}');""" - sql """insert into ${table_name} values (2, '{"A" : 1}');""" - sql """insert into ${table_name} values (4, '{"A" : 123456}');""" - sql """insert into ${table_name} values (8, '{"A" : 123456789101112}');""" - qt_sql_2 "select v:A from ${table_name} order by cast(v:A as int)" - sql """insert into ${table_name} values (12, '{"AA" : [123456]}');""" - sql """insert into ${table_name} values (14, '{"AA" : [123456789101112]}');""" - // qt_sql_3 "select v:AA from ${table_name} where size(v:AA) > 0 order by k" - qt_sql_4 "select v:A, v:AA, v from ${table_name} order by k" - qt_sql_5 "select v:A, v:AA, v, v from ${table_name} where cast(v:A as bigint) > 123 order by k" - - sql """insert into ${table_name} values (16, '{"a" : 123.0, "A" : 191191, "c": 123}');""" - sql """insert into ${table_name} values (18, '{"a" : "123", "c" : 123456}');""" - sql """insert into ${table_name} values (20, '{"a" : 1.10111, "A" : 1800, "c" : [12345]}');""" - // sql """insert into ${table_name} values (12, '{"a" : [123]}, "c": "123456"');""" - sql """insert into ${table_name} values (22, '{"a" : 1.1111, "A" : 17211, "c" : 111111}');""" - sql "sync" - qt_sql_6 "select v:a, v:A from ${table_name} order by cast(v:A as bigint), k" - qt_sql_7 "select k, v:A from ${table_name} where cast(v:A as bigint) >= 1 order by cast(v:A as bigint), k" - - // TODO: if not cast, then v:a could return "123" or 123 which is none determinately - qt_sql_8 "select cast(v:a as string), v:A from ${table_name} where cast(v:a as json) is null order by k" - // qt_sql_9 "select cast(v:a as string), v:A from ${table_name} where cast(v:A as json) is null order by k" - - // !!! Not found cast function String to Float64 - // qt_sql_10 "select v:a, v:A from ${table_name} where cast(v:a as double) > 0 order by k" - qt_sql_11 "select v:A from ${table_name} where cast(v:A as bigint) > 1 order by k" - - // ----%%---- - qt_sql_12 "select v:A, v from ${table_name} where cast(v:A as bigint) > 1 order by k" - // ----%%---- - qt_sql_13 "select v:a, v:A from simple_select_variant where 1=1 and cast(v:a as json) is null and cast(v:A as bigint) >= 1 order by k;" - qt_sql_14 """select v:a, v:A, v from simple_select_variant where cast(v:A as bigint) > 0 and cast(v:A as bigint) = 123456 limit 1;""" - - // !!! Not found cast function String to Float64 - // qt_sql_15 "select v:a, v:A from ${table_name} where 1=1 and cast(v:a as double) > 0 and v:A is not null order by k" - // qt_sql_16 "select v:a, v:A, v:c from ${table_name} where 1=1 and cast(v:a as double) > 0 and v:A is not null order by k" - - // TODO: if not cast, then v:a could return "123" or 123 which is none determinately - qt_sql_17 "select cast(v:a as json), v:A, v, v:AA from simple_select_variant where cast(v:A as bigint) is null order by k;" - - sql """insert into simple_select_variant values (12, '{"oamama": 1.1}')""" - qt_sql_18 "select v:a, v:A, v, v:oamama from simple_select_variant where cast(v:oamama as double) is null order by k;" - qt_sql_19 """select v:a, v:A, v, v:oamama from simple_select_variant where cast(v:oamama as double) is not null order by k""" - qt_sql_20 """select v:A from simple_select_variant where cast(v:A as bigint) > 0 and cast(v:A as bigint) = 123456 limit 1;""" - - // !!! Not found cast function String to Float64 - // qt_sql_21 """select v:A, v:a, v from simple_select_variant where cast(v:A as bigint) > 0 and cast(v:a as double) > 1 order by cast(v:A as bigint);""" - - sql "truncate table simple_select_variant" - sql """insert into simple_select_variant values (11, '{"x": [123456]}');""" - sql """insert into simple_select_variant values (12, '{"x": [123456789101112]}');""" - sql """insert into simple_select_variant values (12, '{"xxx" : 123, "yyy" : 456}');""" - qt_sql_21_1 """select * from simple_select_variant where cast(v:x as json) is null""" - qt_sql_21_2 """select cast(v:x as json) from simple_select_variant where cast(v:x as json) is not null order by k;""" - - // 4. multi variant in single table - table_name = "multi_variant" - sql "DROP TABLE IF EXISTS ${table_name}" - sql """ - CREATE TABLE IF NOT EXISTS ${table_name} ( - k bigint, - v1 variant, - v2 variant, - v3 variant - - ) - DUPLICATE KEY(`k`) - DISTRIBUTED BY RANDOM BUCKETS 5 - properties("replication_num" = "1", "disable_auto_compaction" = "false"); - """ - sql """insert into ${table_name} values (1, '{"A" : 123}', '{"B" : 123}', '{"C" : 456}');""" - sql """insert into ${table_name} values (2, '{"C" : "123"}', '{"D" : [123]}', '{"E" : 789}');""" - sql """insert into ${table_name} values (3, '{"C" : "123"}', '{"C" : [123]}', '{"E" : "789"}');""" - sql "sync" - verify table_name - qt_sql_22 "select v1:A from multi_variant order by k;" - qt_sql_23 "select v2:D from multi_variant order by k;" - qt_sql_24 "select v2:C from multi_variant order by k;" - - // 5. multi tablets concurrent load - table_name = "t_json_parallel" - create_table table_name - sql """INSERT INTO t_json_parallel SELECT *, '{"k1":1, "k2": "some", "k3" : [1234], "k4" : 1.10000, "k5" : [[123]]}' FROM numbers("number" = "50000");""" - qt_sql_25 """ SELECT sum(cast(v:k1 as int)), sum(cast(v:k4 as double)), sum(cast(json_extract(v:k5, "\$.[0].[0]") as int)) from t_json_parallel; """ - //50000 61700000 55000.00000000374 6150000 - // 7. gh data - table_name = "ghdata" - create_table table_name - load_json_data.call(table_name, """${getS3Url() + '/load/ghdata_sample.json'}""") - qt_sql_26 "select count() from ${table_name}" - - // 8. json empty string - // table_name = "empty_string" + // // // 1. simple cases + // create_table table_name + // sql """insert into ${table_name} values (1, '[1]'),(1, '{"a" : 1}');""" + // sql """insert into ${table_name} values (2, '[2]'),(1, '{"a" : [[[1]]]}');""" + // sql """insert into ${table_name} values (3, '3'),(1, '{"a" : 1}'), (1, '{"a" : [1]}');""" + // sql """insert into ${table_name} values (4, '"4"'),(1, '{"a" : "1223"}');""" + // sql """insert into ${table_name} values (5, '5.0'),(1, '{"a" : [1]}');""" + // sql """insert into ${table_name} values (6, '"[6]"'),(1, '{"a" : ["1", 2, 1.1]}');""" + // sql """insert into ${table_name} values (7, '7'),(1, '{"a" : 1, "b" : {"c" : 1}}');""" + // sql """insert into ${table_name} values (8, '8.11111'),(1, '{"a" : 1, "b" : {"c" : [{"a" : 1}]}}');""" + // sql """insert into ${table_name} values (9, '"9999"'),(1, '{"a" : 1, "b" : {"c" : [{"a" : 1}]}}');""" + // sql """insert into ${table_name} values (10, '1000000'),(1, '{"a" : 1, "b" : {"c" : [{"a" : 1}]}}');""" + // sql """insert into ${table_name} values (11, '[123.0]'),(1, '{"a" : 1, "b" : {"c" : 1}}'),(1, '{"a" : 1, "b" : 10}');""" + // sql """insert into ${table_name} values (12, '[123.2]'),(1, '{"a" : 1, "b" : 10}'),(1, '{"a" : 1, "b" : {"c" : 1}}');""" + // qt_sql_1 "select k, v from simple_variant order by k, cast(v as string)" + // qt_sql_1_1 "select k, v, cast(v:b as string) from simple_variant where length(cast(v:b as string)) > 4 order by k, cast(v as string)" + // verify table_name + + // // 2. type confilct cases + // table_name = "type_conflict_resolution" + // create_table table_name + // sql """insert into ${table_name} values (1, '{"c" : "123"}');""" + // sql """insert into ${table_name} values (2, '{"c" : 123}');""" + // sql """insert into ${table_name} values (3, '{"cc" : [123]}');""" + // sql """insert into ${table_name} values (4, '{"cc" : [123.1]}');""" + // sql """insert into ${table_name} values (5, '{"ccc" : 123}');""" + // sql """insert into ${table_name} values (6, '{"ccc" : 123321}');""" + // sql """insert into ${table_name} values (7, '{"cccc" : 123.0}');""" + // sql """insert into ${table_name} values (8, '{"cccc" : 123.11}');""" + // sql """insert into ${table_name} values (9, '{"ccccc" : [123]}');""" + // sql """insert into ${table_name} values (10, '{"ccccc" : [123456789]}');""" + // sql """insert into ${table_name} values (11, '{"b" : 1111111111111111}');""" + // sql """insert into ${table_name} values (12, '{"b" : 1.222222}');""" + // sql """insert into ${table_name} values (13, '{"bb" : 1}');""" + // sql """insert into ${table_name} values (14, '{"bb" : 214748364711}');""" + // sql """insert into ${table_name} values (15, '{"A" : 1}');""" + // qt_sql """select v from type_conflict_resolution order by k;""" + // verify table_name + + // // 3. simple variant sub column select + // table_name = "simple_select_variant" + // create_table table_name + // sql """insert into ${table_name} values (1, '{"A" : 123}');""" + // sql """insert into ${table_name} values (2, '{"A" : 1}');""" + // sql """insert into ${table_name} values (4, '{"A" : 123456}');""" + // sql """insert into ${table_name} values (8, '{"A" : 123456789101112}');""" + // qt_sql_2 "select v:A from ${table_name} order by cast(v:A as int)" + // sql """insert into ${table_name} values (12, '{"AA" : [123456]}');""" + // sql """insert into ${table_name} values (14, '{"AA" : [123456789101112]}');""" + // // qt_sql_3 "select v:AA from ${table_name} where size(v:AA) > 0 order by k" + // qt_sql_4 "select v:A, v:AA, v from ${table_name} order by k" + // qt_sql_5 "select v:A, v:AA, v, v from ${table_name} where cast(v:A as bigint) > 123 order by k" + + // sql """insert into ${table_name} values (16, '{"a" : 123.0, "A" : 191191, "c": 123}');""" + // sql """insert into ${table_name} values (18, '{"a" : "123", "c" : 123456}');""" + // sql """insert into ${table_name} values (20, '{"a" : 1.10111, "A" : 1800, "c" : [12345]}');""" + // // sql """insert into ${table_name} values (12, '{"a" : [123]}, "c": "123456"');""" + // sql """insert into ${table_name} values (22, '{"a" : 1.1111, "A" : 17211, "c" : 111111}');""" + // sql "sync" + // qt_sql_6 "select v:a, v:A from ${table_name} order by cast(v:A as bigint), k" + // qt_sql_7 "select k, v:A from ${table_name} where cast(v:A as bigint) >= 1 order by cast(v:A as bigint), k" + + // // TODO: if not cast, then v:a could return "123" or 123 which is none determinately + // qt_sql_8 "select cast(v:a as string), v:A from ${table_name} where cast(v:a as json) is null order by k" + // // qt_sql_9 "select cast(v:a as string), v:A from ${table_name} where cast(v:A as json) is null order by k" + + // // !!! Not found cast function String to Float64 + // // qt_sql_10 "select v:a, v:A from ${table_name} where cast(v:a as double) > 0 order by k" + // qt_sql_11 "select v:A from ${table_name} where cast(v:A as bigint) > 1 order by k" + + // // ----%%---- + // qt_sql_12 "select v:A, v from ${table_name} where cast(v:A as bigint) > 1 order by k" + // // ----%%---- + // qt_sql_13 "select v:a, v:A from simple_select_variant where 1=1 and cast(v:a as json) is null and cast(v:A as bigint) >= 1 order by k;" + // qt_sql_14 """select v:a, v:A, v from simple_select_variant where cast(v:A as bigint) > 0 and cast(v:A as bigint) = 123456 limit 1;""" + + // // !!! Not found cast function String to Float64 + // // qt_sql_15 "select v:a, v:A from ${table_name} where 1=1 and cast(v:a as double) > 0 and v:A is not null order by k" + // // qt_sql_16 "select v:a, v:A, v:c from ${table_name} where 1=1 and cast(v:a as double) > 0 and v:A is not null order by k" + + // // TODO: if not cast, then v:a could return "123" or 123 which is none determinately + // qt_sql_17 "select cast(v:a as json), v:A, v, v:AA from simple_select_variant where cast(v:A as bigint) is null order by k;" + + // sql """insert into simple_select_variant values (12, '{"oamama": 1.1}')""" + // qt_sql_18 "select v:a, v:A, v, v:oamama from simple_select_variant where cast(v:oamama as double) is null order by k;" + // qt_sql_19 """select v:a, v:A, v, v:oamama from simple_select_variant where cast(v:oamama as double) is not null order by k""" + // qt_sql_20 """select v:A from simple_select_variant where cast(v:A as bigint) > 0 and cast(v:A as bigint) = 123456 limit 1;""" + + // // !!! Not found cast function String to Float64 + // // qt_sql_21 """select v:A, v:a, v from simple_select_variant where cast(v:A as bigint) > 0 and cast(v:a as double) > 1 order by cast(v:A as bigint);""" + + // sql "truncate table simple_select_variant" + // sql """insert into simple_select_variant values (11, '{"x": [123456]}');""" + // sql """insert into simple_select_variant values (12, '{"x": [123456789101112]}');""" + // sql """insert into simple_select_variant values (12, '{"xxx" : 123, "yyy" : 456}');""" + // qt_sql_21_1 """select * from simple_select_variant where cast(v:x as json) is null""" + // qt_sql_21_2 """select cast(v:x as json) from simple_select_variant where cast(v:x as json) is not null order by k;""" + + // // 4. multi variant in single table + // table_name = "multi_variant" + // sql "DROP TABLE IF EXISTS ${table_name}" + // sql """ + // CREATE TABLE IF NOT EXISTS ${table_name} ( + // k bigint, + // v1 variant, + // v2 variant, + // v3 variant + // + // ) + // DUPLICATE KEY(`k`) + // DISTRIBUTED BY RANDOM BUCKETS 5 + // properties("replication_num" = "1", "disable_auto_compaction" = "false"); + // """ + // sql """insert into ${table_name} values (1, '{"A" : 123}', '{"B" : 123}', '{"C" : 456}');""" + // sql """insert into ${table_name} values (2, '{"C" : "123"}', '{"D" : [123]}', '{"E" : 789}');""" + // sql """insert into ${table_name} values (3, '{"C" : "123"}', '{"C" : [123]}', '{"E" : "789"}');""" + // sql "sync" + // verify table_name + // qt_sql_22 "select v1:A from multi_variant order by k;" + // qt_sql_23 "select v2:D from multi_variant order by k;" + // qt_sql_24 "select v2:C from multi_variant order by k;" + + // // 5. multi tablets concurrent load + // table_name = "t_json_parallel" + // create_table table_name + // sql """INSERT INTO t_json_parallel SELECT *, '{"k1":1, "k2": "some", "k3" : [1234], "k4" : 1.10000, "k5" : [[123]]}' FROM numbers("number" = "50000");""" + // qt_sql_25 """ SELECT sum(cast(v:k1 as int)), sum(cast(v:k4 as double)), sum(cast(json_extract(v:k5, "\$.[0].[0]") as int)) from t_json_parallel; """ + // //50000 61700000 55000.00000000374 6150000 + // // 7. gh data + // table_name = "ghdata" + // create_table table_name + // load_json_data.call(table_name, """${getS3Url() + '/load/ghdata_sample.json'}""") + // qt_sql_26 "select count() from ${table_name}" + + // // 8. json empty string + // // table_name = "empty_string" + // // create_table table_name + // // sql """INSERT INTO empty_string VALUES (1, ''), (2, '{"k1": 1, "k2": "v1"}'), (3, '{}'), (4, '{"k1": 2}');""" + // // sql """INSERT INTO empty_string VALUES (3, null), (4, '{"k1": 1, "k2": "v1"}'), (3, '{}'), (4, '{"k1": 2}');""" + // // qt_sql_27 "SELECT * FROM ${table_name} ORDER BY k;" + + // // // 9. btc data + // // table_name = "btcdata" + // // create_table table_name + // // load_json_data.call(table_name, """${getS3Url() + '/load/btc_transactions.json'}""") + // // qt_sql_28 "select count() from ${table_name}" + + // // 10. alter add variant + // table_name = "alter_variant" + // create_table table_name + // sql """INSERT INTO ${table_name} VALUES (1, ''), (1, '{"k1": 1, "k2": "v1"}'), (1, '{}'), (1, '{"k1": 2}');""" + // sql "alter table ${table_name} add column v2 variant default null" + // sql """INSERT INTO ${table_name} VALUES (1, '{"kyyyy" : "123"}', '{"kxkxkxkx" : [123]}'), (1, '{"kxxxx" : 123}', '{"xxxxyyyy": 123}');""" + // qt_sql_29_1 """select * from alter_variant where length(cast(v2 as string)) > 2 order by k, cast(v as string), cast(v2 as string);""" + // verify table_name + + // // 11. boolean values + // table_name = "boolean_values" // create_table table_name - // sql """INSERT INTO empty_string VALUES (1, ''), (2, '{"k1": 1, "k2": "v1"}'), (3, '{}'), (4, '{"k1": 2}');""" - // sql """INSERT INTO empty_string VALUES (3, null), (4, '{"k1": 1, "k2": "v1"}'), (3, '{}'), (4, '{"k1": 2}');""" - // qt_sql_27 "SELECT * FROM ${table_name} ORDER BY k;" + // sql """INSERT INTO ${table_name} VALUES (1, ''), (2, '{"k1": true, "k2": false}'), (3, '{}'), (4, '{"k1": false}');""" + // verify table_name - // // 9. btc data - // table_name = "btcdata" + // // 12. jsonb values + // table_name = "jsonb_values" // create_table table_name - // load_json_data.call(table_name, """${getS3Url() + '/load/btc_transactions.json'}""") - // qt_sql_28 "select count() from ${table_name}" - - // 10. alter add variant - table_name = "alter_variant" - create_table table_name - sql """INSERT INTO ${table_name} VALUES (1, ''), (1, '{"k1": 1, "k2": "v1"}'), (1, '{}'), (1, '{"k1": 2}');""" - sql "alter table ${table_name} add column v2 variant default null" - sql """INSERT INTO ${table_name} VALUES (1, '{"kyyyy" : "123"}', '{"kxkxkxkx" : [123]}'), (1, '{"kxxxx" : 123}', '{"xxxxyyyy": 123}');""" - qt_sql_29_1 """select * from alter_variant where length(cast(v2 as string)) > 2 order by k, cast(v as string), cast(v2 as string);""" - verify table_name - - // 11. boolean values - table_name = "boolean_values" - create_table table_name - sql """INSERT INTO ${table_name} VALUES (1, ''), (2, '{"k1": true, "k2": false}'), (3, '{}'), (4, '{"k1": false}');""" - verify table_name - - // 12. jsonb values - table_name = "jsonb_values" - create_table table_name - sql """insert into ${table_name} values (1, '{"a" : ["123", 123, [123]]}')""" - sql """insert into ${table_name} values (2, '{"a" : ["123"]}')""" - sql """insert into ${table_name} values (3, '{"a" : "123"}')""" - sql """insert into ${table_name} values (4, '{"a" : 123456}')""" - sql """insert into ${table_name} values (5, '{"a" : [123, "123", 1.11111]}')""" - sql """insert into ${table_name} values (6, '{"a" : [123, 1.11, "123"]}')""" - sql """insert into ${table_name} values(7, '{"a" : [123, {"xx" : 1}], "b" : {"c" : 456, "d" : null, "e" : 7.111}}')""" - // TODO data bellow is invalid at present - // sql """insert into ${table_name} values (8, '{"a" : [123, 111........]}')""" - sql """insert into ${table_name} values (9, '{"a" : [123, {"a" : 1}]}')""" - sql """insert into ${table_name} values (10, '{"a" : [{"a" : 1}, 123]}')""" - qt_sql_29 "select v:a from ${table_name} order by k" - // b? 7.111 [123,{"xx":1}] {"b":{"c":456,"e":7.111}} 456 - qt_sql_30 "select v:b.e, v:a, v:b, v:b.c from jsonb_values where cast(v:b.e as double) > 1;" - - // 13. sparse columns - table_name = "sparse_columns" - create_table table_name - sql """insert into sparse_columns select 0, '{"a": 11245, "b" : [123, {"xx" : 1}], "c" : {"c" : 456, "d" : null, "e" : 7.111}}' as json_str - union all select 0, '{"a": 1123}' as json_str union all select 0, '{"a" : 1234, "xxxx" : "kaana"}' as json_str from numbers("number" = "4096") limit 4096 ;""" - qt_sql_30 """ select v from sparse_columns where v is not null and json_extract(v, "\$") != "{}" order by cast(v as string) limit 10""" - sql "truncate table sparse_columns" - sql """insert into sparse_columns select 0, '{"a": 1123, "b" : [123, {"xx" : 1}], "c" : {"c" : 456, "d" : null, "e" : 7.111}, "zzz" : null, "oooo" : {"akakaka" : null, "xxxx" : {"xxx" : 123}}}' as json_str - union all select 0, '{"a" : 1234, "xxxx" : "kaana", "ddd" : {"aaa" : 123, "mxmxm" : [456, "789"]}}' as json_str from numbers("number" = "4096") limit 4096 ;""" - qt_sql_31 """ select v from sparse_columns where v is not null and json_extract(v, "\$") != "{}" order by cast(v as string) limit 10""" - sql "truncate table sparse_columns" - - // 12. streamload remote file - table_name = "logdata" - create_table.call(table_name, "4") - sql "set enable_two_phase_read_opt = false;" - // no sparse columns - set_be_config.call("ratio_of_defaults_as_sparse_column", "1") - load_json_data.call(table_name, """${getS3Url() + '/load/logdata.json'}""") - qt_sql_32 """ select v->"\$.json.parseFailed" from logdata where v->"\$.json.parseFailed" != 'null' order by k limit 10;""" - qt_sql_32_1 """select v:json.parseFailed from logdata where cast(v:json.parseFailed as string) is not null and k = 162;""" - sql "truncate table ${table_name}" - - // 0.95 default ratio + // sql """insert into ${table_name} values (1, '{"a" : ["123", 123, [123]]}')""" + // sql """insert into ${table_name} values (2, '{"a" : ["123"]}')""" + // sql """insert into ${table_name} values (3, '{"a" : "123"}')""" + // sql """insert into ${table_name} values (4, '{"a" : 123456}')""" + // sql """insert into ${table_name} values (5, '{"a" : [123, "123", 1.11111]}')""" + // sql """insert into ${table_name} values (6, '{"a" : [123, 1.11, "123"]}')""" + // sql """insert into ${table_name} values(7, '{"a" : [123, {"xx" : 1}], "b" : {"c" : 456, "d" : null, "e" : 7.111}}')""" + // // TODO data bellow is invalid at present + // // sql """insert into ${table_name} values (8, '{"a" : [123, 111........]}')""" + // sql """insert into ${table_name} values (9, '{"a" : [123, {"a" : 1}]}')""" + // sql """insert into ${table_name} values (10, '{"a" : [{"a" : 1}, 123]}')""" + // qt_sql_29 "select v:a from ${table_name} order by k" + // // b? 7.111 [123,{"xx":1}] {"b":{"c":456,"e":7.111}} 456 + // qt_sql_30 "select v:b.e, v:a, v:b, v:b.c from jsonb_values where cast(v:b.e as double) > 1;" + + // // 13. sparse columns + // table_name = "sparse_columns" + // create_table table_name + // sql """insert into sparse_columns select 0, '{"a": 11245, "b" : [123, {"xx" : 1}], "c" : {"c" : 456, "d" : null, "e" : 7.111}}' as json_str + // union all select 0, '{"a": 1123}' as json_str union all select 0, '{"a" : 1234, "xxxx" : "kaana"}' as json_str from numbers("number" = "4096") limit 4096 ;""" + // qt_sql_30 """ select v from sparse_columns where v is not null and json_extract(v, "\$") != "{}" order by cast(v as string) limit 10""" + // sql "truncate table sparse_columns" + // sql """insert into sparse_columns select 0, '{"a": 1123, "b" : [123, {"xx" : 1}], "c" : {"c" : 456, "d" : null, "e" : 7.111}, "zzz" : null, "oooo" : {"akakaka" : null, "xxxx" : {"xxx" : 123}}}' as json_str + // union all select 0, '{"a" : 1234, "xxxx" : "kaana", "ddd" : {"aaa" : 123, "mxmxm" : [456, "789"]}}' as json_str from numbers("number" = "4096") limit 4096 ;""" + // qt_sql_31 """ select v from sparse_columns where v is not null and json_extract(v, "\$") != "{}" order by cast(v as string) limit 10""" + // sql "truncate table sparse_columns" + + // // 12. streamload remote file + // table_name = "logdata" + // create_table.call(table_name, "4") + // sql "set enable_two_phase_read_opt = false;" + // // no sparse columns + // set_be_config.call("ratio_of_defaults_as_sparse_column", "1") + // load_json_data.call(table_name, """${getS3Url() + '/load/logdata.json'}""") + // qt_sql_32 """ select v->"\$.json.parseFailed" from logdata where v->"\$.json.parseFailed" != 'null' order by k limit 10;""" + // qt_sql_32_1 """select v:json.parseFailed from logdata where cast(v:json.parseFailed as string) is not null and k = 162;""" + // sql "truncate table ${table_name}" + + // // 0.95 default ratio + // set_be_config.call("ratio_of_defaults_as_sparse_column", "0.95") + // load_json_data.call(table_name, """${getS3Url() + '/load/logdata.json'}""") + // qt_sql_33 """ select v->"\$.json.parseFailed" from logdata where v->"\$.json.parseFailed" != 'null' order by k limit 10;""" + // qt_sql_33_1 """select v:json.parseFailed from logdata where cast(v:json.parseFailed as string) is not null and k = 162;""" + // sql "truncate table ${table_name}" + + // // always sparse column + // set_be_config.call("ratio_of_defaults_as_sparse_column", "0") + // load_json_data.call(table_name, """${getS3Url() + '/load/logdata.json'}""") + // qt_sql_34 """ select v->"\$.json.parseFailed" from logdata where v->"\$.json.parseFailed" != 'null' order by k limit 10;""" + // sql "truncate table ${table_name}" + // qt_sql_35 """select v->"\$.json.parseFailed" from logdata where k = 162 and v->"\$.json.parseFailed" != 'null';""" + // qt_sql_35_1 """select v:json.parseFailed from logdata where cast(v:json.parseFailed as string) is not null and k = 162;""" + + + // load gharchive set_be_config.call("ratio_of_defaults_as_sparse_column", "0.95") - load_json_data.call(table_name, """${getS3Url() + '/load/logdata.json'}""") - qt_sql_33 """ select v->"\$.json.parseFailed" from logdata where v->"\$.json.parseFailed" != 'null' order by k limit 10;""" - qt_sql_33_1 """select v:json.parseFailed from logdata where cast(v:json.parseFailed as string) is not null and k = 162;""" - sql "truncate table ${table_name}" - - // always sparse column - set_be_config.call("ratio_of_defaults_as_sparse_column", "0") - load_json_data.call(table_name, """${getS3Url() + '/load/logdata.json'}""") - qt_sql_34 """ select v->"\$.json.parseFailed" from logdata where v->"\$.json.parseFailed" != 'null' order by k limit 10;""" - sql "truncate table ${table_name}" - qt_sql_35 """select v->"\$.json.parseFailed" from logdata where k = 162 and v->"\$.json.parseFailed" != 'null';""" - qt_sql_35_1 """select v:json.parseFailed from logdata where cast(v:json.parseFailed as string) is not null and k = 162;""" - + table_name = "github_events" + sql """ + CREATE TABLE IF NOT EXISTS ${table_name} ( + k bigint, + v variant + ) + DUPLICATE KEY(`k`) + DISTRIBUTED BY HASH(k) BUCKETS 4 + properties("replication_num" = "1", "disable_auto_compaction" = "false"); + """ + load_json_data.call(table_name, """${getS3Url() + '/regression/gharchive.m/2015-01-01-0.json'}""") + load_json_data.call(table_name, """${getS3Url() + '/regression/gharchive.m/2015-01-01-1.json'}""") + load_json_data.call(table_name, """${getS3Url() + '/regression/gharchive.m/2015-01-01-2.json'}""") + load_json_data.call(table_name, """${getS3Url() + '/regression/gharchive.m/2015-01-01-3.json'}""") // TODO add test case that some certain columns are materialized in some file while others are not materilized(sparse) } finally { // reset flags --------------------------------------------------------------------- To unsubscribe, e-mail: [email protected] For additional commands, e-mail: [email protected]
