clintropolis commented on code in PR #14017:
URL: https://github.com/apache/druid/pull/14017#discussion_r1158848604
##########
web-console/src/druid-models/ingestion-spec/ingestion-spec.spec.ts:
##########
@@ -672,36 +670,36 @@ describe('ingestion-spec', () => {
});
describe('spec utils', () => {
- const ingestionSpec: IngestionSpec = {
- type: 'index_parallel',
- spec: {
- ioConfig: {
- type: 'index_parallel',
- inputSource: {
- type: 'http',
- uris: ['https://website.com/wikipedia.json.gz'],
- },
- inputFormat: {
- type: 'json',
- },
- },
- tuningConfig: {
- type: 'index_parallel',
- },
- dataSchema: {
- dataSource: 'wikipedia',
- granularitySpec: {
- segmentGranularity: 'day',
- queryGranularity: 'hour',
- },
- timestampSpec: {
- column: 'timestamp',
- format: 'iso',
- },
- dimensionsSpec: {},
- },
- },
- };
+ // const ingestionSpec: IngestionSpec = {
+ // type: 'index_parallel',
+ // spec: {
Review Comment:
these commented blocks supposed to be here?
##########
web-console/src/utils/sampler.ts:
##########
@@ -63,6 +63,38 @@ export interface SamplerConfig {
export interface SampleResponse {
data: SampleEntry[];
+ logicalSegmentSchema: { name: string; type: string }[];
+ logicalDimensions: DimensionSpec[];
+ physicalDimensions: DimensionSpec[];
+ numRowsIndexed: number;
+ numRowsRead: number;
+}
+
+export function getHeaderNamesFromSampleResponse(
+ sampleResponse: SampleResponse,
+ ignoreTimeColumn = false,
+) {
+ return filterMap(sampleResponse.logicalSegmentSchema, s =>
+ ignoreTimeColumn && s.name === '__time' ? undefined : s.name,
+ );
+}
+
+export function guessDimensionsFromSampleResponse(sampleResponse:
SampleResponse): DimensionSpec[] {
+ const { logicalDimensions, physicalDimensions, data } = sampleResponse;
+ return logicalDimensions.map(d => {
+ // Boolean column are currently reported as "long" so let's turn them into
"string"
Review Comment:
im still looking into this, since technically looking at stuff it looks like
it should be dependent on the value of `druid.expressions.useStrictBooleans`.
Additionally, 'long' really probably is better when using 'auto', since longs
do have indexes in this mode, so im a bit conflicted about this staying like
this long term, but i think its fine at least until we make 'auto' the default
schemaless (or add indexes to classic 'long' schema)
##########
web-console/src/utils/sampler.mock.ts:
##########
@@ -29,9 +38,45 @@ This data is the returned sample when ingested with:
{"timestamp":"2016-04-11T09:22:00Z","user":"Alice","followers":3,"spend":5.1,"id":"73534533","tags":["a","b"],"nums":[7,8]}
*/
-export const JSON_SAMPLE: SampleHeaderAndRows = {
- header: ['timestamp', 'user', 'followers', 'spend', 'id', 'tags', 'nums'],
- rows: [
+export const JSON_SAMPLE: SampleResponse = {
+ numRowsRead: 3,
+ numRowsIndexed: 3,
+ logicalDimensions: [
+ { type: 'string', name: 'user', multiValueHandling: 'SORTED_ARRAY',
createBitmapIndex: true },
+ {
+ type: 'long',
+ name: 'followers',
+ multiValueHandling: 'SORTED_ARRAY',
+ createBitmapIndex: false,
+ },
+ { type: 'json', name: 'spend', multiValueHandling: 'SORTED_ARRAY',
createBitmapIndex: true },
+ { type: 'string', name: 'id', multiValueHandling: 'SORTED_ARRAY',
createBitmapIndex: true },
+ { type: 'json', name: 'tags', multiValueHandling: 'SORTED_ARRAY',
createBitmapIndex: true },
+ { type: 'json', name: 'nums', multiValueHandling: 'SORTED_ARRAY',
createBitmapIndex: true },
+ ],
+ physicalDimensions: [
+ { type: 'json', name: 'user', multiValueHandling: 'SORTED_ARRAY',
createBitmapIndex: true },
+ {
+ type: 'json',
+ name: 'followers',
+ multiValueHandling: 'SORTED_ARRAY',
+ createBitmapIndex: true,
+ },
+ { type: 'json', name: 'spend', multiValueHandling: 'SORTED_ARRAY',
createBitmapIndex: true },
+ { type: 'json', name: 'id', multiValueHandling: 'SORTED_ARRAY',
createBitmapIndex: true },
+ { type: 'json', name: 'tags', multiValueHandling: 'SORTED_ARRAY',
createBitmapIndex: true },
+ { type: 'json', name: 'nums', multiValueHandling: 'SORTED_ARRAY',
createBitmapIndex: true },
Review Comment:
i suppose I changed this underneath you in #14014, but it might be nice to
update this at some point (also I have a bug to fix here since right now the
sampler will show the new 'auto' in the physical schema but still have 'json'
in the logical schema, so i think its ok to hold off on updating this)
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]