clintropolis commented on code in PR #14017:
URL: https://github.com/apache/druid/pull/14017#discussion_r1158848604


##########
web-console/src/druid-models/ingestion-spec/ingestion-spec.spec.ts:
##########
@@ -672,36 +670,36 @@ describe('ingestion-spec', () => {
 });
 
 describe('spec utils', () => {
-  const ingestionSpec: IngestionSpec = {
-    type: 'index_parallel',
-    spec: {
-      ioConfig: {
-        type: 'index_parallel',
-        inputSource: {
-          type: 'http',
-          uris: ['https://website.com/wikipedia.json.gz'],
-        },
-        inputFormat: {
-          type: 'json',
-        },
-      },
-      tuningConfig: {
-        type: 'index_parallel',
-      },
-      dataSchema: {
-        dataSource: 'wikipedia',
-        granularitySpec: {
-          segmentGranularity: 'day',
-          queryGranularity: 'hour',
-        },
-        timestampSpec: {
-          column: 'timestamp',
-          format: 'iso',
-        },
-        dimensionsSpec: {},
-      },
-    },
-  };
+  // const ingestionSpec: IngestionSpec = {
+  //   type: 'index_parallel',
+  //   spec: {

Review Comment:
   these commented blocks supposed to be here?



##########
web-console/src/utils/sampler.ts:
##########
@@ -63,6 +63,38 @@ export interface SamplerConfig {
 
 export interface SampleResponse {
   data: SampleEntry[];
+  logicalSegmentSchema: { name: string; type: string }[];
+  logicalDimensions: DimensionSpec[];
+  physicalDimensions: DimensionSpec[];
+  numRowsIndexed: number;
+  numRowsRead: number;
+}
+
+export function getHeaderNamesFromSampleResponse(
+  sampleResponse: SampleResponse,
+  ignoreTimeColumn = false,
+) {
+  return filterMap(sampleResponse.logicalSegmentSchema, s =>
+    ignoreTimeColumn && s.name === '__time' ? undefined : s.name,
+  );
+}
+
+export function guessDimensionsFromSampleResponse(sampleResponse: 
SampleResponse): DimensionSpec[] {
+  const { logicalDimensions, physicalDimensions, data } = sampleResponse;
+  return logicalDimensions.map(d => {
+    // Boolean column are currently reported as "long" so let's turn them into 
"string"

Review Comment:
   im still looking into this, since technically looking at stuff it looks like 
it should be dependent on the value of `druid.expressions.useStrictBooleans`. 
Additionally, 'long' really probably is better when using 'auto', since longs 
do have indexes in this mode, so im a bit conflicted about this staying like 
this long term, but i think its fine at least until we make 'auto' the default 
schemaless (or add indexes to classic 'long' schema)



##########
web-console/src/utils/sampler.mock.ts:
##########
@@ -29,9 +38,45 @@ This data is the returned sample when ingested with:
 
{"timestamp":"2016-04-11T09:22:00Z","user":"Alice","followers":3,"spend":5.1,"id":"73534533","tags":["a","b"],"nums":[7,8]}
  */
 
-export const JSON_SAMPLE: SampleHeaderAndRows = {
-  header: ['timestamp', 'user', 'followers', 'spend', 'id', 'tags', 'nums'],
-  rows: [
+export const JSON_SAMPLE: SampleResponse = {
+  numRowsRead: 3,
+  numRowsIndexed: 3,
+  logicalDimensions: [
+    { type: 'string', name: 'user', multiValueHandling: 'SORTED_ARRAY', 
createBitmapIndex: true },
+    {
+      type: 'long',
+      name: 'followers',
+      multiValueHandling: 'SORTED_ARRAY',
+      createBitmapIndex: false,
+    },
+    { type: 'json', name: 'spend', multiValueHandling: 'SORTED_ARRAY', 
createBitmapIndex: true },
+    { type: 'string', name: 'id', multiValueHandling: 'SORTED_ARRAY', 
createBitmapIndex: true },
+    { type: 'json', name: 'tags', multiValueHandling: 'SORTED_ARRAY', 
createBitmapIndex: true },
+    { type: 'json', name: 'nums', multiValueHandling: 'SORTED_ARRAY', 
createBitmapIndex: true },
+  ],
+  physicalDimensions: [
+    { type: 'json', name: 'user', multiValueHandling: 'SORTED_ARRAY', 
createBitmapIndex: true },
+    {
+      type: 'json',
+      name: 'followers',
+      multiValueHandling: 'SORTED_ARRAY',
+      createBitmapIndex: true,
+    },
+    { type: 'json', name: 'spend', multiValueHandling: 'SORTED_ARRAY', 
createBitmapIndex: true },
+    { type: 'json', name: 'id', multiValueHandling: 'SORTED_ARRAY', 
createBitmapIndex: true },
+    { type: 'json', name: 'tags', multiValueHandling: 'SORTED_ARRAY', 
createBitmapIndex: true },
+    { type: 'json', name: 'nums', multiValueHandling: 'SORTED_ARRAY', 
createBitmapIndex: true },

Review Comment:
   i suppose I changed this underneath you in #14014, but it might be nice to 
update this at some point (also I have a bug to fix here since right now the 
sampler will show the new 'auto' in the physical schema but still have 'json' 
in the logical schema, so i think its ok to hold off on updating this)



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to