This is an automated email from the ASF dual-hosted git repository.

vogievetsky pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/druid.git


The following commit(s) were added to refs/heads/master by this push:
     new 3ba878f21b2 don't send lookups to sampler (#16234)
3ba878f21b2 is described below

commit 3ba878f21b2ba585192807de5005ca3814c86f2c
Author: Vadim Ogievetsky <[email protected]>
AuthorDate: Thu Apr 4 21:17:42 2024 -0700

    don't send lookups to sampler (#16234)
---
 web-console/src/utils/sampler.spec.ts | 48 ++++++++++++++++++++++++-
 web-console/src/utils/sampler.ts      | 68 +++++++++++++++++++++++++++++++++--
 2 files changed, 113 insertions(+), 3 deletions(-)

diff --git a/web-console/src/utils/sampler.spec.ts 
b/web-console/src/utils/sampler.spec.ts
index 692a2d14143..b0aa6304f89 100644
--- a/web-console/src/utils/sampler.spec.ts
+++ b/web-console/src/utils/sampler.spec.ts
@@ -17,7 +17,7 @@
  */
 
 import type { SampleResponse } from './sampler';
-import { guessDimensionsFromSampleResponse } from './sampler';
+import { changeLookupInExpressionsSampling, guessDimensionsFromSampleResponse 
} from './sampler';
 
 describe('sampler', () => {
   describe('getInferredDimensionsFromSampleResponse', () => {
@@ -130,4 +130,50 @@ describe('sampler', () => {
       `);
     });
   });
+
+  describe('changeLookupInExpressionsSampling', () => {
+    it('does nothing when there is nothing to do', () => {
+      expect(changeLookupInExpressionsSampling(`concat("x", 
'lol')`)).toEqual(`concat("x", 'lol')`);
+    });
+
+    it('works with a SQL parsable expression', () => {
+      expect(
+        changeLookupInExpressionsSampling(`concat(lookup("x", 'lookup_name'), 
'lol')`),
+      ).toEqual(
+        `concat(concat('lookup_name', '[', "x", '] -- This is a placeholder, 
lookups are not supported in sampling'), 'lol')`,
+      );
+
+      expect(
+        changeLookupInExpressionsSampling(`concat(lookup("x", 'lookup_name', 
'fallback'), 'lol')`),
+      ).toEqual(
+        `concat(nvl(concat('lookup_name', '[', "x", '] -- This is a 
placeholder, lookups are not supported in sampling'), 'fallback'), 'lol')`,
+      );
+
+      expect(
+        changeLookupInExpressionsSampling(
+          `concat(lookup("x", 'lookup_name', 'fallback', '?'), 'lol')`,
+        ),
+      ).toEqual(`concat(null, 'lol')`);
+    });
+
+    it('works with a non-SQL parsable expression', () => {
+      expect(
+        changeLookupInExpressionsSampling(`concat(lookup("x", 'lookup_name'), 
'lol')^`),
+      ).toEqual(
+        `concat(concat('lookup_name','[',"x",'] -- This is a placeholder, 
lookups are not supported in sampling'), 'lol')^`,
+      );
+
+      expect(
+        changeLookupInExpressionsSampling(`concat(lookup("x", 'lookup_name', 
'fallback'), 'lol')^`),
+      ).toEqual(
+        `concat(nvl(concat('lookup_name','[',"x",'] -- This is a placeholder, 
lookups are not supported in sampling'),'fallback'), 'lol')^`,
+      );
+
+      expect(
+        changeLookupInExpressionsSampling(
+          `concat(lookup("x", 'lookup_name', 'fallback', '?'), 'lol')^`,
+        ),
+      ).toEqual(`concat(null, 'lol')^`);
+    });
+  });
 });
diff --git a/web-console/src/utils/sampler.ts b/web-console/src/utils/sampler.ts
index 8b1d25320b7..28489c52400 100644
--- a/web-console/src/utils/sampler.ts
+++ b/web-console/src/utils/sampler.ts
@@ -16,7 +16,7 @@
  * limitations under the License.
  */
 
-import { dedupe } from '@druid-toolkit/query';
+import { dedupe, F, SqlExpression, SqlFunction } from '@druid-toolkit/query';
 import * as JSONBig from 'json-bigint-native';
 
 import type {
@@ -186,7 +186,7 @@ export async function postToSampler(
   sampleSpec: SampleSpec,
   forStr: string,
 ): Promise<SampleResponse> {
-  sampleSpec = fixSamplerTypes(sampleSpec);
+  sampleSpec = fixSamplerLookups(fixSamplerTypes(sampleSpec));
 
   let sampleResp: any;
   try {
@@ -622,3 +622,67 @@ export async function sampleForSchema(
 
   return postToSampler(applyCache(sampleSpec, cacheRows), 'schema');
 }
+
+function fixSamplerLookups(sampleSpec: SampleSpec): SampleSpec {
+  const transforms: Transform[] | undefined = deepGet(
+    sampleSpec,
+    'spec.dataSchema.transformSpec.transforms',
+  );
+  if (!Array.isArray(transforms)) return sampleSpec;
+
+  return deepSet(
+    sampleSpec,
+    'spec.dataSchema.transformSpec.transforms',
+    transforms.map(transform => {
+      const { expression } = transform;
+      if (typeof expression !== 'string') return transform;
+      return { ...transform, expression: 
changeLookupInExpressionsSampling(expression) };
+    }),
+  );
+}
+
+/**
+ * Lookups do not work in the sampler because they are not loaded in the 
Overlord
+ * to prevent the user from getting an error like "Unknown lookup [lookup 
name]" we
+ * change the lookup expression to a placeholder
+ *
+ * lookup("x", 'lookup_name') => concat('lookup_name', '[', "x", '] -- This is 
a placeholder, lookups are not supported in sampling')
+ * lookup("x", 'lookup_name', 'replaceValue') => nvl(concat('lookup_name', 
'[', "x", '] -- This is a placeholder, lookups are not supported in sampling'), 
'replaceValue')
+ */
+export function changeLookupInExpressionsSampling(druidExpression: string): 
string {
+  if (!druidExpression.includes('lookup')) return druidExpression;
+
+  // The Druid expressions are very close to SQL so try parsing them as SQL, 
if it parses then this is a more robust way to apply this transformation
+  const parsedDruidExpression = SqlExpression.maybeParse(druidExpression);
+  if (parsedDruidExpression) {
+    return String(
+      parsedDruidExpression.walk(ex => {
+        if (ex instanceof SqlFunction && ex.getEffectiveFunctionName() === 
'LOOKUP') {
+          if (ex.numArgs() < 2 || ex.numArgs() > 3) return 
SqlExpression.parse('null');
+          const concat = F(
+            'concat',
+            ex.getArg(1),
+            '[',
+            ex.getArg(0),
+            '] -- This is a placeholder, lookups are not supported in 
sampling',
+          );
+
+          const replaceMissingValueWith = ex.getArg(2);
+          if (!replaceMissingValueWith) return concat;
+          return F('nvl', concat, replaceMissingValueWith);
+        }
+        return ex;
+      }),
+    );
+  }
+
+  // If we can not parse the expression as SQL then bash it with a regexp
+  return druidExpression.replace(/lookup\s*\(([^)]+)\)/g, (_, argString: 
string) => {
+    const args = argString.trim().split(/\s*,\s*/);
+    if (args.length < 2 || args.length > 3) return 'null';
+    const concat = `concat(${args[1]},'[',${args[0]},'] -- This is a 
placeholder, lookups are not supported in sampling')`;
+    const replaceMissingValueWith = args[2];
+    if (!replaceMissingValueWith) return concat;
+    return `nvl(${concat},${replaceMissingValueWith})`;
+  });
+}


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to