This is an automated email from the ASF dual-hosted git repository.
vogievetsky pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/druid.git
The following commit(s) were added to refs/heads/master by this push:
new 3ba878f21b2 don't send lookups to sampler (#16234)
3ba878f21b2 is described below
commit 3ba878f21b2ba585192807de5005ca3814c86f2c
Author: Vadim Ogievetsky <[email protected]>
AuthorDate: Thu Apr 4 21:17:42 2024 -0700
don't send lookups to sampler (#16234)
---
web-console/src/utils/sampler.spec.ts | 48 ++++++++++++++++++++++++-
web-console/src/utils/sampler.ts | 68 +++++++++++++++++++++++++++++++++--
2 files changed, 113 insertions(+), 3 deletions(-)
diff --git a/web-console/src/utils/sampler.spec.ts
b/web-console/src/utils/sampler.spec.ts
index 692a2d14143..b0aa6304f89 100644
--- a/web-console/src/utils/sampler.spec.ts
+++ b/web-console/src/utils/sampler.spec.ts
@@ -17,7 +17,7 @@
*/
import type { SampleResponse } from './sampler';
-import { guessDimensionsFromSampleResponse } from './sampler';
+import { changeLookupInExpressionsSampling, guessDimensionsFromSampleResponse
} from './sampler';
describe('sampler', () => {
describe('getInferredDimensionsFromSampleResponse', () => {
@@ -130,4 +130,50 @@ describe('sampler', () => {
`);
});
});
+
+ describe('changeLookupInExpressionsSampling', () => {
+ it('does nothing when there is nothing to do', () => {
+ expect(changeLookupInExpressionsSampling(`concat("x",
'lol')`)).toEqual(`concat("x", 'lol')`);
+ });
+
+ it('works with a SQL parsable expression', () => {
+ expect(
+ changeLookupInExpressionsSampling(`concat(lookup("x", 'lookup_name'),
'lol')`),
+ ).toEqual(
+ `concat(concat('lookup_name', '[', "x", '] -- This is a placeholder,
lookups are not supported in sampling'), 'lol')`,
+ );
+
+ expect(
+ changeLookupInExpressionsSampling(`concat(lookup("x", 'lookup_name',
'fallback'), 'lol')`),
+ ).toEqual(
+ `concat(nvl(concat('lookup_name', '[', "x", '] -- This is a
placeholder, lookups are not supported in sampling'), 'fallback'), 'lol')`,
+ );
+
+ expect(
+ changeLookupInExpressionsSampling(
+ `concat(lookup("x", 'lookup_name', 'fallback', '?'), 'lol')`,
+ ),
+ ).toEqual(`concat(null, 'lol')`);
+ });
+
+ it('works with a non-SQL parsable expression', () => {
+ expect(
+ changeLookupInExpressionsSampling(`concat(lookup("x", 'lookup_name'),
'lol')^`),
+ ).toEqual(
+ `concat(concat('lookup_name','[',"x",'] -- This is a placeholder,
lookups are not supported in sampling'), 'lol')^`,
+ );
+
+ expect(
+ changeLookupInExpressionsSampling(`concat(lookup("x", 'lookup_name',
'fallback'), 'lol')^`),
+ ).toEqual(
+ `concat(nvl(concat('lookup_name','[',"x",'] -- This is a placeholder,
lookups are not supported in sampling'),'fallback'), 'lol')^`,
+ );
+
+ expect(
+ changeLookupInExpressionsSampling(
+ `concat(lookup("x", 'lookup_name', 'fallback', '?'), 'lol')^`,
+ ),
+ ).toEqual(`concat(null, 'lol')^`);
+ });
+ });
});
diff --git a/web-console/src/utils/sampler.ts b/web-console/src/utils/sampler.ts
index 8b1d25320b7..28489c52400 100644
--- a/web-console/src/utils/sampler.ts
+++ b/web-console/src/utils/sampler.ts
@@ -16,7 +16,7 @@
* limitations under the License.
*/
-import { dedupe } from '@druid-toolkit/query';
+import { dedupe, F, SqlExpression, SqlFunction } from '@druid-toolkit/query';
import * as JSONBig from 'json-bigint-native';
import type {
@@ -186,7 +186,7 @@ export async function postToSampler(
sampleSpec: SampleSpec,
forStr: string,
): Promise<SampleResponse> {
- sampleSpec = fixSamplerTypes(sampleSpec);
+ sampleSpec = fixSamplerLookups(fixSamplerTypes(sampleSpec));
let sampleResp: any;
try {
@@ -622,3 +622,67 @@ export async function sampleForSchema(
return postToSampler(applyCache(sampleSpec, cacheRows), 'schema');
}
+
+function fixSamplerLookups(sampleSpec: SampleSpec): SampleSpec {
+ const transforms: Transform[] | undefined = deepGet(
+ sampleSpec,
+ 'spec.dataSchema.transformSpec.transforms',
+ );
+ if (!Array.isArray(transforms)) return sampleSpec;
+
+ return deepSet(
+ sampleSpec,
+ 'spec.dataSchema.transformSpec.transforms',
+ transforms.map(transform => {
+ const { expression } = transform;
+ if (typeof expression !== 'string') return transform;
+ return { ...transform, expression:
changeLookupInExpressionsSampling(expression) };
+ }),
+ );
+}
+
+/**
+ * Lookups do not work in the sampler because they are not loaded in the
Overlord
+ * to prevent the user from getting an error like "Unknown lookup [lookup
name]" we
+ * change the lookup expression to a placeholder
+ *
+ * lookup("x", 'lookup_name') => concat('lookup_name', '[', "x", '] -- This is
a placeholder, lookups are not supported in sampling')
+ * lookup("x", 'lookup_name', 'replaceValue') => nvl(concat('lookup_name',
'[', "x", '] -- This is a placeholder, lookups are not supported in sampling'),
'replaceValue')
+ */
+export function changeLookupInExpressionsSampling(druidExpression: string):
string {
+ if (!druidExpression.includes('lookup')) return druidExpression;
+
+ // The Druid expressions are very close to SQL so try parsing them as SQL,
if it parses then this is a more robust way to apply this transformation
+ const parsedDruidExpression = SqlExpression.maybeParse(druidExpression);
+ if (parsedDruidExpression) {
+ return String(
+ parsedDruidExpression.walk(ex => {
+ if (ex instanceof SqlFunction && ex.getEffectiveFunctionName() ===
'LOOKUP') {
+ if (ex.numArgs() < 2 || ex.numArgs() > 3) return
SqlExpression.parse('null');
+ const concat = F(
+ 'concat',
+ ex.getArg(1),
+ '[',
+ ex.getArg(0),
+ '] -- This is a placeholder, lookups are not supported in
sampling',
+ );
+
+ const replaceMissingValueWith = ex.getArg(2);
+ if (!replaceMissingValueWith) return concat;
+ return F('nvl', concat, replaceMissingValueWith);
+ }
+ return ex;
+ }),
+ );
+ }
+
+ // If we can not parse the expression as SQL then bash it with a regexp
+ return druidExpression.replace(/lookup\s*\(([^)]+)\)/g, (_, argString:
string) => {
+ const args = argString.trim().split(/\s*,\s*/);
+ if (args.length < 2 || args.length > 3) return 'null';
+ const concat = `concat(${args[1]},'[',${args[0]},'] -- This is a
placeholder, lookups are not supported in sampling')`;
+ const replaceMissingValueWith = args[2];
+ if (!replaceMissingValueWith) return concat;
+ return `nvl(${concat},${replaceMissingValueWith})`;
+ });
+}
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]