317brian commented on code in PR #14783:
URL: https://github.com/apache/druid/pull/14783#discussion_r1287762585
##########
web-console/src/druid-models/ingestion-spec/ingestion-spec.tsx:
##########
@@ -2135,33 +2136,53 @@ export function updateIngestionType(
return newSpec;
}
+function findValueWithNewline(rows: string[][]): string | undefined {
+ return findMap(rows, row => findMap(row, value => (value.includes('\n') ?
value : undefined)));
+}
+
export function issueWithSampleData(
- sampleData: SampleResponse,
- spec: Partial<IngestionSpec>,
+ sampleLines: string[],
+ isStreaming: boolean,
): JSX.Element | undefined {
- if (isStreamingSpec(spec)) return;
+ if (!sampleLines.length) return;
- const firstData: string = findMap(sampleData.data, l => l.input?.raw);
- if (firstData) return;
+ const firstLine = sampleLines[0];
+ if (!isStreaming) {
+ if (firstLine === '{') {
+ return (
+ <>
+ This data looks like multi-line formatted JSON object. For Druid to
parse a text file it
+ must have one row per event. Consider reformatting your data as{' '}
+ <ExternalLink href="https://jsonlines.org">JSON Lines</ExternalLink>.
+ </>
+ );
+ }
- if (firstData === '{') {
- return (
- <>
- This data looks like multi-line formatted JSON object. For Druid to
parse a text file it
- must have one row per event. Consider reformatting your data as{' '}
- <ExternalLink href="http://ndjson.org/">newline delimited
JSON</ExternalLink>.
- </>
- );
+ if (oneOf(firstLine, '[', '[]')) {
+ return (
+ <>
+ This data looks like a multi-line JSON array. For Druid to parse a
text file it must have
Review Comment:
```suggestion
This data looks like a multi-line JSON array. For Druid to parse a
text file, it must have
```
##########
web-console/src/druid-models/ingestion-spec/ingestion-spec.tsx:
##########
@@ -2135,33 +2136,53 @@ export function updateIngestionType(
return newSpec;
}
+function findValueWithNewline(rows: string[][]): string | undefined {
+ return findMap(rows, row => findMap(row, value => (value.includes('\n') ?
value : undefined)));
+}
+
export function issueWithSampleData(
- sampleData: SampleResponse,
- spec: Partial<IngestionSpec>,
+ sampleLines: string[],
+ isStreaming: boolean,
): JSX.Element | undefined {
- if (isStreamingSpec(spec)) return;
+ if (!sampleLines.length) return;
- const firstData: string = findMap(sampleData.data, l => l.input?.raw);
- if (firstData) return;
+ const firstLine = sampleLines[0];
+ if (!isStreaming) {
+ if (firstLine === '{') {
+ return (
+ <>
+ This data looks like multi-line formatted JSON object. For Druid to
parse a text file it
+ must have one row per event. Consider reformatting your data as{' '}
+ <ExternalLink href="https://jsonlines.org">JSON Lines</ExternalLink>.
+ </>
+ );
+ }
- if (firstData === '{') {
- return (
- <>
- This data looks like multi-line formatted JSON object. For Druid to
parse a text file it
- must have one row per event. Consider reformatting your data as{' '}
- <ExternalLink href="http://ndjson.org/">newline delimited
JSON</ExternalLink>.
- </>
- );
+ if (oneOf(firstLine, '[', '[]')) {
+ return (
+ <>
+ This data looks like a multi-line JSON array. For Druid to parse a
text file it must have
+ one row per event. Consider reformatting your data as{' '}
+ <ExternalLink href="https://jsonlines.org">JSON Lines</ExternalLink>.
+ </>
+ );
+ }
}
- if (oneOf(firstData, '[', '[]')) {
- return (
- <>
- This data looks like a multi-line JSON array. For Druid to parse a
text file it must have
- one row per event. Consider reformatting your data as{' '}
- <ExternalLink href="http://ndjson.org/">newline delimited
JSON</ExternalLink>.
- </>
+ const format = guessSimpleInputFormat(sampleLines, isStreaming);
+ const text = sampleLines.join('\n');
+ if (oneOf(format.type, 'csv', 'tsv')) {
+ const valueWithNewline = findValueWithNewline(
+ format.type === 'csv' ? csvParseRows(text) : tsvParseRows(text),
);
+ if (valueWithNewline) {
+ const formatLabel = format.type.toUpperCase();
+ return (
+ <>
+ {`This ${formatLabel} data has values that contain new lines. Druid
requires ${formatLabel} files to have one event per line and thus
${formatLabel} values can not contain new lines. Consider encoding new lines in
the values of your ${formatLabel} with some special delimiter.`}
Review Comment:
```suggestion
{`This ${formatLabel} data has values that contain new lines.
Druid requires ${formatLabel} files to have one event per line, so
${formatLabel} values can not contain new lines. Consider encoding new lines in
the values of your ${formatLabel} with some special delimiter.`}
```
##########
web-console/src/druid-models/ingestion-spec/ingestion-spec.tsx:
##########
@@ -2135,33 +2136,53 @@ export function updateIngestionType(
return newSpec;
}
+function findValueWithNewline(rows: string[][]): string | undefined {
+ return findMap(rows, row => findMap(row, value => (value.includes('\n') ?
value : undefined)));
+}
+
export function issueWithSampleData(
- sampleData: SampleResponse,
- spec: Partial<IngestionSpec>,
+ sampleLines: string[],
+ isStreaming: boolean,
): JSX.Element | undefined {
- if (isStreamingSpec(spec)) return;
+ if (!sampleLines.length) return;
- const firstData: string = findMap(sampleData.data, l => l.input?.raw);
- if (firstData) return;
+ const firstLine = sampleLines[0];
+ if (!isStreaming) {
+ if (firstLine === '{') {
+ return (
+ <>
+ This data looks like multi-line formatted JSON object. For Druid to
parse a text file it
Review Comment:
```suggestion
This data looks like multi-line formatted JSON object. For Druid
to parse a text file, it
```
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]