ektravel commented on code in PR #14783:
URL: https://github.com/apache/druid/pull/14783#discussion_r1287773480


##########
web-console/src/druid-models/ingestion-spec/ingestion-spec.tsx:
##########
@@ -2135,33 +2136,53 @@ export function updateIngestionType(
   return newSpec;
 }
 
+function findValueWithNewline(rows: string[][]): string | undefined {
+  return findMap(rows, row => findMap(row, value => (value.includes('\n') ? 
value : undefined)));
+}
+
 export function issueWithSampleData(
-  sampleData: SampleResponse,
-  spec: Partial<IngestionSpec>,
+  sampleLines: string[],
+  isStreaming: boolean,
 ): JSX.Element | undefined {
-  if (isStreamingSpec(spec)) return;
+  if (!sampleLines.length) return;
 
-  const firstData: string = findMap(sampleData.data, l => l.input?.raw);
-  if (firstData) return;
+  const firstLine = sampleLines[0];
+  if (!isStreaming) {
+    if (firstLine === '{') {
+      return (
+        <>
+          This data looks like multi-line formatted JSON object. For Druid to 
parse a text file it
+          must have one row per event. Consider reformatting your data as{' '}
+          <ExternalLink href="https://jsonlines.org";>JSON Lines</ExternalLink>.
+        </>
+      );
+    }
 
-  if (firstData === '{') {
-    return (
-      <>
-        This data looks like multi-line formatted JSON object. For Druid to 
parse a text file it
-        must have one row per event. Consider reformatting your data as{' '}
-        <ExternalLink href="http://ndjson.org/";>newline delimited 
JSON</ExternalLink>.
-      </>
-    );
+    if (oneOf(firstLine, '[', '[]')) {
+      return (
+        <>
+          This data looks like a multi-line JSON array. For Druid to parse a 
text file it must have
+          one row per event. Consider reformatting your data as{' '}
+          <ExternalLink href="https://jsonlines.org";>JSON Lines</ExternalLink>.
+        </>
+      );
+    }
   }
 
-  if (oneOf(firstData, '[', '[]')) {
-    return (
-      <>
-        This data looks like a multi-line JSON array. For Druid to parse a 
text file it must have
-        one row per event. Consider reformatting your data as{' '}
-        <ExternalLink href="http://ndjson.org/";>newline delimited 
JSON</ExternalLink>.
-      </>
+  const format = guessSimpleInputFormat(sampleLines, isStreaming);
+  const text = sampleLines.join('\n');
+  if (oneOf(format.type, 'csv', 'tsv')) {
+    const valueWithNewline = findValueWithNewline(
+      format.type === 'csv' ? csvParseRows(text) : tsvParseRows(text),
     );
+    if (valueWithNewline) {
+      const formatLabel = format.type.toUpperCase();
+      return (
+        <>
+          {`This ${formatLabel} data has values that contain new lines. Druid 
requires ${formatLabel} files to have one event per line and thus 
${formatLabel} values can not contain new lines. Consider encoding new lines in 
the values of your ${formatLabel} with some special delimiter.`}

Review Comment:
   ```suggestion
             {`This ${formatLabel} data has values that contain new lines. 
Druid requires ${formatLabel} files to have one event per line, so 
${formatLabel} values cannot contain new lines. Consider encoding new lines in 
the values of your ${formatLabel} with some special delimiter.`}
   ```



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to