Copilot commented on code in PR #13231:
URL: https://github.com/apache/trafficserver/pull/13231#discussion_r3346413733


##########
tests/gold_tests/logging/binary_log_v3.test.py:
##########
@@ -0,0 +1,145 @@
+'''
+End-to-end test of the self-describing v3 binary log format.
+'''
+#  Licensed to the Apache Software Foundation (ASF) under one
+#  or more contributor license agreements.  See the NOTICE file
+#  distributed with this work for additional information
+#  regarding copyright ownership.  The ASF licenses this file
+#  to you under the Apache License, Version 2.0 (the
+#  "License"); you may not use this file except in compliance
+#  with the License.  You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+#  Unless required by applicable law or agreed to in writing, software
+#  distributed under the License is distributed on an "AS IS" BASIS,
+#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#  See the License for the specific language governing permissions and
+#  limitations under the License.
+

Review Comment:
   This new test file does not start with the Apache license header: the module 
docstring precedes it. ATS requires new source/test files to start with the 
license header, so please move the docstring below the header.



##########
src/proxy/logging/YamlLogConfig.cc:
##########
@@ -233,11 +234,26 @@ YamlLogConfig::decodeLogObject(const YAML::Node &node)
     }
   }
 
+  // On-disk binary segment version (default: current). 2 emits the pre-v3
+  // layout for parsers that don't yet understand v3; binary logs only.
+  int binary_log_version = LOG_SEGMENT_VERSION;
+  if (node["binary_log_version"]) {
+    binary_log_version = node["binary_log_version"].as<int>();
+    if (!log_segment_version_supported(binary_log_version)) {
+      Warning("Invalid binary_log_version '%d' (supported %d-%d); using %d", 
binary_log_version, LOG_SEGMENT_VERSION_MIN_SUPPORTED,
+              LOG_SEGMENT_VERSION, LOG_SEGMENT_VERSION);
+      binary_log_version = LOG_SEGMENT_VERSION;
+    } else if (file_type != LOG_FILE_BINARY) {
+      Warning("binary_log_version only applies to binary logs; ignoring for 
this object");
+    }
+  }

Review Comment:
   The code warns that binary_log_version is ignored for non-binary logs, but 
it still passes the value into the LogObject ctor. That makes the warning 
inaccurate and can propagate the setting to ASCII/pipe objects unintentionally.



##########
src/traffic_logcat/logcat.cc:
##########
@@ -120,6 +124,34 @@ follow_rotate(const char *input_file, ino_t old_inode_num)
   }
 }
 
+// Emit every entry of a v3 segment as a line of JSON. v2 segments lack the
+// field-type schema needed for self-describing decode, so they are skipped
+// with a note. Returns the number of entries written.
+int
+write_json_logbuffer(LogBufferHeader *header, int out_fd)
+{
+  if (header->fmt_fieldtypes() == nullptr) {
+    fprintf(stderr, "JSON output requires a v3 binary log with a field-type 
schema; skipping segment.\n");
+    return 0;
+  }
+
+  LogBufferIterator iter(header);
+  LogEntryHeader   *entry;
+  char              line[LOG_MAX_FORMATTED_LINE];
+  int               count = 0;
+
+  while ((entry = iter.next()) != nullptr) {
+    int n = log_entry_to_json(entry, header, line, sizeof(line) - 1);
+    if (n > 0) {
+      line[n] = '\n';
+      if (write(out_fd, line, n + 1) > 0) {
+        ++count;
+      }
+    }
+  }

Review Comment:
   write_json_logbuffer() treats any positive write() result as success, but 
write() can return a short write. This can silently truncate JSON output 
(especially when writing to a pipe) while still incrementing count.



##########
src/traffic_logcat/LogEntryJson.cc:
##########
@@ -0,0 +1,247 @@
+/** @file
+
+  Reference decoder for the self-describing v3 binary log format.
+
+  @section license License
+
+  Licensed to the Apache Software Foundation (ASF) under one
+  or more contributor license agreements.  See the NOTICE file
+  distributed with this work for additional information
+  regarding copyright ownership.  The ASF licenses this file
+  to you under the Apache License, Version 2.0 (the
+  "License"); you may not use this file except in compliance
+  with the License.  You may obtain a copy of the License at
+
+      http://www.apache.org/licenses/LICENSE-2.0
+
+  Unless required by applicable law or agreed to in writing, software
+  distributed under the License is distributed on an "AS IS" BASIS,
+  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+  See the License for the specific language governing permissions and
+  limitations under the License.
+ */
+
+#include "LogEntryJson.h"
+
+#include "proxy/logging/LogBuffer.h"
+#include "proxy/logging/LogAccess.h"
+#include "proxy/logging/LogField.h"
+
+// Deliberately not including the global field table (proxy/logging/Log.h):
+// decoding must depend only on the segment's schema (see LogEntryJson.h).
+
+#include "tscore/ink_inet.h"
+#include "tscore/ink_align.h"
+
+#include <cinttypes>
+#include <cstdint>
+#include <cstdio>
+#include <cstring>
+
+namespace
+{
+// fmt_fieldlist symbols are comma-separated (e.g. "chi,cqu,pssc"); tolerate
+// spaces too.
+bool
+is_field_sep(char c)
+{
+  return c == ',' || c == ' ';
+}
+} // namespace
+
+int
+log_entry_to_json(LogEntryHeader *entry, LogBufferHeader *header, char *buf, 
int buf_len)
+{
+  if (entry == nullptr || header == nullptr || buf == nullptr || buf_len <= 0) 
{
+    return -1;
+  }
+
+  // [seg_start, seg_end) is the only readable region (byte_count == segment 
size).
+  char *seg_start = reinterpret_cast<char *>(header);
+  char *seg_end   = seg_start + header->byte_count;
+
+  // v3 decode needs the field-type schema and the symbols.
+  char *schema_blob = header->fmt_fieldtypes();
+  char *symbols     = header->fmt_fieldlist();
+  if (schema_blob == nullptr || symbols == nullptr) {
+    return -1;
+  }
+  // Both must lie within the segment, including the fixed schema prefix.
+  if (schema_blob < seg_start || schema_blob + sizeof(LogFieldTypeSchema) > 
seg_end || symbols < seg_start || symbols >= seg_end) {
+    return -1;
+  }
+
+  // field_count is a uint16 that may sit at an unaligned offset (the writer
+  // places the schema right after the NUL-terminated header strings), so read
+  // it byte-wise rather than through the struct.
+  uint16_t fc16 = 0;
+  memcpy(&fc16, schema_blob, sizeof(fc16));
+  const unsigned field_count = fc16;
+  const uint8_t *codes       = reinterpret_cast<const uint8_t *>(schema_blob) 
+ sizeof(LogFieldTypeSchema);
+  if (reinterpret_cast<const char *>(codes) + field_count > seg_end) {
+    return -1;
+  }
+
+  // field_count must match the symbol count; a mismatch is a corrupt segment.
+  unsigned symbol_count = 0;
+  bool     in_token     = false;
+  for (const char *p = symbols; p < seg_end && *p != '\0'; ++p) {
+    if (is_field_sep(*p)) {
+      in_token = false;
+    } else if (!in_token) {
+      in_token = true;
+      ++symbol_count;
+    }
+  }
+  if (symbol_count != field_count) {
+    return -1;
+  }
+
+  // Clamp value reads to this entry (or the segment, if entry_len is bogus).
+  char *entry_start = reinterpret_cast<char *>(entry);
+  if (entry_start < seg_start || entry_start + sizeof(LogEntryHeader) > 
seg_end) {
+    return -1;
+  }
+  char *read_from = entry_start + sizeof(LogEntryHeader);
+  char *read_end  = entry_start + entry->entry_len;
+  if (read_end < read_from || read_end > seg_end) {
+    read_end = seg_end;
+  }
+
+  int written = 0;
+
+  // Append n bytes, keeping one byte in reserve for the trailing NUL.
+  auto put = [&](const char *src, int n) -> bool {
+    if (n < 0 || written + n >= buf_len) {
+      return false;
+    }
+    memcpy(buf + written, src, n);
+    written += n;
+    return true;
+  };
+  auto put_ch = [&](char c) -> bool { return put(&c, 1); };
+
+  if (!put_ch('{')) {
+    return -1;
+  }
+
+  const char *sym = symbols;
+  for (unsigned i = 0; i < field_count; ++i) {
+    // Next symbol token (comma-separated in fmt_fieldlist), bounded by the 
segment.
+    while (sym < seg_end && is_field_sep(*sym)) {
+      ++sym;
+    }
+    const char *sym_start = sym;
+    while (sym < seg_end && *sym != '\0' && !is_field_sep(*sym)) {
+      ++sym;
+    }
+    int sym_len = static_cast<int>(sym - sym_start);
+
+    if (i > 0 && !put_ch(',')) {
+      return -1;
+    }
+    if (!put_ch('"') || !put(sym_start, sym_len) || !put_ch('"') || 
!put_ch(':')) {
+      return -1;
+    }
+
+    // Dispatch on the framing type only, never the symbol; no field semantics
+    // (see the contract in LogEntryJson.h).
+    switch (static_cast<LogField::Type>(codes[i])) {
+    case LogField::Type::sINT: {
+      if (read_from + INK_MIN_ALIGN > read_end) {
+        return -1;
+      }
+      int64_t v = LogAccess::unmarshal_int(&read_from);
+      char    num[24]; // INT64_MIN is 20 digits + sign + NUL
+      int     n = snprintf(num, sizeof(num), "%" PRId64, v);
+      if (!put(num, n)) {
+        return -1;
+      }
+      break;
+    }
+    case LogField::Type::STRING: {
+      // NUL-terminated, 8-byte padded; require the terminator and padded field
+      // within the entry.
+      char *nul = static_cast<char *>(memchr(read_from, '\0', 
static_cast<size_t>(read_end - read_from)));
+      if (nul == nullptr) {
+        return -1;
+      }
+      const char *s          = read_from;
+      size_t      padded_len = INK_ALIGN_DEFAULT(static_cast<size_t>(nul - 
read_from) + 1);
+      if (read_from + padded_len > read_end) {
+        return -1;
+      }
+      read_from += padded_len;
+      if (!put_ch('"')) {
+        return -1;
+      }
+      for (const char *p = s; p < nul; ++p) {
+        // Minimal JSON escaping for structural characters.
+        if (*p == '"' || *p == '\\') {
+          if (!put_ch('\\')) {
+            return -1;
+          }
+        }
+        if (!put_ch(*p)) {
+          return -1;
+        }
+      }

Review Comment:
   log_entry_to_json() only escapes '"' and '\\' inside STRING fields. JSON 
also requires escaping control characters like newlines, tabs, and other bytes 
< 0x20; otherwise the output is not valid JSON for some log values.



##########
src/proxy/logging/LogBuffer.cc:
##########
@@ -88,6 +88,16 @@ LogBufferHeader::fmt_printf()
   return addr;
 }
 
+char *
+LogBufferHeader::fmt_fieldtypes()
+{
+  char *addr = nullptr;
+  if (fmt_fieldtypes_offset) {
+    addr = reinterpret_cast<char *>(this) + fmt_fieldtypes_offset;
+  }
+  return addr;
+}

Review Comment:
   LogBufferHeader::fmt_fieldtypes() is documented as returning nullptr for v2 
segments, but the implementation only checks fmt_fieldtypes_offset. When 
reading a v2 header by version-sized read (not sizeof(LogBufferHeader)), 
fmt_fieldtypes_offset is not initialized from disk, so callers can get a 
non-null pointer for v2.



##########
include/ts/apidefs.h.in:
##########
@@ -1650,10 +1650,10 @@ struct TSResponseAction {
 };
 
 enum TSLogType {
-  TS_LOG_TYPE_INT,
+  TS_LOG_TYPE_INT = 1, ///< LogField::Type::sINT
   // DINT is omitted from the public API for now, until we decide whether we 
keep the type
-  TS_LOG_TYPE_STRING = 2,
-  TS_LOG_TYPE_ADDR   = 3,
+  TS_LOG_TYPE_STRING = 3, ///< LogField::Type::STRING
+  TS_LOG_TYPE_ADDR   = 4, ///< LogField::Type::IP
 };

Review Comment:
   TSLogType enum values are renumbered (INT=1, STRING=3, ADDR=4) to match 
LogField::Type. This is an ABI-breaking change for existing plugins compiled 
against older headers (which passed 0/1/2) and can cause TSLogFieldRegister() 
to reject or misinterpret the type.



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to