Copilot commented on code in PR #13231:
URL: https://github.com/apache/trafficserver/pull/13231#discussion_r3471352577


##########
src/traffic_logcat/logcat.cc:
##########
@@ -120,6 +130,227 @@ follow_rotate(const char *input_file, ino_t old_inode_num)
   }
 }
 
+// Write all @a len bytes of @a buf to @a fd, retrying short writes and EINTR.
+// write() may transfer fewer bytes than requested (notably to a pipe), so a
+// single call is not enough to guarantee the whole line lands. Returns true
+// only if every byte was written.
+bool
+write_all(int fd, const char *buf, size_t len)
+{
+  size_t off = 0;
+  while (off < len) {
+    ssize_t w = write(fd, buf + off, len - off);
+    if (w < 0) {
+      if (errno == EINTR) {
+        continue;
+      }
+      return false;
+    }
+    off += static_cast<size_t>(w);
+  }
+  return true;
+}
+
+// Emit every entry of a v3 segment as a line of JSON. v2 segments lack the
+// field-type schema needed for self-describing decode, so they are skipped
+// with a note. Returns the number of entries written.
+int
+write_json_logbuffer(LogBufferHeader *header, int out_fd)
+{
+  if (header->fmt_fieldtypes() == nullptr) {
+    fprintf(stderr, "JSON output requires a v3 binary log with a field-type 
schema; skipping segment.\n");
+    return 0;
+  }
+
+  LogBufferIterator iter(header);
+  LogEntryHeader   *entry;
+  char              line[LOG_MAX_FORMATTED_LINE];
+  int               count = 0;

Review Comment:
   write_json_logbuffer() probes schema presence via header->fmt_fieldtypes(). 
That helper does unchecked pointer arithmetic on fmt_fieldtypes_offset, which 
is undefined behavior for corrupt/untrusted segments with an out-of-range 
offset. Since process_file() treats .blog input as potentially untrusted, check 
version/offset bounds before any pointer arithmetic.



##########
src/traffic_logcat/LogEntryJson.cc:
##########
@@ -0,0 +1,294 @@
+/** @file
+
+  Reference decoder for the self-describing v3 binary log format.
+
+  @section license License
+
+  Licensed to the Apache Software Foundation (ASF) under one
+  or more contributor license agreements.  See the NOTICE file
+  distributed with this work for additional information
+  regarding copyright ownership.  The ASF licenses this file
+  to you under the Apache License, Version 2.0 (the
+  "License"); you may not use this file except in compliance
+  with the License.  You may obtain a copy of the License at
+
+      http://www.apache.org/licenses/LICENSE-2.0
+
+  Unless required by applicable law or agreed to in writing, software
+  distributed under the License is distributed on an "AS IS" BASIS,
+  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+  See the License for the specific language governing permissions and
+  limitations under the License.
+ */
+
+#include "LogEntryJson.h"
+
+#include "proxy/logging/LogBuffer.h"
+#include "proxy/logging/LogAccess.h"
+#include "proxy/logging/LogField.h"
+
+// Deliberately not including the global field table (proxy/logging/Log.h):
+// decoding must depend only on the segment's schema (see LogEntryJson.h).
+
+#include "tscore/ink_inet.h"
+#include "tscore/ink_align.h"
+
+#include <cinttypes>
+#include <cstdint>
+#include <cstdio>
+#include <cstring>
+
+namespace
+{
+// fmt_fieldlist symbols are comma-separated (e.g. "chi,cqu,pssc"); tolerate
+// spaces too.
+bool
+is_field_sep(char c)
+{
+  return c == ',' || c == ' ';
+}
+} // namespace
+
+int
+log_entry_to_json(LogEntryHeader *entry, LogBufferHeader *header, char *buf, 
int buf_len)
+{
+  if (entry == nullptr || header == nullptr || buf == nullptr || buf_len <= 0) 
{
+    return -1;
+  }
+
+  // [seg_start, seg_end) is the only readable region (byte_count == segment 
size).
+  char *seg_start = reinterpret_cast<char *>(header);
+  char *seg_end   = seg_start + header->byte_count;
+
+  // v3 decode needs the field-type schema and the symbols.
+  char *schema_blob = header->fmt_fieldtypes();
+  char *symbols     = header->fmt_fieldlist();
+  if (schema_blob == nullptr || symbols == nullptr) {
+    return -1;
+  }

Review Comment:
   log_entry_to_json() calls LogBufferHeader::fmt_fieldtypes()/fmt_fieldlist() 
before validating the offsets. Those helpers do unchecked pointer arithmetic 
(reinterpret_cast<char*>(this) + offset), which is undefined behavior if a 
corrupt/untrusted segment has an out-of-range offset. Since this decoder is 
explicitly for potentially untrusted .blog data, compute these pointers from 
offsets only after range-checking against byte_count.



##########
src/traffic_logcat/logcat.cc:
##########
@@ -120,6 +130,227 @@ follow_rotate(const char *input_file, ino_t old_inode_num)
   }
 }
 
+// Write all @a len bytes of @a buf to @a fd, retrying short writes and EINTR.
+// write() may transfer fewer bytes than requested (notably to a pipe), so a
+// single call is not enough to guarantee the whole line lands. Returns true
+// only if every byte was written.
+bool
+write_all(int fd, const char *buf, size_t len)
+{
+  size_t off = 0;
+  while (off < len) {
+    ssize_t w = write(fd, buf + off, len - off);
+    if (w < 0) {
+      if (errno == EINTR) {
+        continue;
+      }
+      return false;
+    }
+    off += static_cast<size_t>(w);
+  }
+  return true;
+}
+
+// Emit every entry of a v3 segment as a line of JSON. v2 segments lack the
+// field-type schema needed for self-describing decode, so they are skipped
+// with a note. Returns the number of entries written.
+int
+write_json_logbuffer(LogBufferHeader *header, int out_fd)
+{
+  if (header->fmt_fieldtypes() == nullptr) {
+    fprintf(stderr, "JSON output requires a v3 binary log with a field-type 
schema; skipping segment.\n");
+    return 0;
+  }
+
+  LogBufferIterator iter(header);
+  LogEntryHeader   *entry;
+  char              line[LOG_MAX_FORMATTED_LINE];
+  int               count = 0;
+
+  while ((entry = iter.next()) != nullptr) {
+    int n = log_entry_to_json(entry, header, line, sizeof(line) - 1);
+    if (n > 0) {
+      line[n] = '\n';
+      if (!write_all(out_fd, line, static_cast<size_t>(n) + 1)) {
+        fprintf(stderr, "Error writing JSON output: %s\n", strerror(errno));
+        return count;
+      }
+      ++count;
+    }
+  }
+  return count;
+}
+
+// Human-readable name for a LogBufferHeader::format_type value.
+const char *
+log_format_type_name(uint32_t format_type)
+{
+  switch (format_type) {
+  case LOG_FORMAT_CUSTOM:
+    return "CUSTOM";
+  case LOG_FORMAT_TEXT:
+    return "TEXT";
+  default:
+    return "UNKNOWN";
+  }
+}
+
+// Human-readable name for a v3 wire type code (LogField::Type).
+const char *
+log_field_type_name(uint8_t code)
+{
+  switch (static_cast<LogField::Type>(code)) {
+  case LogField::Type::sINT:
+    return "sINT";
+  case LogField::Type::dINT:
+    return "dINT";
+  case LogField::Type::STRING:
+    return "STRING";
+  case LogField::Type::IP:
+    return "IP";
+  case LogField::Type::INVALID:
+    return "INVALID";
+  default:
+    return "UNKNOWN";
+  }
+}
+
+// Return the NUL-terminated string living at @a offset within the segment, or
+// nullptr if the offset is zero, lands outside the segment, or is not
+// terminated before the segment ends. A .blog read here may be untrusted, so
+// every offset is validated against byte_count.
+const char *
+bounded_header_str(LogBufferHeader *header, uint32_t offset)
+{
+  if (offset == 0) {
+    return nullptr;
+  }
+
+  // Validate the offset against the segment size *before* forming a pointer:
+  // pointer arithmetic landing outside the object is undefined behavior even 
if
+  // the result is never dereferenced, and offset is untrusted here.
+  if (offset >= header->byte_count) {
+    return nullptr;
+  }
+
+  char *seg_start = reinterpret_cast<char *>(header);
+  char *seg_end   = seg_start + header->byte_count;
+  char *addr      = seg_start + offset;
+
+  if (memchr(addr, '\0', static_cast<size_t>(seg_end - addr)) == nullptr) {
+    return nullptr;
+  }
+  return addr;
+}
+
+// Print the v3 field-type schema: the field_count and, for each field, its
+// fmt_fieldlist symbol paired with its framing type. Every read is bounded
+// against the segment (mirrors the bounds checks in log_entry_to_json).
+void
+print_field_type_schema(LogBufferHeader *header, int out_fd)
+{
+  char *seg_start   = reinterpret_cast<char *>(header);
+  char *seg_end     = seg_start + header->byte_count;
+  char *schema_blob = header->fmt_fieldtypes();
+

Review Comment:
   print_field_type_schema() fetches schema_blob via header->fmt_fieldtypes(), 
which does unchecked pointer arithmetic on an untrusted offset. Even though you 
bounds-check schema_blob afterward, the UB may already have occurred. Derive 
schema_blob from fmt_fieldtypes_offset only after validating it against 
byte_count.



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to