Add new utility files util/trace-dat.c and util/trace-dat.h
implementing the infrastructure for exporting perf.data tracepoints
to trace.dat format compatible with trace-cmd and KernelShark.

trace-dat.c defines all globals and functions needed for:
- Per-cpu raw event buffer management (init_cpu_buffers,
  collect_cpu_event, free_cpu_buffers)
- ftrace ring buffer page construction (write_page, write_cpu_dat)
- trace.dat section writers (write_strings_section,
  write_options_section1, write_options_section2,
  write_flyrecord_section)

trace-dat.h declares all globals and function prototypes to be
used by data-convert-trace.c and trace-event-read.c.

Signed-off-by: Tanushree Shah <[email protected]>
---
 tools/perf/util/Build       |   1 +
 tools/perf/util/trace-dat.c | 705 ++++++++++++++++++++++++++++++++++++
 tools/perf/util/trace-dat.h |  79 ++++
 3 files changed, 785 insertions(+)
 create mode 100644 tools/perf/util/trace-dat.c
 create mode 100644 tools/perf/util/trace-dat.h

diff --git a/tools/perf/util/Build b/tools/perf/util/Build
index 70cc91d00804..c000d8032d25 100644
--- a/tools/perf/util/Build
+++ b/tools/perf/util/Build
@@ -98,6 +98,7 @@ perf-util-y += trace-event-scripting.o
 perf-util-$(CONFIG_LIBTRACEEVENT) += trace-event.o
 perf-util-$(CONFIG_LIBTRACEEVENT) += trace-event-parse.o
 perf-util-$(CONFIG_LIBTRACEEVENT) += trace-event-read.o
+perf-util-$(CONFIG_LIBTRACEEVENT) += trace-dat.o
 perf-util-y += sort.o
 perf-util-y += hist.o
 perf-util-y += util.o
diff --git a/tools/perf/util/trace-dat.c b/tools/perf/util/trace-dat.c
new file mode 100644
index 000000000000..aa34a7b89b7a
--- /dev/null
+++ b/tools/perf/util/trace-dat.c
@@ -0,0 +1,705 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Copyright 2026, IBM Corporation
+ * Author: Tanushree Shah <[email protected]>
+ *
+ * trace-dat.c
+ *
+ * This file implements the trace.dat format writer for perf tool.
+ * It collects trace events from multiple CPUs and writes them in
+ * the trace-cmd compatible format.
+ */
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+#include <errno.h>
+#include "api/fs/tracing_path.h"
+#include "trace-dat.h"
+#include "trace-event.h"
+#include "session.h"
+#include "header.h"
+#include "../perf.h"
+#include "debug.h"
+
+/* ftrace ring buffer constants for trace.dat flyrecord section
+ *
+ * Each page has a 16-byte header (timestamp + commit size), followed by
+ * variable-length records. Each record has a 4-byte header word encoding:
+ *   Bits 0-4:  Type/Length field (5 bits, masked by TYPE_LEN_MASK)
+ *   Bits 5-31:  Time delta from page base timestamp (27 bits, masked by 
TIME_MASK)
+ */
+#define TRACE_DAT_RECORD_HEADER_SIZE 16                /* Page header: 8-byte 
ts + 8-byte commit */
+#define TRACE_DAT_RECORD_TYPE_LEN_MASK 0x1F            /* Extract lower 5 bits 
for type/length */
+#define TRACE_DAT_RECORD_TIME_SHIFT    5               /* Shift to extract 
time delta */
+#define TRACE_DAT_RECORD_TIME_MASK 0x07FFFFFF  /* Mask for 27-bit time delta */
+#define TRACE_DAT_WORD_SIZE    4               /* Records aligned to 4-byte 
boundaries */
+#define TRACE_DAT_WORD_ALIGN_MASK 3
+
+/* Initial capacity for per-CPU event buffer (grows by doubling) */
+#define INITIAL_EVENT_CAPACITY 1024
+/* Initial capacity for page record array (grows by doubling) */
+#define INITIAL_PAGE_RECORD_CAPACITY 64
+/* Buffer size for reading trace_clock string from debugfs/tracefs */
+#define CLOCK_BUFFER_SIZE 256
+
+FILE *trace_dat_fp;
+int trace_dat_page_size;
+int trace_dat_nr_cpus;
+long trace_dat_options_offset;
+long trace_dat_header_info_offset;
+long trace_dat_events_format_offset;
+long trace_dat_ftrace_format_offset;
+long trace_dat_kallsyms_offset;
+long trace_dat_cmdline_offset;
+long trace_dat_next_options_offset;
+
+
+/**
+ * struct cpu_event - Single trace event from a CPU
+ * @ts: Timestamp of the event
+ * @raw: Raw event data
+ * @raw_size: Size of raw event data in bytes
+ */
+struct cpu_event {
+       unsigned long long ts;
+       void *raw;
+       unsigned int raw_size;
+};
+
+/**
+ * struct cpu_events - Collection of trace events for a single CPU
+ * @events: Array of events
+ * @count: Number of events currently stored
+ * @capacity: Maximum number of events that can be stored
+ */
+struct cpu_events {
+       struct cpu_event  *events;
+       int count;
+       int capacity;
+};
+
+static struct cpu_events *trace_cpu_data;
+static long *buffer_opt_cpu_offsets_pos;
+static long opt_payload_start;
+
+/* Allocate per-cpu event buffers for tracepoint data collection */
+int trace_dat__init_cpu_buffers(int nr_cpus)
+{
+       trace_cpu_data = calloc(nr_cpus, sizeof(struct cpu_events));
+       if (!trace_cpu_data)
+               return -ENOMEM;
+       buffer_opt_cpu_offsets_pos = calloc(nr_cpus, sizeof(long));
+       if (!buffer_opt_cpu_offsets_pos) {
+               free(trace_cpu_data);
+               trace_cpu_data = NULL;
+               return -ENOMEM;
+       }
+       trace_dat_nr_cpus = nr_cpus;
+       return 0;
+}
+
+/* Store raw tracepoint event data in per-cpu buffer for trace.dat
+ * flyrecord
+ */
+int trace_dat__collect_cpu_event(int cpu, unsigned long long ts,
+                                void *raw, unsigned int raw_size)
+{
+       struct cpu_events *cpu_events;
+
+       if (!trace_cpu_data || cpu < 0 || cpu >= trace_dat_nr_cpus)
+               return -EINVAL;
+
+       if (!raw || raw_size == 0)
+               return -EINVAL;
+
+       cpu_events = &trace_cpu_data[cpu];
+
+       if (cpu_events->count >= cpu_events->capacity) {
+               cpu_events->capacity = cpu_events->capacity ?
+                                      cpu_events->capacity * 2 : 
INITIAL_EVENT_CAPACITY;
+               cpu_events->events   = realloc(cpu_events->events,
+                                      cpu_events->capacity * 
sizeof(*cpu_events->events));
+               if (!cpu_events->events)
+                       return -ENOMEM;
+       }
+
+       cpu_events->events[cpu_events->count].ts = ts;
+       cpu_events->events[cpu_events->count].raw = malloc(raw_size);
+       if (!cpu_events->events[cpu_events->count].raw)
+               return -ENOMEM;
+
+       memcpy(cpu_events->events[cpu_events->count].raw, raw, raw_size);
+       cpu_events->events[cpu_events->count].raw_size = raw_size;
+       cpu_events->count++;
+
+       return 0;
+}
+
+/* Write a single page of trace records */
+static int trace_dat__write_page(FILE *fp, unsigned long long base_ts,
+                       char **records, int *rec_sizes, int nr_recs)
+{
+       unsigned long long commit = 0;
+       int offset = TRACE_DAT_RECORD_HEADER_SIZE;
+       int i;
+       char *page;
+
+       page = calloc(1, trace_dat_page_size);
+       if (!page)
+               return -ENOMEM;
+
+       for (i = 0; i < nr_recs; i++) {
+               memcpy(page + offset, records[i], rec_sizes[i]);
+               offset += rec_sizes[i];
+               commit += rec_sizes[i];
+       }
+
+       memcpy(page, &base_ts, sizeof(base_ts));
+       memcpy(page + sizeof(base_ts), &commit, sizeof(commit));
+
+       if (!fwrite(page, 1, trace_dat_page_size, fp)) {
+               free(page);
+               return -EIO;
+       }
+       free(page);
+
+       return 0;
+}
+
+/* Write all trace data for a single CPU as trace.dat flyrecord pages */
+static int trace_dat__write_cpu_dat(FILE *fp, int cpu, unsigned long long 
*file_offset_out)
+{
+       struct cpu_events *cpu_events = &trace_cpu_data[cpu];
+       unsigned long long base_ts;
+       unsigned long long file_offset;
+       char **page_records = NULL;
+       int *page_rec_sizes = NULL;
+       int page_cap = 0;
+       int nr_page_recs = 0;
+       int page_size_used = 0;
+       int ret = 0;
+       int i, j;
+
+       file_offset = ftell(fp);
+       *file_offset_out = file_offset;
+
+       if (cpu_events->count == 0) {
+               char *empty_page = calloc(1, trace_dat_page_size);
+
+               if (!empty_page)
+                       return -ENOMEM;
+               if (!fwrite(empty_page, 1, trace_dat_page_size, fp)) {
+                       free(empty_page);
+                       return -EIO;
+               }
+               free(empty_page);
+               return 0;
+       }
+
+       base_ts = cpu_events->events[0].ts;
+
+       for (i = 0; i < cpu_events->count; i++) {
+               struct cpu_event *event = &cpu_events->events[i];
+               unsigned long long time_delta = event->ts - base_ts;
+               unsigned int data_len = event->raw_size;
+               unsigned int words = (data_len + TRACE_DAT_WORD_ALIGN_MASK) / 
TRACE_DAT_WORD_SIZE;
+               unsigned int type_len = words & TRACE_DAT_RECORD_TYPE_LEN_MASK;
+               unsigned int hdr_word = ((time_delta & 
TRACE_DAT_RECORD_TIME_MASK) <<
+                                       TRACE_DAT_RECORD_TIME_SHIFT) | type_len;
+               int rec_size;
+               char *rec;
+
+               rec_size = TRACE_DAT_WORD_SIZE + data_len;
+               if (rec_size % TRACE_DAT_WORD_SIZE)
+                       rec_size += TRACE_DAT_WORD_SIZE - (rec_size % 
TRACE_DAT_WORD_SIZE);
+
+               rec = calloc(1, rec_size);
+               if (!rec)
+                       return -ENOMEM;
+               memcpy(rec, &hdr_word, TRACE_DAT_WORD_SIZE);
+               memcpy(rec + TRACE_DAT_WORD_SIZE, event->raw, data_len);
+
+               if (page_size_used + rec_size > trace_dat_page_size -
+                   TRACE_DAT_RECORD_HEADER_SIZE) {
+                       ret = trace_dat__write_page(fp, base_ts,
+                             page_records, page_rec_sizes, nr_page_recs);
+                       for (j = 0; j < nr_page_recs; j++)
+                               free(page_records[j]);
+                       nr_page_recs = 0;
+                       page_size_used = 0;
+                       base_ts = event->ts;
+                       if (ret < 0)
+                               goto out_free;
+               }
+
+               if (nr_page_recs >= page_cap) {
+                       char **tmp_records;
+                       int *tmp_sizes;
+
+                       page_cap = page_cap ? page_cap * 2 : 
INITIAL_PAGE_RECORD_CAPACITY;
+                       tmp_records = realloc(page_records, page_cap * 
sizeof(char *));
+                       tmp_sizes = realloc(page_rec_sizes, page_cap * 
sizeof(int));
+                       if (!tmp_records || !tmp_sizes) {
+                               ret = -ENOMEM;
+                               goto out_free;
+                       }
+                       page_records = tmp_records;
+                       page_rec_sizes = tmp_sizes;
+               }
+               page_records[nr_page_recs] = rec;
+               page_rec_sizes[nr_page_recs] = rec_size;
+               nr_page_recs++;
+               page_size_used += rec_size;
+       }
+
+       if (nr_page_recs > 0) {
+               ret = trace_dat__write_page(fp, base_ts,
+                     page_records, page_rec_sizes, nr_page_recs);
+       }
+out_free:
+       for (j = 0; j < nr_page_recs; j++)
+               free(page_records[j]);
+       free(page_records);
+       free(page_rec_sizes);
+       return ret;
+}
+
+/* Write the strings section containing section name lookup table */
+int trace_dat__write_strings_section(void)
+{
+       unsigned short section_id = TRACE_DAT_SECTION_STRINGS;
+       unsigned short flags = 0;
+       unsigned long long section_size = 0;
+       static const char * const section_names[] = {
+               "headers",              /* offset 0 - strid for section 16  */
+               "ftrace event formats", /* offset 8 - strid for section 17  */
+               "events format",        /* offset 29 - strid for section 18  */
+               "kallsyms",             /* offset 43 - strid for section 19  */
+               "cmdlines",             /* offset 52 - strid for section 21  */
+               "strings",              /* offset 61 - strid for section 15  */
+               "options",              /* offset 69 - strid for options 1   */
+               "options",              /* offset 77 - strid for options 2   */
+               "buffer-flyrecord",     /* offset 85 - strid for flyrecord 3 */
+               NULL
+       };
+
+       /* string_id points to "strings" string itself */
+       unsigned int string_id = STRID_STRINGS;
+       int i;
+
+       if (!trace_dat_fp)
+               return -EBADF;
+
+       for (i = 0; section_names[i] != NULL; i++)
+               section_size += strlen(section_names[i]) + 1;
+
+       /* write section header */
+       if (!fwrite(&section_id, sizeof(unsigned short), 1, trace_dat_fp) ||
+                      !fwrite(&flags, sizeof(unsigned short), 1, trace_dat_fp) 
||
+                      !fwrite(&string_id, sizeof(unsigned int), 1, 
trace_dat_fp) ||
+                      !fwrite(&section_size, sizeof(unsigned long long), 1, 
trace_dat_fp))
+               return -EIO;
+
+       /* write strings */
+       for (i = 0; section_names[i] != NULL; i++)
+               if (!fwrite(section_names[i], 1, strlen(section_names[i]) + 1, 
trace_dat_fp))
+                       return -EIO;
+       return 0;
+}
+
+/* Writes options section containing CPUCOUNT, TRACECLOCK, EVENT_FORMAT, 
HEADER_INFO,
+ * FTRACE_EVENTS, KALLSYMS, CMDLINES options, ending with DONE option pointing 
to next section.
+ */
+int trace_dat__write_options_section1(void)
+{
+       unsigned short section_id = TRACE_DAT_SECTION_OPTIONS;
+       unsigned short flags = 0;
+       unsigned int string_id = STRID_OPTIONS_1;
+       unsigned long long section_size = 0;
+       long section_size_pos;
+       long payload_start;
+       unsigned long long section_start;
+       unsigned short opt_id;
+       unsigned int opt_size;
+       char clock_buf[CLOCK_BUFFER_SIZE];
+       FILE *clock_file;
+       size_t bytes_read;
+       char *path;
+       unsigned long long next_offset;
+       long end_pos;
+
+       if (!trace_dat_fp)
+               return -EBADF;
+
+       /* fill options_offset in initial format */
+       section_start = ftell(trace_dat_fp);
+
+       if (fseek(trace_dat_fp, trace_dat_options_offset, SEEK_SET) < 0 ||
+          !fwrite(&section_start, sizeof(unsigned long long), 1, trace_dat_fp) 
||
+          fseek(trace_dat_fp, 0, SEEK_END) < 0)
+               return -EIO;
+
+       /* write section header */
+       if (!fwrite(&section_id, sizeof(unsigned short), 1, trace_dat_fp) ||
+           !fwrite(&flags, sizeof(unsigned short), 1, trace_dat_fp) ||
+           !fwrite(&string_id, sizeof(unsigned int), 1, trace_dat_fp))
+               return -EIO;
+       section_size_pos = ftell(trace_dat_fp);
+       if (!fwrite(&section_size, sizeof(unsigned long long), 1, trace_dat_fp))
+               return -EIO;
+
+       payload_start = ftell(trace_dat_fp);
+
+       /* CPUCOUNT option */
+       opt_id = TRACE_DAT_OPTION_CPUCOUNT;
+       opt_size = sizeof(unsigned int);
+
+       if (!fwrite(&opt_id, sizeof(unsigned short), 1, trace_dat_fp) ||
+           !fwrite(&opt_size, sizeof(unsigned int), 1, trace_dat_fp) ||
+           !fwrite(&trace_dat_nr_cpus, sizeof(unsigned int), 1, trace_dat_fp))
+               return -EIO;
+
+       /* TRACECLOCK option */
+       opt_id = TRACE_DAT_OPTION_TRACECLOCK;
+
+       path = get_tracing_file("trace_clock");
+       clock_file = fopen(path, "r");
+       put_tracing_file(path);
+       if (clock_file) {
+               bytes_read = fread(clock_buf, 1, sizeof(clock_buf) - 1, 
clock_file);
+               fclose(clock_file);
+               clock_buf[bytes_read] = '\0';
+       } else {
+               strcpy(clock_buf, "local\n");
+               bytes_read = strlen(clock_buf);
+       }
+       opt_size = bytes_read + 1;
+       if (!fwrite(&opt_id, sizeof(unsigned short), 1, trace_dat_fp) ||
+           !fwrite(&opt_size, sizeof(unsigned int), 1, trace_dat_fp) ||
+           !fwrite(clock_buf, 1, opt_size, trace_dat_fp))
+               return -EIO;
+
+       /* EVENT option */
+       opt_id = TRACE_DAT_OPTION_EVENT;
+       opt_size = sizeof(unsigned long long);
+
+       if (!fwrite(&opt_id, sizeof(unsigned short), 1, trace_dat_fp) ||
+          !fwrite(&opt_size, sizeof(unsigned int), 1, trace_dat_fp) ||
+          !fwrite(&trace_dat_events_format_offset, sizeof(unsigned long long),
+                  1, trace_dat_fp))
+               return -EIO;
+
+       /* HEADER option */
+       opt_id = TRACE_DAT_OPTION_HEADER;
+       opt_size = sizeof(unsigned long long);
+
+       if (!fwrite(&opt_id, sizeof(unsigned short), 1, trace_dat_fp) ||
+           !fwrite(&opt_size, sizeof(unsigned int), 1, trace_dat_fp) ||
+           !fwrite(&trace_dat_header_info_offset, sizeof(unsigned long long),
+                   1, trace_dat_fp))
+               return -EIO;
+
+       /* FTRACE option */
+       opt_id = TRACE_DAT_OPTION_FTRACE;
+       opt_size = sizeof(unsigned long long);
+
+       if (!fwrite(&opt_id, sizeof(unsigned short), 1, trace_dat_fp) ||
+           !fwrite(&opt_size, sizeof(unsigned int), 1, trace_dat_fp) ||
+           !fwrite(&trace_dat_ftrace_format_offset, sizeof(unsigned long long),
+                  1, trace_dat_fp))
+               return -EIO;
+
+       /* KALLSYMS option */
+       opt_id = TRACE_DAT_OPTION_KALLSYMS;
+       opt_size = sizeof(unsigned long long);
+
+       if (!fwrite(&opt_id, sizeof(unsigned short), 1, trace_dat_fp) ||
+           !fwrite(&opt_size, sizeof(unsigned int), 1, trace_dat_fp) ||
+           !fwrite(&trace_dat_kallsyms_offset, sizeof(unsigned long long),
+                   1, trace_dat_fp))
+               return -EIO;
+
+       /* CMDLINE option */
+       opt_id = TRACE_DAT_OPTION_CMDLINE;
+       opt_size = sizeof(unsigned long long);
+
+       if (!fwrite(&opt_id, sizeof(unsigned short), 1, trace_dat_fp) ||
+           !fwrite(&opt_size, sizeof(unsigned int), 1, trace_dat_fp) ||
+           !fwrite(&trace_dat_cmdline_offset, sizeof(unsigned long long),
+                   1, trace_dat_fp))
+               return -EIO;
+
+       /* DONE option id - next_options_offset filled later */
+       opt_id = TRACE_DAT_OPTION_DONE;
+       opt_size = sizeof(unsigned long long);
+       next_offset = 0;  /* placeholder */
+
+       trace_dat_next_options_offset = ftell(trace_dat_fp);
+       if (!fwrite(&opt_id, sizeof(unsigned short), 1, trace_dat_fp) ||
+           !fwrite(&opt_size, sizeof(unsigned int), 1, trace_dat_fp) ||
+           !fwrite(&next_offset, sizeof(unsigned long long), 1, trace_dat_fp))
+               return -EIO;
+
+       /* fill section size */
+       end_pos = ftell(trace_dat_fp);
+
+       section_size = end_pos - payload_start;
+       if (fseek(trace_dat_fp, section_size_pos, SEEK_SET) < 0 ||
+           !fwrite(&section_size, sizeof(unsigned long long), 1, trace_dat_fp) 
||
+           fseek(trace_dat_fp, end_pos, SEEK_SET) < 0)
+               return -EIO;
+
+       return 0;
+
+}
+
+/* Writes options section containing BUFFER option with flyrecord section
+ * (flyrecord section offset, clock type, page size, CPU count,
+ * per-CPU offsets/sizes) and DONE option.
+ */
+int trace_dat__write_options_section2(void)
+{
+       unsigned short section_id  = TRACE_DAT_SECTION_OPTIONS;
+       unsigned short flags = 0;
+       unsigned int string_id = STRID_OPTIONS_2;
+       unsigned long long section_size = 0;
+       long section_size_pos;
+       long payload_start;
+       int cpu;
+       unsigned short opt_id = TRACE_DAT_OPTION_BUFFER;
+       unsigned int opt_size = 0;
+       long opt_size_pos;
+       unsigned long long data_offset = 0;
+       unsigned int page_size = (unsigned int)trace_dat_page_size;
+       const char *clock = "local";
+       unsigned long long next;
+       long end_pos;
+       unsigned long long cpu_offset;
+       unsigned long long cpu_size;
+       unsigned short done_id;
+       unsigned int done_size;
+
+       if (!trace_dat_fp)
+               return -EINVAL;
+
+       /* fill done1 next offset - points to this section */
+       next = ftell(trace_dat_fp);
+
+       if (fseek(trace_dat_fp, trace_dat_next_options_offset + 2 + 4, 
SEEK_SET) < 0 ||
+           !fwrite(&next, sizeof(unsigned long long), 1, trace_dat_fp) ||
+           fseek(trace_dat_fp, 0, SEEK_END) < 0)
+               return -EIO;
+
+       /* write section header */
+       if (!fwrite(&section_id, sizeof(unsigned short), 1, trace_dat_fp) ||
+           !fwrite(&flags, sizeof(unsigned short), 1, trace_dat_fp) ||
+           !fwrite(&string_id, sizeof(unsigned int), 1, trace_dat_fp))
+               return -EIO;
+       section_size_pos = ftell(trace_dat_fp);
+       if (!fwrite(&section_size, sizeof(unsigned long long), 1, trace_dat_fp))
+               return -EIO;
+
+       payload_start = ftell(trace_dat_fp);
+
+       /* BUFFER option */
+       if (!fwrite(&opt_id, sizeof(unsigned short), 1, trace_dat_fp))
+               return -EIO;
+       opt_size_pos = ftell(trace_dat_fp);
+       if (!fwrite(&opt_size, sizeof(unsigned int), 1, trace_dat_fp))
+               return -EIO;
+       opt_payload_start = ftell(trace_dat_fp);
+
+       /* data_offset placeholder */
+       if (!fwrite(&data_offset, sizeof(unsigned long long), 1, trace_dat_fp) 
||
+           !fwrite("\0", 1, 1, trace_dat_fp) ||
+           !fwrite(clock, 1, strlen(clock) + 1, trace_dat_fp) ||
+           !fwrite(&page_size, sizeof(unsigned int), 1, trace_dat_fp) ||
+           !fwrite(&trace_dat_nr_cpus, sizeof(unsigned int), 1, trace_dat_fp))
+               return -EIO;
+
+       /* per cpu: cpu_id + offset placeholder + size */
+       for (cpu = 0; cpu < trace_dat_nr_cpus; cpu++) {
+               cpu_offset = 0;  /* filled in write_flyrecord */
+               cpu_size   = 0;  /* filled in write_flyrecord */
+
+               if (!fwrite(&cpu, sizeof(unsigned int), 1, trace_dat_fp))
+                       return -EIO;
+               buffer_opt_cpu_offsets_pos[cpu] = ftell(trace_dat_fp);
+               if (!fwrite(&cpu_offset, sizeof(unsigned long long), 1, 
trace_dat_fp) ||
+                   !fwrite(&cpu_size, sizeof(unsigned long long), 1, 
trace_dat_fp))
+                       return -EIO;
+       }
+
+       /* fill opt_size */
+       end_pos = ftell(trace_dat_fp);
+
+       opt_size = end_pos - opt_payload_start;
+       fseek(trace_dat_fp, opt_size_pos, SEEK_SET);
+       if (!fwrite(&opt_size, sizeof(unsigned int), 1, trace_dat_fp))
+               return -EIO;
+       fseek(trace_dat_fp, end_pos, SEEK_SET);
+
+       /* DONE id=0 */
+       done_id = TRACE_DAT_OPTION_DONE;
+       done_size = sizeof(unsigned long long);
+       /* No additional options sections follow this one */
+       next = 0;
+
+       if (!fwrite(&done_id, sizeof(unsigned short), 1, trace_dat_fp) ||
+           !fwrite(&done_size, sizeof(unsigned int), 1, trace_dat_fp) ||
+           !fwrite(&next, sizeof(unsigned long long), 1, trace_dat_fp))
+               return -EIO;
+
+       /* fill section size */
+       end_pos = ftell(trace_dat_fp);
+
+       section_size = end_pos - payload_start;
+       fseek(trace_dat_fp, section_size_pos, SEEK_SET);
+       if (!fwrite(&section_size, sizeof(unsigned long long), 1, trace_dat_fp))
+               return -EIO;
+       fseek(trace_dat_fp, end_pos, SEEK_SET);
+
+       return 0;
+
+}
+
+int trace_dat__write_flyrecord_section(void)
+{
+       unsigned short section_id = TRACE_DAT_SECTION_FLYRECORD;
+       unsigned short flags = 0;
+       unsigned int string_id = STRID_BUFFER_FLYRECORD;
+       unsigned long long section_size = 0;
+       long section_size_pos;
+       long flyrecord_start;
+       long after_header;
+       long padding_needed;
+       unsigned long long *cpu_offsets;
+       unsigned long long *cpu_sizes;
+       int cpu;
+       int ret = 0;
+       char *pad;
+       unsigned long long start;
+       long end_pos;
+
+       if (!trace_dat_fp)
+               return -EINVAL;
+
+       cpu_offsets = calloc(trace_dat_nr_cpus, sizeof(unsigned long long));
+       cpu_sizes   = calloc(trace_dat_nr_cpus, sizeof(unsigned long long));
+       if (!cpu_offsets || !cpu_sizes) {
+               ret = -ENOMEM;
+               goto cleanup;
+       }
+       flyrecord_start = ftell(trace_dat_fp);
+       if (flyrecord_start < 0) {
+               ret = -EIO;
+               goto cleanup;
+       }
+
+       /* section header */
+       if (!fwrite(&section_id, sizeof(unsigned short), 1, trace_dat_fp) ||
+           !fwrite(&flags, sizeof(unsigned short), 1, trace_dat_fp) ||
+           !fwrite(&string_id, sizeof(unsigned int), 1, trace_dat_fp)) {
+               ret = -EIO;
+               goto cleanup;
+       }
+       section_size_pos = ftell(trace_dat_fp);
+       if (!fwrite(&section_size, sizeof(unsigned long long), 1, 
trace_dat_fp)) {
+               ret = -EIO;
+               goto cleanup;
+       }
+
+       /* Align to page boundary */
+       after_header   = ftell(trace_dat_fp);
+       padding_needed = (trace_dat_page_size -
+                        (after_header % trace_dat_page_size)) % 
trace_dat_page_size;
+
+       if (padding_needed > 0) {
+               pad = calloc(1, padding_needed);
+
+               if (!fwrite(pad, 1, padding_needed, trace_dat_fp)) {
+                       free(pad);
+                       ret = -EIO;
+                       goto cleanup;
+               }
+               free(pad);
+       }
+
+       /* write per-cpu trace data */
+       for (cpu = 0; cpu < trace_dat_nr_cpus; cpu++) {
+               start = ftell(trace_dat_fp);
+
+               ret = trace_dat__write_cpu_dat(trace_dat_fp, cpu, 
&cpu_offsets[cpu]);
+
+               if (ret < 0) {
+                       pr_err("Failed to write CPU %d data\n", cpu);
+                       goto cleanup;
+               }
+               cpu_sizes[cpu]   = ftell(trace_dat_fp) - start;
+       }
+
+       /* fill section size */
+       end_pos = ftell(trace_dat_fp);
+
+       section_size = end_pos - flyrecord_start;
+       if (fseek(trace_dat_fp, section_size_pos, SEEK_SET) < 0 ||
+           !fwrite(&section_size, sizeof(unsigned long long), 1, 
trace_dat_fp)) {
+               ret = -EIO;
+               goto cleanup;
+       }
+       if (fseek(trace_dat_fp, end_pos, SEEK_SET) < 0) {
+               ret = -EIO;
+               goto cleanup;
+       }
+
+       /* fill cpu offsets and sizes in BUFFER option */
+       for (cpu = 0; cpu < trace_dat_nr_cpus; cpu++) {
+               if (fseek(trace_dat_fp, buffer_opt_cpu_offsets_pos[cpu], 
SEEK_SET) < 0 ||
+                   !fwrite(&cpu_offsets[cpu], sizeof(unsigned long long), 1, 
trace_dat_fp) ||
+                   !fwrite(&cpu_sizes[cpu], sizeof(unsigned long long), 1, 
trace_dat_fp)) {
+                       ret = -EIO;
+                       goto cleanup;
+               }
+       }
+
+       /* fill data offset in buffer option */
+       if (fseek(trace_dat_fp, opt_payload_start, SEEK_SET) < 0 ||
+           !fwrite(&flyrecord_start, sizeof(unsigned long long), 1, 
trace_dat_fp)) {
+               ret = -EIO;
+               goto cleanup;
+       }
+
+       if (fseek(trace_dat_fp, 0, SEEK_END) < 0) {
+               ret = -EIO;
+               goto cleanup;
+       }
+
+
+cleanup:
+       free(cpu_offsets);
+       free(cpu_sizes);
+       return ret;
+}
+
+/* Free all per-CPU event buffers */
+void trace_dat__free_cpu_buffers(void)
+{
+       int cpu;
+
+       if (!trace_cpu_data)
+               return;
+
+       for (cpu = 0; cpu < trace_dat_nr_cpus; cpu++) {
+               int i;
+
+               for (i = 0; i < trace_cpu_data[cpu].count; i++)
+                       free(trace_cpu_data[cpu].events[i].raw);
+               free(trace_cpu_data[cpu].events);
+       }
+       free(trace_cpu_data);
+       trace_cpu_data = NULL;
+       free(buffer_opt_cpu_offsets_pos);
+       buffer_opt_cpu_offsets_pos = NULL;
+       trace_dat_nr_cpus = 0;
+}
diff --git a/tools/perf/util/trace-dat.h b/tools/perf/util/trace-dat.h
new file mode 100644
index 000000000000..7667a440330c
--- /dev/null
+++ b/tools/perf/util/trace-dat.h
@@ -0,0 +1,79 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * Copyright 2026, IBM Corporation
+ * Author: Tanushree Shah <[email protected]>
+ */
+
+#ifndef __PERF_TRACE_DAT_H
+#define __PERF_TRACE_DAT_H
+
+#include <stdio.h>
+
+/* trace.dat file format version */
+#define TRACE_DAT_VERSION '7'
+
+/*
+ * Section IDs for trace.dat format
+ */
+#define TRACE_DAT_SECTION_OPTIONS   0
+#define TRACE_DAT_SECTION_FLYRECORD 3
+#define TRACE_DAT_SECTION_STRINGS   15
+#define TRACE_DAT_SECTION_HEADER    16
+#define TRACE_DAT_SECTION_FTRACE    17
+#define TRACE_DAT_SECTION_EVENTS    18
+#define TRACE_DAT_SECTION_KALLSYMS  19
+#define TRACE_DAT_SECTION_CMDLINE   21
+
+/*
+ * Option IDs for trace.dat options sections
+ */
+#define TRACE_DAT_OPTION_DONE       0
+#define TRACE_DAT_OPTION_BUFFER     3
+#define TRACE_DAT_OPTION_TRACECLOCK 4
+#define TRACE_DAT_OPTION_CPUCOUNT   8
+#define TRACE_DAT_OPTION_HEADER     16
+#define TRACE_DAT_OPTION_FTRACE     17
+#define TRACE_DAT_OPTION_EVENT      18
+#define TRACE_DAT_OPTION_KALLSYMS   19
+#define TRACE_DAT_OPTION_CMDLINE    21
+
+/*
+ * String offsets in the strings section
+ * These point to null-terminated strings used as section names
+ */
+#define STRID_HEADERS          0
+#define STRID_FTRACE_FORMATS   8
+#define STRID_EVENT_FORMATS    29
+#define STRID_KALLSYMS         43
+#define STRID_CMDLINES         52
+#define STRID_STRINGS          61
+#define STRID_OPTIONS_1        69
+#define STRID_OPTIONS_2        77
+#define STRID_BUFFER_FLYRECORD 85
+
+struct perf_session;
+
+extern FILE *trace_dat_fp;
+extern int trace_dat_page_size;
+extern int trace_dat_nr_cpus;
+extern long trace_dat_options_offset;
+extern long trace_dat_header_info_offset;
+extern long trace_dat_events_format_offset;
+extern long trace_dat_ftrace_format_offset;
+extern long trace_dat_kallsyms_offset;
+extern long trace_dat_cmdline_offset;
+extern long trace_dat_next_options_offset;
+
+/* collect and manage per-cpu tracepoint event buffers */
+int trace_dat__init_cpu_buffers(int nr_cpus);
+int trace_dat__collect_cpu_event(int cpu, unsigned long long ts,
+                      void *raw, unsigned int raw_size);
+void trace_dat__free_cpu_buffers(void);
+
+/* write trace.dat file sections */
+int trace_dat__write_options_section1(void);
+int trace_dat__write_options_section2(void);
+int trace_dat__write_flyrecord_section(void);
+int trace_dat__write_strings_section(void);
+
+#endif /* __PERF_TRACE_DAT_H */
-- 
2.53.0


Reply via email to