Phillipe, Running that babeltrace command gives an error:
# babeltrace2 -o ctf-metadata /home/naricc/lttng-traces/my-user-space-session-20240131-161638/ 01-31 18:59:17.715 1166285 1166285 E PLUGIN/SRC.CTF.FS/QUERY metadata_info_query@query.c:111 [fs] Cannot open trace metadata: path="/home/naricc/lttng-traces/my-user-space-session-20240131-161638/". 01-31 18:59:17.715 1166285 1166285 W LIB/QUERY-EXECUTOR bt_query_executor_query@query-executor.c:243 Component class's "query" method failed: query-exec-addr=0x557f47f2fb20, cc-addr=0x557f47f3e300, cc-type=SOURCE, cc-name="fs", cc-partial-descr="Read CTF traces from the file sy", cc-is-frozen=0, cc-so-handle-addr=0x557f47f3d430, cc-so-handle-path="/usr/lib/x86_64-linux-gnu/babeltrace2/plugins/babeltrace-plugin-ctf.so", object="metadata-info", params-addr=0x557f47f2e9d0, params-type=MAP, params-element-count=1, log-level=WARNING 01-31 18:59:17.715 1166285 1166285 E CLI cmd_print_ctf_metadata@babeltrace2.c:1083 Failed to query `metadata-info` object: unknown error I was able to just read the meta data file with cat though: /home/naricc/lttng-traces/my-user-space-session-20240131-161638/ust/uid/1002/64-bit# cat metadata W�u��X��C�`m�+�0c/* CTF 1.8 */ typealias integer { size = 8; align = 8; signed = false; } := uint8_t; typealias integer { size = 16; align = 8; signed = false; } := uint16_t; typealias integer { size = 32; align = 8; signed = false; } := uint32_t; typealias integer { size = 64; align = 8; signed = false; } := uint64_t; typealias integer { size = 64; align = 8; signed = false; } := unsigned long; typealias integer { size = 5; align = 1; signed = false; } := uint5_t; typealias integer { size = 27; align = 1; signed = false; } := uint27_t; trace { major = 1; minor = 8; uuid = "17bd8558-e1c1-43ee-9a93-9c606db72bb0"; byte_order = le; packet.header := struct { uint32_t magic; uint8_t uuid[16]; uint32_t stream_id; uint64_t stream_instance_id; }; }; env { domain = "ust"; tracer_name = "lttng-ust"; tracer_major = 2; tracer_minor = 13; tracer_buffering_scheme = "uid"; tracer_buffering_id = 1002; architecture_bit_width = 64; trace_name = "my-user-space-session"; trace_creation_datetime = "20240131T161638+0000"; hostname = "TDC20748914"; }; clock { name = "monotonic"; uuid = "ece735c7-26b7-446c-b6fe-1edefe5c7062"; description = "Monotonic Clock"; freq = 1000000000; /* Frequency, in Hz */ /* clock value offset from Epoch is: offset * (1/freq) */ offset = 1705964436712174561; }; typealias integer { size = 27; align = 1; signed = false; map = clock.monotonic.value; } := uint27_clock_monotonic_t; typealias integer { size = 32; align = 8; signed = false; map = clock.monotonic.value; } := uint32_clock_monotonic_t; typealias integer { size = 64; align = 8; signed = false; map = clock.monotonic.value; } := uint64_clock_monotonic_t; struct packet_context { uint64_clock_monotonic_t timestamp_begin; uint64_clock_monotonic_t timestamp_end; uint64_t content_size; uint64_t packet_size; uint64_t packet_seq_num; unsigned long events_discarded; uint32_t cpu_id; }; struct event_header_compact { enum : uint5_t { compact = 0 ... 30, extended = 31 } id; variant <id> { struct { uint27_clock_monotonic_t timestamp; } compact; struct { uint32_t id; uint64_clock_monotonic_t timestamp; } extended; } v; } align(8); struct event_header_large { enum : uint16_t { compact = 0 ... 65534, extended = 65535 } id; variant <id> { struct { uint32_clock_monotonic_t timestamp; } compact; struct { uint32_t id; uint64_clock_monotonic_t timestamp; } extended; } v; } align(8); event { name = "naricc_test_provider:test_event"; id = 0; stream_id = 0; loglevel = 13; fields := struct { integer { size = 64; align = 8; signed = 0; encoding = none; base = 10; } __utf8_text_sequence_length; integer { size = 8; align = 8; signed = 1; encoding = UTF8; base = 10; } _utf8_text_sequence[ __utf8_text_sequence_length ]; integer { size = 64; align = 8; signed = 0; encoding = none; base = 10; } __wchar_text_sequence_length; integer { size = 32; align = 8; signed = 1; encoding = UTF8; base = 10; } _wchar_text_sequence[ __wchar_text_sequence_length ]; }; }; stream { id = 0; event.header := struct event_header_large; packet.context := struct packet_context; }; I think ctf_sequence will work as a work around, although it will make it somewhat more annoying for the program eventually reading these traces. --Nathan Ricci -----Original Message----- From: Philippe Proulx <eeppelitel...@gmail.com> Sent: Wednesday, January 31, 2024 1:39 PM To: Nathan Ricci <nar...@microsoft.com> Cc: lttng-dev@lists.lttng.org Subject: [EXTERNAL] Re: [lttng-dev] error with ctf_sequence_text when using non-utf8 encoded strings [You don't often get email from eeppelitel...@gmail.com. Learn why this is important at https://aka.ms/LearnAboutSenderIdentification ] On Wed, Jan 31, 2024 at 12:58 PM Nathan Ricci via lttng-dev <lttng-dev@lists.lttng.org> wrote: > > I am trying to emit a trace that has a wchar string, using as one of the > fields ctf_sequence_text. When the trace is recorded, it seems like > everything at but the first character is truncated, and I think this is > because it is assuming UTF8 encoding and stopping at the first null > character. This is on Ubuntu 22.04, using this liblttng-ust package: Hi Nathan. I'd like to see your generated trace metadata. Here's how: $ babeltrace2 -o ctf-metadata /path/to/dir/containing/metadata In your case, that path is somewhere within `~/lttng-traces/my-user-space-session-20240131-161638`. CTF 1.8 doesn't support "wide characters" or any other string encoding than UTF-8 and its ASCII subset. Therefore LTTng-UST assumes UTF-8 and doesn't care about anything past the first U+0000 codepoint (single zero byte in UTF-8). I don't think it's an error per se, but maybe the ctf_sequence_text() macro could fail with an element type having a size greater than one byte (like `wchar_t`)? That being said, you'll be glad to learn that, although it won't help in the short term, CTF 2 will support UTF-8-, UTF-16-, and UTF-32-encoded string fields: <https://diamon.org/ctf/files/CTF2-SPECRC-9.0rA.html#str-fc>. In the meantime, you could use the ctf_sequence() macro, but your string will become a dynamic-length array of integers in the CTF data stream, meaning you'll need to decode it manually using the Babeltrace 2 API. I'll wait for your metadata text. Philippe Proulx > > liblttng-ust-common1/jammy,now 2.13.1-1ubuntu1 > > I’ve boiled this down to a simple repro; I’ve included the code below, > but you can also get it here: > https://gith/ > ub.com%2Fnaricc%2Flttng-test&data=05%7C02%7Cnaricc%40microsoft.com%7C5 > 72ac60916b6455e566108dc228c0470%7C72f988bf86f141af91ab2d7cd011db47%7C1 > %7C0%7C638423233009199050%7CUnknown%7CTWFpbGZsb3d8eyJWIjoiMC4wLjAwMDAi > LCJQIjoiV2luMzIiLCJBTiI6Ik1haWwiLCJXVCI6Mn0%3D%7C60000%7C%7C%7C&sdata= > jugmTDeB4upGRIKotLuSFqfbIEwlbrp0z96GON2r%2BcI%3D&reserved=0 > > Here is the main file: > ----- > > #include <stdio.h> > > #include <unistd.h> > > #include <lttng/lttng.h> > > #include <lttng/tracepoint.h> > > #include <wchar.h> > > #include "repro-tracepoint.h" > > > > > > > > int main() { > > puts("Hello, World!\nPress Enter to continue..."); > > getchar(); > > > > const char* utf8_text_value = "Hello, UTF8 Sequence Text!"; > > const wchar_t *wchar_text_value = L"Hello, WChar Sequence Text!"; > > > > // Emit the tracepoint event with the sequence text field > > lttng_ust_tracepoint(naricc_test_provider, test_event, > utf8_text_value, wchar_text_value); > > > > return 0; > > } > > > > ---- > > Here is the tracepoint header (repro-tracepoint.h): > ___ > > #undef TRACEPOINT_PROVIDER > > #define TRACEPOINT_PROVIDER naricc_test_provider > > > > #undef TRACEPOINT_INCLUDE > > #define TRACEPOINT_INCLUDE "./repro-tracepoint.h" > > > > #if !defined(_TP_H) || defined(TRACEPOINT_HEADER_MULTI_READ) > > #define _TP_H > > > > #include <lttng/tracepoint.h> > > #include <wchar.h> > > > > // Define the tracepoint event with a sequence text field > > TRACEPOINT_EVENT(naricc_test_provider, test_event, > > TP_ARGS( > > const char*, utf8_text_value, > > const wchar_t*, wchar_text_value > > ), > > TP_FIELDS( > > ctf_sequence_text(char, utf8_text_sequence, utf8_text_value, > size_t, strlen(utf8_text_value)) > > ctf_sequence_text(wchar_t, wchar_text_sequence, > wchar_text_value, size_t, wcslen(wchar_text_value) * 2 + 2) > > ) > > ) > > > > #endif /* _TP_H */ > > > > #include <lttng/tracepoint-event.h> > > ---- > And here is the repro-tracepoint.cpp: > ___ > > #define LTTNG_UST_TRACEPOINT_CREATE_PROBES > > #define LTTNG_UST_TRACEPOINT_DEFINE > > > > #include "repro-tracepoint.h" > > --- > > I built it like so: > > > g++ -c -I. repro-tracepoint.cpp > > g++ -c lttng-test.cpp > > g++ -o lttng-test lttng-test.o repro-tracepoint.o -llttng-ust -ldl > > naricc@TDC20748914:/workspace/lttng-test$ > > --- > > After starting a session, running that program, and destroying the session, > this is what I get with babeltrace2: > > ``` > $ babeltrace2 ~/lttng-traces/my-user-space-session-20240131-161638 > [16:16:43.553211421] (+?.?????????) TDC20748914 > naricc_test_provider:test_event: { cpu_id = 6 }, { > _utf8_text_sequence_length = 26, utf8_text_sequence = "Hello, UTF8 > Sequence Text!", _wchar_text_sequence_length = 56, wchar_text_sequence > = [ [0] = 72, [1] = 0, [2] = 0, [3] = 0, [4] = 0, [5] = 0, [6] = 0, > [7] = 0, [8] = 0, [9] = 0, [10] = 0, [11] = 0, [12] = 0, [13] = 0, > [14] = 0, [15] = 0, [16] = 0, [17] = 0, [18] = 0, [19] = 0, [20] = 0, > [21] = 0, [22] = 0, [23] = 0, [24] = 0, [25] = 0, [26] = 0, [27] = 0, > [28] = 0, [29] = 0, [30] = 0, [31] = 0, [32] = 0, [33] = 0, [34] = 0, > [35] = 0, [36] = 0, [37] = 0, [38] = 0, [39] = 0, [40] = 0, [41] = 0, > [42] = 0, [43] = 0, [44] = 0, [45] = 0, [46] = 0, [47] = 0, [48] = 0, > [49] = 0, [50] = 0, [51] = 0, [52] = 0, [53] = 0, [54] = 0, [55] = 0 ] > } > > ``` > > The utf8 sequence prints fine, but the wchar one is truncated to a single > character and then zeros. To rule out an error in babelltrace, I inspected > the channel files with hexedit and found this: > > ``` > 85 58 E1 C1 43 EE 9A 93 9C 60 6D B7 2B B0 00 00 00 00 > .......X..C....`m.+..... > 00000018 06 00 00 00 00 00 00 00 6A 33 DA B4 2D AD 02 00 04 78 1C 56 > 30 AD 02 00 ........j3..-....x.V0... > 00000030 60 0B 00 00 00 00 00 00 00 80 00 00 00 00 00 00 00 00 00 00 > 00 00 00 00 `....................... > 00000048 00 00 00 00 00 00 00 00 06 00 00 00 FF FF 00 00 00 00 3C 10 > F2 E2 2E AD ..................<..... > 00000060 02 00 1A 00 00 00 00 00 00 00 48 65 6C 6C 6F 2C 20 55 54 46 > 38 20 53 65 ..........Hello, UTF8 Se > 00000078 71 75 65 6E 63 65 20 54 65 78 74 21 38 00 00 00 00 00 00 00 > 48 00 00 00 quence Text!8.......H... > 00000090 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 > 00 00 00 00 ........................ > > ``` > > So it seems the error in the the recording of the trace, not in the viewing. > > Looking into the lttng-ust code, it seems like ctf_sequence_text ends up > mapped to this: > > lttng-ust/include/lttng/ust-tracepoint-event-write.h at > 717c38f658248bc04ccfc6e7fdf5d03040c2a846 · lttng/lttng-ust · GitHub > > Which assumes utf8 encoding, and ultimately writes into a ring buffer > terminating on null: > > lttng-ust/src/common/ringbuffer/backend.h at > 717c38f658248bc04ccfc6e7fdf5d03040c2a846 · lttng/lttng-ust · GitHub > > > If we agree this is an error, I believe I can produce a fix for it. Or if I > am just using the APIs wrong, please let me know what I should do instead. > > --Nathan Ricci > > > > > > > _______________________________________________ > lttng-dev mailing list > lttng-dev@lists.lttng.org > https://list/ > s.lttng.org%2Fcgi-bin%2Fmailman%2Flistinfo%2Flttng-dev&data=05%7C02%7C > naricc%40microsoft.com%7C572ac60916b6455e566108dc228c0470%7C72f988bf86 > f141af91ab2d7cd011db47%7C1%7C0%7C638423233009205474%7CUnknown%7CTWFpbG > Zsb3d8eyJWIjoiMC4wLjAwMDAiLCJQIjoiV2luMzIiLCJBTiI6Ik1haWwiLCJXVCI6Mn0% > 3D%7C60000%7C%7C%7C&sdata=jldEmpyCh1YPMFL8Le359320rbfRfRFw5SJwREbXkm0% > 3D&reserved=0 _______________________________________________ lttng-dev mailing list lttng-dev@lists.lttng.org https://lists.lttng.org/cgi-bin/mailman/listinfo/lttng-dev