This is an automated email from Gerrit. "Yurii Shutkin <yurii.shut...@gmail.com>" just uploaded a new patch set to Gerrit, which you can find at https://review.openocd.org/c/openocd/+/8405
-- gerrit commit 93aeef8cd18eff8fea4c73f373c97e8ff808f3dd Author: Yurii Shutkin <yurii.shut...@gmail.com> Date: Thu Jul 18 09:33:45 2024 +0300 target profiling: add support for 64-bit systems profiling 64-bit support preserves array of 32-bit words to store samples by using an additional single high 32-bit word that should be the same for all collected samples. High 32-bit word is calculated automatically by the first coming sample and checked against other samples. Consequent samples that do not match this high 32-bit word are skipped. 'with_range', 'start_address' and 'end_address' are added to collection function to be able to select samples that determine the high 32-bit work. This also makes samples collection more efficient and allows to collect more samples. Change-Id: Ie379161d20ec514c9fb28daa04f6164e3bd1616e Signed-off-by: Yurii Shutkin <yurii.shut...@gmail.com> diff --git a/src/target/cortex_m.c b/src/target/cortex_m.c index 791a432427..b75910deb8 100644 --- a/src/target/cortex_m.c +++ b/src/target/cortex_m.c @@ -2277,8 +2277,9 @@ void cortex_m_deinit_target(struct target *target) free(cortex_m); } -int cortex_m_profiling(struct target *target, uint32_t *samples, - uint32_t max_num_samples, uint32_t *num_samples, uint32_t seconds) +int cortex_m_profiling(struct target *target, uint32_t *samples, uint32_t *sample_address_hi32, + bool with_range, uint64_t start_address, uint64_t end_address, + uint32_t max_num_samples, uint32_t *num_samples, uint32_t seconds) { struct timeval timeout, now; struct armv7m_common *armv7m = target_to_armv7m(target); @@ -2292,7 +2293,8 @@ int cortex_m_profiling(struct target *target, uint32_t *samples, } if (reg_value == 0) { LOG_TARGET_INFO(target, "PCSR sampling not supported on this processor."); - return target_profiling_default(target, samples, max_num_samples, num_samples, seconds); + return target_profiling_default(target, samples, sample_address_hi32, with_range, + start_address, end_address, max_num_samples, num_samples, seconds); } gettimeofday(&timeout, NULL); @@ -2318,12 +2320,17 @@ int cortex_m_profiling(struct target *target, uint32_t *samples, if (read_count > 1024) read_count = 1024; + // this case ignores with_range flag, all samples are collected + // and will be filtered out later on write_gmon retval = mem_ap_read_buf_noincr(armv7m->debug_ap, (void *)&samples[sample_count], 4, read_count, DWT_PCSR); sample_count += read_count; } else { - target_read_u32(target, DWT_PCSR, &samples[sample_count++]); + target_read_u32(target, DWT_PCSR, ®_value); + if (!with_range || (reg_value >= start_address && reg_value < end_address)) { + samples[sample_count++] = reg_value; + } } if (retval != ERROR_OK) { diff --git a/src/target/cortex_m.h b/src/target/cortex_m.h index 726fca2903..91198c2ce5 100644 --- a/src/target/cortex_m.h +++ b/src/target/cortex_m.h @@ -337,7 +337,8 @@ int cortex_m_remove_watchpoint(struct target *target, struct watchpoint *watchpo void cortex_m_enable_breakpoints(struct target *target); void cortex_m_enable_watchpoints(struct target *target); void cortex_m_deinit_target(struct target *target); -int cortex_m_profiling(struct target *target, uint32_t *samples, +int cortex_m_profiling(struct target *target, uint32_t *samples, uint32_t *sample_address_hi32, + bool with_range, uint64_t start_address, uint64_t end_address, uint32_t max_num_samples, uint32_t *num_samples, uint32_t seconds); #endif /* OPENOCD_TARGET_CORTEX_M_H */ diff --git a/src/target/openrisc/or1k.c b/src/target/openrisc/or1k.c index 8c38610805..0556431d2a 100644 --- a/src/target/openrisc/or1k.c +++ b/src/target/openrisc/or1k.c @@ -1200,7 +1200,8 @@ static int or1k_checksum_memory(struct target *target, target_addr_t address, return ERROR_FAIL; } -static int or1k_profiling(struct target *target, uint32_t *samples, +static int or1k_profiling(struct target *target, uint32_t *samples, uint32_t *sample_address_hi32, + bool with_range, uint64_t start_address, uint64_t end_address, uint32_t max_num_samples, uint32_t *num_samples, uint32_t seconds) { struct timeval timeout, now; @@ -1233,7 +1234,8 @@ static int or1k_profiling(struct target *target, uint32_t *samples, return retval; } - samples[sample_count++] = reg_value; + if (!with_range || (reg_value >= start_address && reg_value < end_address)) + samples[sample_count++] = reg_value; gettimeofday(&now, NULL); if ((sample_count >= max_num_samples) || timeval_compare(&now, &timeout) > 0) { diff --git a/src/target/target.c b/src/target/target.c index 8ff665f474..975d102925 100644 --- a/src/target/target.c +++ b/src/target/target.c @@ -1464,11 +1464,12 @@ unsigned int target_data_bits(struct target *target) return 32; } -static int target_profiling(struct target *target, uint32_t *samples, +static int target_profiling(struct target *target, uint32_t *samples, uint32_t *sample_address_hi32, + bool with_range, uint64_t start_address, uint64_t end_address, uint32_t max_num_samples, uint32_t *num_samples, uint32_t seconds) { - return target->type->profiling(target, samples, max_num_samples, - num_samples, seconds); + return target->type->profiling(target, samples, sample_address_hi32, with_range, + start_address, end_address, max_num_samples, num_samples, seconds); } static int handle_target(void *priv); @@ -2287,7 +2288,8 @@ static int target_gdb_fileio_end_default(struct target *target, return ERROR_OK; } -int target_profiling_default(struct target *target, uint32_t *samples, +int target_profiling_default(struct target *target, uint32_t *samples, uint32_t *sample_address_hi32, + bool with_range, uint64_t start_address, uint64_t end_address, uint32_t max_num_samples, uint32_t *num_samples, uint32_t seconds) { struct timeval timeout, now; @@ -2299,6 +2301,7 @@ int target_profiling_default(struct target *target, uint32_t *samples, " target as often as we can..."); uint32_t sample_count = 0; + bool warn_printed = false; /* hopefully it is safe to cache! We want to stop/restart as quickly as possible. */ struct reg *reg = register_get_by_name(target->reg_cache, "pc", true); @@ -2306,8 +2309,26 @@ int target_profiling_default(struct target *target, uint32_t *samples, for (;;) { target_poll(target); if (target->state == TARGET_HALTED) { - uint32_t t = buf_get_u32(reg->value, 0, 32); - samples[sample_count++] = t; + // update reg value if cached value is not valid + if (!reg->valid) + reg->type->get(reg); + + uint64_t t = buf_get_u64(reg->value, 0, reg->size); + + if (!with_range || (t >= start_address && t < end_address)) { + if (sample_count == 0) { + // set high 32 bits of address as of the first sample + *sample_address_hi32 = (uint32_t)(t >> 32); + } + if ((t >> 32) != *sample_address_hi32 && !warn_printed) { + LOG_WARNING("Samples do not fit into single 32-bit slice, " + "some samples will be skipped"); + warn_printed = true; + continue; + } + samples[sample_count++] = (uint32_t)(t & 0xffffffff); + } + /* current pc, addr = 0, do not handle breakpoints, not debugging */ retval = target_resume(target, 1, 0, 0, 0); target_poll(target); @@ -4197,6 +4218,20 @@ static void write_long(FILE *f, int l, struct target *target) write_data(f, val, 4); } +static void write_vma(FILE *f, uint64_t l, struct target *target) +{ + struct reg *reg = register_get_by_name(target->reg_cache, "pc", true); + if (reg->size == 64) { + uint8_t val[8]; + target_buffer_set_u64(target, val, l); + write_data(f, val, 8); + } else { + uint8_t val[4]; + target_buffer_set_u32(target, val, l); + write_data(f, val, 4); + } +} + static void write_string(FILE *f, char *s) { write_data(f, s, strlen(s)); @@ -4205,8 +4240,9 @@ static void write_string(FILE *f, char *s) typedef unsigned char UNIT[2]; /* unit of profiling */ /* Dump a gmon.out histogram file. */ -static void write_gmon(uint32_t *samples, uint32_t sample_num, const char *filename, bool with_range, - uint32_t start_address, uint32_t end_address, struct target *target, uint32_t duration_ms) +static void write_gmon(uint32_t *samples, uint32_t sample_address_hi32, uint32_t sample_num, + const char *filename, bool with_range, uint64_t start_address, uint64_t end_address, + struct target *target, uint32_t duration_ms) { uint32_t i; FILE *f = fopen(filename, "w"); @@ -4222,42 +4258,43 @@ static void write_gmon(uint32_t *samples, uint32_t sample_num, const char *filen write_data(f, &zero, 1); /* figure out bucket size */ - uint32_t min; - uint32_t max; + uint64_t min; + uint64_t max; if (with_range) { min = start_address; max = end_address; } else { - min = samples[0]; - max = samples[0]; + min = ((uint64_t)sample_address_hi32 << 32) | samples[0]; + max = ((uint64_t)sample_address_hi32 << 32) | samples[0]; for (i = 0; i < sample_num; i++) { - if (min > samples[i]) - min = samples[i]; - if (max < samples[i]) - max = samples[i]; + uint64_t sample = ((uint64_t)sample_address_hi32 << 32) | samples[i]; + if (min > sample) + min = sample; + if (max < sample) + max = sample; } /* max should be (largest sample + 1) * Refer to binutils/gprof/hist.c (find_histogram_for_pc) */ - if (max < UINT32_MAX) + if (max < UINT64_MAX) max++; /* gprof requires (max - min) >= 2 */ while ((max - min) < 2) { - if (max < UINT32_MAX) + if (max < UINT64_MAX) max++; else min--; } } - uint32_t address_space = max - min; + uint64_t address_space = max - min; /* FIXME: What is the reasonable number of buckets? * The profiling result will be more accurate if there are enough buckets. */ - static const uint32_t max_buckets = 128 * 1024; /* maximum buckets. */ - uint32_t num_buckets = address_space / sizeof(UNIT); - if (num_buckets > max_buckets) + static const uint32_t max_buckets = 128 * 1024 * 1024; /* maximum buckets. */ + uint64_t num_buckets = address_space / sizeof(UNIT); + if (num_buckets > (uint64_t)max_buckets) num_buckets = max_buckets; int *buckets = malloc(sizeof(int) * num_buckets); if (!buckets) { @@ -4266,22 +4303,23 @@ static void write_gmon(uint32_t *samples, uint32_t sample_num, const char *filen } memset(buckets, 0, sizeof(int) * num_buckets); for (i = 0; i < sample_num; i++) { - uint32_t address = samples[i]; + uint64_t address = ((uint64_t)sample_address_hi32 << 32) | samples[i]; if ((address < min) || (max <= address)) continue; - long long a = address - min; - long long b = num_buckets; - long long c = address_space; + int64_t a = address - min; + int64_t b = num_buckets; + int64_t c = address_space; int index_t = (a * b) / c; /* danger!!!! int32 overflows */ buckets[index_t]++; } + /* append binary memory gmon.out &profile_hist_hdr ((char*)&profile_hist_hdr + sizeof(struct gmon_hist_hdr)) */ - write_long(f, min, target); /* low_pc */ - write_long(f, max, target); /* high_pc */ - write_long(f, num_buckets, target); /* # of buckets */ + write_vma(f, min, target); /* low_pc */ + write_vma(f, max, target); /* high_pc */ + write_long(f, (uint32_t)num_buckets, target); /* # of buckets */ float sample_rate = sample_num / (duration_ms / 1000.0); write_long(f, sample_rate, target); write_string(f, "seconds"); @@ -4327,13 +4365,13 @@ COMMAND_HANDLER(handle_profile_command) COMMAND_PARSE_NUMBER(u32, CMD_ARGV[0], offset); - uint32_t start_address = 0; - uint32_t end_address = 0; + uint64_t start_address = 0; + uint64_t end_address = 0; bool with_range = false; if (CMD_ARGC == 4) { with_range = true; - COMMAND_PARSE_NUMBER(u32, CMD_ARGV[2], start_address); - COMMAND_PARSE_NUMBER(u32, CMD_ARGV[3], end_address); + COMMAND_PARSE_NUMBER(u64, CMD_ARGV[2], start_address); + COMMAND_PARSE_NUMBER(u64, CMD_ARGV[3], end_address); if (start_address > end_address || (end_address - start_address) < 2) { command_print(CMD, "Error: end - start < 2"); return ERROR_COMMAND_ARGUMENT_INVALID; @@ -4345,6 +4383,7 @@ COMMAND_HANDLER(handle_profile_command) LOG_ERROR("No memory to store samples."); return ERROR_FAIL; } + uint32_t sample_address_hi32 = 0; uint64_t timestart_ms = timeval_ms(); /** @@ -4352,8 +4391,9 @@ COMMAND_HANDLER(handle_profile_command) * annoying halt/resume step; for example, ARMv7 PCSR. * Provide a way to use that more efficient mechanism. */ - retval = target_profiling(target, samples, MAX_PROFILE_SAMPLE_NUM, - &num_of_samples, offset); + retval = target_profiling(target, samples, &sample_address_hi32, + with_range, start_address, end_address, + MAX_PROFILE_SAMPLE_NUM, &num_of_samples, offset); if (retval != ERROR_OK) { free(samples); return retval; @@ -4392,7 +4432,7 @@ COMMAND_HANDLER(handle_profile_command) return retval; } - write_gmon(samples, num_of_samples, CMD_ARGV[1], + write_gmon(samples, sample_address_hi32, num_of_samples, CMD_ARGV[1], with_range, start_address, end_address, target, duration_ms); command_print(CMD, "Wrote %s", CMD_ARGV[1]); diff --git a/src/target/target.h b/src/target/target.h index 03db3950ce..6bc4a50325 100644 --- a/src/target/target.h +++ b/src/target/target.h @@ -781,8 +781,9 @@ void target_handle_md_output(struct command_invocation *cmd, struct target *target, target_addr_t address, unsigned size, unsigned count, const uint8_t *buffer); -int target_profiling_default(struct target *target, uint32_t *samples, uint32_t - max_num_samples, uint32_t *num_samples, uint32_t seconds); +int target_profiling_default(struct target *target, uint32_t *samples, uint32_t *sample_address_hi32, + bool with_range, uint64_t start_address, uint64_t end_address, + uint32_t max_num_samples, uint32_t *num_samples, uint32_t seconds); #define ERROR_TARGET_INVALID (-300) #define ERROR_TARGET_INIT_FAILED (-301) diff --git a/src/target/target_type.h b/src/target/target_type.h index bc42c2d16e..50a164e1a1 100644 --- a/src/target/target_type.h +++ b/src/target/target_type.h @@ -297,7 +297,8 @@ struct target_type { /* do target profiling */ - int (*profiling)(struct target *target, uint32_t *samples, + int (*profiling)(struct target *target, uint32_t *samples, uint32_t *sample_address_hi32, + bool with_range, uint64_t start_address, uint64_t end_address, uint32_t max_num_samples, uint32_t *num_samples, uint32_t seconds); /* Return the number of address bits this target supports. This will --