This is an automated email from Gerrit.

"Yurii Shutkin <yurii.shut...@gmail.com>" just uploaded a new patch set to 
Gerrit, which you can find at https://review.openocd.org/c/openocd/+/8405

-- gerrit

commit 93aeef8cd18eff8fea4c73f373c97e8ff808f3dd
Author: Yurii Shutkin <yurii.shut...@gmail.com>
Date:   Thu Jul 18 09:33:45 2024 +0300

    target profiling: add support for 64-bit systems profiling
    
    64-bit support preserves array of 32-bit words to store samples
    by using an additional single high 32-bit word that should
    be the same for all collected samples.
    
    High 32-bit word is calculated automatically by the first
    coming sample and checked against other samples. Consequent
    samples that do not match this high 32-bit word are skipped.
    
    'with_range', 'start_address' and 'end_address' are added to
    collection function to be able to select samples that
    determine the high 32-bit work. This also makes samples
    collection more efficient and allows to collect more samples.
    
    Change-Id: Ie379161d20ec514c9fb28daa04f6164e3bd1616e
    Signed-off-by: Yurii Shutkin <yurii.shut...@gmail.com>

diff --git a/src/target/cortex_m.c b/src/target/cortex_m.c
index 791a432427..b75910deb8 100644
--- a/src/target/cortex_m.c
+++ b/src/target/cortex_m.c
@@ -2277,8 +2277,9 @@ void cortex_m_deinit_target(struct target *target)
        free(cortex_m);
 }
 
-int cortex_m_profiling(struct target *target, uint32_t *samples,
-                             uint32_t max_num_samples, uint32_t *num_samples, 
uint32_t seconds)
+int cortex_m_profiling(struct target *target, uint32_t *samples, uint32_t 
*sample_address_hi32,
+                       bool with_range, uint64_t start_address, uint64_t 
end_address,
+                       uint32_t max_num_samples, uint32_t *num_samples, 
uint32_t seconds)
 {
        struct timeval timeout, now;
        struct armv7m_common *armv7m = target_to_armv7m(target);
@@ -2292,7 +2293,8 @@ int cortex_m_profiling(struct target *target, uint32_t 
*samples,
        }
        if (reg_value == 0) {
                LOG_TARGET_INFO(target, "PCSR sampling not supported on this 
processor.");
-               return target_profiling_default(target, samples, 
max_num_samples, num_samples, seconds);
+               return target_profiling_default(target, samples, 
sample_address_hi32, with_range,
+                               start_address, end_address, max_num_samples, 
num_samples, seconds);
        }
 
        gettimeofday(&timeout, NULL);
@@ -2318,12 +2320,17 @@ int cortex_m_profiling(struct target *target, uint32_t 
*samples,
                        if (read_count > 1024)
                                read_count = 1024;
 
+                       // this case ignores with_range flag, all samples are 
collected
+                       // and will be filtered out later on write_gmon
                        retval = mem_ap_read_buf_noincr(armv7m->debug_ap,
                                                (void *)&samples[sample_count],
                                                4, read_count, DWT_PCSR);
                        sample_count += read_count;
                } else {
-                       target_read_u32(target, DWT_PCSR, 
&samples[sample_count++]);
+                       target_read_u32(target, DWT_PCSR, &reg_value);
+                       if (!with_range || (reg_value >= start_address && 
reg_value < end_address)) {
+                               samples[sample_count++] = reg_value;
+                       }
                }
 
                if (retval != ERROR_OK) {
diff --git a/src/target/cortex_m.h b/src/target/cortex_m.h
index 726fca2903..91198c2ce5 100644
--- a/src/target/cortex_m.h
+++ b/src/target/cortex_m.h
@@ -337,7 +337,8 @@ int cortex_m_remove_watchpoint(struct target *target, 
struct watchpoint *watchpo
 void cortex_m_enable_breakpoints(struct target *target);
 void cortex_m_enable_watchpoints(struct target *target);
 void cortex_m_deinit_target(struct target *target);
-int cortex_m_profiling(struct target *target, uint32_t *samples,
+int cortex_m_profiling(struct target *target, uint32_t *samples, uint32_t 
*sample_address_hi32,
+       bool with_range, uint64_t start_address, uint64_t end_address,
        uint32_t max_num_samples, uint32_t *num_samples, uint32_t seconds);
 
 #endif /* OPENOCD_TARGET_CORTEX_M_H */
diff --git a/src/target/openrisc/or1k.c b/src/target/openrisc/or1k.c
index 8c38610805..0556431d2a 100644
--- a/src/target/openrisc/or1k.c
+++ b/src/target/openrisc/or1k.c
@@ -1200,7 +1200,8 @@ static int or1k_checksum_memory(struct target *target, 
target_addr_t address,
        return ERROR_FAIL;
 }
 
-static int or1k_profiling(struct target *target, uint32_t *samples,
+static int or1k_profiling(struct target *target, uint32_t *samples, uint32_t 
*sample_address_hi32,
+               bool with_range, uint64_t start_address, uint64_t end_address,
                uint32_t max_num_samples, uint32_t *num_samples, uint32_t 
seconds)
 {
        struct timeval timeout, now;
@@ -1233,7 +1234,8 @@ static int or1k_profiling(struct target *target, uint32_t 
*samples,
                        return retval;
                }
 
-               samples[sample_count++] = reg_value;
+               if (!with_range || (reg_value >= start_address && reg_value < 
end_address))
+                       samples[sample_count++] = reg_value;
 
                gettimeofday(&now, NULL);
                if ((sample_count >= max_num_samples) || timeval_compare(&now, 
&timeout) > 0) {
diff --git a/src/target/target.c b/src/target/target.c
index 8ff665f474..975d102925 100644
--- a/src/target/target.c
+++ b/src/target/target.c
@@ -1464,11 +1464,12 @@ unsigned int target_data_bits(struct target *target)
        return 32;
 }
 
-static int target_profiling(struct target *target, uint32_t *samples,
+static int target_profiling(struct target *target, uint32_t *samples, uint32_t 
*sample_address_hi32,
+                       bool with_range, uint64_t start_address, uint64_t 
end_address,
                        uint32_t max_num_samples, uint32_t *num_samples, 
uint32_t seconds)
 {
-       return target->type->profiling(target, samples, max_num_samples,
-                       num_samples, seconds);
+       return target->type->profiling(target, samples, sample_address_hi32, 
with_range,
+                       start_address, end_address, max_num_samples, 
num_samples, seconds);
 }
 
 static int handle_target(void *priv);
@@ -2287,7 +2288,8 @@ static int target_gdb_fileio_end_default(struct target 
*target,
        return ERROR_OK;
 }
 
-int target_profiling_default(struct target *target, uint32_t *samples,
+int target_profiling_default(struct target *target, uint32_t *samples, 
uint32_t *sample_address_hi32,
+               bool with_range, uint64_t start_address, uint64_t end_address,
                uint32_t max_num_samples, uint32_t *num_samples, uint32_t 
seconds)
 {
        struct timeval timeout, now;
@@ -2299,6 +2301,7 @@ int target_profiling_default(struct target *target, 
uint32_t *samples,
                        " target as often as we can...");
 
        uint32_t sample_count = 0;
+       bool warn_printed = false;
        /* hopefully it is safe to cache! We want to stop/restart as quickly as 
possible. */
        struct reg *reg = register_get_by_name(target->reg_cache, "pc", true);
 
@@ -2306,8 +2309,26 @@ int target_profiling_default(struct target *target, 
uint32_t *samples,
        for (;;) {
                target_poll(target);
                if (target->state == TARGET_HALTED) {
-                       uint32_t t = buf_get_u32(reg->value, 0, 32);
-                       samples[sample_count++] = t;
+                       // update reg value if cached value is not valid
+                       if (!reg->valid)
+                               reg->type->get(reg);
+
+                       uint64_t t = buf_get_u64(reg->value, 0, reg->size);
+
+                       if (!with_range || (t >= start_address && t < 
end_address)) {
+                               if (sample_count == 0) {
+                                       // set high 32 bits of address as of 
the first sample
+                                       *sample_address_hi32 = (uint32_t)(t >> 
32);
+                               }
+                               if ((t >> 32) != *sample_address_hi32 && 
!warn_printed) {
+                                       LOG_WARNING("Samples do not fit into 
single 32-bit slice, "
+                                                       "some samples will be 
skipped");
+                                       warn_printed = true;
+                                       continue;
+                               }
+                               samples[sample_count++] = (uint32_t)(t & 
0xffffffff);
+                       }
+
                        /* current pc, addr = 0, do not handle breakpoints, not 
debugging */
                        retval = target_resume(target, 1, 0, 0, 0);
                        target_poll(target);
@@ -4197,6 +4218,20 @@ static void write_long(FILE *f, int l, struct target 
*target)
        write_data(f, val, 4);
 }
 
+static void write_vma(FILE *f, uint64_t l, struct target *target)
+{
+       struct reg *reg = register_get_by_name(target->reg_cache, "pc", true);
+       if (reg->size == 64) {
+               uint8_t val[8];
+               target_buffer_set_u64(target, val, l);
+               write_data(f, val, 8);
+       } else {
+               uint8_t val[4];
+               target_buffer_set_u32(target, val, l);
+               write_data(f, val, 4);
+       }
+}
+
 static void write_string(FILE *f, char *s)
 {
        write_data(f, s, strlen(s));
@@ -4205,8 +4240,9 @@ static void write_string(FILE *f, char *s)
 typedef unsigned char UNIT[2];  /* unit of profiling */
 
 /* Dump a gmon.out histogram file. */
-static void write_gmon(uint32_t *samples, uint32_t sample_num, const char 
*filename, bool with_range,
-                       uint32_t start_address, uint32_t end_address, struct 
target *target, uint32_t duration_ms)
+static void write_gmon(uint32_t *samples, uint32_t sample_address_hi32, 
uint32_t sample_num,
+               const char *filename, bool with_range, uint64_t start_address, 
uint64_t end_address,
+               struct target *target, uint32_t duration_ms)
 {
        uint32_t i;
        FILE *f = fopen(filename, "w");
@@ -4222,42 +4258,43 @@ static void write_gmon(uint32_t *samples, uint32_t 
sample_num, const char *filen
        write_data(f, &zero, 1);
 
        /* figure out bucket size */
-       uint32_t min;
-       uint32_t max;
+       uint64_t min;
+       uint64_t max;
        if (with_range) {
                min = start_address;
                max = end_address;
        } else {
-               min = samples[0];
-               max = samples[0];
+               min = ((uint64_t)sample_address_hi32 << 32) | samples[0];
+               max = ((uint64_t)sample_address_hi32 << 32) | samples[0];
                for (i = 0; i < sample_num; i++) {
-                       if (min > samples[i])
-                               min = samples[i];
-                       if (max < samples[i])
-                               max = samples[i];
+                       uint64_t sample = ((uint64_t)sample_address_hi32 << 32) 
| samples[i];
+                       if (min > sample)
+                               min = sample;
+                       if (max < sample)
+                               max = sample;
                }
 
                /* max should be (largest sample + 1)
                 * Refer to binutils/gprof/hist.c (find_histogram_for_pc) */
-               if (max < UINT32_MAX)
+               if (max < UINT64_MAX)
                        max++;
 
                /* gprof requires (max - min) >= 2 */
                while ((max - min) < 2) {
-                       if (max < UINT32_MAX)
+                       if (max < UINT64_MAX)
                                max++;
                        else
                                min--;
                }
        }
 
-       uint32_t address_space = max - min;
+       uint64_t address_space = max - min;
 
        /* FIXME: What is the reasonable number of buckets?
         * The profiling result will be more accurate if there are enough 
buckets. */
-       static const uint32_t max_buckets = 128 * 1024; /* maximum buckets. */
-       uint32_t num_buckets = address_space / sizeof(UNIT);
-       if (num_buckets > max_buckets)
+       static const uint32_t max_buckets = 128 * 1024 * 1024; /* maximum 
buckets. */
+       uint64_t num_buckets = address_space / sizeof(UNIT);
+       if (num_buckets > (uint64_t)max_buckets)
                num_buckets = max_buckets;
        int *buckets = malloc(sizeof(int) * num_buckets);
        if (!buckets) {
@@ -4266,22 +4303,23 @@ static void write_gmon(uint32_t *samples, uint32_t 
sample_num, const char *filen
        }
        memset(buckets, 0, sizeof(int) * num_buckets);
        for (i = 0; i < sample_num; i++) {
-               uint32_t address = samples[i];
+               uint64_t address = ((uint64_t)sample_address_hi32 << 32) | 
samples[i];
 
                if ((address < min) || (max <= address))
                        continue;
 
-               long long a = address - min;
-               long long b = num_buckets;
-               long long c = address_space;
+               int64_t a = address - min;
+               int64_t b = num_buckets;
+               int64_t c = address_space;
                int index_t = (a * b) / c; /* danger!!!! int32 overflows */
                buckets[index_t]++;
        }
 
+
        /* append binary memory gmon.out &profile_hist_hdr 
((char*)&profile_hist_hdr + sizeof(struct gmon_hist_hdr)) */
-       write_long(f, min, target);                     /* low_pc */
-       write_long(f, max, target);                     /* high_pc */
-       write_long(f, num_buckets, target);     /* # of buckets */
+       write_vma(f, min, target);                      /* low_pc */
+       write_vma(f, max, target);                      /* high_pc */
+       write_long(f, (uint32_t)num_buckets, target);   /* # of buckets */
        float sample_rate = sample_num / (duration_ms / 1000.0);
        write_long(f, sample_rate, target);
        write_string(f, "seconds");
@@ -4327,13 +4365,13 @@ COMMAND_HANDLER(handle_profile_command)
 
        COMMAND_PARSE_NUMBER(u32, CMD_ARGV[0], offset);
 
-       uint32_t start_address = 0;
-       uint32_t end_address = 0;
+       uint64_t start_address = 0;
+       uint64_t end_address = 0;
        bool with_range = false;
        if (CMD_ARGC == 4) {
                with_range = true;
-               COMMAND_PARSE_NUMBER(u32, CMD_ARGV[2], start_address);
-               COMMAND_PARSE_NUMBER(u32, CMD_ARGV[3], end_address);
+               COMMAND_PARSE_NUMBER(u64, CMD_ARGV[2], start_address);
+               COMMAND_PARSE_NUMBER(u64, CMD_ARGV[3], end_address);
                if (start_address > end_address || (end_address - 
start_address) < 2) {
                        command_print(CMD, "Error: end - start < 2");
                        return ERROR_COMMAND_ARGUMENT_INVALID;
@@ -4345,6 +4383,7 @@ COMMAND_HANDLER(handle_profile_command)
                LOG_ERROR("No memory to store samples.");
                return ERROR_FAIL;
        }
+       uint32_t sample_address_hi32 = 0;
 
        uint64_t timestart_ms = timeval_ms();
        /**
@@ -4352,8 +4391,9 @@ COMMAND_HANDLER(handle_profile_command)
         * annoying halt/resume step; for example, ARMv7 PCSR.
         * Provide a way to use that more efficient mechanism.
         */
-       retval = target_profiling(target, samples, MAX_PROFILE_SAMPLE_NUM,
-                               &num_of_samples, offset);
+       retval = target_profiling(target, samples, &sample_address_hi32,
+                               with_range, start_address, end_address,
+                               MAX_PROFILE_SAMPLE_NUM, &num_of_samples, 
offset);
        if (retval != ERROR_OK) {
                free(samples);
                return retval;
@@ -4392,7 +4432,7 @@ COMMAND_HANDLER(handle_profile_command)
                return retval;
        }
 
-       write_gmon(samples, num_of_samples, CMD_ARGV[1],
+       write_gmon(samples, sample_address_hi32, num_of_samples, CMD_ARGV[1],
                   with_range, start_address, end_address, target, duration_ms);
        command_print(CMD, "Wrote %s", CMD_ARGV[1]);
 
diff --git a/src/target/target.h b/src/target/target.h
index 03db3950ce..6bc4a50325 100644
--- a/src/target/target.h
+++ b/src/target/target.h
@@ -781,8 +781,9 @@ void target_handle_md_output(struct command_invocation *cmd,
        struct target *target, target_addr_t address, unsigned size,
        unsigned count, const uint8_t *buffer);
 
-int target_profiling_default(struct target *target, uint32_t *samples, uint32_t
-               max_num_samples, uint32_t *num_samples, uint32_t seconds);
+int target_profiling_default(struct target *target, uint32_t *samples, 
uint32_t *sample_address_hi32,
+               bool with_range, uint64_t start_address, uint64_t end_address,
+               uint32_t max_num_samples, uint32_t *num_samples, uint32_t 
seconds);
 
 #define ERROR_TARGET_INVALID   (-300)
 #define ERROR_TARGET_INIT_FAILED (-301)
diff --git a/src/target/target_type.h b/src/target/target_type.h
index bc42c2d16e..50a164e1a1 100644
--- a/src/target/target_type.h
+++ b/src/target/target_type.h
@@ -297,7 +297,8 @@ struct target_type {
 
        /* do target profiling
         */
-       int (*profiling)(struct target *target, uint32_t *samples,
+       int (*profiling)(struct target *target, uint32_t *samples, uint32_t 
*sample_address_hi32,
+                       bool with_range, uint64_t start_address, uint64_t 
end_address,
                        uint32_t max_num_samples, uint32_t *num_samples, 
uint32_t seconds);
 
        /* Return the number of address bits this target supports. This will

-- 

Reply via email to