On Fri, Jul 24, 2015 at 02:25:06PM +0100, Alex Bennée wrote:
> This adds a fairly brain dead torture test for TLB flushes intended for
> stressing the MTTCG QEMU build. It takes the usual -smp option for
> multiple CPUs.
>
> By default it will do a TLBIALL flush after each cycle. If you pass
> -append "page" to the kernel it will take it in turns to flush each of
> the computation functions. At the moment it doesn't do any re-mapping of
> pages but maybe that is something that could be done in the future.
>
> [DEV VERSION FOR COMMENT]
> Signed-off-by: Alex Bennée <[email protected]>
> ---
> arm/tlbflush.c | 163
> +++++++++++++++++++++++++++++++++++++++++++
> config/config-arm-common.mak | 4 +-
> lib/arm/asm/mmu.h | 11 +++
> 3 files changed, 177 insertions(+), 1 deletion(-)
> create mode 100644 arm/tlbflush.c
>
> diff --git a/arm/tlbflush.c b/arm/tlbflush.c
> new file mode 100644
> index 0000000..6eeff18
> --- /dev/null
> +++ b/arm/tlbflush.c
> @@ -0,0 +1,163 @@
> +#include <libcflat.h>
> +#include <asm/smp.h>
> +#include <asm/cpumask.h>
> +#include <asm/barrier.h>
> +#include <asm/mmu.h>
> +
> +#define SEQ_LENGTH 10
> +
> +static cpumask_t smp_test_complete;
> +static int flush_count = 100000;
> +static int flush_self = 1;
> +static int flush_page = 0;
> +
> +__attribute__((aligned(0x1000))) unsigned int hash_array(int length,
> unsigned int *array)
You should use PAGE_SIZE instead of 0x1000 in these attributes, allowing
the test to also work for aarch64, as we're using 64k pages on aarch64.
> +{
> + int i;
> + unsigned int sum=0;
> + for (i=0; i<length; i++)
> + {
> + unsigned int val = *array++;
> + sum ^= val;
> + sum ^= (val >> (val % 16));
> + sum ^= (val << (val % 32));
> + }
> +
> + return sum;
> +}
> +
> +__attribute__((aligned(0x1000))) void create_fib_sequence(int length,
> unsigned int *array)
> +{
> + int i;
> +
> + /* first two values */
> + array[0] = 0;
> + array[1] = 1;
> + for (i=2; i<length; i++)
> + {
> + array[i] = array[i-2] + array[i-1];
> + }
> +}
> +
> +__attribute__((aligned(0x1000))) unsigned long long factorial(unsigned int n)
> +{
> + unsigned int i;
> + unsigned long long fac = 1;
> + for (i=1; i<=n; i++)
> + {
> + fac = fac * i;
> + }
> + return fac;
> +}
> +
> +/* do some computationally expensive stuff, return a checksum of the
> + * results */
> +__attribute__((aligned(0x1000))) unsigned int do_computation(void)
> +{
> + unsigned int fib_array[SEQ_LENGTH];
> + unsigned long long facfib_array[SEQ_LENGTH];
> + unsigned int fib_hash, facfib_hash;
> + int cpu = smp_processor_id();
> + int i, j;
> +
> + create_fib_sequence(SEQ_LENGTH, &fib_array[0]);
> + fib_hash = hash_array(SEQ_LENGTH, &fib_array[0]);
> + for (i=0; i<SEQ_LENGTH; i++) {
> + for (j=0; j<fib_array[i]; j++) {
> + facfib_array[i] = factorial(fib_array[i]+j);
> + }
> + }
> + facfib_hash = 0;
> + for (i=0; i<SEQ_LENGTH; i++) {
> + for (j=0; j<fib_array[i]; j++) {
> + facfib_hash ^=
> hash_array(sizeof(facfib_array)/sizeof(unsigned int), (unsigned int
> *)&facfib_array[0]);
> + }
> + }
> +
> +#if 0
> + printf("CPU:%d FIBSEQ ", cpu);
> + for (i=0; i<SEQ_LENGTH; i++)
> + printf("%u,", fib_array[i]);
> + printf("\n");
> +
> + printf("CPU:%d FACFIB ", cpu);
> + for (i=0; i<SEQ_LENGTH; i++)
> + printf("%llu,", facfib_array[i]);
> + printf("\n");
> +#endif
> +
> + return (fib_hash ^ facfib_hash);
> +}
> +
> +static void * pages[] = {&hash_array, &create_fib_sequence, &factorial,
> &do_computation};
I can't comment on whether or not the complexity of do_computation is
necessary for your test, but it seems like overkill. Comments explaining
why it's necessary would be good.
> +
> +static void test_flush(void)
> +{
> + int i, errors = 0;
> + int cpu = smp_processor_id();
> +
> + unsigned int ref;
> +
> + printf("CPU%d online\n", cpu);
> +
> + ref = do_computation();
What makes you sure that the first time you do the computation
per cpu is correct? I think computing it externally, and saving
the result, i.e.
#define EXPECTED_RESULT 0x12345678
would be more reliable.
> +
> + for (i=0; i < flush_count; i++) {
> + unsigned int this_ref = do_computation();
> +
> + if (this_ref != ref) {
> + errors++;
> + printf("CPU%d: seq%d 0x%x!=0x%x\n",
> + cpu, i, ref, this_ref);
> + }
> +
> + if ((i % 1000) == 0) {
> + printf("CPU%d: seq%d\n", cpu, i);
> + }
> +
> + if (flush_self) {
> + if (flush_page) {
> + int j = (i % (sizeof(pages)/sizeof(void *)));
libcflat.h has the ARRAY_SIZE macro
> + flush_tlb_page((unsigned long)pages[j]);
> + } else {
> + flush_tlb_all();
> + }
> + }
> + }
> +
> + report("CPU%d: Done - Errors: %d\n", errors == 0, cpu, errors);
> +
> + cpumask_set_cpu(cpu, &smp_test_complete);
> + if (cpu != 0)
> + halt();
> +}
> +
> +int main(int argc, char **argv)
> +{
> + int cpu, i;
> +
> + report_prefix_push("tlbflush");
> +
> + for (i=0; i<argc; i++) {
> + char *arg = argv[i];
> +/* printf("arg:%d:%s\n", i, arg); */
> +
> + if (strcmp(arg, "page") == 0) {
> + report_prefix_push("page");
> + flush_page = 1;
> + }
> + }
> +
> + for_each_present_cpu(cpu) {
> + if (cpu == 0)
> + continue;
> + smp_boot_secondary(cpu, test_flush);
> + }
> +
> + test_flush();
> +
> + while (!cpumask_full(&smp_test_complete))
> + cpu_relax();
> +
> + return report_summary();
As we use the kernel coding style you should run
$KERNEL_SRC/scripts/checkpatch.pl -f arm/tlbflush.c
Also, please rename to tlbflush-test.c to differentiate it
from an implementation of tlbflush support, and to make
the standalone test name (if we commit those patches) more
descriptive.
> +}
> diff --git a/config/config-arm-common.mak b/config/config-arm-common.mak
> index 0674daa..5b14db4 100644
> --- a/config/config-arm-common.mak
> +++ b/config/config-arm-common.mak
> @@ -11,7 +11,8 @@ endif
>
> tests-common = \
> $(TEST_DIR)/selftest.flat \
> - $(TEST_DIR)/spinlock-test.flat
> + $(TEST_DIR)/spinlock-test.flat \
> + $(TEST_DIR)/tlbflush.flat
As we're adding tests faster now it's becoming clear that the '\' list
isn't so great. To add a new test at the bottom we always have to modify
the last line too. We should either add the new one at the top (right
below the 'test-common =' line), or change this to a '+=' sequence like
some other lists are done.
>
> all: test_cases
>
> @@ -72,3 +73,4 @@ test_cases: $(generated_files) $(tests-common) $(tests)
>
> $(TEST_DIR)/selftest.elf: $(cstart.o) $(TEST_DIR)/selftest.o
> $(TEST_DIR)/spinlock-test.elf: $(cstart.o) $(TEST_DIR)/spinlock-test.o
> +$(TEST_DIR)/tlbflush.elf: $(cstart.o) $(TEST_DIR)/tlbflush.o
> diff --git a/lib/arm/asm/mmu.h b/lib/arm/asm/mmu.h
> index c1bd01c..2bb0cde 100644
> --- a/lib/arm/asm/mmu.h
> +++ b/lib/arm/asm/mmu.h
> @@ -14,8 +14,11 @@
> #define PTE_AF PTE_EXT_AF
> #define PTE_WBWA L_PTE_MT_WRITEALLOC
>
> +/* See B3.18.7 TLB maintenance operations */
> +
> static inline void local_flush_tlb_all(void)
> {
> + /* TLBIALL */
> asm volatile("mcr p15, 0, %0, c8, c7, 0" :: "r" (0));
> dsb();
> isb();
> @@ -27,6 +30,14 @@ static inline void flush_tlb_all(void)
> local_flush_tlb_all();
> }
>
> +static inline void flush_tlb_page(unsigned long vaddr)
> +{
> + /* TLBIMVAA */
> + asm volatile("mcr p15, 0, %0, c8, c7, 3" :: "r" (vaddr));
> + dsb();
> + isb();
> +}
> +
> #include <asm/mmu-api.h>
>
> #endif /* __ASMARM_MMU_H_ */
This mmu.h change looks good, but please add the arm64
flush_tlb_page at the same time. And anyway, I guess you'll
want your test to work for both arm and aarch64?
Thanks,
drew
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to [email protected]
More majordomo info at http://vger.kernel.org/majordomo-info.html