Re: [PATCH v5 18/18] s390x: pv: Add dump support

Janis Schoetterl-Glausch Thu, 01 Sep 2022 03:28:09 -0700

On Thu, 2022-08-11 at 12:11 +0000, Janosch Frank wrote:
> Sometimes dumping a guest from the outside is the only way to get the
> data that is needed. This can be the case if a dumping mechanism like
> KDUMP hasn't been configured or data needs to be fetched at a specific
> point. Dumping a protected guest from the outside without help from
> fw/hw doesn't yield sufficient data to be useful. Hence we now
> introduce PV dump support.
> 
> The PV dump support works by integrating the firmware into the dump
> process. New Ultravisor calls are used to initiate the dump process,
> dump cpu data, dump memory state and lastly complete the dump process.
> The UV calls are exposed by KVM via the new KVM_PV_DUMP command and
> its subcommands. The guest's data is fully encrypted and can only be
> decrypted by the entity that owns the customer communication key for
> the dumped guest. Also dumping needs to be allowed via a flag in the
> SE header.
> 
> On the QEMU side of things we store the PV dump data in the newly
> introduced architecture ELF sections (storage state and completion
> data) and the cpu notes (for cpu dump data).
> 
> Users can use the zgetdump tool to convert the encrypted QEMU dump to an
> unencrypted one.


Does PV dump work when memory is being filtered? Are there any
constraints on the filter parameters, alignment or so?
> 
> Signed-off-by: Janosch Frank <fran...@linux.ibm.com>
> ---
>  dump/dump.c              |  12 +-
>  include/sysemu/dump.h    |   5 +
>  target/s390x/arch_dump.c | 242 ++++++++++++++++++++++++++++++++++-----
>  3 files changed, 227 insertions(+), 32 deletions(-)

[...]
>  
>  typedef struct NoteFuncDescStruct {
>      int contents_size;
> +    uint64_t (*note_size_func)(void); /* NULL for non-dynamic sized contents 
> */
>      void (*note_contents_func)(Note *note, S390CPU *cpu, int id);
> +    bool pvonly;
>  } NoteFuncDesc;
>  
>  static const NoteFuncDesc note_core[] = {
> -    {sizeof_field(Note, contents.prstatus), s390x_write_elf64_prstatus},
> -    {sizeof_field(Note, contents.fpregset), s390x_write_elf64_fpregset},
> -    { 0, NULL}
> +    {sizeof_field(Note, contents.prstatus), NULL, 
> s390x_write_elf64_prstatus, false},
> +    {sizeof_field(Note, contents.fpregset), NULL, 
> s390x_write_elf64_fpregset, false},
> +    { 0, NULL, NULL}
>  };
>  
>  static const NoteFuncDesc note_linux[] = {
> -    {sizeof_field(Note, contents.prefix),   s390x_write_elf64_prefix},
> -    {sizeof_field(Note, contents.ctrs),     s390x_write_elf64_ctrs},
> -    {sizeof_field(Note, contents.timer),    s390x_write_elf64_timer},
> -    {sizeof_field(Note, contents.todcmp),   s390x_write_elf64_todcmp},
> -    {sizeof_field(Note, contents.todpreg),  s390x_write_elf64_todpreg},
> -    {sizeof_field(Note, contents.vregslo),  s390x_write_elf64_vregslo},
> -    {sizeof_field(Note, contents.vregshi),  s390x_write_elf64_vregshi},
> -    {sizeof_field(Note, contents.gscb),     s390x_write_elf64_gscb},
> -    { 0, NULL}
> +    {sizeof_field(Note, contents.prefix),   NULL, s390x_write_elf64_prefix,  
> false},
> +    {sizeof_field(Note, contents.ctrs),     NULL, s390x_write_elf64_ctrs,    
> false},
> +    {sizeof_field(Note, contents.timer),    NULL, s390x_write_elf64_timer,   
> false},
> +    {sizeof_field(Note, contents.todcmp),   NULL, s390x_write_elf64_todcmp,  
> false},
> +    {sizeof_field(Note, contents.todpreg),  NULL, s390x_write_elf64_todpreg, 
> false},
> +    {sizeof_field(Note, contents.vregslo),  NULL, s390x_write_elf64_vregslo, 
> false},
> +    {sizeof_field(Note, contents.vregshi),  NULL, s390x_write_elf64_vregshi, 
> false},
> +    {sizeof_field(Note, contents.gscb),     NULL, s390x_write_elf64_gscb,    
> false},
> +    {0, kvm_s390_pv_dmp_get_size_cpu,       s390x_write_elf64_pv, true},
> +    { 0, NULL, NULL}
>  };
>  
>  static int s390x_write_elf64_notes(const char *note_name,
> @@ -207,22 +226,41 @@ static int s390x_write_elf64_notes(const char 
> *note_name,
>                                         DumpState *s,
>                                         const NoteFuncDesc *funcs)
>  {
> -    Note note;
> +    Note note, *notep;
>      const NoteFuncDesc *nf;
> -    int note_size;
> +    int note_size, content_size;

Could make those size_t. I guess it's not necessary, but we're kind of
a dumb pipe for data from the ultravisor so there's something to be
said for not making assumptions.

>      int ret = -1;
>  
>      assert(strlen(note_name) < sizeof(note.name));
>  
>      for (nf = funcs; nf->note_contents_func; nf++) {
> -        memset(&note, 0, sizeof(note));
> -        note.hdr.n_namesz = cpu_to_be32(strlen(note_name) + 1);
> -        note.hdr.n_descsz = cpu_to_be32(nf->contents_size);
> -        g_strlcpy(note.name, note_name, sizeof(note.name));
> -        (*nf->note_contents_func)(&note, cpu, id);
> +        notep = &note;
> +        if (nf->pvonly && !s390_is_pv()) {
> +            continue;
> +        }
>  
> -        note_size = sizeof(note) - sizeof(note.contents) + nf->contents_size;
> -        ret = f(&note, note_size, s);
> +        content_size = nf->contents_size ? nf->contents_size : 
> nf->note_size_func();

Your comment above says
/* NULL for non-dynamic sized contents */
So it makes sense to condition on that, i.e.
+        content_size = nf->contents_size_func ? nf->note_size_func() : 
nf->contents_size;
And it makes it consistent with the ifs below

> +        note_size = sizeof(note) - sizeof(notep->contents) + content_size;
> +
> +        /* Notes with dynamic sizes need to allocate a note */
> +        if (nf->note_size_func) {
> +            notep = g_malloc0(note_size);
> +        }
> +
> +        memset(notep, 0, sizeof(note));
> +
> +        /* Setup note header data */
> +        notep->hdr.n_descsz = cpu_to_be32(content_size);
> +        notep->hdr.n_namesz = cpu_to_be32(strlen(note_name) + 1);
> +        g_strlcpy(notep->name, note_name, sizeof(notep->name));
> +
> +        /* Get contents and write them out */
> +        (*nf->note_contents_func)(notep, cpu, id);
> +        ret = f(notep, note_size, s);
> +
> +        if (nf->note_size_func) {
> +            g_free(notep);
> +        }
>  
>          if (ret < 0) {
>              return -1;
> @@ -247,12 +285,161 @@ int s390_cpu_write_elf64_note(WriteCoreDumpFunction f, 
> CPUState *cs,
>      return s390x_write_elf64_notes("LINUX", f, cpu, cpuid, s, note_linux);
>  }
>  
> +/* PV dump section size functions */
> +static uint64_t get_dump_stor_state_size_from_len(uint64_t len)
> +{
> +    return (len / (1 << 20)) * kvm_s390_pv_dmp_get_size_stor_state();
> +}
> +
> +static uint64_t get_size_stor_state(DumpState *s)
> +{
> +    return get_dump_stor_state_size_from_len(s->total_size);
> +}
> +
> +static uint64_t get_size_complete(DumpState *s)
> +{
> +    return kvm_s390_pv_dmp_get_size_complete();
> +}
> +
> +/* PV dump section data functions*/
> +static int get_data_complete(DumpState *s, uint8_t *buff)
> +{
> +    int rc;
> +
> +    if (!pv_dump_initialized) {
> +        return 0;
> +    }
> +    rc = kvm_s390_dump_complete(buff);
> +    if (!rc) {
> +            pv_dump_initialized = false;
> +    }
> +    return rc;
> +}
> +
> +static int dump_mem(DumpState *s, uint64_t gaddr, uint8_t *buff, uint64_t 
> buff_len)

The DumpState arg is being ignored.

> +{
> +    /* We need the gaddr + len and something to write to */
> +    if (!pv_dump_initialized) {
> +        return 0;
> +    }
> +    return kvm_s390_dump_mem(gaddr, buff_len, buff);
> +}
> +
> +static int get_data_mem(DumpState *s, uint8_t *buff)
> +{
> +    int64_t memblock_size, memblock_start;
> +    GuestPhysBlock *block;
> +    uint64_t off;
> +
> +    QTAILQ_FOREACH(block, &s->guest_phys_blocks.head, next) {
> +        memblock_start = dump_filtered_memblock_start(block, 
> s->filter_area_begin,
> +                                                      s->filter_area_length);
> +        if (memblock_start == -1) {
> +            continue;
> +        }
> +
> +        memblock_size = dump_filtered_memblock_size(block, 
> s->filter_area_begin,
> +                                                    s->filter_area_length);
> +
> +        off = get_dump_stor_state_size_from_len(block->target_start);
> +        dump_mem(s, block->target_start, buff + off,
> +                 get_dump_stor_state_size_from_len(memblock_size));

This ignores the return value, if this is intentional a comment
explaining it would be nice.

> +    }
> +
> +    return 0;
> +}
> +
> +struct sections {
> +    uint64_t (*sections_size_func)(DumpState *s);
> +    int (*sections_contents_func)(DumpState *s, uint8_t *buff);
> +    char sctn_str[12];
> +} sections[] = {
> +    { get_size_stor_state, get_data_mem, "pv_mem_meta"},
> +    { get_size_complete, get_data_complete, "pv_compl"},
> +    {NULL , NULL, ""}
> +};

Could be static right?

> +
> +static uint64_t arch_sections_write_hdr(DumpState *s, uint8_t *buff)
> +{
> +    Elf64_Shdr *shdr = (void *)buff;
> +    struct sections *sctn = sections;
> +    uint64_t off = s->section_offset;
> +
> +    if (!s390_is_pv()) {
> +        return 0;
> +    }
> +
> +    for (; sctn->sections_size_func; off += shdr->sh_size, sctn++, shdr++) {
> +        memset(shdr, 0, sizeof(*shdr));

This isn't necessary since the header mem is zero allocated, but you
might not want to make guarantees about that. Maybe consider doing a
normal malloc and memsetting just the special 0 index section header in
dump.c.

> +        shdr->sh_type = SHT_PROGBITS;
> +        shdr->sh_offset = off;
> +        shdr->sh_size = sctn->sections_size_func(s);
> +        shdr->sh_name = s->string_table_buf->len;
> +        g_array_append_vals(s->string_table_buf, sctn->sctn_str, 
> sizeof(sctn->sctn_str));
> +    }
> +
> +    return (uintptr_t)shdr - (uintptr_t)buff;
> +}
> +
> +
> +/* Add arch specific number of sections and their respective sizes */
> +static void arch_sections_add(DumpState *s)
> +{
> +    struct sections *sctn = sections;
> +
> +    /*
> +     * We only do a PV dump if we are running a PV guest, KVM supports
> +     * the dump API and we got valid dump length information.
> +     */
> +    if (!s390_is_pv() || !kvm_s390_get_protected_dump() ||
> +        !kvm_s390_pv_info_basic_valid()) {
> +        return;
> +    }
> +
> +    /*
> +     * Start the UV dump process by doing the initialize dump call via
> +     * KVM as the proxy.
> +     */
> +    if (!kvm_s390_dump_init()) {
> +            pv_dump_initialized = true;
> +    }
> +
> +    for (; sctn->sections_size_func; sctn++) {
> +        s->shdr_num += 1;
> +        s->elf_section_data_size += sctn->sections_size_func(s);
> +    }
> +
> +    /* We use the string table to identify the sections */
> +    s->string_table_usage = true;

In dump.c this value is being set if shdr_num > 0, so this seems
redundant.

> +}
> +
> +/*
> + * After the PV dump has been initialized, the CPU data has been
> + * fetched and memory has been dumped, we need to grab the tweak data
> + * and the completion data.
> + */
> +static void arch_sections_write(DumpState *s, uint8_t *buff)
> +{
> +    struct sections *sctn = sections;
> +
> +    /* shdr_num should only have been set > 1 if we are protected */
> +    assert(s390_is_pv());
> +
> +    for (; sctn->sections_size_func; sctn++) {
> +        sctn->sections_contents_func(s, buff);

As above with regards to ignoring return values.

> +        buff += sctn->sections_size_func(s);
> +    }
> +}
> +
>  int cpu_get_dump_info(ArchDumpInfo *info,
>                        const struct GuestPhysBlockList *guest_phys_blocks)
>  {
>      info->d_machine = EM_S390;
>      info->d_endian = ELFDATA2MSB;
>      info->d_class = ELFCLASS64;
> +    info->arch_sections_add_fn = *arch_sections_add;
> +    info->arch_sections_write_hdr_fn = *arch_sections_write_hdr;
> +    info->arch_sections_write_fn = *arch_sections_write;
>  
>      return 0;
>  }
> @@ -261,7 +448,7 @@ ssize_t cpu_get_note_size(int class, int machine, int 
> nr_cpus)
>  {
>      int name_size = 8; /* "LINUX" or "CORE" + pad */
>      size_t elf_note_size = 0;
> -    int note_head_size;
> +    int note_head_size, content_size;
>      const NoteFuncDesc *nf;
>  
>      assert(class == ELFCLASS64);
> @@ -270,12 +457,15 @@ ssize_t cpu_get_note_size(int class, int machine, int 
> nr_cpus)
>      note_head_size = sizeof(Elf64_Nhdr);
>  
>      for (nf = note_core; nf->note_contents_func; nf++) {
> -        elf_note_size = elf_note_size + note_head_size + name_size +
> -                        nf->contents_size;
> +        elf_note_size = elf_note_size + note_head_size + name_size + 
> nf->contents_size;
>      }
>      for (nf = note_linux; nf->note_contents_func; nf++) {
> +        if (nf->pvonly && !s390_is_pv()) {
> +            continue;
> +        }
> +        content_size = nf->contents_size ? nf->contents_size : 
> nf->note_size_func();
>          elf_note_size = elf_note_size + note_head_size + name_size +
> -                        nf->contents_size;
> +                        content_size;
>      }
>  
>      return (elf_note_size) * nr_cpus;

Re: [PATCH v5 18/18] s390x: pv: Add dump support

Reply via email to