Hello Norbert,

On Mon, 14 May 2012 16:28:49 +0200
"Trapp, Norbert" <[email protected]> wrote:

> Dear Kumagai-San,
> 
> the relevant functions for xen4 in makedumpfile
> for saving just the xen and dom0 pages are
> exclude_xen4_user_domain and kvtop_xen_x86_64.
> Our current version of kvtop_xen_x86_64 also
> tries to check for 1GB pages and reads the page
> list faster like the crash utility does.
> I send you our current versions.
> 
> As was suggested I sent the implementation to the
> xen mailing list last year but didn't get any answer.

I'm sorry for late reply.

> We check the core file data to find out whether
> it is a xen3 or a xen4 dump and then the xen3
> or xen4 functions get called. Alternatively
> there could be #ifdefs but I do not know how
> the makedumpfile rpms would then be produced.
> Probably the xen3 code should not yet be removed.
> 
> Also we only implemented xen4 for x86_64.
> Maybe there should be an error message for
> other architectures as long as nobody hands in
> an implementation.

Thank you for your explanation.

I think that your idea is good but I can't review details of your code.
So, could you repost the whole of your current version as divided patches
to get advices to kexec-ML ?

  kexec-ML:
    http://lists.infradead.org/pipermail/kexec/


Thanks
Atsushi Kumagai
 
> Greetings
> Norbert
> 
> #if defined(__i386__)
> #define BITS_PER_LONG 32
> #define BYTES_PER_LONG 4
> #define LONG_BYTEORDER 2
> #elif defined(__x86_64__)
> #define BITS_PER_LONG 64
> #define BYTES_PER_LONG 8
> #define LONG_BYTEORDER 3
> #endif
> #define PG_shift(idx) (BITS_PER_LONG - (idx))
> //#define PG_mask(x, idx) (x ## UL << PG_shift(idx))
> #define PG_mask(x, idx) (x ## UL << PG_shift(idx))
> #define PGC_xen_heap        PG_mask(1, 2)
> #define PGC_allocated       PG_mask(1, 1)
> #define is_xen_heap_page(page) ((page)->count_info & PGC_xen_heap)
> #define PGC_count_width     PG_shift(9)
> #define PGC_count_mask ((1UL<<PGC_count_width)-1)
> #define PGC_state_offlined  PG_mask(2, 9)
> #define PGC_state_free      PG_mask(3, 9)
> #define PGC_state           PG_mask(3, 9)
> #define PGC_state_inuse     PG_mask(0, 9)
> #define PGC_state_offlining PG_mask(1, 9)
> #define PGC_state_offlined  PG_mask(2, 9)
> #define PGC_page_table      PG_mask(1, 3)
> #define PGC_broken          PG_mask(1, 7)
> 
> #define PGT_none          PG_mask(0, 4)  /* no special uses of this page   */
> #define PGT_l1_page_table PG_mask(1, 4)  /* using as an L1 page table?     */
> #define PGT_l2_page_table PG_mask(2, 4)  /* using as an L2 page table?     */
> #define PGT_l3_page_table PG_mask(3, 4)  /* using as an L3 page table?     */
> #define PGT_l4_page_table PG_mask(4, 4)  /* using as an L4 page table?     */
> #define PGT_seg_desc_page PG_mask(5, 4)  /* using this page in a GDT/LDT?  */
> #define PGT_writable_page PG_mask(7, 4)  /* has writable mappings?         */
> #define PGT_shared_page   PG_mask(8, 4)  /* CoW sharable page              */
> #define PGT_type_mask     PG_mask(15, 4) /* Bits 28-31 or 60-63.           */
> #define PGT_pinned        PG_mask(1, 5)
> #define PGT_validated     PG_mask(1, 6)
> #define PGT_pae_xen_l2    PG_mask(1, 7)
> #define PGT_partial       PG_mask(1, 8)
> #define PGT_locked        PG_mask(1, 9)
> #define PGT_count_width   PG_shift(9)
> #define PGT_count_mask    ((1UL<<PGT_count_width)-1)
> 
> int
> exclude_xen4_user_domain(void)
> {
>         int i;
>         unsigned long deleted_pages, total_deleted_pages = 0;
>         unsigned long state_free, total_state_free = 0;
>         unsigned long xen_heap, total_xen_heap = 0;
>         unsigned long allocated, total_allocated = 0;
>         unsigned long selected_domain, total_selected_domain = 0;
>         unsigned long not_selected_domain, total_not_selected_domain = 0;
>         unsigned long not_a_page, total_not_a_page = 0;
>         unsigned long page_not_readable, total_page_not_readable = 0;
>         unsigned long unknown_page_type, total_unknown_page_type = 0;
>         unsigned long not_a_page_offset, total_not_a_page_offset = 0;
>         unsigned long broken_pages, total_broken_pages = 0;
>         unsigned long page_in_use, total_page_in_use = 0;
>         unsigned long count_info;
>         unsigned int  _domain;
>         unsigned long page_info_addr, first_page_info_addr;
>         unsigned long long pfn, prev_pfn, pfn_end;
>         unsigned long long first_pfn;
>         unsigned long long num_pages, total_num_pages, num_pfn_done, 
> num_one_percent_pfn;
>         unsigned long type_info;
>         struct pt_load_segment *pls;
>         int idx;
>         char page_info_local[SIZE(page_info)];
>         char *page_info_mem;
>         int page_info_cntr = 0;
>         int retval;
>         unsigned long long paddr;
>         off_t offset = 0;
>         const off_t failed = (off_t)-1;
> 
>         /*
>          * NOTE: the first half of bitmap is not used for Xen extraction
>          */
>         first_pfn = 0;
>         idx = 0;
> 
>         if ((page_info_mem = (char *)malloc(SIZE(page_info) * 128)) == NULL) {
>                 ERRMSG("Can't allocate memory for the page_info memory. 
> %s\n", strerror(errno));
>                 return FALSE;
>         }
>         print_progress(PROGRESS_XEN_DOMAIN, 0, 1);
>         DEBUG_MSG("\nmakedumpfile: exclude_xen4_user_domain start\n");
>         DEBUG_MSG("XEN_VIRT_START       : 0x%016lx\n", XEN_VIRT_START);
>         DEBUG_MSG("XEN_VIRT_END         : 0x%016lx\n", XEN_VIRT_END);
>         DEBUG_MSG("DIRECTMAP_VIRT_START : 0x%016lx\n", DIRECTMAP_VIRT_START);
>         DEBUG_MSG("DIRECTMAP_VIRT_END   : 0x%016lx\n", DIRECTMAP_VIRT_END);
>         DEBUG_MSG("FRAMETABLE_VIRT_START: 0x%016lx\n", FRAMETABLE_VIRT_START);
>         DEBUG_MSG("FRAMETABLE_VIRT_END  : 0x%016lx\n", FRAMETABLE_VIRT_END);
>         DEBUG_MSG("FRAMETABLE_SIZE      : 0x%016lx\n", FRAMETABLE_SIZE);
>         DEBUG_MSG("frame_table_vaddr    : 0x%016lx\n", 
> info->frame_table_vaddr);
>         DEBUG_MSG("SIZE(page_info)      : 0x%016lx\n", SIZE(page_info));
>         DEBUG_MSG("PAGESIZE()           : 0x%016lx\n", PAGESIZE());
>         DEBUG_MSG("_2MB_PAGE_MASK       : 0x%08x\n"  , _2MB_PAGE_MASK);
>         DEBUG_MSG("_PAGE_PSE            : 0x%08x\n"  , _PAGE_PSE);
>         DEBUG_MSG("ENTRY_MASK           : 0x%016llx\n", ENTRY_MASK);
>         DEBUG_MSG("PHYSICAL_PAGE_MASK   : 0x%016lx\n", PHYSICAL_PAGE_MASK);
>         DEBUG_MSG("PGC_state_inuse      : 0x%016lx\n", PGC_state_inuse);
>         DEBUG_MSG("PGC_count_mask       : 0x%016lx\n", PGC_count_mask);
>         DEBUG_MSG("PGC_state            : 0x%016lx\n", PGC_state);
>         DEBUG_MSG("PGC_state_free       : 0x%016lx\n", PGC_state_free);
>         DEBUG_MSG("PGC_allocated        : 0x%016lx\n", PGC_allocated);
>         DEBUG_MSG("PGC_broken           : 0x%016lx\n", PGC_broken);
>         num_pfn_done = 0;
>         total_num_pages = 0;
> 
>         DEBUG_MSG("exclude_xen4_user_domain: %d memory LOAD sections\n", 
> info->num_load_memory);
>         DEBUG_MSG("section phys_start   phys_end pfn_start  pfn_end  
> num_pfn\n");
>         for (i = 0; i < info->num_load_memory; i++) {
>                 pls = &info->pt_load_segments[i];
>                 pfn     = pls->phys_start >> PAGESHIFT();
>                 pfn_end = pls->phys_end >> PAGESHIFT();
>                 total_num_pages += pfn_end - pfn;
>                 DEBUG_MSG("%3d 0x%016llx 0x%016llx %10llu %10llu %10llu\n",
>                         i, pls->phys_start, pls->phys_end, pfn, pfn_end, 
> pfn_end - pfn);
>         }
>         DEBUG_MSG("exclude_xen4_user_domain total_num_pages: %llu\n", 
> total_num_pages);
>         DEBUG_MSG("exclude_xen4_user_domain total size of pages: 0x%llx\n", 
> total_num_pages * SIZE(page_info));
>         num_one_percent_pfn = total_num_pages / 100;
>         paddr = 0;
>         for (i = 0; i < info->num_load_memory; i++) {
>                 pls = &info->pt_load_segments[i];
>                 pfn     = pls->phys_start >> PAGESHIFT();
>                 pfn_end = pls->phys_end >> PAGESHIFT();
>                 num_pages    = pfn_end - pfn;
>                 page_info_cntr = 0;
>                 first_page_info_addr = info->frame_table_vaddr + pfn * 
> SIZE(page_info);
>                 deleted_pages = 0;
>                 state_free = 0;
>                 page_in_use = 0;
>                 xen_heap = 0;
>                 allocated = 0;
>                 selected_domain = 0;
>                 not_selected_domain = 0;
>                 not_a_page = 0;
>                 not_a_page_offset = 0;
>                 page_not_readable = 0;
>                 unknown_page_type = 0;
>                 broken_pages = 0;
> 
>                 DEBUG_MSG("exclude_xen4_user_domain: i: %d/%d pfn_start: 
> 0x%llx pfn_end: 0x%llx num_pfn: %llu\n",
>                         i, info->num_load_memory, pfn, pfn_end, pfn_end - 
> pfn);
>                 while (pfn < pfn_end) {
>                         num_pfn_done++;
>                         if (((message_level & ML_PRINT_DEBUG_MSG) == 0) && 
> ((num_pfn_done % num_one_percent_pfn) == 0)) {
>                                 print_progress(PROGRESS_XEN_DOMAIN, 
> num_pfn_done, total_num_pages);
>                         }
>                         page_info_addr = info->frame_table_vaddr + pfn * 
> SIZE(page_info);
>                         retval = TRUE;
>                         while (1 == 1) {
>                                 paddr = kvtop_xen(page_info_addr);
>                                 if (paddr == NOT_PADDR) {
>                                         ERRMSG("NOT a physical address(%llx) 
> for pfn %llu\n", paddr, pfn);
>                                         not_a_page++;
>                                         retval = FALSE;
>                                         break;
>                                 }
>                                 if (!(offset = paddr_to_offset(paddr))) {
>                                         ERRMSG("Can't convert a physical 
> address(%llx) to offset.\n", paddr);
>                                         not_a_page_offset++;
>                                         retval = FALSE;
>                                         break;
>                                 }
>                                 if (lseek(info->fd_memory, offset, SEEK_SET) 
> == failed) {
>                                         ERRMSG("Can't seek the dump 
> memory(%s). %s\n", info->name_memory, strerror(errno));
>                                         page_not_readable++;
>                                         retval = FALSE;
>                                         break;
>                                 }
>                                 if (read(info->fd_memory, page_info_local, 
> SIZE(page_info)) != SIZE(page_info)) {
>                                         ERRMSG("Can't read the dump 
> memory(%s). %s\n", info->name_memory, strerror(errno));
>                                         page_not_readable++;
>                                         retval = FALSE;
>                                         break;
>                                 }
>                                 retval = TRUE;
>                                 break;
>                         }
>                         if (retval == FALSE) {
>                                 ERRMSG("retval == False\n");
>                                 deleted_pages++;
>                                 clear_bit_on_2nd_bitmap(pfn);
>                                 pfn++;
>                                 continue;
>                         }
>                         count_info = *((unsigned long *)(page_info_local + 
> OFFSET(page_info.count_info)));
>                         _domain = *((unsigned int *)(page_info_local + 
> OFFSET(page_info._domain)));
>                         type_info = *((unsigned long *)(page_info_local + 
> 0x10));
>                         if (count_info & PGC_xen_heap) {
>                                 xen_heap++;
>                                 pfn++;
>                                 continue;
>                         }
>                         if (count_info & PGC_allocated) {
>                                 allocated++;
>                                 if (_domain == 0) {
>                                         pfn++;
>                                         continue;
>                                 }
>                                 if (is_select_domain(_domain)) {
>                                         selected_domain++;
>                                         pfn++;
>                                         continue;
>                                 } else {
>                                         not_selected_domain++;
>                                         //DEBUG_MSG("domain not selected: 
> %u\n", _domain);
>                                         prev_pfn = pfn;
>                                         clear_bit_on_2nd_bitmap(pfn);
>                                         pfn++;
>                                         deleted_pages++;
>                                         continue;
>                                 }
>                         }
>                         if ((count_info & PGC_state) == PGC_state_inuse) {
>                                 page_in_use++;
>                                 pfn++;
>                                 continue;
>                         }
>                         if ((count_info & PGC_state) == PGC_state_free) {
>                                 //DEBUG_MSG("PSE_PAGE PGC_state_free (0x%llx) 
> count_info: (0x%lx) for pfn %llu\n",
>                                 //      PGC_state_free, count_info, pfn);
>                                 //DEBUG_MSG("PSE_PAGE PGC_state (0x%llx)\n", 
> (count_info & PGC_state));
>                                 state_free++;
>                                 clear_bit_on_2nd_bitmap(pfn);
>                                 pfn++;
>                                 deleted_pages++;
>                                 continue;
>                         }
>                         if (count_info & PGC_broken) {
>                                 clear_bit_on_2nd_bitmap(pfn);
>                                 pfn++;
>                                 broken_pages++;
>                                 deleted_pages++;
>                                 continue;
>                         }
>                         unknown_page_type++;
>                         //clear_bit_on_2nd_bitmap(pfn);
>                         pfn++;
>                 }
>                 total_deleted_pages += deleted_pages;
>                 total_not_a_page += not_a_page;
>                 total_not_a_page_offset += not_a_page_offset;
>                 total_state_free += state_free;
>                 total_page_in_use += page_in_use;
>                 total_xen_heap += xen_heap;
>                 total_allocated += allocated;
>                 total_selected_domain += selected_domain;
>                 total_not_selected_domain += not_selected_domain;
>                 total_unknown_page_type += unknown_page_type;
>                 total_broken_pages += broken_pages;
>                 DEBUG_MSG("deleted pages               : %10lu of %10llu 
> %3llu%%\n",
>                         deleted_pages, num_pages, deleted_pages * 100 / 
> num_pages);
>                 DEBUG_MSG("       unused page          : %10lu\n", 
> state_free);
>                 DEBUG_MSG("       not dom0 domain page : %10lu\n", 
> not_selected_domain);
>                 DEBUG_MSG("       page address invalid : %10lu\n", 
> not_a_page);
>                 DEBUG_MSG("       not a page offset    : %10lu\n", 
> not_a_page_offset);
>                 DEBUG_MSG("       page not readable    : %10lu\n", 
> page_not_readable);
>                 DEBUG_MSG("       broken page          : %10lu\n", 
> broken_pages);
>                 DEBUG_MSG("saved pages                 : %10llu of %10llu 
> %3llu%%\n",
>                         num_pages - deleted_pages, num_pages, (num_pages - 
> deleted_pages) * 100 / num_pages);
>                 DEBUG_MSG("       page in use          : %10lu\n", 
> page_in_use);
>                 DEBUG_MSG("       xen heap page        : %10lu\n", xen_heap);
>                 DEBUG_MSG("       dom0 page            : %10lu\n", 
> selected_domain);
>                 DEBUG_MSG("       unknown type page    : %10lu\n", 
> unknown_page_type);
>         }
>         /*
>          * print [100 %]
>          */
>         print_progress(PROGRESS_XEN_DOMAIN, 1, 1);
>         DEBUG_MSG("\n");
>         DEBUG_MSG("total deleted pages               : %10lu of %10llu 
> %3llu%%\n",
>                 total_deleted_pages, total_num_pages, total_deleted_pages * 
> 100 / total_num_pages);
>         DEBUG_MSG("       total unused page          : %10lu\n", 
> total_state_free);
>         DEBUG_MSG("       total not dom0 domain page : %10lu\n", 
> total_not_selected_domain);
>         DEBUG_MSG("       total page address invalid : %10lu\n", 
> total_not_a_page);
>         DEBUG_MSG("       total not a page offset    : %10lu\n", 
> total_not_a_page_offset);
>         DEBUG_MSG("       total page not readable    : %10lu\n", 
> total_page_not_readable);
>         DEBUG_MSG("       total broken page          : %10lu\n", 
> total_broken_pages);
>         DEBUG_MSG("total saved pages                 : %10llu of %10llu 
> %3llu%%\n",
>                 total_num_pages - total_deleted_pages, total_num_pages, 
> (total_num_pages - total_deleted_pages) * 100 / total_num_pages);
>         DEBUG_MSG("       total page in use          : %10lu\n", 
> total_page_in_use);
>         DEBUG_MSG("       total xen heap page        : %10lu\n", 
> total_xen_heap);
>         DEBUG_MSG("       total dom0 page            : %10lu\n", 
> total_selected_domain);
>         DEBUG_MSG("       total unknown type page    : %10lu\n", 
> total_unknown_page_type);
>         return TRUE;
> }
> 
> /*
>  * for Xen extraction
>  */
> 
> int pml4_page_read = 0;
> char pml4_page[4096];
> char pgd_page[4096];
> unsigned long long last_pgd_read = 0;
> char pmd_page[4096];
> unsigned long long last_pmd_read = 0;
> char pte_page[4096];
> unsigned long long last_pte_read = 0;
> 
> unsigned long long
> kvtop_xen_x86_64(unsigned long kvaddr)
> {
>         unsigned long long entry = 0;
>         unsigned long long pml4_entry, pml4_dirp;
>         unsigned long long pgd_entry, pgd_dirp;
>         unsigned long long pmd_entry, pmd_dirp;
>         unsigned long long pgd_idx = 0;
>         unsigned long pml4_idx = 0;
>         unsigned long pmd_idx = 0;
>         int reason;
> 
>         if (!is_xen_vaddr(kvaddr)) {
>                 reason = 1;
>                 goto not_paddr;
>         }
>         if (is_xen_text(kvaddr)) {
>                 if (info->xen_major_version < 4)
>                         return (unsigned long)kvaddr - XEN_VIRT_START_XEN3 + 
> info->xen_phys_start;
>                 else {
>                         entry = (unsigned long)kvaddr - XEN_VIRT_START + 
> info->xen_phys_start;
>                         return entry;
>                 }
>         }
>         if (is_direct(kvaddr)) {
>                 if (info->xen_major_version < 4)
>                         return (unsigned long)kvaddr - 
> DIRECTMAP_VIRT_START_XEN3;
>                 else {
>                         entry = (unsigned long)kvaddr - DIRECTMAP_VIRT_START;
>                         return entry;
>                 }
>         }
>         pml4_idx = pml4_index(kvaddr);
>         if (pml4_page_read == 0) {
>                 if (!readmem(MADDR_XEN, kvtop_xen_x86_64(SYMBOL(pgd_l4)), 
> pml4_page, PAGESIZE())) {
>                         reason = 2;
>                         goto not_paddr;
>                 }
>                 pml4_page_read = 1;
>         }
>         pml4_entry = *(unsigned long long *)(pml4_page + pml4_idx * 
> sizeof(unsigned long long));
> 
>         if (!(pml4_entry & _PAGE_PRESENT)) {
>                 reason = 3;
>                 goto not_paddr;
>         }
>         pml4_dirp = pml4_entry & ENTRY_MASK;
>         if (pml4_dirp != last_pgd_read) {
>                 if (!readmem(MADDR_XEN, pml4_dirp, pgd_page, PAGESIZE())) {
>                         reason = 4;
>                         goto not_paddr;
>                 }
>                 last_pgd_read = pml4_dirp;
>         }
>         pgd_idx = pgd_index(kvaddr);
>         pgd_entry = *(unsigned long long *)(pgd_page + pgd_idx * 
> sizeof(unsigned long long));
>         if (!(pgd_entry & _PAGE_PRESENT)) {
>                 reason = 5;
>                 goto not_paddr;
>         }
>         if (pgd_entry & _PAGE_PSE) { // 1GB page
>                 pgd_entry = (pgd_entry & ENTRY_MASK) + (kvaddr & ((1UL << 
> PGDIR_SHIFT) - 1));
>                 return pgd_entry;
>         }
>         pgd_dirp = pgd_entry & ENTRY_MASK;
> 
>         if (pgd_dirp != last_pmd_read) {
>                 pmd_dirp = pgd_dirp;
>                 if (!readmem(MADDR_XEN, pgd_dirp, pmd_page, PAGESIZE())) {
>                         reason = 6;
>                         goto not_paddr;
>                 }
>                 last_pmd_read = pgd_dirp;
>         }
>         pmd_idx = pmd_index(kvaddr);
>         pmd_entry = *(unsigned long long *)(pmd_page + pmd_idx * 
> sizeof(unsigned long long));
>         if (!(pmd_entry & _PAGE_PRESENT)) {
>                 reason = 7;
>                 goto not_paddr;
>         }
> 
>         if (pmd_entry & _PAGE_PSE) { // 2MB page
>                 return (PAGEBASE(pmd_entry) & ENTRY_MASK) + (kvaddr & 
> ~_2MB_PAGE_MASK);
>         }
>         pmd_dirp = pmd_entry & ENTRY_MASK;
>         if (pmd_dirp != last_pte_read) {
>                 if (!readmem(MADDR_XEN, pmd_dirp, pte_page, PAGESIZE())) {
>                         reason = 8;
>                         goto not_paddr;
>                 }
>         }
>         entry = *(unsigned long long *)(pte_page + pte_index(kvaddr) * 
> sizeof(unsigned long long));
> 
>         if (!(entry & _PAGE_PRESENT)) {
>                 reason = 9;
>                 goto not_paddr;
>         }
> 
>         entry = (entry & ENTRY_MASK) + (kvaddr & ((1UL << PTE_SHIFT) - 1));
>         return entry;
> not_paddr:
>         DEBUG_MSG("kvtop_xen: NOT_PADDR page 0x%llx from kavaddr: 0x%lx 
> reason: %d\n",
>                 entry, kvaddr, reason);
>         return NOT_PADDR;
> }
> 
> With kind regards
> 
> Norbert Trapp
> PDG ES&S SWE OS 6
> 
> FUJITSU
> Fujitsu Technology Solutions GmbH
> Domagkstraße 28, D-80807 München, Germany
> Tel.: ...
> E-mail: [email protected]
> Web: ts.fujitsu.com
> Company details: ts.fujitsu.com/imprint
> This communication contains information that is confidential, proprietary in 
> nature and/or privileged.  It is for the exclusive use of the intended 
> recipient(s). If you are not the intended recipient(s) or the person 
> responsible for delivering it to the intended recipient(s), please note that 
> any form of dissemination, distribution or copying of this communication is 
> strictly prohibited and may be unlawful. If you have received this 
> communication in error, please immediately notify the sender and delete the 
> original communication. Thank you for your cooperation.
> Please be advised that neither Fujitsu, its affiliates, its employees or 
> agents accept liability for any errors, omissions or damages caused by delays 
> of receipt or by any virus infection in this message or its attachments, or 
> which may otherwise arise as a result of this e-mail transmission.
> 

_______________________________________________
kexec mailing list
[email protected]
http://lists.infradead.org/mailman/listinfo/kexec

Reply via email to