Hello Norbert,
On Mon, 14 May 2012 16:28:49 +0200
"Trapp, Norbert" <[email protected]> wrote:
> Dear Kumagai-San,
>
> the relevant functions for xen4 in makedumpfile
> for saving just the xen and dom0 pages are
> exclude_xen4_user_domain and kvtop_xen_x86_64.
> Our current version of kvtop_xen_x86_64 also
> tries to check for 1GB pages and reads the page
> list faster like the crash utility does.
> I send you our current versions.
>
> As was suggested I sent the implementation to the
> xen mailing list last year but didn't get any answer.
I'm sorry for late reply.
> We check the core file data to find out whether
> it is a xen3 or a xen4 dump and then the xen3
> or xen4 functions get called. Alternatively
> there could be #ifdefs but I do not know how
> the makedumpfile rpms would then be produced.
> Probably the xen3 code should not yet be removed.
>
> Also we only implemented xen4 for x86_64.
> Maybe there should be an error message for
> other architectures as long as nobody hands in
> an implementation.
Thank you for your explanation.
I think that your idea is good but I can't review details of your code.
So, could you repost the whole of your current version as divided patches
to get advices to kexec-ML ?
kexec-ML:
http://lists.infradead.org/pipermail/kexec/
Thanks
Atsushi Kumagai
> Greetings
> Norbert
>
> #if defined(__i386__)
> #define BITS_PER_LONG 32
> #define BYTES_PER_LONG 4
> #define LONG_BYTEORDER 2
> #elif defined(__x86_64__)
> #define BITS_PER_LONG 64
> #define BYTES_PER_LONG 8
> #define LONG_BYTEORDER 3
> #endif
> #define PG_shift(idx) (BITS_PER_LONG - (idx))
> //#define PG_mask(x, idx) (x ## UL << PG_shift(idx))
> #define PG_mask(x, idx) (x ## UL << PG_shift(idx))
> #define PGC_xen_heap PG_mask(1, 2)
> #define PGC_allocated PG_mask(1, 1)
> #define is_xen_heap_page(page) ((page)->count_info & PGC_xen_heap)
> #define PGC_count_width PG_shift(9)
> #define PGC_count_mask ((1UL<<PGC_count_width)-1)
> #define PGC_state_offlined PG_mask(2, 9)
> #define PGC_state_free PG_mask(3, 9)
> #define PGC_state PG_mask(3, 9)
> #define PGC_state_inuse PG_mask(0, 9)
> #define PGC_state_offlining PG_mask(1, 9)
> #define PGC_state_offlined PG_mask(2, 9)
> #define PGC_page_table PG_mask(1, 3)
> #define PGC_broken PG_mask(1, 7)
>
> #define PGT_none PG_mask(0, 4) /* no special uses of this page */
> #define PGT_l1_page_table PG_mask(1, 4) /* using as an L1 page table? */
> #define PGT_l2_page_table PG_mask(2, 4) /* using as an L2 page table? */
> #define PGT_l3_page_table PG_mask(3, 4) /* using as an L3 page table? */
> #define PGT_l4_page_table PG_mask(4, 4) /* using as an L4 page table? */
> #define PGT_seg_desc_page PG_mask(5, 4) /* using this page in a GDT/LDT? */
> #define PGT_writable_page PG_mask(7, 4) /* has writable mappings? */
> #define PGT_shared_page PG_mask(8, 4) /* CoW sharable page */
> #define PGT_type_mask PG_mask(15, 4) /* Bits 28-31 or 60-63. */
> #define PGT_pinned PG_mask(1, 5)
> #define PGT_validated PG_mask(1, 6)
> #define PGT_pae_xen_l2 PG_mask(1, 7)
> #define PGT_partial PG_mask(1, 8)
> #define PGT_locked PG_mask(1, 9)
> #define PGT_count_width PG_shift(9)
> #define PGT_count_mask ((1UL<<PGT_count_width)-1)
>
> int
> exclude_xen4_user_domain(void)
> {
> int i;
> unsigned long deleted_pages, total_deleted_pages = 0;
> unsigned long state_free, total_state_free = 0;
> unsigned long xen_heap, total_xen_heap = 0;
> unsigned long allocated, total_allocated = 0;
> unsigned long selected_domain, total_selected_domain = 0;
> unsigned long not_selected_domain, total_not_selected_domain = 0;
> unsigned long not_a_page, total_not_a_page = 0;
> unsigned long page_not_readable, total_page_not_readable = 0;
> unsigned long unknown_page_type, total_unknown_page_type = 0;
> unsigned long not_a_page_offset, total_not_a_page_offset = 0;
> unsigned long broken_pages, total_broken_pages = 0;
> unsigned long page_in_use, total_page_in_use = 0;
> unsigned long count_info;
> unsigned int _domain;
> unsigned long page_info_addr, first_page_info_addr;
> unsigned long long pfn, prev_pfn, pfn_end;
> unsigned long long first_pfn;
> unsigned long long num_pages, total_num_pages, num_pfn_done,
> num_one_percent_pfn;
> unsigned long type_info;
> struct pt_load_segment *pls;
> int idx;
> char page_info_local[SIZE(page_info)];
> char *page_info_mem;
> int page_info_cntr = 0;
> int retval;
> unsigned long long paddr;
> off_t offset = 0;
> const off_t failed = (off_t)-1;
>
> /*
> * NOTE: the first half of bitmap is not used for Xen extraction
> */
> first_pfn = 0;
> idx = 0;
>
> if ((page_info_mem = (char *)malloc(SIZE(page_info) * 128)) == NULL) {
> ERRMSG("Can't allocate memory for the page_info memory.
> %s\n", strerror(errno));
> return FALSE;
> }
> print_progress(PROGRESS_XEN_DOMAIN, 0, 1);
> DEBUG_MSG("\nmakedumpfile: exclude_xen4_user_domain start\n");
> DEBUG_MSG("XEN_VIRT_START : 0x%016lx\n", XEN_VIRT_START);
> DEBUG_MSG("XEN_VIRT_END : 0x%016lx\n", XEN_VIRT_END);
> DEBUG_MSG("DIRECTMAP_VIRT_START : 0x%016lx\n", DIRECTMAP_VIRT_START);
> DEBUG_MSG("DIRECTMAP_VIRT_END : 0x%016lx\n", DIRECTMAP_VIRT_END);
> DEBUG_MSG("FRAMETABLE_VIRT_START: 0x%016lx\n", FRAMETABLE_VIRT_START);
> DEBUG_MSG("FRAMETABLE_VIRT_END : 0x%016lx\n", FRAMETABLE_VIRT_END);
> DEBUG_MSG("FRAMETABLE_SIZE : 0x%016lx\n", FRAMETABLE_SIZE);
> DEBUG_MSG("frame_table_vaddr : 0x%016lx\n",
> info->frame_table_vaddr);
> DEBUG_MSG("SIZE(page_info) : 0x%016lx\n", SIZE(page_info));
> DEBUG_MSG("PAGESIZE() : 0x%016lx\n", PAGESIZE());
> DEBUG_MSG("_2MB_PAGE_MASK : 0x%08x\n" , _2MB_PAGE_MASK);
> DEBUG_MSG("_PAGE_PSE : 0x%08x\n" , _PAGE_PSE);
> DEBUG_MSG("ENTRY_MASK : 0x%016llx\n", ENTRY_MASK);
> DEBUG_MSG("PHYSICAL_PAGE_MASK : 0x%016lx\n", PHYSICAL_PAGE_MASK);
> DEBUG_MSG("PGC_state_inuse : 0x%016lx\n", PGC_state_inuse);
> DEBUG_MSG("PGC_count_mask : 0x%016lx\n", PGC_count_mask);
> DEBUG_MSG("PGC_state : 0x%016lx\n", PGC_state);
> DEBUG_MSG("PGC_state_free : 0x%016lx\n", PGC_state_free);
> DEBUG_MSG("PGC_allocated : 0x%016lx\n", PGC_allocated);
> DEBUG_MSG("PGC_broken : 0x%016lx\n", PGC_broken);
> num_pfn_done = 0;
> total_num_pages = 0;
>
> DEBUG_MSG("exclude_xen4_user_domain: %d memory LOAD sections\n",
> info->num_load_memory);
> DEBUG_MSG("section phys_start phys_end pfn_start pfn_end
> num_pfn\n");
> for (i = 0; i < info->num_load_memory; i++) {
> pls = &info->pt_load_segments[i];
> pfn = pls->phys_start >> PAGESHIFT();
> pfn_end = pls->phys_end >> PAGESHIFT();
> total_num_pages += pfn_end - pfn;
> DEBUG_MSG("%3d 0x%016llx 0x%016llx %10llu %10llu %10llu\n",
> i, pls->phys_start, pls->phys_end, pfn, pfn_end,
> pfn_end - pfn);
> }
> DEBUG_MSG("exclude_xen4_user_domain total_num_pages: %llu\n",
> total_num_pages);
> DEBUG_MSG("exclude_xen4_user_domain total size of pages: 0x%llx\n",
> total_num_pages * SIZE(page_info));
> num_one_percent_pfn = total_num_pages / 100;
> paddr = 0;
> for (i = 0; i < info->num_load_memory; i++) {
> pls = &info->pt_load_segments[i];
> pfn = pls->phys_start >> PAGESHIFT();
> pfn_end = pls->phys_end >> PAGESHIFT();
> num_pages = pfn_end - pfn;
> page_info_cntr = 0;
> first_page_info_addr = info->frame_table_vaddr + pfn *
> SIZE(page_info);
> deleted_pages = 0;
> state_free = 0;
> page_in_use = 0;
> xen_heap = 0;
> allocated = 0;
> selected_domain = 0;
> not_selected_domain = 0;
> not_a_page = 0;
> not_a_page_offset = 0;
> page_not_readable = 0;
> unknown_page_type = 0;
> broken_pages = 0;
>
> DEBUG_MSG("exclude_xen4_user_domain: i: %d/%d pfn_start:
> 0x%llx pfn_end: 0x%llx num_pfn: %llu\n",
> i, info->num_load_memory, pfn, pfn_end, pfn_end -
> pfn);
> while (pfn < pfn_end) {
> num_pfn_done++;
> if (((message_level & ML_PRINT_DEBUG_MSG) == 0) &&
> ((num_pfn_done % num_one_percent_pfn) == 0)) {
> print_progress(PROGRESS_XEN_DOMAIN,
> num_pfn_done, total_num_pages);
> }
> page_info_addr = info->frame_table_vaddr + pfn *
> SIZE(page_info);
> retval = TRUE;
> while (1 == 1) {
> paddr = kvtop_xen(page_info_addr);
> if (paddr == NOT_PADDR) {
> ERRMSG("NOT a physical address(%llx)
> for pfn %llu\n", paddr, pfn);
> not_a_page++;
> retval = FALSE;
> break;
> }
> if (!(offset = paddr_to_offset(paddr))) {
> ERRMSG("Can't convert a physical
> address(%llx) to offset.\n", paddr);
> not_a_page_offset++;
> retval = FALSE;
> break;
> }
> if (lseek(info->fd_memory, offset, SEEK_SET)
> == failed) {
> ERRMSG("Can't seek the dump
> memory(%s). %s\n", info->name_memory, strerror(errno));
> page_not_readable++;
> retval = FALSE;
> break;
> }
> if (read(info->fd_memory, page_info_local,
> SIZE(page_info)) != SIZE(page_info)) {
> ERRMSG("Can't read the dump
> memory(%s). %s\n", info->name_memory, strerror(errno));
> page_not_readable++;
> retval = FALSE;
> break;
> }
> retval = TRUE;
> break;
> }
> if (retval == FALSE) {
> ERRMSG("retval == False\n");
> deleted_pages++;
> clear_bit_on_2nd_bitmap(pfn);
> pfn++;
> continue;
> }
> count_info = *((unsigned long *)(page_info_local +
> OFFSET(page_info.count_info)));
> _domain = *((unsigned int *)(page_info_local +
> OFFSET(page_info._domain)));
> type_info = *((unsigned long *)(page_info_local +
> 0x10));
> if (count_info & PGC_xen_heap) {
> xen_heap++;
> pfn++;
> continue;
> }
> if (count_info & PGC_allocated) {
> allocated++;
> if (_domain == 0) {
> pfn++;
> continue;
> }
> if (is_select_domain(_domain)) {
> selected_domain++;
> pfn++;
> continue;
> } else {
> not_selected_domain++;
> //DEBUG_MSG("domain not selected:
> %u\n", _domain);
> prev_pfn = pfn;
> clear_bit_on_2nd_bitmap(pfn);
> pfn++;
> deleted_pages++;
> continue;
> }
> }
> if ((count_info & PGC_state) == PGC_state_inuse) {
> page_in_use++;
> pfn++;
> continue;
> }
> if ((count_info & PGC_state) == PGC_state_free) {
> //DEBUG_MSG("PSE_PAGE PGC_state_free (0x%llx)
> count_info: (0x%lx) for pfn %llu\n",
> // PGC_state_free, count_info, pfn);
> //DEBUG_MSG("PSE_PAGE PGC_state (0x%llx)\n",
> (count_info & PGC_state));
> state_free++;
> clear_bit_on_2nd_bitmap(pfn);
> pfn++;
> deleted_pages++;
> continue;
> }
> if (count_info & PGC_broken) {
> clear_bit_on_2nd_bitmap(pfn);
> pfn++;
> broken_pages++;
> deleted_pages++;
> continue;
> }
> unknown_page_type++;
> //clear_bit_on_2nd_bitmap(pfn);
> pfn++;
> }
> total_deleted_pages += deleted_pages;
> total_not_a_page += not_a_page;
> total_not_a_page_offset += not_a_page_offset;
> total_state_free += state_free;
> total_page_in_use += page_in_use;
> total_xen_heap += xen_heap;
> total_allocated += allocated;
> total_selected_domain += selected_domain;
> total_not_selected_domain += not_selected_domain;
> total_unknown_page_type += unknown_page_type;
> total_broken_pages += broken_pages;
> DEBUG_MSG("deleted pages : %10lu of %10llu
> %3llu%%\n",
> deleted_pages, num_pages, deleted_pages * 100 /
> num_pages);
> DEBUG_MSG(" unused page : %10lu\n",
> state_free);
> DEBUG_MSG(" not dom0 domain page : %10lu\n",
> not_selected_domain);
> DEBUG_MSG(" page address invalid : %10lu\n",
> not_a_page);
> DEBUG_MSG(" not a page offset : %10lu\n",
> not_a_page_offset);
> DEBUG_MSG(" page not readable : %10lu\n",
> page_not_readable);
> DEBUG_MSG(" broken page : %10lu\n",
> broken_pages);
> DEBUG_MSG("saved pages : %10llu of %10llu
> %3llu%%\n",
> num_pages - deleted_pages, num_pages, (num_pages -
> deleted_pages) * 100 / num_pages);
> DEBUG_MSG(" page in use : %10lu\n",
> page_in_use);
> DEBUG_MSG(" xen heap page : %10lu\n", xen_heap);
> DEBUG_MSG(" dom0 page : %10lu\n",
> selected_domain);
> DEBUG_MSG(" unknown type page : %10lu\n",
> unknown_page_type);
> }
> /*
> * print [100 %]
> */
> print_progress(PROGRESS_XEN_DOMAIN, 1, 1);
> DEBUG_MSG("\n");
> DEBUG_MSG("total deleted pages : %10lu of %10llu
> %3llu%%\n",
> total_deleted_pages, total_num_pages, total_deleted_pages *
> 100 / total_num_pages);
> DEBUG_MSG(" total unused page : %10lu\n",
> total_state_free);
> DEBUG_MSG(" total not dom0 domain page : %10lu\n",
> total_not_selected_domain);
> DEBUG_MSG(" total page address invalid : %10lu\n",
> total_not_a_page);
> DEBUG_MSG(" total not a page offset : %10lu\n",
> total_not_a_page_offset);
> DEBUG_MSG(" total page not readable : %10lu\n",
> total_page_not_readable);
> DEBUG_MSG(" total broken page : %10lu\n",
> total_broken_pages);
> DEBUG_MSG("total saved pages : %10llu of %10llu
> %3llu%%\n",
> total_num_pages - total_deleted_pages, total_num_pages,
> (total_num_pages - total_deleted_pages) * 100 / total_num_pages);
> DEBUG_MSG(" total page in use : %10lu\n",
> total_page_in_use);
> DEBUG_MSG(" total xen heap page : %10lu\n",
> total_xen_heap);
> DEBUG_MSG(" total dom0 page : %10lu\n",
> total_selected_domain);
> DEBUG_MSG(" total unknown type page : %10lu\n",
> total_unknown_page_type);
> return TRUE;
> }
>
> /*
> * for Xen extraction
> */
>
> int pml4_page_read = 0;
> char pml4_page[4096];
> char pgd_page[4096];
> unsigned long long last_pgd_read = 0;
> char pmd_page[4096];
> unsigned long long last_pmd_read = 0;
> char pte_page[4096];
> unsigned long long last_pte_read = 0;
>
> unsigned long long
> kvtop_xen_x86_64(unsigned long kvaddr)
> {
> unsigned long long entry = 0;
> unsigned long long pml4_entry, pml4_dirp;
> unsigned long long pgd_entry, pgd_dirp;
> unsigned long long pmd_entry, pmd_dirp;
> unsigned long long pgd_idx = 0;
> unsigned long pml4_idx = 0;
> unsigned long pmd_idx = 0;
> int reason;
>
> if (!is_xen_vaddr(kvaddr)) {
> reason = 1;
> goto not_paddr;
> }
> if (is_xen_text(kvaddr)) {
> if (info->xen_major_version < 4)
> return (unsigned long)kvaddr - XEN_VIRT_START_XEN3 +
> info->xen_phys_start;
> else {
> entry = (unsigned long)kvaddr - XEN_VIRT_START +
> info->xen_phys_start;
> return entry;
> }
> }
> if (is_direct(kvaddr)) {
> if (info->xen_major_version < 4)
> return (unsigned long)kvaddr -
> DIRECTMAP_VIRT_START_XEN3;
> else {
> entry = (unsigned long)kvaddr - DIRECTMAP_VIRT_START;
> return entry;
> }
> }
> pml4_idx = pml4_index(kvaddr);
> if (pml4_page_read == 0) {
> if (!readmem(MADDR_XEN, kvtop_xen_x86_64(SYMBOL(pgd_l4)),
> pml4_page, PAGESIZE())) {
> reason = 2;
> goto not_paddr;
> }
> pml4_page_read = 1;
> }
> pml4_entry = *(unsigned long long *)(pml4_page + pml4_idx *
> sizeof(unsigned long long));
>
> if (!(pml4_entry & _PAGE_PRESENT)) {
> reason = 3;
> goto not_paddr;
> }
> pml4_dirp = pml4_entry & ENTRY_MASK;
> if (pml4_dirp != last_pgd_read) {
> if (!readmem(MADDR_XEN, pml4_dirp, pgd_page, PAGESIZE())) {
> reason = 4;
> goto not_paddr;
> }
> last_pgd_read = pml4_dirp;
> }
> pgd_idx = pgd_index(kvaddr);
> pgd_entry = *(unsigned long long *)(pgd_page + pgd_idx *
> sizeof(unsigned long long));
> if (!(pgd_entry & _PAGE_PRESENT)) {
> reason = 5;
> goto not_paddr;
> }
> if (pgd_entry & _PAGE_PSE) { // 1GB page
> pgd_entry = (pgd_entry & ENTRY_MASK) + (kvaddr & ((1UL <<
> PGDIR_SHIFT) - 1));
> return pgd_entry;
> }
> pgd_dirp = pgd_entry & ENTRY_MASK;
>
> if (pgd_dirp != last_pmd_read) {
> pmd_dirp = pgd_dirp;
> if (!readmem(MADDR_XEN, pgd_dirp, pmd_page, PAGESIZE())) {
> reason = 6;
> goto not_paddr;
> }
> last_pmd_read = pgd_dirp;
> }
> pmd_idx = pmd_index(kvaddr);
> pmd_entry = *(unsigned long long *)(pmd_page + pmd_idx *
> sizeof(unsigned long long));
> if (!(pmd_entry & _PAGE_PRESENT)) {
> reason = 7;
> goto not_paddr;
> }
>
> if (pmd_entry & _PAGE_PSE) { // 2MB page
> return (PAGEBASE(pmd_entry) & ENTRY_MASK) + (kvaddr &
> ~_2MB_PAGE_MASK);
> }
> pmd_dirp = pmd_entry & ENTRY_MASK;
> if (pmd_dirp != last_pte_read) {
> if (!readmem(MADDR_XEN, pmd_dirp, pte_page, PAGESIZE())) {
> reason = 8;
> goto not_paddr;
> }
> }
> entry = *(unsigned long long *)(pte_page + pte_index(kvaddr) *
> sizeof(unsigned long long));
>
> if (!(entry & _PAGE_PRESENT)) {
> reason = 9;
> goto not_paddr;
> }
>
> entry = (entry & ENTRY_MASK) + (kvaddr & ((1UL << PTE_SHIFT) - 1));
> return entry;
> not_paddr:
> DEBUG_MSG("kvtop_xen: NOT_PADDR page 0x%llx from kavaddr: 0x%lx
> reason: %d\n",
> entry, kvaddr, reason);
> return NOT_PADDR;
> }
>
> With kind regards
>
> Norbert Trapp
> PDG ES&S SWE OS 6
>
> FUJITSU
> Fujitsu Technology Solutions GmbH
> Domagkstraße 28, D-80807 München, Germany
> Tel.: ...
> E-mail: [email protected]
> Web: ts.fujitsu.com
> Company details: ts.fujitsu.com/imprint
> This communication contains information that is confidential, proprietary in
> nature and/or privileged. It is for the exclusive use of the intended
> recipient(s). If you are not the intended recipient(s) or the person
> responsible for delivering it to the intended recipient(s), please note that
> any form of dissemination, distribution or copying of this communication is
> strictly prohibited and may be unlawful. If you have received this
> communication in error, please immediately notify the sender and delete the
> original communication. Thank you for your cooperation.
> Please be advised that neither Fujitsu, its affiliates, its employees or
> agents accept liability for any errors, omissions or damages caused by delays
> of receipt or by any virus infection in this message or its attachments, or
> which may otherwise arise as a result of this e-mail transmission.
>
_______________________________________________
kexec mailing list
[email protected]
http://lists.infradead.org/mailman/listinfo/kexec