[PATCH] Mini-OS: add some macros for asm statements

2024-04-16 Thread Juergen Gross
Instead of having #ifdefs sprinkled around in x86 code, add some
macros defining constants for asm statements to address differences
between 32- and 64-bit mode.

Modify existing code to use those macros.

Signed-off-by: Juergen Gross 
---
 arch/x86/sched.c | 32 +---
 include/x86/os.h | 19 +++
 2 files changed, 20 insertions(+), 31 deletions(-)

diff --git a/arch/x86/sched.c b/arch/x86/sched.c
index dabe6fd6..460dea2e 100644
--- a/arch/x86/sched.c
+++ b/arch/x86/sched.c
@@ -60,16 +60,10 @@ void dump_stack(struct thread *thread)
 unsigned long *bottom = (unsigned long *)(thread->stack + STACK_SIZE); 
 unsigned long *pointer = (unsigned long *)thread->sp;
 int count;
-if(thread == current)
-{
-#ifdef __i386__
-asm("movl %%esp,%0"
-: "=r"(pointer));
-#else
-asm("movq %%rsp,%0"
-: "=r"(pointer));
-#endif
-}
+
+if ( thread == current )
+asm(ASM_MOV" "ASM_SP",%0" : "=r"(pointer));
+
 printk("The stack for \"%s\"\n", thread->name);
 for(count = 0; count < 25 && pointer < bottom; count ++)
 {
@@ -119,20 +113,12 @@ struct thread* arch_create_thread(char *name, void 
(*function)(void *),
 
 void run_idle_thread(void)
 {
-/* Switch stacks and run the thread */ 
-#if defined(__i386__)
-__asm__ __volatile__("mov %0,%%esp\n\t"
- "push %1\n\t" 
- "ret"
+/* Switch stacks and run the thread */
+__asm__ __volatile__("mov %0,"ASM_SP"\n\t"
+ "push %1\n\t"
+ "ret"
  :"=m" (idle_thread->sp)
- :"m" (idle_thread->ip));  
-#elif defined(__x86_64__)
-__asm__ __volatile__("mov %0,%%rsp\n\t"
- "push %1\n\t" 
- "ret"
- :"=m" (idle_thread->sp)
- :"m" (idle_thread->ip));  
  
-#endif
+ :"m" (idle_thread->ip));
 }
 
 unsigned long __local_irq_save(void)
diff --git a/include/x86/os.h b/include/x86/os.h
index ee34d784..485d90b8 100644
--- a/include/x86/os.h
+++ b/include/x86/os.h
@@ -77,6 +77,17 @@ int  arch_suspend(void);
 void arch_post_suspend(int canceled);
 void arch_fini(void);
 
+#if defined(__i386__)
+#define __SZ"l"
+#define __REG   "e"
+#else
+#define __SZ"q"
+#define __REG   "r"
+#endif
+
+#define ASM_SP  "%%"__REG"sp"
+#define ASM_MOV "mov"__SZ
+
 #ifdef CONFIG_PARAVIRT
 
 /* 
@@ -141,14 +152,6 @@ do {   
\
 
 #else
 
-#if defined(__i386__)
-#define __SZ "l"
-#define __REG "e"
-#else
-#define __SZ "q"
-#define __REG "r"
-#endif
-
 #define __cli() asm volatile ( "cli" : : : "memory" )
 #define __sti() asm volatile ( "sti" : : : "memory" )
 
-- 
2.35.3




[PATCH] x86/pat: fix W^X violation false-positives when running as Xen PV guest

2024-04-09 Thread Juergen Gross
When running as Xen PV guest in some cases W^X violation WARN()s have
been observed. Those WARN()s are produced by verify_rwx(), which looks
into the PTE to verify that writable kernel pages have the NX bit set
in order to avoid code modifications of the kernel by rogue code.

As the NX bits of all levels of translation entries are or-ed and the
RW bits of all levels are and-ed, looking just into the PTE isn't enough
for the decision that a writable page is executable, too. When running
as a Xen PV guest, kernel initialization will set the NX bit in PMD
entries of the initial page tables covering the .data segment.

When finding the PTE to have set the RW bit but no NX bit, higher level
entries must be looked at. Only when all levels have the RW bit set and
no NX bit set, the W^X violation should be flagged.

Additionally show_fault_oops() has a similar problem: it will issue the
"kernel tried to execute NX-protected page" message only if it finds
the NX bit set in the leaf translation entry, while any NX bit in
non-leaf entries are being ignored for issuing the message.

Modify lookup_address_in_pgd() to return the effective NX and RW bit
values of the non-leaf translation entries and evaluate those as well
in verify_rwx() and show_fault_oops().

Fixes: 652c5bf380ad ("x86/mm: Refuse W^X violations")
Reported-by: Jason Andryuk 
Signed-off-by: Juergen Gross 
---
 arch/x86/include/asm/pgtable_types.h |  2 +-
 arch/x86/kernel/sev.c|  3 +-
 arch/x86/mm/fault.c  |  7 ++--
 arch/x86/mm/pat/set_memory.c | 56 +---
 arch/x86/virt/svm/sev.c  |  3 +-
 5 files changed, 52 insertions(+), 19 deletions(-)

diff --git a/arch/x86/include/asm/pgtable_types.h 
b/arch/x86/include/asm/pgtable_types.h
index 0b748ee16b3d..91ab538d3872 100644
--- a/arch/x86/include/asm/pgtable_types.h
+++ b/arch/x86/include/asm/pgtable_types.h
@@ -565,7 +565,7 @@ static inline void update_page_count(int level, unsigned 
long pages) { }
  */
 extern pte_t *lookup_address(unsigned long address, unsigned int *level);
 extern pte_t *lookup_address_in_pgd(pgd_t *pgd, unsigned long address,
-   unsigned int *level);
+   unsigned int *level, bool *nx, bool *rw);
 extern pmd_t *lookup_pmd_address(unsigned long address);
 extern phys_addr_t slow_virt_to_phys(void *__address);
 extern int __init kernel_map_pages_in_pgd(pgd_t *pgd, u64 pfn,
diff --git a/arch/x86/kernel/sev.c b/arch/x86/kernel/sev.c
index 38ad066179d8..adba581e999d 100644
--- a/arch/x86/kernel/sev.c
+++ b/arch/x86/kernel/sev.c
@@ -516,12 +516,13 @@ static enum es_result vc_slow_virt_to_phys(struct ghcb 
*ghcb, struct es_em_ctxt
unsigned long va = (unsigned long)vaddr;
unsigned int level;
phys_addr_t pa;
+   bool nx, rw;
pgd_t *pgd;
pte_t *pte;
 
pgd = __va(read_cr3_pa());
pgd = [pgd_index(va)];
-   pte = lookup_address_in_pgd(pgd, va, );
+   pte = lookup_address_in_pgd(pgd, va, , , );
if (!pte) {
ctxt->fi.vector = X86_TRAP_PF;
ctxt->fi.cr2= vaddr;
diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c
index 622d12ec7f08..eb8e897a5653 100644
--- a/arch/x86/mm/fault.c
+++ b/arch/x86/mm/fault.c
@@ -514,18 +514,19 @@ show_fault_oops(struct pt_regs *regs, unsigned long 
error_code, unsigned long ad
 
if (error_code & X86_PF_INSTR) {
unsigned int level;
+   bool nx, rw;
pgd_t *pgd;
pte_t *pte;
 
pgd = __va(read_cr3_pa());
pgd += pgd_index(address);
 
-   pte = lookup_address_in_pgd(pgd, address, );
+   pte = lookup_address_in_pgd(pgd, address, , , );
 
-   if (pte && pte_present(*pte) && !pte_exec(*pte))
+   if (pte && pte_present(*pte) && (!pte_exec(*pte) || nx))
pr_crit("kernel tried to execute NX-protected page - 
exploit attempt? (uid: %d)\n",
from_kuid(_user_ns, current_uid()));
-   if (pte && pte_present(*pte) && pte_exec(*pte) &&
+   if (pte && pte_present(*pte) && pte_exec(*pte) && !nx &&
(pgd_flags(*pgd) & _PAGE_USER) &&
(__read_cr4() & X86_CR4_SMEP))
pr_crit("unable to execute userspace code (SMEP?) (uid: 
%d)\n",
diff --git a/arch/x86/mm/pat/set_memory.c b/arch/x86/mm/pat/set_memory.c
index 80c9037ffadf..baa4dc4748e9 100644
--- a/arch/x86/mm/pat/set_memory.c
+++ b/arch/x86/mm/pat/set_memory.c
@@ -619,7 +619,8 @@ static inline pgprot_t static_protections(pgprot_t prot, 
unsigned long start,
  * Validate strict W^X semantics.
  */
 

Re: Linux Xen PV CPA W^X violation false-positives

2024-04-09 Thread Juergen Gross

On 08.04.24 12:22, Anthony PERARD wrote:

On Thu, Mar 28, 2024 at 02:00:14PM +0100, Jürgen Groß wrote:

Hi Jason,

On 28.03.24 02:24, Jason Andryuk wrote:

On Wed, Mar 27, 2024 at 7:46 AM Jürgen Groß  wrote:


On 24.01.24 17:54, Jason Andryuk wrote:

+
+ return new;
+ }
+ }
+
end = start + npg * PAGE_SIZE - 1;
WARN_ONCE(1, "CPA detected W^X violation: %016llx -> %016llx range: 0x%016lx 
- 0x%016lx PFN %lx\n",
  (unsigned long long)pgprot_val(old),


Jason, do you want to send a V2 with your Signed-off, or would you like me to
try upstreaming the patch?


Hi Jürgen,

Yes, please upstream your approach.  I wasn't sure how to deal with
it, so it was more of a bug report.


The final solution was a bit more complicated, as there are some
corner cases to be considered. OTOH it is now complete by looking
at all used translation entries.

Are you able to test the attached patch? I don't see the original
issue and can only verify the patch doesn't cause any regression.


Juergen


Hi Jürgen,

I gave a try to the patch in this email with osstest, and I can't find a
single "CPA detected W^X violation" log entry, when there's seems to be
many in osstest in general, from dom0 it seems as it's on the host
serial console usually.

http://logs.test-lab.xenproject.org/osstest/logs/185252/

If you look in several "serial-$host.log*" files, there will be the
"CPA detected" message, but they happen on previous test run.

I did an other smaller run before this one, and same thing:
http://logs.test-lab.xenproject.org/osstest/logs/185186/

And this other run as well, which I failed to setup properly with lots
of broken, but no failure due to the patch and I can't find any "CPA
detected" messages.
http://logs.test-lab.xenproject.org/osstest/logs/185248/

I hope that helps?


Yes, it does. Thanks for testing.


Juergen



OpenPGP_0xB0DE9DD628BF132F.asc
Description: OpenPGP public key


OpenPGP_signature.asc
Description: OpenPGP digital signature


[PATCH 0/2] x86: Two fixes related to Xen PV guest mode

2024-04-05 Thread Juergen Gross
These are 2 fixes for issues introduced by topology related changes
added in the 6.9 merge window.

Juergen Gross (2):
  x86/cpu: fix BSP detection when running as Xen PV guest
  x86/xen: return a sane initial apic id when running as PV guest

 arch/x86/kernel/cpu/topology.c |  2 +-
 arch/x86/xen/enlighten_pv.c| 10 +-
 2 files changed, 10 insertions(+), 2 deletions(-)

-- 
2.35.3




[PATCH 2/2] x86/xen: return a sane initial apic id when running as PV guest

2024-04-05 Thread Juergen Gross
With recent sanity checks for topology information added, there are now
warnings issued for APs when running as a Xen PV guest:

  [Firmware Bug]: CPU   1: APIC ID mismatch. CPUID: 0x APIC: 0x0001

This is due to the initial APIC ID obtained via CPUID for PV guests is
always 0.

Avoid the warnings by synthesizing the CPUID data to contain the same
initial APIC ID as xen_pv_smp_config() is using for registering the
APIC IDs of all CPUs.

Fixes: 52128a7a21f7 ("86/cpu/topology: Make the APIC mismatch warnings 
complete")
Signed-off-by: Juergen Gross 
---
 arch/x86/xen/enlighten_pv.c | 10 +-
 1 file changed, 9 insertions(+), 1 deletion(-)

diff --git a/arch/x86/xen/enlighten_pv.c b/arch/x86/xen/enlighten_pv.c
index ace2eb054053..965e4ca36024 100644
--- a/arch/x86/xen/enlighten_pv.c
+++ b/arch/x86/xen/enlighten_pv.c
@@ -219,13 +219,20 @@ static __read_mostly unsigned int cpuid_leaf5_edx_val;
 static void xen_cpuid(unsigned int *ax, unsigned int *bx,
  unsigned int *cx, unsigned int *dx)
 {
-   unsigned maskebx = ~0;
+   unsigned int maskebx = ~0;
+   unsigned int or_ebx = 0;
 
/*
 * Mask out inconvenient features, to try and disable as many
 * unsupported kernel subsystems as possible.
 */
switch (*ax) {
+   case 0x1:
+   /* Replace initial APIC ID in bits 24-31 of EBX. */
+   maskebx = 0x00ff;
+   or_ebx = smp_processor_id() << 24;
+   break;
+
case CPUID_MWAIT_LEAF:
/* Synthesize the values.. */
*ax = 0;
@@ -248,6 +255,7 @@ static void xen_cpuid(unsigned int *ax, unsigned int *bx,
: "0" (*ax), "2" (*cx));
 
*bx &= maskebx;
+   *bx |= or_ebx;
 }
 
 static bool __init xen_check_mwait(void)
-- 
2.35.3




Re: [PATCH v6 1/8] xen/spinlock: add explicit non-recursive locking functions

2024-04-04 Thread Juergen Gross

On 27.03.24 16:22, Juergen Gross wrote:

In order to prepare a type-safe recursive spinlock structure, add
explicitly non-recursive locking functions to be used for non-recursive
locking of spinlocks, which are used recursively, too.

Signed-off-by: Juergen Gross 
Acked-by: Jan Beulich 


Could any of the Arm maintainers please have a look at this patch?


Juergen


---
V2:
- rename functions (Jan Beulich)
- get rid of !! in pcidevs_locked() (Jan Beulich)
V5:
- remove spurious change (Julien Grall)
- add nrspin_lock() description (Julien Grall)
---
  xen/arch/arm/mm.c |  4 ++--
  xen/arch/x86/domain.c | 12 ++--
  xen/arch/x86/mm.c | 12 ++--
  xen/arch/x86/mm/mem_sharing.c |  8 
  xen/arch/x86/mm/p2m-pod.c |  4 ++--
  xen/arch/x86/mm/p2m.c |  4 ++--
  xen/arch/x86/tboot.c  |  4 ++--
  xen/common/domctl.c   |  4 ++--
  xen/common/grant_table.c  | 10 +-
  xen/common/memory.c   |  4 ++--
  xen/common/numa.c |  4 ++--
  xen/common/page_alloc.c   | 16 
  xen/drivers/char/console.c| 16 
  xen/include/xen/spinlock.h| 29 +++--
  14 files changed, 74 insertions(+), 57 deletions(-)

diff --git a/xen/arch/arm/mm.c b/xen/arch/arm/mm.c
index b15a18a494..def939172c 100644
--- a/xen/arch/arm/mm.c
+++ b/xen/arch/arm/mm.c
@@ -105,7 +105,7 @@ void share_xen_page_with_guest(struct page_info *page, 
struct domain *d,
  if ( page_get_owner(page) == d )
  return;
  
-spin_lock(>page_alloc_lock);

+nrspin_lock(>page_alloc_lock);
  
  /*

   * The incremented type count pins as writable or read-only.
@@ -136,7 +136,7 @@ void share_xen_page_with_guest(struct page_info *page, 
struct domain *d,
  page_list_add_tail(page, >xenpage_list);
  }
  
-spin_unlock(>page_alloc_lock);

+nrspin_unlock(>page_alloc_lock);
  }
  
  int xenmem_add_to_physmap_one(

diff --git a/xen/arch/x86/domain.c b/xen/arch/x86/domain.c
index a11c55f921..33a2830d9d 100644
--- a/xen/arch/x86/domain.c
+++ b/xen/arch/x86/domain.c
@@ -212,7 +212,7 @@ void dump_pageframe_info(struct domain *d)
  {
  unsigned long total[MASK_EXTR(PGT_type_mask, PGT_type_mask) + 1] = {};
  
-spin_lock(>page_alloc_lock);

+nrspin_lock(>page_alloc_lock);
  page_list_for_each ( page, >page_list )
  {
  unsigned int index = MASK_EXTR(page->u.inuse.type_info,
@@ -231,13 +231,13 @@ void dump_pageframe_info(struct domain *d)
 _p(mfn_x(page_to_mfn(page))),
 page->count_info, page->u.inuse.type_info);
  }
-spin_unlock(>page_alloc_lock);
+nrspin_unlock(>page_alloc_lock);
  }
  
  if ( is_hvm_domain(d) )

  p2m_pod_dump_data(d);
  
-spin_lock(>page_alloc_lock);

+nrspin_lock(>page_alloc_lock);
  
  page_list_for_each ( page, >xenpage_list )

  {
@@ -253,7 +253,7 @@ void dump_pageframe_info(struct domain *d)
 page->count_info, page->u.inuse.type_info);
  }
  
-spin_unlock(>page_alloc_lock);

+nrspin_unlock(>page_alloc_lock);
  }
  
  void update_guest_memory_policy(struct vcpu *v,

@@ -2448,10 +2448,10 @@ int domain_relinquish_resources(struct domain *d)
  d->arch.auto_unmask = 0;
  }
  
-spin_lock(>page_alloc_lock);

+nrspin_lock(>page_alloc_lock);
  page_list_splice(>arch.relmem_list, >page_list);
  INIT_PAGE_LIST_HEAD(>arch.relmem_list);
-spin_unlock(>page_alloc_lock);
+nrspin_unlock(>page_alloc_lock);
  
  PROGRESS(xen):
  
diff --git a/xen/arch/x86/mm.c b/xen/arch/x86/mm.c

index 62f5b811bb..b4d125db39 100644
--- a/xen/arch/x86/mm.c
+++ b/xen/arch/x86/mm.c
@@ -482,7 +482,7 @@ void share_xen_page_with_guest(struct page_info *page, 
struct domain *d,
  
  set_gpfn_from_mfn(mfn_x(page_to_mfn(page)), INVALID_M2P_ENTRY);
  
-spin_lock(>page_alloc_lock);

+nrspin_lock(>page_alloc_lock);
  
  /* The incremented type count pins as writable or read-only. */

  page->u.inuse.type_info =
@@ -502,7 +502,7 @@ void share_xen_page_with_guest(struct page_info *page, 
struct domain *d,
  page_list_add_tail(page, >xenpage_list);
  }
  
-spin_unlock(>page_alloc_lock);

+nrspin_unlock(>page_alloc_lock);
  }
  
  void make_cr3(struct vcpu *v, mfn_t mfn)

@@ -3597,11 +3597,11 @@ long do_mmuext_op(
  {
  bool drop_ref;
  
-spin_lock(_owner->page_alloc_lock);

+nrspin_lock(_owner->page_alloc_lock);
  drop_ref = (pg_owner->is_dying &&
  test_and_clear_bit(_PGT_pinned,
 >u.inuse.type_info));
-spin

[PATCH] xen/include: move definition of ASM_INT() to xen/linkage.h

2024-04-03 Thread Juergen Gross
ASM_INT() is defined in arch/[arm|x86]/include/asm/asm_defns.h in
exactly the same way. Instead of replicating this definition for riscv
and ppc, move it to include/xen/linkage.h, where other arch agnostic
definitions for assembler code are living already.

Adapt the generation of assembler sources via tools/binfile to include
the new home of ASM_INT().

Signed-off-by: Juergen Gross 
---
 xen/arch/arm/include/asm/asm_defns.h | 3 ---
 xen/arch/x86/include/asm/asm_defns.h | 3 ---
 xen/include/xen/linkage.h| 2 ++
 xen/tools/binfile| 2 +-
 4 files changed, 3 insertions(+), 7 deletions(-)

diff --git a/xen/arch/arm/include/asm/asm_defns.h 
b/xen/arch/arm/include/asm/asm_defns.h
index c489547d29..47efdf5234 100644
--- a/xen/arch/arm/include/asm/asm_defns.h
+++ b/xen/arch/arm/include/asm/asm_defns.h
@@ -28,9 +28,6 @@
 label:  .asciz msg; \
 .popsection
 
-#define ASM_INT(label, val) \
-DATA(label, 4) .long (val); END(label)
-
 #endif /* __ARM_ASM_DEFNS_H__ */
 /*
  * Local variables:
diff --git a/xen/arch/x86/include/asm/asm_defns.h 
b/xen/arch/x86/include/asm/asm_defns.h
index a69fae78b1..0a3ff70566 100644
--- a/xen/arch/x86/include/asm/asm_defns.h
+++ b/xen/arch/x86/include/asm/asm_defns.h
@@ -351,9 +351,6 @@ static always_inline void stac(void)
 4:  .p2align 2; \
 .popsection
 
-#define ASM_INT(label, val) \
-DATA(label, 4) .long (val); END(label)
-
 #define ASM_CONSTANT(name, value)\
 asm ( ".equ " #name ", %P0; .global " #name  \
   :: "i" ((value)) );
diff --git a/xen/include/xen/linkage.h b/xen/include/xen/linkage.h
index 478b1d7287..3d401b88c1 100644
--- a/xen/include/xen/linkage.h
+++ b/xen/include/xen/linkage.h
@@ -60,6 +60,8 @@
 #define DATA_LOCAL(name, align...) \
 SYM(name, DATA, LOCAL, LASTARG(DATA_ALIGN, ## align), DATA_FILL)
 
+#define ASM_INT(label, val)DATA(label, 4) .long (val); END(label)
+
 #endif /*  __ASSEMBLY__ */
 
 #endif /* __LINKAGE_H__ */
diff --git a/xen/tools/binfile b/xen/tools/binfile
index 099d7eda9a..0299326ccc 100755
--- a/xen/tools/binfile
+++ b/xen/tools/binfile
@@ -25,7 +25,7 @@ binsource=$2
 varname=$3
 
 cat <$target
-#include 
+#include 
 
 .section $section.rodata, "a", %progbits
 
-- 
2.35.3




Re: [PATCH v7 02/19] xen/riscv: disable unnecessary configs

2024-04-03 Thread Juergen Gross

On 03.04.24 13:47, Jan Beulich wrote:

On 03.04.2024 13:18, Juergen Gross wrote:

On 03.04.24 12:54, Oleksii wrote:

On Wed, 2024-04-03 at 12:28 +0200, Jan Beulich wrote:

On 03.04.2024 12:19, Oleksii Kurochko wrote:

This patch disables unnecessary configs for two cases:
1. By utilizing EXTRA_FIXED_RANDCONFIG for randconfig builds
(GitLab CI jobs).
2. By using tiny64_defconfig for non-randconfig builds.

Only configs which lead to compilation issues were disabled.

Signed-off-by: Oleksii Kurochko 
---
Changes in V7:
   - Disable only configs which cause compilation issues.


Since the description doesn't go into details: While I can see that
PERF_COUNTERS and LIVEPATCH may require (a little / some more) extra
work, are HYPFS, ARGO, and XSM really causing issues?

For Argo, I recieved the following compilation errors:
 common/argo.c:1416:5: error: unknown type name 'p2m_type_t'; did you
 mean 'hvmmem_type_t'?
  1416 | p2m_type_t p2mt;
   | ^~
   | hvmmem_type_t
 common/argo.c:1419:11: error: implicit declaration of function
 'check_get_page_from_gfn' [-Werror=implicit-function-declaration]
  1419 | ret = check_get_page_from_gfn(d, gfn, false, , );
   |   ^~~
 common/argo.c:1427:10: error: 'p2m_ram_rw' undeclared (first use in
 this function)
  1427 | case p2m_ram_rw:
 
It seems it should be included xen/p2m-common.h and asm/p2m.h in

common/argo.c.

For CONFIG_HYPFS_CONFIG ( there is no issue with CONFIG_HYPFS,
overlooked that ):
 common/config_data.S:1:10: fatal error: asm/asm_defns.h: No such file
 or directory
 1 | #include 


Hmm, this seems to be needed for ASM_INT(), which is currently defined the same
way for arm and x86. Maybe we should move that macro to xen/linkage.h and
include that one instead of asm_defns.h?


Indeed while doing the entry annotation work (also touching the build logic
here iirc) I was thinking of doing so.


Okay, I'm preparing a patch.


Juergen



OpenPGP_0xB0DE9DD628BF132F.asc
Description: OpenPGP public key


OpenPGP_signature.asc
Description: OpenPGP digital signature


Re: [PATCH v7 02/19] xen/riscv: disable unnecessary configs

2024-04-03 Thread Juergen Gross

On 03.04.24 12:54, Oleksii wrote:

On Wed, 2024-04-03 at 12:28 +0200, Jan Beulich wrote:

On 03.04.2024 12:19, Oleksii Kurochko wrote:

This patch disables unnecessary configs for two cases:
1. By utilizing EXTRA_FIXED_RANDCONFIG for randconfig builds
(GitLab CI jobs).
2. By using tiny64_defconfig for non-randconfig builds.

Only configs which lead to compilation issues were disabled.

Signed-off-by: Oleksii Kurochko 
---
Changes in V7:
  - Disable only configs which cause compilation issues.


Since the description doesn't go into details: While I can see that
PERF_COUNTERS and LIVEPATCH may require (a little / some more) extra
work, are HYPFS, ARGO, and XSM really causing issues?

For Argo, I recieved the following compilation errors:
common/argo.c:1416:5: error: unknown type name 'p2m_type_t'; did you
mean 'hvmmem_type_t'?
 1416 | p2m_type_t p2mt;
  | ^~
  | hvmmem_type_t
common/argo.c:1419:11: error: implicit declaration of function
'check_get_page_from_gfn' [-Werror=implicit-function-declaration]
 1419 | ret = check_get_page_from_gfn(d, gfn, false, , );
  |   ^~~
common/argo.c:1427:10: error: 'p2m_ram_rw' undeclared (first use in
this function)
 1427 | case p2m_ram_rw:

It seems it should be included xen/p2m-common.h and asm/p2m.h in

common/argo.c.

For CONFIG_HYPFS_CONFIG ( there is no issue with CONFIG_HYPFS,
overlooked that ):
common/config_data.S:1:10: fatal error: asm/asm_defns.h: No such file
or directory
1 | #include 


Hmm, this seems to be needed for ASM_INT(), which is currently defined the same
way for arm and x86. Maybe we should move that macro to xen/linkage.h and
include that one instead of asm_defns.h?


Juergen


OpenPGP_0xB0DE9DD628BF132F.asc
Description: OpenPGP public key


OpenPGP_signature.asc
Description: OpenPGP digital signature


[PATCH v6 8/8] xen: allow up to 16383 cpus

2024-03-27 Thread Juergen Gross
With lock handling now allowing up to 16384 cpus (spinlocks can handle
65535 cpus, rwlocks can handle 16384 cpus), raise the allowed limit for
the number of cpus to be configured to 16383.

The new limit is imposed by IOMMU_CMD_BUFFER_MAX_ENTRIES and
QINVAL_MAX_ENTRY_NR required to be larger than 2 * CONFIG_NR_CPUS.

Signed-off-by: Juergen Gross 
---
V5:
- new patch (Jan Beulich)
---
 xen/arch/Kconfig | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/xen/arch/Kconfig b/xen/arch/Kconfig
index 67ba38f32f..308ce129a8 100644
--- a/xen/arch/Kconfig
+++ b/xen/arch/Kconfig
@@ -6,7 +6,7 @@ config PHYS_ADDR_T_32
 
 config NR_CPUS
int "Maximum number of CPUs"
-   range 1 4095
+   range 1 16383
default "256" if X86
default "8" if ARM && RCAR3
default "4" if ARM && QEMU
-- 
2.35.3




[PATCH v6 6/8] xen/spinlock: support higher number of cpus

2024-03-27 Thread Juergen Gross
Allow 16 bits per cpu number, which is the limit imposed by
spinlock_tickets_t.

This will allow up to 65535 cpus, while increasing only the size of
recursive spinlocks in debug builds from 8 to 12 bytes.

The current Xen limit of 4095 cpus is imposed by SPINLOCK_CPU_BITS
being 12. There are machines available with more cpus than the current
Xen limit, so it makes sense to have the possibility to use more cpus.

Signed-off-by: Juergen Gross 
---
V5:
- keep previous recursion limit (Julien Grall)
V6:
- use unsigned int instead of uint32_t (Jan Beulich)
---
 xen/common/spinlock.c  |  2 ++
 xen/include/xen/spinlock.h | 20 ++--
 2 files changed, 12 insertions(+), 10 deletions(-)

diff --git a/xen/common/spinlock.c b/xen/common/spinlock.c
index 7ccb725171..5aa9ba6188 100644
--- a/xen/common/spinlock.c
+++ b/xen/common/spinlock.c
@@ -485,7 +485,9 @@ bool _rspin_trylock(rspinlock_t *lock)
 
 /* Don't allow overflow of recurse_cpu field. */
 BUILD_BUG_ON(NR_CPUS > SPINLOCK_NO_CPU);
+BUILD_BUG_ON(SPINLOCK_CPU_BITS > sizeof(lock->recurse_cpu) * 8);
 BUILD_BUG_ON(SPINLOCK_RECURSE_BITS < 3);
+BUILD_BUG_ON(SPINLOCK_MAX_RECURSE > ((1u << SPINLOCK_RECURSE_BITS) - 1));
 
 check_lock(>debug, true);
 
diff --git a/xen/include/xen/spinlock.h b/xen/include/xen/spinlock.h
index 3a4092626c..db00a24646 100644
--- a/xen/include/xen/spinlock.h
+++ b/xen/include/xen/spinlock.h
@@ -8,16 +8,16 @@
 #include 
 #include 
 
-#define SPINLOCK_CPU_BITS  12
+#define SPINLOCK_CPU_BITS  16
 
 #ifdef CONFIG_DEBUG_LOCKS
 union lock_debug {
-uint16_t val;
-#define LOCK_DEBUG_INITVAL 0x
+uint32_t val;
+#define LOCK_DEBUG_INITVAL 0x
 struct {
-uint16_t cpu:SPINLOCK_CPU_BITS;
-#define LOCK_DEBUG_PAD_BITS (14 - SPINLOCK_CPU_BITS)
-uint16_t :LOCK_DEBUG_PAD_BITS;
+unsigned int cpu:SPINLOCK_CPU_BITS;
+#define LOCK_DEBUG_PAD_BITS (30 - SPINLOCK_CPU_BITS)
+unsigned int :LOCK_DEBUG_PAD_BITS;
 bool irq_safe:1;
 bool unseen:1;
 };
@@ -211,11 +211,11 @@ typedef struct spinlock {
 
 typedef struct rspinlock {
 spinlock_tickets_t tickets;
-uint16_t recurse_cpu:SPINLOCK_CPU_BITS;
+uint16_t recurse_cpu;
 #define SPINLOCK_NO_CPU((1u << SPINLOCK_CPU_BITS) - 1)
-#define SPINLOCK_RECURSE_BITS  (16 - SPINLOCK_CPU_BITS)
-uint16_t recurse_cnt:SPINLOCK_RECURSE_BITS;
-#define SPINLOCK_MAX_RECURSE   ((1u << SPINLOCK_RECURSE_BITS) - 1)
+#define SPINLOCK_RECURSE_BITS  8
+uint8_t recurse_cnt;
+#define SPINLOCK_MAX_RECURSE   15
 union lock_debug debug;
 #ifdef CONFIG_DEBUG_LOCK_PROFILE
 struct lock_profile *profile;
-- 
2.35.3




[PATCH v6 7/8] xen/rwlock: raise the number of possible cpus

2024-03-27 Thread Juergen Gross
The rwlock handling is limiting the number of cpus to 4095 today. The
main reason is the use of the atomic_t data type for the main lock
handling, which needs 2 bits for the locking state (writer waiting or
write locked), 12 bits for the id of a possible writer, and a 12 bit
counter for readers. The limit isn't 4096 due to an off by one sanity
check.

The atomic_t data type is 32 bits wide, so in theory 15 bits for the
writer's cpu id and 15 bits for the reader count seem to be fine, but
via read_trylock() more readers than cpus are possible.

This means that it is possible to raise the number of cpus to 16384
without changing the rwlock_t data structure. In order to avoid the
reader count wrapping to zero, don't let read_trylock() succeed in case
the highest bit of the reader's count is set already. This leaves enough
headroom for non-recursive readers to enter without risking a wrap.

While at it calculate _QW_CPUMASK and _QR_SHIFT from _QW_SHIFT and
add a sanity check for not overflowing the atomic_t data type.

Signed-off-by: Juergen Gross 
---
V5:
- new patch
V6:
- add comment to _can_read_lock() (Jan Beulich)
---
 xen/include/xen/rwlock.h | 23 +++
 1 file changed, 15 insertions(+), 8 deletions(-)

diff --git a/xen/include/xen/rwlock.h b/xen/include/xen/rwlock.h
index 65d88b0ef4..232782801d 100644
--- a/xen/include/xen/rwlock.h
+++ b/xen/include/xen/rwlock.h
@@ -23,12 +23,12 @@ typedef struct {
 #define rwlock_init(l) (*(l) = (rwlock_t)RW_LOCK_UNLOCKED)
 
 /* Writer states & reader shift and bias. */
-#define_QW_CPUMASK  0xfffU /* Writer CPU mask */
-#define_QW_SHIFT12 /* Writer flags shift */
-#define_QW_WAITING  (1U << _QW_SHIFT)  /* A writer is waiting */
-#define_QW_LOCKED   (3U << _QW_SHIFT)  /* A writer holds the lock */
-#define_QW_WMASK(3U << _QW_SHIFT)  /* Writer mask */
-#define_QR_SHIFT14 /* Reader count shift */
+#define_QW_SHIFT14  /* Writer flags shift */
+#define_QW_CPUMASK  ((1U << _QW_SHIFT) - 1) /* Writer CPU mask */
+#define_QW_WAITING  (1U << _QW_SHIFT)   /* A writer is waiting */
+#define_QW_LOCKED   (3U << _QW_SHIFT)   /* A writer holds the lock */
+#define_QW_WMASK(3U << _QW_SHIFT)   /* Writer mask */
+#define_QR_SHIFT(_QW_SHIFT + 2) /* Reader count shift */
 #define_QR_BIAS (1U << _QR_SHIFT)
 
 void queue_read_lock_slowpath(rwlock_t *lock);
@@ -36,14 +36,21 @@ void queue_write_lock_slowpath(rwlock_t *lock);
 
 static inline bool _is_write_locked_by_me(unsigned int cnts)
 {
-BUILD_BUG_ON(_QW_CPUMASK < NR_CPUS);
+BUILD_BUG_ON((_QW_CPUMASK + 1) < NR_CPUS);
+BUILD_BUG_ON(NR_CPUS * _QR_BIAS > INT_MAX);
 return (cnts & _QW_WMASK) == _QW_LOCKED &&
(cnts & _QW_CPUMASK) == smp_processor_id();
 }
 
 static inline bool _can_read_lock(unsigned int cnts)
 {
-return !(cnts & _QW_WMASK) || _is_write_locked_by_me(cnts);
+/*
+ * If write locked by the caller, no other readers are possible.
+ * Not allowing the lock holder to read_lock() another 32768 times ought
+ * to be fine.
+ */
+return cnts <= INT_MAX &&
+   (!(cnts & _QW_WMASK) || _is_write_locked_by_me(cnts));
 }
 
 /*
-- 
2.35.3




[PATCH v6 4/8] xen/spinlock: split recursive spinlocks from normal ones

2024-03-27 Thread Juergen Gross
Recursive and normal spinlocks are sharing the same data structure for
representation of the lock. This has two major disadvantages:

- it is not clear from the definition of a lock, whether it is intended
  to be used recursive or not, while a mixture of both usage variants
  needs to be

- in production builds (builds without CONFIG_DEBUG_LOCKS) the needed
  data size of an ordinary spinlock is 8 bytes instead of 4, due to the
  additional recursion data needed (associated with that the rwlock
  data is using 12 instead of only 8 bytes)

Fix that by introducing a struct spinlock_recursive for recursive
spinlocks only, and switch recursive spinlock functions to require
pointers to this new struct.

This allows to check the correct usage at build time.

Signed-off-by: Juergen Gross 
Reviewed-by: Jan Beulich 
---
V2:
- use shorter names (Jan Beulich)
- don't embed spinlock_t in rspinlock_t (Jan Beulich)
V5:
- some style fixes (Jan Beulich)
- bool instead of int (Jan Beulich)
---
 xen/common/spinlock.c  | 50 ++
 xen/include/xen/spinlock.h | 72 +-
 2 files changed, 105 insertions(+), 17 deletions(-)

diff --git a/xen/common/spinlock.c b/xen/common/spinlock.c
index 6572c76114..5aaca49a61 100644
--- a/xen/common/spinlock.c
+++ b/xen/common/spinlock.c
@@ -545,6 +545,56 @@ void _rspin_unlock_irqrestore(rspinlock_t *lock, unsigned 
long flags)
 local_irq_restore(flags);
 }
 
+bool _nrspin_trylock(rspinlock_t *lock)
+{
+check_lock(>debug, true);
+
+if ( unlikely(lock->recurse_cpu != SPINLOCK_NO_CPU) )
+return false;
+
+return spin_trylock_common(>tickets, >debug, LOCK_PROFILE_PAR);
+}
+
+void _nrspin_lock(rspinlock_t *lock)
+{
+spin_lock_common(>tickets, >debug, LOCK_PROFILE_PAR, NULL,
+ NULL);
+}
+
+void _nrspin_unlock(rspinlock_t *lock)
+{
+spin_unlock_common(>tickets, >debug, LOCK_PROFILE_PAR);
+}
+
+void _nrspin_lock_irq(rspinlock_t *lock)
+{
+ASSERT(local_irq_is_enabled());
+local_irq_disable();
+_nrspin_lock(lock);
+}
+
+void _nrspin_unlock_irq(rspinlock_t *lock)
+{
+_nrspin_unlock(lock);
+local_irq_enable();
+}
+
+unsigned long _nrspin_lock_irqsave(rspinlock_t *lock)
+{
+unsigned long flags;
+
+local_irq_save(flags);
+_nrspin_lock(lock);
+
+return flags;
+}
+
+void _nrspin_unlock_irqrestore(rspinlock_t *lock, unsigned long flags)
+{
+_nrspin_unlock(lock);
+local_irq_restore(flags);
+}
+
 #ifdef CONFIG_DEBUG_LOCK_PROFILE
 
 struct lock_profile_anc {
diff --git a/xen/include/xen/spinlock.h b/xen/include/xen/spinlock.h
index 148be1e116..f49ba928f0 100644
--- a/xen/include/xen/spinlock.h
+++ b/xen/include/xen/spinlock.h
@@ -77,8 +77,6 @@ union lock_debug { };
 */
 
 struct spinlock;
-/* Temporary hack until a dedicated struct rspinlock is existing. */
-#define rspinlock spinlock
 
 struct lock_profile {
 struct lock_profile *next;   /* forward link */
@@ -110,6 +108,10 @@ struct lock_profile_qhead {
 __used_section(".lockprofile.data") = \
 _profile_data__##name
 #define SPIN_LOCK_UNLOCKED_(x) {  \
+.debug = LOCK_DEBUG_, \
+.profile = x, \
+}
+#define RSPIN_LOCK_UNLOCKED_(x) { \
 .recurse_cpu = SPINLOCK_NO_CPU,   \
 .debug = LOCK_DEBUG_, \
 .profile = x, \
@@ -119,8 +121,9 @@ struct lock_profile_qhead {
 spinlock_t l = SPIN_LOCK_UNLOCKED_(NULL); \
 static struct lock_profile lock_profile_data__##l = LOCK_PROFILE_(l); \
 LOCK_PROFILE_PTR_(l)
+#define RSPIN_LOCK_UNLOCKED RSPIN_LOCK_UNLOCKED_(NULL)
 #define DEFINE_RSPINLOCK(l)   \
-rspinlock_t l = SPIN_LOCK_UNLOCKED_(NULL);\
+rspinlock_t l = RSPIN_LOCK_UNLOCKED_(NULL);   \
 static struct lock_profile lock_profile_data__##l = RLOCK_PROFILE_(l);\
 LOCK_PROFILE_PTR_(l)
 
@@ -145,8 +148,11 @@ struct lock_profile_qhead {
 
 #define spin_lock_init_prof(s, l) \
 spin_lock_init_prof__(s, l, lock, spinlock_t, false)
-#define rspin_lock_init_prof(s, l)\
-spin_lock_init_prof__(s, l, rlock, rspinlock_t, true)
+#define rspin_lock_init_prof(s, l) do {   \
+spin_lock_init_prof__(s, l, rlock, rspinlock_t, true);\
+(s)->l.recurse_cpu = SPINLOCK_NO_CPU; 

[PATCH v6 2/8] xen/spinlock: add another function level

2024-03-27 Thread Juergen Gross
Add another function level in spinlock.c hiding the spinlock_t layout
from the low level locking code.

This is done in preparation of introducing rspinlock_t for recursive
locks without having to duplicate all of the locking code.

Signed-off-by: Juergen Gross 
Reviewed-by: Jan Beulich 
---
V2:
- new patch
V5:
- don't regress spin_is_locked() for rspin-lock (Jan Beulich)
- use bool as return type of spin_is_locked_common() and
  spin_trylock_common() (Jan Beulich)
---
 xen/common/spinlock.c  | 103 -
 xen/include/xen/spinlock.h |   1 +
 2 files changed, 68 insertions(+), 36 deletions(-)

diff --git a/xen/common/spinlock.c b/xen/common/spinlock.c
index 874ed762b4..648393d95f 100644
--- a/xen/common/spinlock.c
+++ b/xen/common/spinlock.c
@@ -261,29 +261,31 @@ void spin_debug_disable(void)
 
 #ifdef CONFIG_DEBUG_LOCK_PROFILE
 
+#define LOCK_PROFILE_PAR lock->profile
 #define LOCK_PROFILE_REL \
-if ( lock->profile ) \
+if ( profile )   \
 {\
-lock->profile->time_hold += NOW() - lock->profile->time_locked;  \
-lock->profile->lock_cnt++;   \
+profile->time_hold += NOW() - profile->time_locked;  \
+profile->lock_cnt++; \
 }
 #define LOCK_PROFILE_VAR(var, val)s_time_t var = (val)
 #define LOCK_PROFILE_BLOCK(var)   var = var ? : NOW()
 #define LOCK_PROFILE_BLKACC(tst, val)\
 if ( tst )   \
 {\
-lock->profile->time_block += lock->profile->time_locked - (val); \
-lock->profile->block_cnt++;  \
+profile->time_block += profile->time_locked - (val); \
+profile->block_cnt++;\
 }
 #define LOCK_PROFILE_GOT(val)\
-if ( lock->profile ) \
+if ( profile )   \
 {\
-lock->profile->time_locked = NOW();  \
+profile->time_locked = NOW();\
 LOCK_PROFILE_BLKACC(val, val);   \
 }
 
 #else
 
+#define LOCK_PROFILE_PAR NULL
 #define LOCK_PROFILE_REL
 #define LOCK_PROFILE_VAR(var, val)
 #define LOCK_PROFILE_BLOCK(var)
@@ -307,17 +309,18 @@ static always_inline uint16_t observe_head(const 
spinlock_tickets_t *t)
 return read_atomic(>head);
 }
 
-static void always_inline spin_lock_common(spinlock_t *lock,
+static void always_inline spin_lock_common(spinlock_tickets_t *t,
+   union lock_debug *debug,
+   struct lock_profile *profile,
void (*cb)(void *data), void *data)
 {
 spinlock_tickets_t tickets = SPINLOCK_TICKET_INC;
 LOCK_PROFILE_VAR(block, 0);
 
-check_lock(>debug, false);
+check_lock(debug, false);
 preempt_disable();
-tickets.head_tail = arch_fetch_and_add(>tickets.head_tail,
-   tickets.head_tail);
-while ( tickets.tail != observe_head(>tickets) )
+tickets.head_tail = arch_fetch_and_add(>head_tail, tickets.head_tail);
+while ( tickets.tail != observe_head(t) )
 {
 LOCK_PROFILE_BLOCK(block);
 if ( cb )
@@ -325,18 +328,19 @@ static void always_inline spin_lock_common(spinlock_t 
*lock,
 arch_lock_relax();
 }
 arch_lock_acquire_barrier();
-got_lock(>debug);
+got_lock(debug);
 LOCK_PROFILE_GOT(block);
 }
 
 void _spin_lock(spinlock_t *lock)
 {
-spin_lock_common(lock, NULL, NULL);
+spin_lock_common(>tickets, >debug, LOCK_PROFILE_PAR, NULL,
+ NULL);
 }
 
 void _spin_lock_cb(spinlock_t *lock, void (*cb)(void *data), void *data)
 {
-spin_lock_common(lock, cb, data);
+spin_lock_common(>tickets, >debug, LOCK_PROFILE_PAR, cb, data);
 }
 
 void _spin_lock_irq(spinlock_t *lock)
@@ -355,16 +359,23 @@ unsigned long _spin_lock_irqsave(spinlock_t *lock)
 return flags;
 }
 
-void _spin_unlock(spinlock_t *lock)
+static void always_inline spin_unlock_common(spinlock_tickets_t *t,
+ union lock_debug *debug

[PATCH v6 5/8] xen/spinlock: let all is_locked and trylock variants return bool

2024-03-27 Thread Juergen Gross
Switch the remaining trylock and is_locked variants to return bool.

Signed-off-by: Juergen Gross 
Reviewed-by: Jan Beulich 
---
V5:
- new patch (Jan Beulich)
---
 xen/common/spinlock.c  | 4 ++--
 xen/include/xen/spinlock.h | 4 ++--
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/xen/common/spinlock.c b/xen/common/spinlock.c
index 5aaca49a61..7ccb725171 100644
--- a/xen/common/spinlock.c
+++ b/xen/common/spinlock.c
@@ -393,7 +393,7 @@ static bool always_inline spin_is_locked_common(const 
spinlock_tickets_t *t)
 return t->head != t->tail;
 }
 
-int _spin_is_locked(const spinlock_t *lock)
+bool _spin_is_locked(const spinlock_t *lock)
 {
 /*
  * This function is suitable only for use in ASSERT()s and alike, as it
@@ -433,7 +433,7 @@ static bool always_inline 
spin_trylock_common(spinlock_tickets_t *t,
 return true;
 }
 
-int _spin_trylock(spinlock_t *lock)
+bool _spin_trylock(spinlock_t *lock)
 {
 return spin_trylock_common(>tickets, >debug, LOCK_PROFILE_PAR);
 }
diff --git a/xen/include/xen/spinlock.h b/xen/include/xen/spinlock.h
index f49ba928f0..3a4092626c 100644
--- a/xen/include/xen/spinlock.h
+++ b/xen/include/xen/spinlock.h
@@ -234,8 +234,8 @@ void _spin_unlock(spinlock_t *lock);
 void _spin_unlock_irq(spinlock_t *lock);
 void _spin_unlock_irqrestore(spinlock_t *lock, unsigned long flags);
 
-int _spin_is_locked(const spinlock_t *lock);
-int _spin_trylock(spinlock_t *lock);
+bool _spin_is_locked(const spinlock_t *lock);
+bool _spin_trylock(spinlock_t *lock);
 void _spin_barrier(spinlock_t *lock);
 
 static always_inline void spin_lock(spinlock_t *l)
-- 
2.35.3




[PATCH v6 0/8] xen/spinlock: make recursive spinlocks a dedicated type

2024-03-27 Thread Juergen Gross
Instead of being able to use normal spinlocks as recursive ones, too,
make recursive spinlocks a special lock type.

This will make the spinlock structure smaller in production builds and
add type-safety.

This allows to increase the maximum number of physical cpus from 4095
to 65535 without increasing the size of the lock structure in production
builds (the size of recursive spinlocks in debug builds will grow to
12 bytes due to that change).

Note that rwlock handling is still limiting the number of cpus to 4095,
this is being taken care off in patch 12, which raises the rwlock limit
to 16384 cpus.

Iommu code imposes a limit of 16383 cpus.

Changes in V2:
- addressed comments by Jan Beulich
- lots of additional cleanups
- reorganized complete series

Changes in V3:
- addressed comments by Jan Beulich

Changes in V4:
- former patch 1 has already been applied
- fixed a coding style issue in patch 1

Changes in V5:
- new patches 1 + 10 + 12 + 13
- due to the recent Ghost-race patches the macro layer for calling
  spinlock functions is kept
- addressed comments

Changes in V6:
- patches 1-5 of V5 have been committed already
- addressed comments

Juergen Gross (8):
  xen/spinlock: add explicit non-recursive locking functions
  xen/spinlock: add another function level
  xen/spinlock: add missing rspin_is_locked() and rspin_barrier()
  xen/spinlock: split recursive spinlocks from normal ones
  xen/spinlock: let all is_locked and trylock variants return bool
  xen/spinlock: support higher number of cpus
  xen/rwlock: raise the number of possible cpus
  xen: allow up to 16383 cpus

 xen/arch/Kconfig  |   2 +-
 xen/arch/arm/mm.c |   4 +-
 xen/arch/x86/domain.c |  12 +--
 xen/arch/x86/mm.c |  12 +--
 xen/arch/x86/mm/mem_sharing.c |   8 +-
 xen/arch/x86/mm/p2m-pod.c |   6 +-
 xen/arch/x86/mm/p2m.c |   4 +-
 xen/arch/x86/tboot.c  |   4 +-
 xen/common/domain.c   |   2 +-
 xen/common/domctl.c   |   4 +-
 xen/common/grant_table.c  |  10 +-
 xen/common/memory.c   |   4 +-
 xen/common/numa.c |   4 +-
 xen/common/page_alloc.c   |  18 ++--
 xen/common/spinlock.c | 181 ++
 xen/drivers/char/console.c|  20 ++--
 xen/drivers/passthrough/pci.c |   2 +-
 xen/include/xen/rwlock.h  |  23 +++--
 xen/include/xen/spinlock.h| 110 -
 19 files changed, 297 insertions(+), 133 deletions(-)

-- 
2.35.3




[PATCH v6 3/8] xen/spinlock: add missing rspin_is_locked() and rspin_barrier()

2024-03-27 Thread Juergen Gross
Add rspin_is_locked() and rspin_barrier() in order to prepare differing
spinlock_t and rspinlock_t types.

Signed-off-by: Juergen Gross 
---
V2:
- partially carved out from V1 patch, partially new
V5:
- let rspin_is_locked() return bool (Jan Beulich)
V6:
- Re-add comment to _spin_is_locked() (Jan Beulich)
---
 xen/arch/x86/mm/p2m-pod.c |  2 +-
 xen/common/domain.c   |  2 +-
 xen/common/page_alloc.c   |  2 +-
 xen/common/spinlock.c | 26 --
 xen/drivers/char/console.c|  4 ++--
 xen/drivers/passthrough/pci.c |  2 +-
 xen/include/xen/spinlock.h|  4 
 7 files changed, 30 insertions(+), 12 deletions(-)

diff --git a/xen/arch/x86/mm/p2m-pod.c b/xen/arch/x86/mm/p2m-pod.c
index c48ea169b7..9750a3a21b 100644
--- a/xen/arch/x86/mm/p2m-pod.c
+++ b/xen/arch/x86/mm/p2m-pod.c
@@ -374,7 +374,7 @@ int p2m_pod_empty_cache(struct domain *d)
 
 /* After this barrier no new PoD activities can happen. */
 BUG_ON(!d->is_dying);
-spin_barrier(>pod.lock.lock);
+rspin_barrier(>pod.lock.lock);
 
 lock_page_alloc(p2m);
 
diff --git a/xen/common/domain.c b/xen/common/domain.c
index ceb44c8266..282c3ab623 100644
--- a/xen/common/domain.c
+++ b/xen/common/domain.c
@@ -991,7 +991,7 @@ int domain_kill(struct domain *d)
 case DOMDYING_alive:
 domain_pause(d);
 d->is_dying = DOMDYING_dying;
-spin_barrier(>domain_lock);
+rspin_barrier(>domain_lock);
 argo_destroy(d);
 vnuma_destroy(d->vnuma);
 domain_set_outstanding_pages(d, 0);
diff --git a/xen/common/page_alloc.c b/xen/common/page_alloc.c
index 4d6ce726e3..7c1bdfc046 100644
--- a/xen/common/page_alloc.c
+++ b/xen/common/page_alloc.c
@@ -477,7 +477,7 @@ unsigned long domain_adjust_tot_pages(struct domain *d, 
long pages)
 {
 long dom_before, dom_after, dom_claimed, sys_before, sys_after;
 
-ASSERT(spin_is_locked(>page_alloc_lock));
+ASSERT(rspin_is_locked(>page_alloc_lock));
 d->tot_pages += pages;
 
 /*
diff --git a/xen/common/spinlock.c b/xen/common/spinlock.c
index 648393d95f..6572c76114 100644
--- a/xen/common/spinlock.c
+++ b/xen/common/spinlock.c
@@ -396,13 +396,10 @@ static bool always_inline spin_is_locked_common(const 
spinlock_tickets_t *t)
 int _spin_is_locked(const spinlock_t *lock)
 {
 /*
- * Recursive locks may be locked by another CPU, yet we return
- * "false" here, making this function suitable only for use in
- * ASSERT()s and alike.
+ * This function is suitable only for use in ASSERT()s and alike, as it
+ * doesn't tell _who_ is holding the lock.
  */
-return lock->recurse_cpu == SPINLOCK_NO_CPU
-   ? spin_is_locked_common(>tickets)
-   : lock->recurse_cpu == smp_processor_id();
+return spin_is_locked_common(>tickets);
 }
 
 static bool always_inline spin_trylock_common(spinlock_tickets_t *t,
@@ -465,6 +462,23 @@ void _spin_barrier(spinlock_t *lock)
 spin_barrier_common(>tickets, >debug, LOCK_PROFILE_PAR);
 }
 
+bool _rspin_is_locked(const rspinlock_t *lock)
+{
+/*
+ * Recursive locks may be locked by another CPU, yet we return
+ * "false" here, making this function suitable only for use in
+ * ASSERT()s and alike.
+ */
+return lock->recurse_cpu == SPINLOCK_NO_CPU
+   ? spin_is_locked_common(>tickets)
+   : lock->recurse_cpu == smp_processor_id();
+}
+
+void _rspin_barrier(rspinlock_t *lock)
+{
+spin_barrier_common(>tickets, >debug, LOCK_PROFILE_PAR);
+}
+
 bool _rspin_trylock(rspinlock_t *lock)
 {
 unsigned int cpu = smp_processor_id();
diff --git a/xen/drivers/char/console.c b/xen/drivers/char/console.c
index 22f50fc617..d5e6aacc27 100644
--- a/xen/drivers/char/console.c
+++ b/xen/drivers/char/console.c
@@ -327,7 +327,7 @@ static void cf_check do_dec_thresh(unsigned char key, bool 
unused)
 
 static void conring_puts(const char *str, size_t len)
 {
-ASSERT(spin_is_locked(_lock));
+ASSERT(rspin_is_locked(_lock));
 
 while ( len-- )
 conring[CONRING_IDX_MASK(conringp++)] = *str++;
@@ -765,7 +765,7 @@ static void __putstr(const char *str)
 {
 size_t len = strlen(str);
 
-ASSERT(spin_is_locked(_lock));
+ASSERT(rspin_is_locked(_lock));
 
 console_serial_puts(str, len);
 video_puts(str, len);
diff --git a/xen/drivers/passthrough/pci.c b/xen/drivers/passthrough/pci.c
index 4fcc7e2cde..5a446d3dce 100644
--- a/xen/drivers/passthrough/pci.c
+++ b/xen/drivers/passthrough/pci.c
@@ -65,7 +65,7 @@ void pcidevs_unlock(void)
 
 bool pcidevs_locked(void)
 {
-return !!spin_is_locked(&_pcidevs_lock);
+return rspin_is_locked(&_pcidevs_lock);
 }
 
 static struct radix_tree_root pci_segments;
diff --git a/xen/include/xen/spinlock.h b/xen/include/xen/spinlock.h
index 8bc4652526..148be1e116 100644
--- a/xen/include/xen/spinlock.h
+++ b/xen/include/xen/spinlock.h
@@ -297,6 +297,8 @@ void _

[PATCH v6 1/8] xen/spinlock: add explicit non-recursive locking functions

2024-03-27 Thread Juergen Gross
In order to prepare a type-safe recursive spinlock structure, add
explicitly non-recursive locking functions to be used for non-recursive
locking of spinlocks, which are used recursively, too.

Signed-off-by: Juergen Gross 
Acked-by: Jan Beulich 
---
V2:
- rename functions (Jan Beulich)
- get rid of !! in pcidevs_locked() (Jan Beulich)
V5:
- remove spurious change (Julien Grall)
- add nrspin_lock() description (Julien Grall)
---
 xen/arch/arm/mm.c |  4 ++--
 xen/arch/x86/domain.c | 12 ++--
 xen/arch/x86/mm.c | 12 ++--
 xen/arch/x86/mm/mem_sharing.c |  8 
 xen/arch/x86/mm/p2m-pod.c |  4 ++--
 xen/arch/x86/mm/p2m.c |  4 ++--
 xen/arch/x86/tboot.c  |  4 ++--
 xen/common/domctl.c   |  4 ++--
 xen/common/grant_table.c  | 10 +-
 xen/common/memory.c   |  4 ++--
 xen/common/numa.c |  4 ++--
 xen/common/page_alloc.c   | 16 
 xen/drivers/char/console.c| 16 
 xen/include/xen/spinlock.h| 29 +++--
 14 files changed, 74 insertions(+), 57 deletions(-)

diff --git a/xen/arch/arm/mm.c b/xen/arch/arm/mm.c
index b15a18a494..def939172c 100644
--- a/xen/arch/arm/mm.c
+++ b/xen/arch/arm/mm.c
@@ -105,7 +105,7 @@ void share_xen_page_with_guest(struct page_info *page, 
struct domain *d,
 if ( page_get_owner(page) == d )
 return;
 
-spin_lock(>page_alloc_lock);
+nrspin_lock(>page_alloc_lock);
 
 /*
  * The incremented type count pins as writable or read-only.
@@ -136,7 +136,7 @@ void share_xen_page_with_guest(struct page_info *page, 
struct domain *d,
 page_list_add_tail(page, >xenpage_list);
 }
 
-spin_unlock(>page_alloc_lock);
+nrspin_unlock(>page_alloc_lock);
 }
 
 int xenmem_add_to_physmap_one(
diff --git a/xen/arch/x86/domain.c b/xen/arch/x86/domain.c
index a11c55f921..33a2830d9d 100644
--- a/xen/arch/x86/domain.c
+++ b/xen/arch/x86/domain.c
@@ -212,7 +212,7 @@ void dump_pageframe_info(struct domain *d)
 {
 unsigned long total[MASK_EXTR(PGT_type_mask, PGT_type_mask) + 1] = {};
 
-spin_lock(>page_alloc_lock);
+nrspin_lock(>page_alloc_lock);
 page_list_for_each ( page, >page_list )
 {
 unsigned int index = MASK_EXTR(page->u.inuse.type_info,
@@ -231,13 +231,13 @@ void dump_pageframe_info(struct domain *d)
_p(mfn_x(page_to_mfn(page))),
page->count_info, page->u.inuse.type_info);
 }
-spin_unlock(>page_alloc_lock);
+nrspin_unlock(>page_alloc_lock);
 }
 
 if ( is_hvm_domain(d) )
 p2m_pod_dump_data(d);
 
-spin_lock(>page_alloc_lock);
+nrspin_lock(>page_alloc_lock);
 
 page_list_for_each ( page, >xenpage_list )
 {
@@ -253,7 +253,7 @@ void dump_pageframe_info(struct domain *d)
page->count_info, page->u.inuse.type_info);
 }
 
-spin_unlock(>page_alloc_lock);
+nrspin_unlock(>page_alloc_lock);
 }
 
 void update_guest_memory_policy(struct vcpu *v,
@@ -2448,10 +2448,10 @@ int domain_relinquish_resources(struct domain *d)
 d->arch.auto_unmask = 0;
 }
 
-spin_lock(>page_alloc_lock);
+nrspin_lock(>page_alloc_lock);
 page_list_splice(>arch.relmem_list, >page_list);
 INIT_PAGE_LIST_HEAD(>arch.relmem_list);
-spin_unlock(>page_alloc_lock);
+nrspin_unlock(>page_alloc_lock);
 
 PROGRESS(xen):
 
diff --git a/xen/arch/x86/mm.c b/xen/arch/x86/mm.c
index 62f5b811bb..b4d125db39 100644
--- a/xen/arch/x86/mm.c
+++ b/xen/arch/x86/mm.c
@@ -482,7 +482,7 @@ void share_xen_page_with_guest(struct page_info *page, 
struct domain *d,
 
 set_gpfn_from_mfn(mfn_x(page_to_mfn(page)), INVALID_M2P_ENTRY);
 
-spin_lock(>page_alloc_lock);
+nrspin_lock(>page_alloc_lock);
 
 /* The incremented type count pins as writable or read-only. */
 page->u.inuse.type_info =
@@ -502,7 +502,7 @@ void share_xen_page_with_guest(struct page_info *page, 
struct domain *d,
 page_list_add_tail(page, >xenpage_list);
 }
 
-spin_unlock(>page_alloc_lock);
+nrspin_unlock(>page_alloc_lock);
 }
 
 void make_cr3(struct vcpu *v, mfn_t mfn)
@@ -3597,11 +3597,11 @@ long do_mmuext_op(
 {
 bool drop_ref;
 
-spin_lock(_owner->page_alloc_lock);
+nrspin_lock(_owner->page_alloc_lock);
 drop_ref = (pg_owner->is_dying &&
 test_and_clear_bit(_PGT_pinned,
>u.inuse.type_info));
-spin_unlock(_owner->page_alloc_lock);
+nrspin_unlock(_owner->page_alloc_lock);
 if ( drop_ref )
 {
 pin_drop:
@@ -4424,7 +4424,7 @@ int steal_page(
  * that it might be 

[GIT PULL] xen: branch for v6.9-rc1

2024-03-19 Thread Juergen Gross
Linus,

Please git pull the following tag:

 git://git.kernel.org/pub/scm/linux/kernel/git/xen/tip.git for-linus-6.9-rc1-tag

xen: branch for v6.9-rc1

It contains the following patches:

- 2 patches for Xen event channel handling fixing a regression wit a
  rare kernel config and adding some hardening.

- A patch for better support of running Xen dom0 in PVH mode.

- A cleanup patch for the xen grant-dma-iommu driver.


Thanks.

Juergen

 arch/x86/include/asm/xen/hypervisor.h |  5 +++
 arch/x86/platform/pvh/enlighten.c |  3 ++
 arch/x86/xen/enlighten.c  | 32 +
 arch/x86/xen/enlighten_pvh.c  | 68 +++
 arch/x86/xen/setup.c  | 44 ---
 arch/x86/xen/xen-ops.h| 14 
 drivers/xen/balloon.c |  2 --
 drivers/xen/events/events_base.c  | 22 +++-
 drivers/xen/evtchn.c  |  6 
 drivers/xen/grant-dma-iommu.c |  6 ++--
 10 files changed, 143 insertions(+), 59 deletions(-)

Juergen Gross (2):
  xen/evtchn: avoid WARN() when unbinding an event channel
  xen/events: increment refcnt only if event channel is refcounted

Roger Pau Monne (1):
  x86/xen: attempt to inflate the memory balloon on PVH

Uwe Kleine-König (1):
  xen/grant-dma-iommu: Convert to platform remove callback returning void



Re: [OSSTEST PATCH v2 3/3] ap-common: Switch to Linux 6.1 by default on x86 + drop dom0 i386

2024-03-15 Thread Juergen Gross

On 15.03.24 16:48, Anthony PERARD wrote:

linux-4.19 branch in xenbits is outdated, it haven't been updated and
tested since 2020 as it has been disabled in osstest. Also, this 4.19
branch doesn't build on Bookworm.

So we will start to use a newer version of Linux. We switch to 6.1 for
the Arm* tests recently, so will use that same version for x86.

Also, following commit 3a3089c94913 ("mfi-common: Drop Linux dom0 i386
tests for newer Linux branches"), 32bit dom0 isn't tested on newer
Linux, so we need to drop all dom0 i386 tests wherever the default
linux branch is used. That is, this changes in jobs will apply to
"xen-unstable" branch but also all xen stable branches, seabios, qemu,
osstest, libvirt, so every branch that aren't "linux-*".

Here is the list jobs that changes, and whether they are replace, or
have existing equivalents, on the "xen-unstable" branch. Changes
compared with:
 OSSTEST_CONFIG=standalone-config-example nice eatmydata 
./standalone-generate-dump-flight-runvars

Gone, without exiting or new test-amd64-amd64-*:
- test-amd64-i386-freebsd10-amd64
- test-amd64-i386-freebsd10-i386
- test-amd64-i386-qemut-rhel6hvm-amd
- test-amd64-i386-qemut-rhel6hvm-intel
- test-amd64-i386-qemuu-rhel6hvm-amd
- test-amd64-i386-qemuu-rhel6hvm-intel

Gone, but with exiting test-amd64-amd64-* equivalent:
- test-amd64-coresched-i386-xl
- test-amd64-i386-examine
- test-amd64-i386-examine-bios
- test-amd64-i386-examine-uefi
- test-amd64-i386-libvirt
- test-amd64-i386-libvirt-pair
- test-amd64-i386-libvirt-qemuu-debianhvm-amd64-xsm
- test-amd64-i386-libvirt-xsm
- test-amd64-i386-livepatch
- test-amd64-i386-migrupgrade
- test-amd64-i386-pair
- test-amd64-i386-xl
- test-amd64-i386-xl-pvshim
- test-amd64-i386-xl-qemut-debianhvm-amd64
- test-amd64-i386-xl-qemut-debianhvm-i386-xsm
- test-amd64-i386-xl-qemut-stubdom-debianhvm-amd64-xsm
- test-amd64-i386-xl-qemut-win7-amd64
- test-amd64-i386-xl-qemut-ws16-amd64
- test-amd64-i386-xl-qemuu-debianhvm-amd64
- test-amd64-i386-xl-qemuu-debianhvm-amd64-shadow
- test-amd64-i386-xl-qemuu-debianhvm-i386-xsm
- test-amd64-i386-xl-qemuu-dmrestrict-amd64-dmrestrict
- test-amd64-i386-xl-qemuu-ovmf-amd64
- test-amd64-i386-xl-qemuu-win7-amd64
- test-amd64-i386-xl-qemuu-ws16-amd64
- test-amd64-i386-xl-shadow
- test-amd64-i386-xl-simplat-amd64-buster
- test-amd64-i386-xl-xsm

Gone, but replaced by a new test-amd64-amd64-*:
- test-amd64-i386-libvirt-qcow2
- test-amd64-i386-libvirt-raw
- test-amd64-i386-xl-vhd
+ test-amd64-amd64-libvirt-qcow2
+ test-amd64-amd64-libvirt-raw
+ test-amd64-amd64-xl-vhd

In any case, the list of test would be the same as for the existing
branch "linux-linus" or "linux-6.1" branches.

Signed-off-by: Anthony PERARD 


Acked-by: Juergen Gross 

... knowing this is kind of meaningless given that I'm no maintainer of
OSStest, but I'd like to document that I'm fine with the intention of the
change. :-)


Juergen


OpenPGP_0xB0DE9DD628BF132F.asc
Description: OpenPGP public key


OpenPGP_signature.asc
Description: OpenPGP digital signature


[PATCH v5 10/13] xen/spinlock: let all is_locked and trylock variants return bool

2024-03-14 Thread Juergen Gross
Switch the remaining trylock and is_locked variants to return bool.

Signed-off-by: Juergen Gross 
---
V5:
- new patch (Jan Beulich)
---
 xen/common/spinlock.c  | 4 ++--
 xen/include/xen/spinlock.h | 4 ++--
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/xen/common/spinlock.c b/xen/common/spinlock.c
index a88ad9b93c..b28239f74d 100644
--- a/xen/common/spinlock.c
+++ b/xen/common/spinlock.c
@@ -393,7 +393,7 @@ static bool always_inline spin_is_locked_common(const 
spinlock_tickets_t *t)
 return t->head != t->tail;
 }
 
-int _spin_is_locked(const spinlock_t *lock)
+bool _spin_is_locked(const spinlock_t *lock)
 {
 return spin_is_locked_common(>tickets);
 }
@@ -429,7 +429,7 @@ static bool always_inline 
spin_trylock_common(spinlock_tickets_t *t,
 return true;
 }
 
-int _spin_trylock(spinlock_t *lock)
+bool _spin_trylock(spinlock_t *lock)
 {
 return spin_trylock_common(>tickets, >debug, LOCK_PROFILE_PAR);
 }
diff --git a/xen/include/xen/spinlock.h b/xen/include/xen/spinlock.h
index 181e5c7d35..1b50a7e6a0 100644
--- a/xen/include/xen/spinlock.h
+++ b/xen/include/xen/spinlock.h
@@ -234,8 +234,8 @@ void _spin_unlock(spinlock_t *lock);
 void _spin_unlock_irq(spinlock_t *lock);
 void _spin_unlock_irqrestore(spinlock_t *lock, unsigned long flags);
 
-int _spin_is_locked(const spinlock_t *lock);
-int _spin_trylock(spinlock_t *lock);
+bool _spin_is_locked(const spinlock_t *lock);
+bool _spin_trylock(spinlock_t *lock);
 void _spin_barrier(spinlock_t *lock);
 
 static always_inline void spin_lock(spinlock_t *l)
-- 
2.35.3




[PATCH v5 12/13] xen/rwlock: raise the number of possible cpus

2024-03-14 Thread Juergen Gross
The rwlock handling is limiting the number of cpus to 4095 today. The
main reason is the use of the atomic_t data type for the main lock
handling, which needs 2 bits for the locking state (writer waiting or
write locked), 12 bits for the id of a possible writer, and a 12 bit
counter for readers. The limit isn't 4096 due to an off by one sanity
check.

The atomic_t data type is 32 bits wide, so in theory 15 bits for the
writer's cpu id and 15 bits for the reader count seem to be fine, but
via read_trylock() more readers than cpus are possible.

This means that it is possible to raise the number of cpus to 16384
without changing the rwlock_t data structure. In order to avoid the
reader count wrapping to zero, don't let read_trylock() succeed in case
the highest bit of the reader's count is set already. This leaves enough
headroom for non-recursive readers to enter without risking a wrap.

While at it calculate _QW_CPUMASK and _QR_SHIFT from _QW_SHIFT and
add a sanity check for not overflowing the atomic_t data type.

Signed-off-by: Juergen Gross 
---
V5:
- new patch
---
 xen/include/xen/rwlock.h | 18 ++
 1 file changed, 10 insertions(+), 8 deletions(-)

diff --git a/xen/include/xen/rwlock.h b/xen/include/xen/rwlock.h
index 65d88b0ef4..afd57659bd 100644
--- a/xen/include/xen/rwlock.h
+++ b/xen/include/xen/rwlock.h
@@ -23,12 +23,12 @@ typedef struct {
 #define rwlock_init(l) (*(l) = (rwlock_t)RW_LOCK_UNLOCKED)
 
 /* Writer states & reader shift and bias. */
-#define_QW_CPUMASK  0xfffU /* Writer CPU mask */
-#define_QW_SHIFT12 /* Writer flags shift */
-#define_QW_WAITING  (1U << _QW_SHIFT)  /* A writer is waiting */
-#define_QW_LOCKED   (3U << _QW_SHIFT)  /* A writer holds the lock */
-#define_QW_WMASK(3U << _QW_SHIFT)  /* Writer mask */
-#define_QR_SHIFT14 /* Reader count shift */
+#define_QW_SHIFT14  /* Writer flags shift */
+#define_QW_CPUMASK  ((1U << _QW_SHIFT) - 1) /* Writer CPU mask */
+#define_QW_WAITING  (1U << _QW_SHIFT)   /* A writer is waiting */
+#define_QW_LOCKED   (3U << _QW_SHIFT)   /* A writer holds the lock */
+#define_QW_WMASK(3U << _QW_SHIFT)   /* Writer mask */
+#define_QR_SHIFT(_QW_SHIFT + 2) /* Reader count shift */
 #define_QR_BIAS (1U << _QR_SHIFT)
 
 void queue_read_lock_slowpath(rwlock_t *lock);
@@ -36,14 +36,16 @@ void queue_write_lock_slowpath(rwlock_t *lock);
 
 static inline bool _is_write_locked_by_me(unsigned int cnts)
 {
-BUILD_BUG_ON(_QW_CPUMASK < NR_CPUS);
+BUILD_BUG_ON((_QW_CPUMASK + 1) < NR_CPUS);
+BUILD_BUG_ON(NR_CPUS * _QR_BIAS > INT_MAX);
 return (cnts & _QW_WMASK) == _QW_LOCKED &&
(cnts & _QW_CPUMASK) == smp_processor_id();
 }
 
 static inline bool _can_read_lock(unsigned int cnts)
 {
-return !(cnts & _QW_WMASK) || _is_write_locked_by_me(cnts);
+return cnts <= INT_MAX &&
+   (!(cnts & _QW_WMASK) || _is_write_locked_by_me(cnts));
 }
 
 /*
-- 
2.35.3




[PATCH v5 09/13] xen/spinlock: split recursive spinlocks from normal ones

2024-03-14 Thread Juergen Gross
Recursive and normal spinlocks are sharing the same data structure for
representation of the lock. This has two major disadvantages:

- it is not clear from the definition of a lock, whether it is intended
  to be used recursive or not, while a mixture of both usage variants
  needs to be

- in production builds (builds without CONFIG_DEBUG_LOCKS) the needed
  data size of an ordinary spinlock is 8 bytes instead of 4, due to the
  additional recursion data needed (associated with that the rwlock
  data is using 12 instead of only 8 bytes)

Fix that by introducing a struct spinlock_recursive for recursive
spinlocks only, and switch recursive spinlock functions to require
pointers to this new struct.

This allows to check the correct usage at build time.

Signed-off-by: Juergen Gross 
---
V2:
- use shorter names (Jan Beulich)
- don't embed spinlock_t in rspinlock_t (Jan Beulich)
V5:
- some style fixes (Jan Beulich)
- bool instead of int (Jan Beulich)
---
 xen/common/spinlock.c  | 50 ++
 xen/include/xen/spinlock.h | 72 +-
 2 files changed, 105 insertions(+), 17 deletions(-)

diff --git a/xen/common/spinlock.c b/xen/common/spinlock.c
index c3f2f9b209..a88ad9b93c 100644
--- a/xen/common/spinlock.c
+++ b/xen/common/spinlock.c
@@ -541,6 +541,56 @@ void _rspin_unlock_irqrestore(rspinlock_t *lock, unsigned 
long flags)
 local_irq_restore(flags);
 }
 
+bool _nrspin_trylock(rspinlock_t *lock)
+{
+check_lock(>debug, true);
+
+if ( unlikely(lock->recurse_cpu != SPINLOCK_NO_CPU) )
+return false;
+
+return spin_trylock_common(>tickets, >debug, LOCK_PROFILE_PAR);
+}
+
+void _nrspin_lock(rspinlock_t *lock)
+{
+spin_lock_common(>tickets, >debug, LOCK_PROFILE_PAR, NULL,
+ NULL);
+}
+
+void _nrspin_unlock(rspinlock_t *lock)
+{
+spin_unlock_common(>tickets, >debug, LOCK_PROFILE_PAR);
+}
+
+void _nrspin_lock_irq(rspinlock_t *lock)
+{
+ASSERT(local_irq_is_enabled());
+local_irq_disable();
+_nrspin_lock(lock);
+}
+
+void _nrspin_unlock_irq(rspinlock_t *lock)
+{
+_nrspin_unlock(lock);
+local_irq_enable();
+}
+
+unsigned long _nrspin_lock_irqsave(rspinlock_t *lock)
+{
+unsigned long flags;
+
+local_irq_save(flags);
+_nrspin_lock(lock);
+
+return flags;
+}
+
+void _nrspin_unlock_irqrestore(rspinlock_t *lock, unsigned long flags)
+{
+_nrspin_unlock(lock);
+local_irq_restore(flags);
+}
+
 #ifdef CONFIG_DEBUG_LOCK_PROFILE
 
 struct lock_profile_anc {
diff --git a/xen/include/xen/spinlock.h b/xen/include/xen/spinlock.h
index 7dd11faab3..181e5c7d35 100644
--- a/xen/include/xen/spinlock.h
+++ b/xen/include/xen/spinlock.h
@@ -77,8 +77,6 @@ union lock_debug { };
 */
 
 struct spinlock;
-/* Temporary hack until a dedicated struct rspinlock is existing. */
-#define rspinlock spinlock
 
 struct lock_profile {
 struct lock_profile *next;   /* forward link */
@@ -110,6 +108,10 @@ struct lock_profile_qhead {
 __used_section(".lockprofile.data") = \
 _profile_data__##name
 #define SPIN_LOCK_UNLOCKED_(x) {  \
+.debug = LOCK_DEBUG_, \
+.profile = x, \
+}
+#define RSPIN_LOCK_UNLOCKED_(x) { \
 .recurse_cpu = SPINLOCK_NO_CPU,   \
 .debug = LOCK_DEBUG_, \
 .profile = x, \
@@ -119,8 +121,9 @@ struct lock_profile_qhead {
 spinlock_t l = SPIN_LOCK_UNLOCKED_(NULL); \
 static struct lock_profile lock_profile_data__##l = LOCK_PROFILE_(l); \
 LOCK_PROFILE_PTR_(l)
+#define RSPIN_LOCK_UNLOCKED RSPIN_LOCK_UNLOCKED_(NULL)
 #define DEFINE_RSPINLOCK(l)   \
-rspinlock_t l = SPIN_LOCK_UNLOCKED_(NULL);\
+rspinlock_t l = RSPIN_LOCK_UNLOCKED_(NULL);   \
 static struct lock_profile lock_profile_data__##l = RLOCK_PROFILE_(l);\
 LOCK_PROFILE_PTR_(l)
 
@@ -145,8 +148,11 @@ struct lock_profile_qhead {
 
 #define spin_lock_init_prof(s, l) \
 spin_lock_init_prof__(s, l, lock, spinlock_t, false)
-#define rspin_lock_init_prof(s, l)\
-spin_lock_init_prof__(s, l, rlock, rspinlock_t, true)
+#define rspin_lock_init_prof(s, l) do {   \
+spin_lock_init_prof__(s, l, rlock, rspinlock_t, true);\
+(s)->l.recurse_cpu = SPINLOCK_NO_CPU; \
+(s)->l.recurse_cnt = 0; 

[PATCH v5 13/13] xen: allow up to 16383 cpus

2024-03-14 Thread Juergen Gross
With lock handling now allowing up to 16384 cpus (spinlocks can handle
65535 cpus, rwlocks can handle 16384 cpus), raise the allowed limit for
the number of cpus to be configured to 16383.

The new limit is imposed by IOMMU_CMD_BUFFER_MAX_ENTRIES and
QINVAL_MAX_ENTRY_NR required to be larger than 2 * CONFIG_NR_CPUS.

Signed-off-by: Juergen Gross 
---
V5:
- new patch (Jan Beulich)
---
 xen/arch/Kconfig | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/xen/arch/Kconfig b/xen/arch/Kconfig
index 67ba38f32f..308ce129a8 100644
--- a/xen/arch/Kconfig
+++ b/xen/arch/Kconfig
@@ -6,7 +6,7 @@ config PHYS_ADDR_T_32
 
 config NR_CPUS
int "Maximum number of CPUs"
-   range 1 4095
+   range 1 16383
default "256" if X86
default "8" if ARM && RCAR3
default "4" if ARM && QEMU
-- 
2.35.3




[PATCH v5 11/13] xen/spinlock: support higher number of cpus

2024-03-14 Thread Juergen Gross
Allow 16 bits per cpu number, which is the limit imposed by
spinlock_tickets_t.

This will allow up to 65535 cpus, while increasing only the size of
recursive spinlocks in debug builds from 8 to 12 bytes.

The current Xen limit of 4095 cpus is imposed by SPINLOCK_CPU_BITS
being 12. There are machines available with more cpus than the current
Xen limit, so it makes sense to have the possibility to use more cpus.

Signed-off-by: Juergen Gross 
---
V5:
- keep previous recursion limit (Julien Grall)
---
 xen/common/spinlock.c  |  2 ++
 xen/include/xen/spinlock.h | 20 ++--
 2 files changed, 12 insertions(+), 10 deletions(-)

diff --git a/xen/common/spinlock.c b/xen/common/spinlock.c
index b28239f74d..5be48be082 100644
--- a/xen/common/spinlock.c
+++ b/xen/common/spinlock.c
@@ -481,7 +481,9 @@ bool _rspin_trylock(rspinlock_t *lock)
 
 /* Don't allow overflow of recurse_cpu field. */
 BUILD_BUG_ON(NR_CPUS > SPINLOCK_NO_CPU);
+BUILD_BUG_ON(SPINLOCK_CPU_BITS > sizeof(lock->recurse_cpu) * 8);
 BUILD_BUG_ON(SPINLOCK_RECURSE_BITS < 3);
+BUILD_BUG_ON(SPINLOCK_MAX_RECURSE > ((1u << SPINLOCK_RECURSE_BITS) - 1));
 
 check_lock(>debug, true);
 
diff --git a/xen/include/xen/spinlock.h b/xen/include/xen/spinlock.h
index 1b50a7e6a0..984da6d4c9 100644
--- a/xen/include/xen/spinlock.h
+++ b/xen/include/xen/spinlock.h
@@ -8,16 +8,16 @@
 #include 
 #include 
 
-#define SPINLOCK_CPU_BITS  12
+#define SPINLOCK_CPU_BITS  16
 
 #ifdef CONFIG_DEBUG_LOCKS
 union lock_debug {
-uint16_t val;
-#define LOCK_DEBUG_INITVAL 0x
+uint32_t val;
+#define LOCK_DEBUG_INITVAL 0x
 struct {
-uint16_t cpu:SPINLOCK_CPU_BITS;
-#define LOCK_DEBUG_PAD_BITS (14 - SPINLOCK_CPU_BITS)
-uint16_t :LOCK_DEBUG_PAD_BITS;
+uint32_t cpu:SPINLOCK_CPU_BITS;
+#define LOCK_DEBUG_PAD_BITS (30 - SPINLOCK_CPU_BITS)
+uint32_t :LOCK_DEBUG_PAD_BITS;
 bool irq_safe:1;
 bool unseen:1;
 };
@@ -211,11 +211,11 @@ typedef struct spinlock {
 
 typedef struct rspinlock {
 spinlock_tickets_t tickets;
-uint16_t recurse_cpu:SPINLOCK_CPU_BITS;
+uint16_t recurse_cpu;
 #define SPINLOCK_NO_CPU((1u << SPINLOCK_CPU_BITS) - 1)
-#define SPINLOCK_RECURSE_BITS  (16 - SPINLOCK_CPU_BITS)
-uint16_t recurse_cnt:SPINLOCK_RECURSE_BITS;
-#define SPINLOCK_MAX_RECURSE   ((1u << SPINLOCK_RECURSE_BITS) - 1)
+#define SPINLOCK_RECURSE_BITS  8
+uint8_t recurse_cnt;
+#define SPINLOCK_MAX_RECURSE   15
 union lock_debug debug;
 #ifdef CONFIG_DEBUG_LOCK_PROFILE
 struct lock_profile *profile;
-- 
2.35.3




[PATCH v5 08/13] xen/spinlock: add missing rspin_is_locked() and rspin_barrier()

2024-03-14 Thread Juergen Gross
Add rspin_is_locked() and rspin_barrier() in order to prepare differing
spinlock_t and rspinlock_t types.

Signed-off-by: Juergen Gross 
---
V2:
- partially carved out from V1 patch, partially new
V5:
- let rspin_is_locked() return bool (Jan Beulich)
---
 xen/arch/x86/mm/p2m-pod.c |  2 +-
 xen/common/domain.c   |  2 +-
 xen/common/page_alloc.c   |  2 +-
 xen/common/spinlock.c | 26 ++
 xen/drivers/char/console.c|  4 ++--
 xen/drivers/passthrough/pci.c |  2 +-
 xen/include/xen/spinlock.h|  4 
 7 files changed, 28 insertions(+), 14 deletions(-)

diff --git a/xen/arch/x86/mm/p2m-pod.c b/xen/arch/x86/mm/p2m-pod.c
index 515a8c98a5..ae5dcb1870 100644
--- a/xen/arch/x86/mm/p2m-pod.c
+++ b/xen/arch/x86/mm/p2m-pod.c
@@ -374,7 +374,7 @@ int p2m_pod_empty_cache(struct domain *d)
 
 /* After this barrier no new PoD activities can happen. */
 BUG_ON(!d->is_dying);
-spin_barrier(>pod.lock.lock);
+rspin_barrier(>pod.lock.lock);
 
 lock_page_alloc(p2m);
 
diff --git a/xen/common/domain.c b/xen/common/domain.c
index 8e0109c590..2d7c69e6d2 100644
--- a/xen/common/domain.c
+++ b/xen/common/domain.c
@@ -983,7 +983,7 @@ int domain_kill(struct domain *d)
 case DOMDYING_alive:
 domain_pause(d);
 d->is_dying = DOMDYING_dying;
-spin_barrier(>domain_lock);
+rspin_barrier(>domain_lock);
 argo_destroy(d);
 vnuma_destroy(d->vnuma);
 domain_set_outstanding_pages(d, 0);
diff --git a/xen/common/page_alloc.c b/xen/common/page_alloc.c
index 20238015ed..83d1715e25 100644
--- a/xen/common/page_alloc.c
+++ b/xen/common/page_alloc.c
@@ -475,7 +475,7 @@ unsigned long domain_adjust_tot_pages(struct domain *d, 
long pages)
 {
 long dom_before, dom_after, dom_claimed, sys_before, sys_after;
 
-ASSERT(spin_is_locked(>page_alloc_lock));
+ASSERT(rspin_is_locked(>page_alloc_lock));
 d->tot_pages += pages;
 
 /*
diff --git a/xen/common/spinlock.c b/xen/common/spinlock.c
index 648393d95f..c3f2f9b209 100644
--- a/xen/common/spinlock.c
+++ b/xen/common/spinlock.c
@@ -395,14 +395,7 @@ static bool always_inline spin_is_locked_common(const 
spinlock_tickets_t *t)
 
 int _spin_is_locked(const spinlock_t *lock)
 {
-/*
- * Recursive locks may be locked by another CPU, yet we return
- * "false" here, making this function suitable only for use in
- * ASSERT()s and alike.
- */
-return lock->recurse_cpu == SPINLOCK_NO_CPU
-   ? spin_is_locked_common(>tickets)
-   : lock->recurse_cpu == smp_processor_id();
+return spin_is_locked_common(>tickets);
 }
 
 static bool always_inline spin_trylock_common(spinlock_tickets_t *t,
@@ -465,6 +458,23 @@ void _spin_barrier(spinlock_t *lock)
 spin_barrier_common(>tickets, >debug, LOCK_PROFILE_PAR);
 }
 
+bool _rspin_is_locked(const rspinlock_t *lock)
+{
+/*
+ * Recursive locks may be locked by another CPU, yet we return
+ * "false" here, making this function suitable only for use in
+ * ASSERT()s and alike.
+ */
+return lock->recurse_cpu == SPINLOCK_NO_CPU
+   ? spin_is_locked_common(>tickets)
+   : lock->recurse_cpu == smp_processor_id();
+}
+
+void _rspin_barrier(rspinlock_t *lock)
+{
+spin_barrier_common(>tickets, >debug, LOCK_PROFILE_PAR);
+}
+
 bool _rspin_trylock(rspinlock_t *lock)
 {
 unsigned int cpu = smp_processor_id();
diff --git a/xen/drivers/char/console.c b/xen/drivers/char/console.c
index 22f50fc617..d5e6aacc27 100644
--- a/xen/drivers/char/console.c
+++ b/xen/drivers/char/console.c
@@ -327,7 +327,7 @@ static void cf_check do_dec_thresh(unsigned char key, bool 
unused)
 
 static void conring_puts(const char *str, size_t len)
 {
-ASSERT(spin_is_locked(_lock));
+ASSERT(rspin_is_locked(_lock));
 
 while ( len-- )
 conring[CONRING_IDX_MASK(conringp++)] = *str++;
@@ -765,7 +765,7 @@ static void __putstr(const char *str)
 {
 size_t len = strlen(str);
 
-ASSERT(spin_is_locked(_lock));
+ASSERT(rspin_is_locked(_lock));
 
 console_serial_puts(str, len);
 video_puts(str, len);
diff --git a/xen/drivers/passthrough/pci.c b/xen/drivers/passthrough/pci.c
index 4fcc7e2cde..5a446d3dce 100644
--- a/xen/drivers/passthrough/pci.c
+++ b/xen/drivers/passthrough/pci.c
@@ -65,7 +65,7 @@ void pcidevs_unlock(void)
 
 bool pcidevs_locked(void)
 {
-return !!spin_is_locked(&_pcidevs_lock);
+return rspin_is_locked(&_pcidevs_lock);
 }
 
 static struct radix_tree_root pci_segments;
diff --git a/xen/include/xen/spinlock.h b/xen/include/xen/spinlock.h
index 5b20b11db6..7dd11faab3 100644
--- a/xen/include/xen/spinlock.h
+++ b/xen/include/xen/spinlock.h
@@ -297,6 +297,8 @@ void _rspin_lock(rspinlock_t *lock);
 unsigned long _rspin_lock_irqsave(rspinlock_t *lock);
 void _rspin_unlock(rspinlock_t *lock);
 void _rspin_unlock_irqrestore(rspinlock_t *lock, unsigned lon

[PATCH v5 06/13] xen/spinlock: add explicit non-recursive locking functions

2024-03-14 Thread Juergen Gross
In order to prepare a type-safe recursive spinlock structure, add
explicitly non-recursive locking functions to be used for non-recursive
locking of spinlocks, which are used recursively, too.

Signed-off-by: Juergen Gross 
Acked-by: Jan Beulich 
---
V2:
- rename functions (Jan Beulich)
- get rid of !! in pcidevs_locked() (Jan Beulich)
V5:
- remove spurious change (Julien Grall)
- add nrspin_lock() description (Julien Grall)
---
 xen/arch/arm/mm.c |  4 ++--
 xen/arch/x86/domain.c | 12 ++--
 xen/arch/x86/mm.c | 12 ++--
 xen/arch/x86/mm/mem_sharing.c |  8 
 xen/arch/x86/mm/p2m-pod.c |  4 ++--
 xen/arch/x86/mm/p2m.c |  4 ++--
 xen/arch/x86/tboot.c  |  4 ++--
 xen/common/domctl.c   |  4 ++--
 xen/common/grant_table.c  | 10 +-
 xen/common/memory.c   |  4 ++--
 xen/common/numa.c |  4 ++--
 xen/common/page_alloc.c   | 16 
 xen/drivers/char/console.c| 16 
 xen/include/xen/spinlock.h| 29 +++--
 14 files changed, 74 insertions(+), 57 deletions(-)

diff --git a/xen/arch/arm/mm.c b/xen/arch/arm/mm.c
index b15a18a494..def939172c 100644
--- a/xen/arch/arm/mm.c
+++ b/xen/arch/arm/mm.c
@@ -105,7 +105,7 @@ void share_xen_page_with_guest(struct page_info *page, 
struct domain *d,
 if ( page_get_owner(page) == d )
 return;
 
-spin_lock(>page_alloc_lock);
+nrspin_lock(>page_alloc_lock);
 
 /*
  * The incremented type count pins as writable or read-only.
@@ -136,7 +136,7 @@ void share_xen_page_with_guest(struct page_info *page, 
struct domain *d,
 page_list_add_tail(page, >xenpage_list);
 }
 
-spin_unlock(>page_alloc_lock);
+nrspin_unlock(>page_alloc_lock);
 }
 
 int xenmem_add_to_physmap_one(
diff --git a/xen/arch/x86/domain.c b/xen/arch/x86/domain.c
index a11c55f921..33a2830d9d 100644
--- a/xen/arch/x86/domain.c
+++ b/xen/arch/x86/domain.c
@@ -212,7 +212,7 @@ void dump_pageframe_info(struct domain *d)
 {
 unsigned long total[MASK_EXTR(PGT_type_mask, PGT_type_mask) + 1] = {};
 
-spin_lock(>page_alloc_lock);
+nrspin_lock(>page_alloc_lock);
 page_list_for_each ( page, >page_list )
 {
 unsigned int index = MASK_EXTR(page->u.inuse.type_info,
@@ -231,13 +231,13 @@ void dump_pageframe_info(struct domain *d)
_p(mfn_x(page_to_mfn(page))),
page->count_info, page->u.inuse.type_info);
 }
-spin_unlock(>page_alloc_lock);
+nrspin_unlock(>page_alloc_lock);
 }
 
 if ( is_hvm_domain(d) )
 p2m_pod_dump_data(d);
 
-spin_lock(>page_alloc_lock);
+nrspin_lock(>page_alloc_lock);
 
 page_list_for_each ( page, >xenpage_list )
 {
@@ -253,7 +253,7 @@ void dump_pageframe_info(struct domain *d)
page->count_info, page->u.inuse.type_info);
 }
 
-spin_unlock(>page_alloc_lock);
+nrspin_unlock(>page_alloc_lock);
 }
 
 void update_guest_memory_policy(struct vcpu *v,
@@ -2448,10 +2448,10 @@ int domain_relinquish_resources(struct domain *d)
 d->arch.auto_unmask = 0;
 }
 
-spin_lock(>page_alloc_lock);
+nrspin_lock(>page_alloc_lock);
 page_list_splice(>arch.relmem_list, >page_list);
 INIT_PAGE_LIST_HEAD(>arch.relmem_list);
-spin_unlock(>page_alloc_lock);
+nrspin_unlock(>page_alloc_lock);
 
 PROGRESS(xen):
 
diff --git a/xen/arch/x86/mm.c b/xen/arch/x86/mm.c
index 62f5b811bb..b4d125db39 100644
--- a/xen/arch/x86/mm.c
+++ b/xen/arch/x86/mm.c
@@ -482,7 +482,7 @@ void share_xen_page_with_guest(struct page_info *page, 
struct domain *d,
 
 set_gpfn_from_mfn(mfn_x(page_to_mfn(page)), INVALID_M2P_ENTRY);
 
-spin_lock(>page_alloc_lock);
+nrspin_lock(>page_alloc_lock);
 
 /* The incremented type count pins as writable or read-only. */
 page->u.inuse.type_info =
@@ -502,7 +502,7 @@ void share_xen_page_with_guest(struct page_info *page, 
struct domain *d,
 page_list_add_tail(page, >xenpage_list);
 }
 
-spin_unlock(>page_alloc_lock);
+nrspin_unlock(>page_alloc_lock);
 }
 
 void make_cr3(struct vcpu *v, mfn_t mfn)
@@ -3597,11 +3597,11 @@ long do_mmuext_op(
 {
 bool drop_ref;
 
-spin_lock(_owner->page_alloc_lock);
+nrspin_lock(_owner->page_alloc_lock);
 drop_ref = (pg_owner->is_dying &&
 test_and_clear_bit(_PGT_pinned,
>u.inuse.type_info));
-spin_unlock(_owner->page_alloc_lock);
+nrspin_unlock(_owner->page_alloc_lock);
 if ( drop_ref )
 {
 pin_drop:
@@ -4424,7 +4424,7 @@ int steal_page(
  * that it might be 

[PATCH v5 07/13] xen/spinlock: add another function level

2024-03-14 Thread Juergen Gross
Add another function level in spinlock.c hiding the spinlock_t layout
from the low level locking code.

This is done in preparation of introducing rspinlock_t for recursive
locks without having to duplicate all of the locking code.

Signed-off-by: Juergen Gross 
---
V2:
- new patch
V5:
- don't regress spin_is_locked() for rspin-lock (Jan Beulich)
- use bool as return type of spin_is_locked_common() and
  spin_trylock_common() (Jan Beulich)
---
 xen/common/spinlock.c  | 103 -
 xen/include/xen/spinlock.h |   1 +
 2 files changed, 68 insertions(+), 36 deletions(-)

diff --git a/xen/common/spinlock.c b/xen/common/spinlock.c
index 0e8b525cec..648393d95f 100644
--- a/xen/common/spinlock.c
+++ b/xen/common/spinlock.c
@@ -261,29 +261,31 @@ void spin_debug_disable(void)
 
 #ifdef CONFIG_DEBUG_LOCK_PROFILE
 
+#define LOCK_PROFILE_PAR lock->profile
 #define LOCK_PROFILE_REL \
-if ( lock->profile ) \
+if ( profile )   \
 {\
-lock->profile->time_hold += NOW() - lock->profile->time_locked;  \
-lock->profile->lock_cnt++;   \
+profile->time_hold += NOW() - profile->time_locked;  \
+profile->lock_cnt++; \
 }
 #define LOCK_PROFILE_VAR(var, val)s_time_t var = (val)
 #define LOCK_PROFILE_BLOCK(var)   var = var ? : NOW()
 #define LOCK_PROFILE_BLKACC(tst, val)\
 if ( tst )   \
 {\
-lock->profile->time_block += lock->profile->time_locked - (val); \
-lock->profile->block_cnt++;  \
+profile->time_block += profile->time_locked - (val); \
+profile->block_cnt++;\
 }
 #define LOCK_PROFILE_GOT(val)\
-if ( lock->profile ) \
+if ( profile )   \
 {\
-lock->profile->time_locked = NOW();  \
+profile->time_locked = NOW();\
 LOCK_PROFILE_BLKACC(val, val);   \
 }
 
 #else
 
+#define LOCK_PROFILE_PAR NULL
 #define LOCK_PROFILE_REL
 #define LOCK_PROFILE_VAR(var, val)
 #define LOCK_PROFILE_BLOCK(var)
@@ -307,17 +309,18 @@ static always_inline uint16_t observe_head(const 
spinlock_tickets_t *t)
 return read_atomic(>head);
 }
 
-static void always_inline spin_lock_common(spinlock_t *lock,
+static void always_inline spin_lock_common(spinlock_tickets_t *t,
+   union lock_debug *debug,
+   struct lock_profile *profile,
void (*cb)(void *data), void *data)
 {
 spinlock_tickets_t tickets = SPINLOCK_TICKET_INC;
 LOCK_PROFILE_VAR(block, 0);
 
-check_lock(>debug, false);
+check_lock(debug, false);
 preempt_disable();
-tickets.head_tail = arch_fetch_and_add(>tickets.head_tail,
-   tickets.head_tail);
-while ( tickets.tail != observe_head(>tickets) )
+tickets.head_tail = arch_fetch_and_add(>head_tail, tickets.head_tail);
+while ( tickets.tail != observe_head(t) )
 {
 LOCK_PROFILE_BLOCK(block);
 if ( cb )
@@ -325,18 +328,19 @@ static void always_inline spin_lock_common(spinlock_t 
*lock,
 arch_lock_relax();
 }
 arch_lock_acquire_barrier();
-got_lock(>debug);
+got_lock(debug);
 LOCK_PROFILE_GOT(block);
 }
 
 void _spin_lock(spinlock_t *lock)
 {
-spin_lock_common(lock, NULL, NULL);
+spin_lock_common(>tickets, >debug, LOCK_PROFILE_PAR, NULL,
+ NULL);
 }
 
 void _spin_lock_cb(spinlock_t *lock, void (*cb)(void *data), void *data)
 {
-spin_lock_common(lock, cb, data);
+spin_lock_common(>tickets, >debug, LOCK_PROFILE_PAR, cb, data);
 }
 
 void _spin_lock_irq(spinlock_t *lock)
@@ -355,16 +359,23 @@ unsigned long _spin_lock_irqsave(spinlock_t *lock)
 return flags;
 }
 
-void _spin_unlock(spinlock_t *lock)
+static void always_inline spin_unlock_common(spinlock_tickets_t *t,
+ union lock_debug *debug,
+   

[PATCH v5 04/13] xen/spinlock: add rspin_[un]lock_irq[save|restore]()

2024-03-14 Thread Juergen Gross
Instead of special casing rspin_lock_irqsave() and
rspin_unlock_irqrestore() for the console lock, add those functions
to spinlock handling and use them where needed.

Signed-off-by: Juergen Gross 
---
V2:
- new patch
V5:
- avoid MISRA violation (Julien Grall)
- keep wrapper functions (Jan Beulich)
---
 xen/common/spinlock.c  | 18 +-
 xen/drivers/char/console.c |  6 ++
 xen/include/xen/spinlock.h |  9 +
 3 files changed, 28 insertions(+), 5 deletions(-)

diff --git a/xen/common/spinlock.c b/xen/common/spinlock.c
index 11e13e1259..5ef0ac7f89 100644
--- a/xen/common/spinlock.c
+++ b/xen/common/spinlock.c
@@ -475,15 +475,31 @@ void _rspin_lock(rspinlock_t *lock)
 lock->recurse_cnt++;
 }
 
+unsigned long _rspin_lock_irqsave(rspinlock_t *lock)
+{
+unsigned long flags;
+
+local_irq_save(flags);
+_rspin_lock(lock);
+
+return flags;
+}
+
 void _rspin_unlock(rspinlock_t *lock)
 {
 if ( likely(--lock->recurse_cnt == 0) )
 {
 lock->recurse_cpu = SPINLOCK_NO_CPU;
-spin_unlock(lock);
+_spin_unlock(lock);
 }
 }
 
+void _rspin_unlock_irqrestore(rspinlock_t *lock, unsigned long flags)
+{
+_rspin_unlock(lock);
+local_irq_restore(flags);
+}
+
 #ifdef CONFIG_DEBUG_LOCK_PROFILE
 
 struct lock_profile_anc {
diff --git a/xen/drivers/char/console.c b/xen/drivers/char/console.c
index eca17b55b4..ccd5f8cc14 100644
--- a/xen/drivers/char/console.c
+++ b/xen/drivers/char/console.c
@@ -1161,16 +1161,14 @@ unsigned long console_lock_recursive_irqsave(void)
 {
 unsigned long flags;
 
-local_irq_save(flags);
-rspin_lock(_lock);
+rspin_lock_irqsave(_lock, flags);
 
 return flags;
 }
 
 void console_unlock_recursive_irqrestore(unsigned long flags)
 {
-rspin_unlock(_lock);
-local_irq_restore(flags);
+rspin_unlock_irqrestore(_lock, flags);
 }
 
 void console_force_unlock(void)
diff --git a/xen/include/xen/spinlock.h b/xen/include/xen/spinlock.h
index 50f6580f52..afa24c8e29 100644
--- a/xen/include/xen/spinlock.h
+++ b/xen/include/xen/spinlock.h
@@ -272,7 +272,15 @@ static always_inline void spin_lock_if(bool condition, 
spinlock_t *l)
  */
 bool _rspin_trylock(rspinlock_t *lock);
 void _rspin_lock(rspinlock_t *lock);
+#define rspin_lock_irqsave(l, f)\
+({  \
+BUILD_BUG_ON(sizeof(f) != sizeof(unsigned long));   \
+((f) = _rspin_lock_irqsave(l)); \
+block_lock_speculation();   \
+})
+unsigned long _rspin_lock_irqsave(rspinlock_t *lock);
 void _rspin_unlock(rspinlock_t *lock);
+void _rspin_unlock_irqrestore(rspinlock_t *lock, unsigned long flags);
 
 static always_inline void rspin_lock(rspinlock_t *lock)
 {
@@ -282,5 +290,6 @@ static always_inline void rspin_lock(rspinlock_t *lock)
 
 #define rspin_trylock(l)  lock_evaluate_nospec(_rspin_trylock(l))
 #define rspin_unlock(l)   _rspin_unlock(l)
+#define rspin_unlock_irqrestore(l, f) _rspin_unlock_irqrestore(l, f)
 
 #endif /* __SPINLOCK_H__ */
-- 
2.35.3




[PATCH v5 05/13] xen/spinlock: make struct lock_profile rspinlock_t aware

2024-03-14 Thread Juergen Gross
Struct lock_profile contains a pointer to the spinlock it is associated
with. Prepare support of differing spinlock_t and rspinlock_t types by
adding a type indicator of the pointer. Use the highest bit of the
block_cnt member for this indicator in order to not grow the struct
while hurting only the slow path with slightly less performant code.
Note that this requires a cast when printing the value in order to be
format compliant.

Signed-off-by: Juergen Gross 
Acked-by: Alejandro Vallejo 
Acked-by: Julien Grall 
---
V2:
- new patch
V5:
- use bool for is_rlock (Julien Grall)
- use unsigned int for lockval in spinlock_profile_print_elem()
  (Jan Beulich)
- don't use anonymous union (Jan Beulich)
---
 xen/common/spinlock.c  | 28 
 xen/include/xen/spinlock.h | 14 ++
 2 files changed, 30 insertions(+), 12 deletions(-)

diff --git a/xen/common/spinlock.c b/xen/common/spinlock.c
index 5ef0ac7f89..0e8b525cec 100644
--- a/xen/common/spinlock.c
+++ b/xen/common/spinlock.c
@@ -538,19 +538,31 @@ static void spinlock_profile_iterate(lock_profile_subfunc 
*sub, void *par)
 static void cf_check spinlock_profile_print_elem(struct lock_profile *data,
 int32_t type, int32_t idx, void *par)
 {
-struct spinlock *lock = data->lock;
+unsigned int cpu;
+unsigned int lockval;
+
+if ( data->is_rlock )
+{
+cpu = data->ptr.rlock->debug.cpu;
+lockval = data->ptr.rlock->tickets.head_tail;
+}
+else
+{
+cpu = data->ptr.lock->debug.cpu;
+lockval = data->ptr.lock->tickets.head_tail;
+}
 
 printk("%s ", lock_profile_ancs[type].name);
 if ( type != LOCKPROF_TYPE_GLOBAL )
 printk("%d ", idx);
-printk("%s: addr=%p, lockval=%08x, ", data->name, lock,
-   lock->tickets.head_tail);
-if ( lock->debug.cpu == SPINLOCK_NO_CPU )
+printk("%s: addr=%p, lockval=%08x, ", data->name, data->ptr.lock, lockval);
+if ( cpu == SPINLOCK_NO_CPU )
 printk("not locked\n");
 else
-printk("cpu=%d\n", lock->debug.cpu);
-printk("  lock:%" PRId64 "(%" PRI_stime "), block:%" PRId64 "(%" PRI_stime 
")\n",
-   data->lock_cnt, data->time_hold, data->block_cnt, data->time_block);
+printk("cpu=%u\n", cpu);
+printk("  lock:%" PRIu64 "(%" PRI_stime "), block:%" PRIu64 "(%" PRI_stime 
")\n",
+   data->lock_cnt, data->time_hold, (uint64_t)data->block_cnt,
+   data->time_block);
 }
 
 void cf_check spinlock_profile_printall(unsigned char key)
@@ -680,7 +692,7 @@ static int __init cf_check lock_prof_init(void)
 {
 (*q)->next = lock_profile_glb_q.elem_q;
 lock_profile_glb_q.elem_q = *q;
-(*q)->lock->profile = *q;
+(*q)->ptr.lock->profile = *q;
 }
 
 _lock_profile_register_struct(LOCKPROF_TYPE_GLOBAL,
diff --git a/xen/include/xen/spinlock.h b/xen/include/xen/spinlock.h
index afa24c8e29..49c5115f52 100644
--- a/xen/include/xen/spinlock.h
+++ b/xen/include/xen/spinlock.h
@@ -77,13 +77,19 @@ union lock_debug { };
 */
 
 struct spinlock;
+/* Temporary hack until a dedicated struct rspinlock is existing. */
+#define rspinlock spinlock
 
 struct lock_profile {
 struct lock_profile *next;   /* forward link */
 const char  *name;   /* lock name */
-struct spinlock *lock;   /* the lock itself */
+union {
+struct spinlock *lock;   /* the lock itself */
+struct rspinlock *rlock; /* the recursive lock itself */
+} ptr;
 uint64_tlock_cnt;/* # of complete locking ops */
-uint64_tblock_cnt;   /* # of complete wait for lock */
+uint64_tblock_cnt:63; /* # of complete wait for lock */
+boolis_rlock:1;  /* use rlock pointer */
 s_time_ttime_hold;   /* cumulated lock time */
 s_time_ttime_block;  /* cumulated wait time */
 s_time_ttime_locked; /* system time of last locking */
@@ -95,7 +101,7 @@ struct lock_profile_qhead {
 int32_t   idx; /* index for printout */
 };
 
-#define LOCK_PROFILE_(lockname) { .name = #lockname, .lock = &(lockname), }
+#define LOCK_PROFILE_(lockname) { .name = #lockname, .ptr.lock = &(lockname), }
 #define LOCK_PROFILE_PTR_(name)   \
 static struct lock_profile * const lock_profile__##name   \
 __used_section(".lockprofile.data") = \
@@ -128,7 +134,7 @@ struct lock_profile_qhead {
 break;\
 }   

[PATCH v5 03/13] xen/spinlock: rename recursive lock functions

2024-03-14 Thread Juergen Gross
Rename the recursive spin_lock() functions by replacing the trailing
"_recursive" with a leading "r".

Switch the parameter to be a pointer to rspinlock_t.

Suggested-by: Jan Beulich 
Signed-off-by: Juergen Gross 
Acked-by: Julien Grall 
Acked-by: Jan Beulich 
---
V2:
- new patch
V5:
- let rspin_trylock() return bool (Jan Beulich)
- keep inline/macro layer
---
 xen/arch/arm/domain.c |  4 +--
 xen/arch/x86/domain.c |  8 +++---
 xen/arch/x86/mm/mem_sharing.c |  8 +++---
 xen/arch/x86/mm/mm-locks.h|  4 +--
 xen/common/ioreq.c| 52 +--
 xen/common/page_alloc.c   | 12 
 xen/common/spinlock.c | 12 
 xen/drivers/char/console.c| 12 
 xen/drivers/passthrough/pci.c |  4 +--
 xen/include/xen/sched.h   |  4 +--
 xen/include/xen/spinlock.h| 18 ++--
 11 files changed, 68 insertions(+), 70 deletions(-)

diff --git a/xen/arch/arm/domain.c b/xen/arch/arm/domain.c
index 5e7a7f3e7e..f38cb5e04c 100644
--- a/xen/arch/arm/domain.c
+++ b/xen/arch/arm/domain.c
@@ -987,7 +987,7 @@ static int relinquish_memory(struct domain *d, struct 
page_list_head *list)
 int   ret = 0;
 
 /* Use a recursive lock, as we may enter 'free_domheap_page'. */
-spin_lock_recursive(>page_alloc_lock);
+rspin_lock(>page_alloc_lock);
 
 page_list_for_each_safe( page, tmp, list )
 {
@@ -1014,7 +1014,7 @@ static int relinquish_memory(struct domain *d, struct 
page_list_head *list)
 }
 
   out:
-spin_unlock_recursive(>page_alloc_lock);
+rspin_unlock(>page_alloc_lock);
 return ret;
 }
 
diff --git a/xen/arch/x86/domain.c b/xen/arch/x86/domain.c
index bda853e3c9..a11c55f921 100644
--- a/xen/arch/x86/domain.c
+++ b/xen/arch/x86/domain.c
@@ -1323,7 +1323,7 @@ int arch_set_info_guest(
 {
 bool done = false;
 
-spin_lock_recursive(>page_alloc_lock);
+rspin_lock(>page_alloc_lock);
 
 for ( i = 0; ; )
 {
@@ -1344,7 +1344,7 @@ int arch_set_info_guest(
 break;
 }
 
-spin_unlock_recursive(>page_alloc_lock);
+rspin_unlock(>page_alloc_lock);
 
 if ( !done )
 return -ERESTART;
@@ -2183,7 +2183,7 @@ static int relinquish_memory(
 int   ret = 0;
 
 /* Use a recursive lock, as we may enter 'free_domheap_page'. */
-spin_lock_recursive(>page_alloc_lock);
+rspin_lock(>page_alloc_lock);
 
 while ( (page = page_list_remove_head(list)) )
 {
@@ -2324,7 +2324,7 @@ static int relinquish_memory(
 page_list_move(list, >arch.relmem_list);
 
  out:
-spin_unlock_recursive(>page_alloc_lock);
+rspin_unlock(>page_alloc_lock);
 return ret;
 }
 
diff --git a/xen/arch/x86/mm/mem_sharing.c b/xen/arch/x86/mm/mem_sharing.c
index fe299a2bf9..f58576c702 100644
--- a/xen/arch/x86/mm/mem_sharing.c
+++ b/xen/arch/x86/mm/mem_sharing.c
@@ -682,7 +682,7 @@ static int page_make_sharable(struct domain *d,
 int rc = 0;
 bool drop_dom_ref = false;
 
-spin_lock_recursive(>page_alloc_lock);
+rspin_lock(>page_alloc_lock);
 
 if ( d->is_dying )
 {
@@ -725,7 +725,7 @@ static int page_make_sharable(struct domain *d,
 }
 
 out:
-spin_unlock_recursive(>page_alloc_lock);
+rspin_unlock(>page_alloc_lock);
 
 if ( drop_dom_ref )
 put_domain(d);
@@ -1936,7 +1936,7 @@ int mem_sharing_fork_reset(struct domain *d, bool 
reset_state,
 goto state;
 
 /* need recursive lock because we will free pages */
-spin_lock_recursive(>page_alloc_lock);
+rspin_lock(>page_alloc_lock);
 page_list_for_each_safe(page, tmp, >page_list)
 {
 shr_handle_t sh;
@@ -1965,7 +1965,7 @@ int mem_sharing_fork_reset(struct domain *d, bool 
reset_state,
 put_page_alloc_ref(page);
 put_page_and_type(page);
 }
-spin_unlock_recursive(>page_alloc_lock);
+rspin_unlock(>page_alloc_lock);
 
  state:
 if ( reset_state )
diff --git a/xen/arch/x86/mm/mm-locks.h b/xen/arch/x86/mm/mm-locks.h
index c25261b4c0..669969c6ee 100644
--- a/xen/arch/x86/mm/mm-locks.h
+++ b/xen/arch/x86/mm/mm-locks.h
@@ -79,7 +79,7 @@ static always_inline void _mm_lock(const struct domain *d, 
mm_lock_t *l,
 {
 if ( !((mm_locked_by_me(l)) && rec) )
 _check_lock_level(d, level);
-spin_lock_recursive(>lock);
+rspin_lock(>lock);
 if ( l->lock.recurse_cnt == 1 )
 {
 l->locker_function = func;
@@ -202,7 +202,7 @@ static inline void mm_unlock(mm_lock_t *l)
 l->locker_function = "nobody";
 _set_lock_level(l->unlock_level);
 }
-spin_unlock_recursive(>lock);
+rspin_unlock(>lock);
 }
 
 static inline void mm_enforce_order_unlock(int unlock_level,
diff --git a/xen/common/ioreq.c b/xen/common/ioreq.c
index 652c18a9b5..1257a3d972 10

[PATCH v5 00/13] xen/spinlock: make recursive spinlocks a dedicated type

2024-03-14 Thread Juergen Gross
Instead of being able to use normal spinlocks as recursive ones, too,
make recursive spinlocks a special lock type.

This will make the spinlock structure smaller in production builds and
add type-safety.

This allows to increase the maximum number of physical cpus from 4095
to 65535 without increasing the size of the lock structure in production
builds (the size of recursive spinlocks in debug builds will grow to
12 bytes due to that change).

Note that rwlock handling is still limiting the number of cpus to 4095,
this is being taken care off in patch 12, which raises the rwlock limit
to 16384 cpus.

Iommu code imposes a limit of 16383 cpus.

Changes in V2:
- addressed comments by Jan Beulich
- lots of additional cleanups
- reorganized complete series

Changes in V3:
- addressed comments by Jan Beulich

Changes in V4:
- former patch 1 has already been applied
- fixed a coding style issue in patch 1

Changes in V5:
- new patches 1 + 10 + 12 + 13
- due to the recent Ghost-race patches the macro layer for calling
  spinlock functions is kept
- addressed comments

Juergen Gross (13):
  xen/spinlock: remove misra rule 21.1 violations
  xen/spinlock: introduce new type for recursive spinlocks
  xen/spinlock: rename recursive lock functions
  xen/spinlock: add rspin_[un]lock_irq[save|restore]()
  xen/spinlock: make struct lock_profile rspinlock_t aware
  xen/spinlock: add explicit non-recursive locking functions
  xen/spinlock: add another function level
  xen/spinlock: add missing rspin_is_locked() and rspin_barrier()
  xen/spinlock: split recursive spinlocks from normal ones
  xen/spinlock: let all is_locked and trylock variants return bool
  xen/spinlock: support higher number of cpus
  xen/rwlock: raise the number of possible cpus
  xen: allow up to 16383 cpus

 xen/arch/Kconfig  |   2 +-
 xen/arch/arm/domain.c |   4 +-
 xen/arch/arm/mm.c |   4 +-
 xen/arch/x86/domain.c |  20 +--
 xen/arch/x86/include/asm/mm.h |   2 +-
 xen/arch/x86/mm.c |  12 +-
 xen/arch/x86/mm/mem_sharing.c |  16 +--
 xen/arch/x86/mm/mm-locks.h|   6 +-
 xen/arch/x86/mm/p2m-pod.c |   6 +-
 xen/arch/x86/mm/p2m.c |   4 +-
 xen/arch/x86/tboot.c  |   4 +-
 xen/common/domain.c   |   6 +-
 xen/common/domctl.c   |   4 +-
 xen/common/grant_table.c  |  10 +-
 xen/common/ioreq.c|  54 
 xen/common/memory.c   |   4 +-
 xen/common/numa.c |   4 +-
 xen/common/page_alloc.c   |  30 ++---
 xen/common/spinlock.c | 235 +-
 xen/drivers/char/console.c|  38 +++---
 xen/drivers/passthrough/pci.c |   8 +-
 xen/include/xen/rwlock.h  |  18 +--
 xen/include/xen/sched.h   |  10 +-
 xen/include/xen/spinlock.h| 174 ++---
 24 files changed, 440 insertions(+), 235 deletions(-)

-- 
2.35.3




[PATCH v5 02/13] xen/spinlock: introduce new type for recursive spinlocks

2024-03-14 Thread Juergen Gross
Introduce a new type "rspinlock_t" to be used for recursive spinlocks.

For now it is only an alias of spinlock_t, so both types can still be
used for recursive spinlocks. This will be changed later, though.

Switch all recursive spinlocks to the new type.

Define the initializer helpers and use them where appropriate.

Signed-off-by: Juergen Gross 
Acked-by: Julien Grall 
---
V2:
- carved out from V1 patch
V5:
- avoid MISRA violation (Julien Grall)
---
 xen/arch/x86/include/asm/mm.h |  2 +-
 xen/arch/x86/mm/mm-locks.h|  2 +-
 xen/common/domain.c   |  4 ++--
 xen/common/ioreq.c|  2 +-
 xen/drivers/char/console.c|  4 ++--
 xen/drivers/passthrough/pci.c |  2 +-
 xen/include/xen/sched.h   |  6 +++---
 xen/include/xen/spinlock.h| 19 +++
 8 files changed, 26 insertions(+), 15 deletions(-)

diff --git a/xen/arch/x86/include/asm/mm.h b/xen/arch/x86/include/asm/mm.h
index 65d209d5ff..98b66edaca 100644
--- a/xen/arch/x86/include/asm/mm.h
+++ b/xen/arch/x86/include/asm/mm.h
@@ -597,7 +597,7 @@ unsigned long domain_get_maximum_gpfn(struct domain *d);
 
 /* Definition of an mm lock: spinlock with extra fields for debugging */
 typedef struct mm_lock {
-spinlock_t lock;
+rspinlock_tlock;
 intunlock_level;
 intlocker;  /* processor which holds the lock */
 const char*locker_function; /* func that took it */
diff --git a/xen/arch/x86/mm/mm-locks.h b/xen/arch/x86/mm/mm-locks.h
index 2eae73ac68..c25261b4c0 100644
--- a/xen/arch/x86/mm/mm-locks.h
+++ b/xen/arch/x86/mm/mm-locks.h
@@ -20,7 +20,7 @@ DECLARE_PERCPU_RWLOCK_GLOBAL(p2m_percpu_rwlock);
 
 static inline void mm_lock_init(mm_lock_t *l)
 {
-spin_lock_init(>lock);
+rspin_lock_init(>lock);
 l->locker = -1;
 l->locker_function = "nobody";
 l->unlock_level = 0;
diff --git a/xen/common/domain.c b/xen/common/domain.c
index f6f5574996..8e0109c590 100644
--- a/xen/common/domain.c
+++ b/xen/common/domain.c
@@ -627,8 +627,8 @@ struct domain *domain_create(domid_t domid,
 
 atomic_set(>refcnt, 1);
 RCU_READ_LOCK_INIT(>rcu_lock);
-spin_lock_init_prof(d, domain_lock);
-spin_lock_init_prof(d, page_alloc_lock);
+rspin_lock_init_prof(d, domain_lock);
+rspin_lock_init_prof(d, page_alloc_lock);
 spin_lock_init(>hypercall_deadlock_mutex);
 INIT_PAGE_LIST_HEAD(>page_list);
 INIT_PAGE_LIST_HEAD(>extra_page_list);
diff --git a/xen/common/ioreq.c b/xen/common/ioreq.c
index 62b907f4c4..652c18a9b5 100644
--- a/xen/common/ioreq.c
+++ b/xen/common/ioreq.c
@@ -1331,7 +1331,7 @@ unsigned int ioreq_broadcast(ioreq_t *p, bool buffered)
 
 void ioreq_domain_init(struct domain *d)
 {
-spin_lock_init(>ioreq_server.lock);
+rspin_lock_init(>ioreq_server.lock);
 
 arch_ioreq_domain_init(d);
 }
diff --git a/xen/drivers/char/console.c b/xen/drivers/char/console.c
index d2cb0530b2..6a88a0b32c 100644
--- a/xen/drivers/char/console.c
+++ b/xen/drivers/char/console.c
@@ -119,7 +119,7 @@ static int __read_mostly sercon_handle = -1;
 int8_t __read_mostly opt_console_xen; /* console=xen */
 #endif
 
-static DEFINE_SPINLOCK(console_lock);
+static DEFINE_RSPINLOCK(console_lock);
 
 /*
  * To control the amount of printing, thresholds are added.
@@ -1177,7 +1177,7 @@ void console_force_unlock(void)
 {
 watchdog_disable();
 spin_debug_disable();
-spin_lock_init(_lock);
+rspin_lock_init(_lock);
 serial_force_unlock(sercon_handle);
 console_locks_busted = 1;
 console_start_sync();
diff --git a/xen/drivers/passthrough/pci.c b/xen/drivers/passthrough/pci.c
index 6a1eda675d..b6b2196ab0 100644
--- a/xen/drivers/passthrough/pci.c
+++ b/xen/drivers/passthrough/pci.c
@@ -50,7 +50,7 @@ struct pci_seg {
 } bus2bridge[MAX_BUSES];
 };
 
-static spinlock_t _pcidevs_lock = SPIN_LOCK_UNLOCKED;
+static DEFINE_RSPINLOCK(_pcidevs_lock);
 
 /* Do not use, as it has no speculation barrier, use pcidevs_lock() instead. */
 void pcidevs_lock_unsafe(void)
diff --git a/xen/include/xen/sched.h b/xen/include/xen/sched.h
index 37f5922f32..bc320f4e55 100644
--- a/xen/include/xen/sched.h
+++ b/xen/include/xen/sched.h
@@ -376,9 +376,9 @@ struct domain
 
 rcu_read_lock_t  rcu_lock;
 
-spinlock_t   domain_lock;
+rspinlock_t  domain_lock;
 
-spinlock_t   page_alloc_lock; /* protects all the following fields  */
+rspinlock_t  page_alloc_lock; /* protects all the following fields  */
 struct page_list_head page_list;  /* linked list */
 struct page_list_head extra_page_list; /* linked list (size extra_pages) */
 struct page_list_head xenpage_list; /* linked list (size xenheap_pages) */
@@ -620,7 +620,7 @@ struct domain
 #ifdef CONFIG_IOREQ_SERVER
 /* Lock protects all other values in the sub-struct */
 struct {
-spinlock_t  lock;
+rspinlock_t lock;
 

[PATCH v5 01/13] xen/spinlock: remove misra rule 21.1 violations

2024-03-14 Thread Juergen Gross
In xen spinlock code there are several violations of MISRA rule 21.1
(identifiers starting with "__" or "_[A-Z]").

Fix them by using trailing underscores instead.

Signed-off-by: Juergen Gross 
---
V5:
- new patch (Julien Grall)
---
 xen/include/xen/spinlock.h | 28 ++--
 1 file changed, 14 insertions(+), 14 deletions(-)

diff --git a/xen/include/xen/spinlock.h b/xen/include/xen/spinlock.h
index e351fc9995..8a443efc19 100644
--- a/xen/include/xen/spinlock.h
+++ b/xen/include/xen/spinlock.h
@@ -22,7 +22,7 @@ union lock_debug {
 bool unseen:1;
 };
 };
-#define _LOCK_DEBUG { .val = LOCK_DEBUG_INITVAL }
+#define LOCK_DEBUG_ { .val = LOCK_DEBUG_INITVAL }
 void check_lock(union lock_debug *debug, bool try);
 void lock_enter(const union lock_debug *debug);
 void lock_exit(const union lock_debug *debug);
@@ -30,7 +30,7 @@ void spin_debug_enable(void);
 void spin_debug_disable(void);
 #else
 union lock_debug { };
-#define _LOCK_DEBUG { }
+#define LOCK_DEBUG_ { }
 #define check_lock(l, t) ((void)0)
 #define lock_enter(l) ((void)0)
 #define lock_exit(l) ((void)0)
@@ -95,27 +95,27 @@ struct lock_profile_qhead {
 int32_t   idx; /* index for printout */
 };
 
-#define _LOCK_PROFILE(lockname) { .name = #lockname, .lock = &(lockname), }
-#define _LOCK_PROFILE_PTR(name)   \
-static struct lock_profile * const __lock_profile_##name  \
+#define LOCK_PROFILE_(lockname) { .name = #lockname, .lock = &(lockname), }
+#define LOCK_PROFILE_PTR_(name)   \
+static struct lock_profile * const lock_profile__##name   \
 __used_section(".lockprofile.data") = \
-&__lock_profile_data_##name
-#define _SPIN_LOCK_UNLOCKED(x) {  \
+_profile_data__##name
+#define SPIN_LOCK_UNLOCKED_(x) {  \
 .recurse_cpu = SPINLOCK_NO_CPU,   \
-.debug =_LOCK_DEBUG,  \
+.debug = LOCK_DEBUG_, \
 .profile = x, \
 }
-#define SPIN_LOCK_UNLOCKED _SPIN_LOCK_UNLOCKED(NULL)
+#define SPIN_LOCK_UNLOCKED SPIN_LOCK_UNLOCKED_(NULL)
 #define DEFINE_SPINLOCK(l)\
-spinlock_t l = _SPIN_LOCK_UNLOCKED(NULL); \
-static struct lock_profile __lock_profile_data_##l = _LOCK_PROFILE(l);\
-_LOCK_PROFILE_PTR(l)
+spinlock_t l = SPIN_LOCK_UNLOCKED_(NULL); \
+static struct lock_profile lock_profile_data__##l = LOCK_PROFILE_(l); \
+LOCK_PROFILE_PTR_(l)
 
 #define spin_lock_init_prof(s, l) \
 do {  \
 struct lock_profile *prof;\
 prof = xzalloc(struct lock_profile);  \
-(s)->l = (spinlock_t)_SPIN_LOCK_UNLOCKED(prof);   \
+(s)->l = (spinlock_t)SPIN_LOCK_UNLOCKED_(prof);   \
 if ( !prof )  \
 { \
 printk(XENLOG_WARNING \
@@ -149,7 +149,7 @@ struct lock_profile_qhead { };
 
 #define SPIN_LOCK_UNLOCKED {  \
 .recurse_cpu = SPINLOCK_NO_CPU,   \
-.debug =_LOCK_DEBUG,  \
+.debug = LOCK_DEBUG_, \
 }
 #define DEFINE_SPINLOCK(l) spinlock_t l = SPIN_LOCK_UNLOCKED
 
-- 
2.35.3




Re: [PATCH RFC] x86/xen: attempt to inflate the memory balloon on PVH

2024-03-13 Thread Juergen Gross

On 20.02.24 18:43, Roger Pau Monne wrote:

When running as PVH or HVM Linux will use holes in the memory map as scratch
space to map grants, foreign domain pages and possibly miscellaneous other
stuff.  However the usage of such memory map holes for Xen purposes can be
problematic.  The request of holesby Xen happen quite early in the kernel boot
process (grant table setup already uses scratch map space), and it's possible
that by then not all devices have reclaimed their MMIO space.  It's not
unlikely for chunks of Xen scratch map space to end up using PCI bridge MMIO
window memory, which (as expected) causes quite a lot of issues in the system.

At least for PVH dom0 we have the possibility of using regions marked as
UNUSABLE in the e820 memory map.  Either if the region is UNUSABLE in the
native memory map, or it has been converted into UNUSABLE in order to hide RAM
regions from dom0, the second stage translation page-tables can populate those
areas without issues.

PV already has this kind of logic, where the balloon driver is inflated at
boot.  Re-use the current logic in order to also inflate it when running as
PVH.  onvert UNUSABLE regions up to the ratio specified in EXTRA_MEM_RATIO to
RAM, while reserving them using xen_add_extra_mem() (which is also moved so
it's no longer tied to CONFIG_PV).

Signed-off-by: Roger Pau Monné 


Reviewed-by: Juergen Gross 


Juergen



OpenPGP_0xB0DE9DD628BF132F.asc
Description: OpenPGP public key


OpenPGP_signature.asc
Description: OpenPGP digital signature


[PATCH 1/2] xen/evtchn: avoid WARN() when unbinding an event channel

2024-03-13 Thread Juergen Gross
When unbinding a user event channel, the related handler might be
called a last time in case the kernel was built with
CONFIG_DEBUG_SHIRQ. This might cause a WARN() in the handler.

Avoid that by adding an "unbinding" flag to struct user_event which
will short circuit the handler.

Fixes: 9e90e58c11b7 ("xen: evtchn: Allow shared registration of IRQ handers")
Reported-by: Demi Marie Obenour 
Tested-by: Demi Marie Obenour 
Signed-off-by: Juergen Gross 
---
 drivers/xen/evtchn.c | 6 ++
 1 file changed, 6 insertions(+)

diff --git a/drivers/xen/evtchn.c b/drivers/xen/evtchn.c
index 59717628ca42..f6a2216c2c87 100644
--- a/drivers/xen/evtchn.c
+++ b/drivers/xen/evtchn.c
@@ -85,6 +85,7 @@ struct user_evtchn {
struct per_user_data *user;
evtchn_port_t port;
bool enabled;
+   bool unbinding;
 };
 
 static void evtchn_free_ring(evtchn_port_t *ring)
@@ -164,6 +165,10 @@ static irqreturn_t evtchn_interrupt(int irq, void *data)
struct per_user_data *u = evtchn->user;
unsigned int prod, cons;
 
+   /* Handler might be called when tearing down the IRQ. */
+   if (evtchn->unbinding)
+   return IRQ_HANDLED;
+
WARN(!evtchn->enabled,
 "Interrupt for port %u, but apparently not enabled; per-user %p\n",
 evtchn->port, u);
@@ -421,6 +426,7 @@ static void evtchn_unbind_from_user(struct per_user_data *u,
 
BUG_ON(irq < 0);
 
+   evtchn->unbinding = true;
unbind_from_irqhandler(irq, evtchn);
 
del_evtchn(u, evtchn);
-- 
2.35.3




[PATCH 2/2] xen/events: increment refcnt only if event channel is refcounted

2024-03-13 Thread Juergen Gross
In bind_evtchn_to_irq_chip() don't increment the refcnt of the event
channel blindly. In case the event channel is NOT refcounted, issue a
warning instead.

Add an additional safety net by doing the refcnt increment only if the
caller has specified IRQF_SHARED in the irqflags parameter.

Fixes: 9e90e58c11b7 ("xen: evtchn: Allow shared registration of IRQ handers")
Signed-off-by: Juergen Gross 
---
 drivers/xen/events/events_base.c | 22 +-
 1 file changed, 13 insertions(+), 9 deletions(-)

diff --git a/drivers/xen/events/events_base.c b/drivers/xen/events/events_base.c
index 2faa4bf78c7a..81effbd53dc5 100644
--- a/drivers/xen/events/events_base.c
+++ b/drivers/xen/events/events_base.c
@@ -1190,7 +1190,7 @@ int xen_pirq_from_irq(unsigned irq)
 EXPORT_SYMBOL_GPL(xen_pirq_from_irq);
 
 static int bind_evtchn_to_irq_chip(evtchn_port_t evtchn, struct irq_chip *chip,
-  struct xenbus_device *dev)
+  struct xenbus_device *dev, bool shared)
 {
int ret = -ENOMEM;
struct irq_info *info;
@@ -1224,7 +1224,8 @@ static int bind_evtchn_to_irq_chip(evtchn_port_t evtchn, 
struct irq_chip *chip,
 */
bind_evtchn_to_cpu(info, 0, false);
} else if (!WARN_ON(info->type != IRQT_EVTCHN)) {
-   info->refcnt++;
+   if (shared && !WARN_ON(info->refcnt < 0))
+   info->refcnt++;
}
 
ret = info->irq;
@@ -1237,13 +1238,13 @@ static int bind_evtchn_to_irq_chip(evtchn_port_t 
evtchn, struct irq_chip *chip,
 
 int bind_evtchn_to_irq(evtchn_port_t evtchn)
 {
-   return bind_evtchn_to_irq_chip(evtchn, _dynamic_chip, NULL);
+   return bind_evtchn_to_irq_chip(evtchn, _dynamic_chip, NULL, false);
 }
 EXPORT_SYMBOL_GPL(bind_evtchn_to_irq);
 
 int bind_evtchn_to_irq_lateeoi(evtchn_port_t evtchn)
 {
-   return bind_evtchn_to_irq_chip(evtchn, _lateeoi_chip, NULL);
+   return bind_evtchn_to_irq_chip(evtchn, _lateeoi_chip, NULL, false);
 }
 EXPORT_SYMBOL_GPL(bind_evtchn_to_irq_lateeoi);
 
@@ -1295,7 +1296,8 @@ static int bind_ipi_to_irq(unsigned int ipi, unsigned int 
cpu)
 
 static int bind_interdomain_evtchn_to_irq_chip(struct xenbus_device *dev,
   evtchn_port_t remote_port,
-  struct irq_chip *chip)
+  struct irq_chip *chip,
+  bool shared)
 {
struct evtchn_bind_interdomain bind_interdomain;
int err;
@@ -1307,14 +1309,14 @@ static int bind_interdomain_evtchn_to_irq_chip(struct 
xenbus_device *dev,
  _interdomain);
 
return err ? : bind_evtchn_to_irq_chip(bind_interdomain.local_port,
-  chip, dev);
+  chip, dev, shared);
 }
 
 int bind_interdomain_evtchn_to_irq_lateeoi(struct xenbus_device *dev,
   evtchn_port_t remote_port)
 {
return bind_interdomain_evtchn_to_irq_chip(dev, remote_port,
-  _lateeoi_chip);
+  _lateeoi_chip, false);
 }
 EXPORT_SYMBOL_GPL(bind_interdomain_evtchn_to_irq_lateeoi);
 
@@ -1430,7 +1432,8 @@ static int bind_evtchn_to_irqhandler_chip(evtchn_port_t 
evtchn,
 {
int irq, retval;
 
-   irq = bind_evtchn_to_irq_chip(evtchn, chip, NULL);
+   irq = bind_evtchn_to_irq_chip(evtchn, chip, NULL,
+ irqflags & IRQF_SHARED);
if (irq < 0)
return irq;
retval = request_irq(irq, handler, irqflags, devname, dev_id);
@@ -1471,7 +1474,8 @@ static int bind_interdomain_evtchn_to_irqhandler_chip(
 {
int irq, retval;
 
-   irq = bind_interdomain_evtchn_to_irq_chip(dev, remote_port, chip);
+   irq = bind_interdomain_evtchn_to_irq_chip(dev, remote_port, chip,
+ irqflags & IRQF_SHARED);
if (irq < 0)
return irq;
 
-- 
2.35.3




[PATCH 0/2] xen: two fixes related to event channels

2024-03-13 Thread Juergen Gross
Two patches fixing one seen problem and another potential one. Both
have been introduced in the 6.7 kernel.

Juergen Gross (2):
  xen/evtchn: avoid WARN() when unbinding an event channel
  xen/events: increment refcnt only if event channel is refcounted

 drivers/xen/events/events_base.c | 22 +-
 drivers/xen/evtchn.c |  6 ++
 2 files changed, 19 insertions(+), 9 deletions(-)

-- 
2.35.3




Re: [PATCH 3/3] x86/PVH: Support relocatable dom0 kernels

2024-03-08 Thread Juergen Gross

On 07.03.24 18:01, Jason Andryuk wrote:

On 2024-03-07 04:30, Roger Pau Monné wrote:

On Wed, Mar 06, 2024 at 01:50:32PM -0500, Jason Andryuk wrote:

Xen tries to load a PVH dom0 kernel at the fixed guest physical address
from the elf headers.  For Linux, this defaults to 0x100 (16MB), but
it can be configured.

Unfortunately there exist firmwares that have reserved regions at this
address, so Xen fails to load the dom0 kernel since it's not RAM.

The PVH entry code is not relocatable - it loads from absolute
addresses, which fail when the kernel is loaded at a different address.
With a suitably modified kernel, a reloctable entry point is possible.

Add the XENFEAT_pvh_relocatable flag to let a kernel indicate that it
supports a relocatable entry path.

Change the loading to check for an acceptable load address.  If the
kernel is relocatable, support finding an alternate load address.

Linux cares about its physical alignment.  This can be pulled out of the
bzImage header, but not from the vmlinux ELF file.  If an alignment
can't be found, use 2MB.


While I'm fine with having a Linux specific way, there needs to be a
generic way of passing the alignment for non-bzImage kernels.

ELF program headers have an align field, would that be suitable to
use?


Unfortunately, it doesn't seem correct.  Linux has CONFIG_PHYSICAL_ALIGN, and it 
doesn't seem to be used in the elf headers.  As a quick test, I set 
CONFIG_PHYSICAL_ALIGN=0x80, but the elf align values are still 0x20.


An excerpt from the kernel's arch/x86/Makefile:

#
# The 64-bit kernel must be aligned to 2MB.  Pass -z max-page-size=0x20 to
# the linker to force 2MB page size regardless of the default page size used
# by the linker.
#
ifdef CONFIG_X86_64
LDFLAGS_vmlinux += -z max-page-size=0x20
endif


Juergen


OpenPGP_0xB0DE9DD628BF132F.asc
Description: OpenPGP public key


OpenPGP_signature.asc
Description: OpenPGP digital signature


Re: [PATCH v2] tools/9pfsd: Fix build error caused by strerror_r()

2024-03-07 Thread Juergen Gross

On 07.03.24 11:38, Henry Wang wrote:

Below error can be seen when doing Yocto build of the toolstack:

| io.c: In function 'p9_error':
| io.c:684:5: error: ignoring return value of 'strerror_r' declared
   with attribute 'warn_unused_result' [-Werror=unused-result]
|   684 | strerror_r(err, ring->buffer, ring->ring_size);
|   | ^~
| cc1: all warnings being treated as errors

Fix the build by using strerror() to replace strerror_r(). Since
strerror() is thread-unsafe, use a separate local mutex to protect
the action. The steps would then become: Acquire the mutex first,
invoke strerror(), copy the string from strerror() to the designated
buffer and then drop the mutex.

Signed-off-by: Henry Wang 
---
  tools/9pfsd/io.c | 12 +++-
  1 file changed, 11 insertions(+), 1 deletion(-)

diff --git a/tools/9pfsd/io.c b/tools/9pfsd/io.c
index adb887c7d9..2b80c9528d 100644
--- a/tools/9pfsd/io.c
+++ b/tools/9pfsd/io.c
@@ -680,8 +680,18 @@ static bool name_ok(const char *str)
  static void p9_error(struct ring *ring, uint16_t tag, uint32_t err)
  {
  unsigned int erroff;
+static pthread_mutex_t mutex = PTHREAD_MUTEX_INITIALIZER;
+char *strerror_str;
+RING_IDX strerror_len = 0, copy_len = 0;
+
+pthread_mutex_lock();
+strerror_str = strerror(err);
+strerror_len = strlen(strerror_str) + 1;
+copy_len = min(strerror_len, ring->ring_size);


Hmm, I think we even _need_ to cap the string earlier.

A string in the 9pfs protocol is a 2 byte length field plus the string.
In case of a ring larger than 65535 bytes this would mean the result of
strerror() could (in theory) overflow the string format of 9pfs.

Additionally the string should be a _short_ description of the error, so
I'd like to suggest to not use ring_size as the upper bound for the string
length, but a fixed value defined as a macro, e.g.:

#define MAX_ERRSTR_LEN 80


Juergen


OpenPGP_0xB0DE9DD628BF132F.asc
Description: OpenPGP public key


OpenPGP_signature.asc
Description: OpenPGP digital signature


Re: [PATCH v2] tools/9pfsd: Fix build error caused by strerror_r()

2024-03-07 Thread Juergen Gross

On 07.03.24 11:38, Henry Wang wrote:

Below error can be seen when doing Yocto build of the toolstack:

| io.c: In function 'p9_error':
| io.c:684:5: error: ignoring return value of 'strerror_r' declared
   with attribute 'warn_unused_result' [-Werror=unused-result]
|   684 | strerror_r(err, ring->buffer, ring->ring_size);
|   | ^~
| cc1: all warnings being treated as errors

Fix the build by using strerror() to replace strerror_r(). Since
strerror() is thread-unsafe, use a separate local mutex to protect
the action. The steps would then become: Acquire the mutex first,
invoke strerror(), copy the string from strerror() to the designated
buffer and then drop the mutex.

Signed-off-by: Henry Wang 


Maybe add a "Fixes:" tag referencing Jan's patch?

And I would expand on the reason why you are using strerror() instead of just
checking the strerror_r() result. Something like:

  Using strerror_r() without special casing different build
  environments is impossible due to the different return types
  (int vs char *) depending on the environment. As p9_error()
  is not on a performance critical path, using strerror() with a
  mutex ought to be fine.


---
  tools/9pfsd/io.c | 12 +++-
  1 file changed, 11 insertions(+), 1 deletion(-)

diff --git a/tools/9pfsd/io.c b/tools/9pfsd/io.c
index adb887c7d9..2b80c9528d 100644
--- a/tools/9pfsd/io.c
+++ b/tools/9pfsd/io.c
@@ -680,8 +680,18 @@ static bool name_ok(const char *str)
  static void p9_error(struct ring *ring, uint16_t tag, uint32_t err)
  {
  unsigned int erroff;
+static pthread_mutex_t mutex = PTHREAD_MUTEX_INITIALIZER;
+char *strerror_str;
+RING_IDX strerror_len = 0, copy_len = 0;


I wouldn't use RING_IDX for the type, but unsigned int.


+
+pthread_mutex_lock();
+strerror_str = strerror(err);
+strerror_len = strlen(strerror_str) + 1;
+copy_len = min(strerror_len, ring->ring_size);
+memcpy(ring->buffer, strerror_str, copy_len);
+((char *)(ring->buffer))[copy_len - 1] = '\0';
+pthread_mutex_unlock();
  
-strerror_r(err, ring->buffer, ring->ring_size);

  erroff = add_string(ring, ring->buffer, strlen(ring->buffer));
  fill_buffer(ring, P9_CMD_ERROR, tag, "SU",
  erroff != ~0 ? ring->str + erroff : "cannot allocate memory",



Juergen


OpenPGP_0xB0DE9DD628BF132F.asc
Description: OpenPGP public key


OpenPGP_signature.asc
Description: OpenPGP digital signature


[PATCH] SUPPORT.md: add xen-9pfsd

2024-03-06 Thread Juergen Gross
Add a support statement for the new xen-9pfsd backend. Set it to
"Experimental", as some functionality for Linux guests is missing
(only tested to work with Xenstore-stubdom).

Signed-off-by: Juergen Gross 
---
 SUPPORT.md | 1 +
 1 file changed, 1 insertion(+)

diff --git a/SUPPORT.md b/SUPPORT.md
index a90d1108c9..8d21bc7945 100644
--- a/SUPPORT.md
+++ b/SUPPORT.md
@@ -664,6 +664,7 @@ there is currently no xl support.
 ### PV 9pfs (backend)
 
 Status, QEMU: Tech Preview
+Status, xen-9pfsd: Experimental
 
 ### PVCalls (backend)
 
-- 
2.35.3




[PATCH] CHANGELOG: add an entry for 9pfsd

2024-03-06 Thread Juergen Gross
Add an entry to CHANGELOG.md regarding the new xen-9pfsd daemon.

Signed-off-by: Juergen Gross 
---
 CHANGELOG.md | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index f9874f9bb0..93fda73c00 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -18,6 +18,8 @@ The format is based on [Keep a 
Changelog](https://keepachangelog.com/en/1.0.0/)
  - On x86:
- Introduce a new x2APIC driver that uses Cluster Logical addressing mode
  for IPIs and Physical addressing mode for external interrupts.
+ - Add a new 9pfs backend running as a daemon in dom0. First user is
+   Xenstore-stubdom now being able to support full Xenstore trace capability.
 
 ### Removed
 - caml-stubdom.  It hasn't built since 2014, was pinned to Ocaml 4.02, and has
-- 
2.35.3




[PATCH] MAINTAINERS: add an entry for tools/9pfsd

2024-03-06 Thread Juergen Gross
Add me as the maintainer for the tools/9pfsd directory.

Signed-off-by: Juergen Gross 
---
 MAINTAINERS | 6 ++
 1 file changed, 6 insertions(+)

diff --git a/MAINTAINERS b/MAINTAINERS
index 076cf1e141..28fb35582b 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -206,6 +206,12 @@ Maintainers List (try to look for most precise areas first)
 
---
 
+9PFSD
+M: Juergen Gross 
+M: Anthony PERARD 
+S: Supported
+F: tools/9pfsd
+
 ACPI
 M: Jan Beulich 
 S: Supported
-- 
2.35.3




Re: [PATCH v4 10/12] xen/spinlock: split recursive spinlocks from normal ones

2024-03-01 Thread Juergen Gross

On 29.02.24 16:32, Jan Beulich wrote:

On 12.12.2023 10:47, Juergen Gross wrote:

+#define nrspin_lock_irqsave(l, f)   \
+({  \
+BUILD_BUG_ON(sizeof(f) != sizeof(unsigned long));   \
+((f) = __nrspin_lock_irqsave(l));   \


I don't think the outer pair of parentheses is needed here.


Turns out it is needed. Otherwise something like:


if ( a )
nrspin_lock_irqsave(l, f);
else
...

will fail with "else without a previous if".


Juergen



OpenPGP_0xB0DE9DD628BF132F.asc
Description: OpenPGP public key


OpenPGP_signature.asc
Description: OpenPGP digital signature


Re: [PATCH v4 12/12] xen/spinlock: support higher number of cpus

2024-02-29 Thread Juergen Gross

On 29.02.24 17:31, Jan Beulich wrote:

On 29.02.2024 17:29, Jürgen Groß wrote:

On 29.02.24 16:46, Jan Beulich wrote:

On 12.12.2023 10:47, Juergen Gross wrote:

Allow 16 bits per cpu number, which is the limit imposed by
spinlock_tickets_t.

This will allow up to 65535 cpus, while increasing only the size of
recursive spinlocks in debug builds from 8 to 12 bytes.


I think we want to be more conservative here, for the case of there
being bugs: The CPU holding a lock may wrongly try to acquire it a
2nd time. That's the 65536th ticket then, wrapping the value.


Is this really a problem? There will be no other cpu left seeing the lock
as "free" in this case, as all others will be waiting for the head to reach
their private tail value.


But isn't said CPU then going to make progress, rather than indefinitely
spinning on the lock?


No, I don't think so.

The limit isn't 65535 because of the ticket mechanism, but because of the
rspin mechanism, where we need a "no cpu is owning the lock" value. Without
the recursive locks the limit would be 65536 (or 4096 today).




Therefore my suggestion would be to only (mention) go(ing) up to 32k.


Signed-off-by: Juergen Gross 
---
   xen/common/spinlock.c  |  1 +
   xen/include/xen/spinlock.h | 18 +-
   2 files changed, 10 insertions(+), 9 deletions(-)


Shouldn't this also bump the upper bound of the NR_CPUS range then
in xen/arch/Kconfig?


Fine with me, I can add another patch to the series doing that.


Why not do it right here? The upper bound there is like it is only
because of the restriction that's lifted here.


I'd prefer splitting the two instances, but if you prefer it to be in a
single patch, so be it.


Juergen



OpenPGP_0xB0DE9DD628BF132F.asc
Description: OpenPGP public key


OpenPGP_signature.asc
Description: OpenPGP digital signature


Re: [PATCH v4 07/12] xen/spinlock: add explicit non-recursive locking functions

2024-02-29 Thread Juergen Gross

On 29.02.24 14:49, Jan Beulich wrote:

On 12.12.2023 10:47, Juergen Gross wrote:

In order to prepare a type-safe recursive spinlock structure, add
explicitly non-recursive locking functions to be used for non-recursive
locking of spinlocks, which are used recursively, too.

Signed-off-by: Juergen Gross 


Acked-by: Jan Beulich 
preferably with ...


--- a/xen/include/xen/spinlock.h
+++ b/xen/include/xen/spinlock.h
@@ -101,6 +101,8 @@ struct lock_profile_qhead {
  };
  
  #define _LOCK_PROFILE(lockname) { .name = #lockname, .lock = , }

+#define _RLOCK_PROFILE(lockname) { .name = #lockname, .rlock = , \
+.is_rlock = 1, }


... "true" used here, ...


@@ -133,13 +135,16 @@ struct lock_profile_qhead {
  break;
\
  } 
\
  prof->name = #l;  
\
-prof->lock = &(s)->l; \
+prof->lockptr = &(s)->l;  \
+prof->is_rlock = isr; \
  prof->next = (s)->profile_head.elem_q;
\
  (s)->profile_head.elem_q = prof;  
\
  } while( 0 )
  
-#define spin_lock_init_prof(s, l) __spin_lock_init_prof(s, l, spinlock_t)

-#define rspin_lock_init_prof(s, l) __spin_lock_init_prof(s, l, rspinlock_t)
+#define spin_lock_init_prof(s, l) \
+__spin_lock_init_prof(s, l, lock, spinlock_t, 0)


... "false" here, ...


+#define rspin_lock_init_prof(s, l)\
+__spin_lock_init_prof(s, l, rlock, rspinlock_t, 1)


... "true" again here, and ...


@@ -174,6 +179,7 @@ struct lock_profile_qhead { };
  
  #endif
  
+

  typedef union {
  uint32_t head_tail;
  struct {


... definitely with this hunk dropped.


I'm fine with all of above.


Juergen



OpenPGP_0xB0DE9DD628BF132F.asc
Description: OpenPGP public key


OpenPGP_signature.asc
Description: OpenPGP digital signature


[PATCH v9 6/6] tools/xenstored: have a single do_control_memreport()

2024-02-29 Thread Juergen Gross
With 9pfs now available in Xenstore-stubdom, there is no reason to
have distinct do_control_memreport() variants for the daemon and the
stubdom implementations.

Signed-off-by: Juergen Gross 
Reviewed-by: Jason Andryuk 
---
 tools/xenstored/control.c | 27 +++
 1 file changed, 7 insertions(+), 20 deletions(-)

diff --git a/tools/xenstored/control.c b/tools/xenstored/control.c
index dae23a5ac0..9561289179 100644
--- a/tools/xenstored/control.c
+++ b/tools/xenstored/control.c
@@ -216,23 +216,11 @@ static int do_control_logfile(const void *ctx, struct 
connection *conn,
return 0;
 }
 
-#ifdef __MINIOS__
-static int do_control_memreport(const void *ctx, struct connection *conn,
-   const char **vec, int num)
-{
-   if (num)
-   return EINVAL;
-
-   talloc_report_full(NULL, stdout);
-
-   send_ack(conn, XS_CONTROL);
-   return 0;
-}
-#else
 static int do_control_memreport(const void *ctx, struct connection *conn,
const char **vec, int num)
 {
FILE *fp;
+   const char *filename;
int fd;
 
if (num > 1)
@@ -255,8 +243,12 @@ static int do_control_memreport(const void *ctx, struct 
connection *conn,
if (!fp)
close(fd);
}
-   } else
-   fp = fopen(vec[0], "a");
+   } else {
+   filename = absolute_filename(ctx, vec[0]);
+   if (!filename)
+   return ENOMEM;
+   fp = fopen(filename, "a");
+   }
 
if (!fp)
return EBADF;
@@ -267,7 +259,6 @@ static int do_control_memreport(const void *ctx, struct 
connection *conn,
send_ack(conn, XS_CONTROL);
return 0;
 }
-#endif
 
 static int do_control_print(const void *ctx, struct connection *conn,
const char **vec, int num)
@@ -310,11 +301,7 @@ static struct cmd_s cmds[] = {
"Default timeout is 60 seconds.", 5 },
 #endif
{ "logfile", do_control_logfile, "" },
-#ifdef __MINIOS__
-   { "memreport", do_control_memreport, "" },
-#else
{ "memreport", do_control_memreport, "[]" },
-#endif
{ "print", do_control_print, "" },
{ "quota", do_control_quota,
"[set  ||max [-r]]" },
-- 
2.35.3




[PATCH v9 4/6] tools/xenstored: add helpers for filename handling

2024-02-29 Thread Juergen Gross
Add some helpers for handling filenames which might need different
implementations between stubdom and daemon environments:

- expansion of relative filenames (those are not really defined today,
  just expand them to be relative to /var/lib/xen/xenstore)
- expansion of xenstore_daemon_rundir() (used e.g. for saving the state
  file in case of live update - needs to be unchanged in the daemon
  case, but should result in /var/lib/xen/xenstore for stubdom)

Signed-off-by: Juergen Gross 
Reviewed-by: Jason Andryuk 
Reviewed-by: Julien Grall 
---
V3:
- make absolute_filename() return a pointer to const (Julien Grall)
---
 tools/xenstored/core.c  | 15 +--
 tools/xenstored/core.h  |  5 -
 tools/xenstored/lu_daemon.c |  4 ++--
 tools/xenstored/minios.c|  5 +
 tools/xenstored/posix.c |  8 +++-
 5 files changed, 31 insertions(+), 6 deletions(-)

diff --git a/tools/xenstored/core.c b/tools/xenstored/core.c
index 48fc787ac1..bada1ad9a2 100644
--- a/tools/xenstored/core.c
+++ b/tools/xenstored/core.c
@@ -63,7 +63,7 @@ char **orig_argv;
 LIST_HEAD(connections);
 int tracefd = -1;
 bool keep_orphans = false;
-char *tracefile = NULL;
+const char *tracefile = NULL;
 static struct hashtable *nodes;
 unsigned int trace_flags = TRACE_OBJ | TRACE_IO;
 
@@ -137,6 +137,17 @@ void trace_destroy(const void *data, const char *type)
trace("obj: DESTROY %s %p\n", type, data);
 }
 
+/*
+ * Return an absolute filename.
+ * In case of a relative filename given as input, prepend XENSTORE_LIB_DIR.
+ */
+const char *absolute_filename(const void *ctx, const char *filename)
+{
+   if (filename[0] != '/')
+   return talloc_asprintf(ctx, XENSTORE_LIB_DIR "/%s", filename);
+   return talloc_strdup(ctx, filename);
+}
+
 void close_log(void)
 {
if (tracefd >= 0)
@@ -2759,7 +2770,7 @@ int main(int argc, char *argv[])
 #endif
 
if (tracefile)
-   tracefile = talloc_strdup(NULL, tracefile);
+   tracefile = absolute_filename(NULL, tracefile);
 
 #ifndef NO_LIVE_UPDATE
/* Read state in case of live update. */
diff --git a/tools/xenstored/core.h b/tools/xenstored/core.h
index fe0ee90581..e58779e88c 100644
--- a/tools/xenstored/core.h
+++ b/tools/xenstored/core.h
@@ -341,7 +341,7 @@ void close_log(void);
 extern int orig_argc;
 extern char **orig_argv;
 
-extern char *tracefile;
+extern const char *tracefile;
 extern int tracefd;
 
 /* Trace flag values must be kept in sync with trace_switches[] contents. */
@@ -405,6 +405,9 @@ void set_socket_fd(int fd);
 void mount_9pfs(void);
 #endif
 
+const char *xenstore_rundir(void);
+const char *absolute_filename(const void *ctx, const char *filename);
+
 /* Close stdin/stdout/stderr to complete daemonize */
 void finish_daemonize(void);
 
diff --git a/tools/xenstored/lu_daemon.c b/tools/xenstored/lu_daemon.c
index 71bcabadd3..635ab0 100644
--- a/tools/xenstored/lu_daemon.c
+++ b/tools/xenstored/lu_daemon.c
@@ -24,7 +24,7 @@ void lu_get_dump_state(struct lu_dump_state *state)
state->size = 0;
 
state->filename = talloc_asprintf(NULL, "%s/state_dump",
- xenstore_daemon_rundir());
+ xenstore_rundir());
if (!state->filename)
barf("Allocation failure");
 
@@ -65,7 +65,7 @@ FILE *lu_dump_open(const void *ctx)
int fd;
 
filename = talloc_asprintf(ctx, "%s/state_dump",
-  xenstore_daemon_rundir());
+  xenstore_rundir());
if (!filename)
return NULL;
 
diff --git a/tools/xenstored/minios.c b/tools/xenstored/minios.c
index 562a9b4972..e70386f8c7 100644
--- a/tools/xenstored/minios.c
+++ b/tools/xenstored/minios.c
@@ -128,3 +128,8 @@ void mount_9pfs(void)
 {
create_thread("mount-9pfs", mount_thread, NULL);
 }
+
+const char *xenstore_rundir(void)
+{
+   return XENSTORE_LIB_DIR;
+}
diff --git a/tools/xenstored/posix.c b/tools/xenstored/posix.c
index 496329dfd1..d88c82d972 100644
--- a/tools/xenstored/posix.c
+++ b/tools/xenstored/posix.c
@@ -326,9 +326,10 @@ void early_init(bool live_update, bool dofork, const char 
*pidfile)
 {
reopen_log();
 
-   /* Make sure xenstored directory exists. */
+   /* Make sure xenstored directories exist. */
/* Errors ignored here, will be reported when we open files */
mkdir(xenstore_daemon_rundir(), 0755);
+   mkdir(XENSTORE_LIB_DIR, 0755);
 
if (dofork) {
openlog("xenstored", 0, LOG_DAEMON);
@@ -406,3 +407,8 @@ void set_socket_fd(int fd)
 {
sock = fd;
 }
+
+const char *xenstore_rundir(void)
+{
+   return xenstore_daemon_rundir();
+}
-- 
2.35.3




[PATCH v9 5/6] tools/xenstored: support complete log capabilities in stubdom

2024-02-29 Thread Juergen Gross
With 9pfs being fully available in Xenstore-stubdom now, there is no
reason to not fully support all logging capabilities in stubdom.

Open the logfile on stubdom only after the 9pfs file system has been
mounted.

Signed-off-by: Juergen Gross 
Reviewed-by: Jason Andryuk 
Reviewed-by: Julien Grall 
Acked-by: Anthony PERARD 
---
V3:
- remove now stale comment in sysconfig.xencommons.in (Julien Grall)
---
 .../Linux/init.d/sysconfig.xencommons.in  |  1 -
 tools/hotplug/Linux/launch-xenstore.in|  1 +
 tools/xenstored/control.c | 30 +--
 tools/xenstored/minios.c  |  3 ++
 4 files changed, 19 insertions(+), 16 deletions(-)

diff --git a/tools/hotplug/Linux/init.d/sysconfig.xencommons.in 
b/tools/hotplug/Linux/init.d/sysconfig.xencommons.in
index 433e4849af..1bdd830d8a 100644
--- a/tools/hotplug/Linux/init.d/sysconfig.xencommons.in
+++ b/tools/hotplug/Linux/init.d/sysconfig.xencommons.in
@@ -58,7 +58,6 @@ XENSTORED_ARGS=
 ## Default: Not defined, tracing off
 #
 # Log xenstored messages
-# Only evaluated if XENSTORETYPE is "daemon".
 #XENSTORED_TRACE=[yes|on|1]
 
 ## Type: integer
diff --git a/tools/hotplug/Linux/launch-xenstore.in 
b/tools/hotplug/Linux/launch-xenstore.in
index e854ca1eb8..da4eeca7c5 100644
--- a/tools/hotplug/Linux/launch-xenstore.in
+++ b/tools/hotplug/Linux/launch-xenstore.in
@@ -98,6 +98,7 @@ test -f @CONFIG_DIR@/@CONFIG_LEAF_DIR@/xencommons && . 
@CONFIG_DIR@/@CONFIG_LEAF
[ -z "$XENSTORE_DOMAIN_SIZE" ] && XENSTORE_DOMAIN_SIZE=8
XENSTORE_DOMAIN_ARGS="$XENSTORE_DOMAIN_ARGS --memory 
$XENSTORE_DOMAIN_SIZE"
[ -z "$XENSTORE_MAX_DOMAIN_SIZE" ] || 
XENSTORE_DOMAIN_ARGS="$XENSTORE_DOMAIN_ARGS --maxmem $XENSTORE_MAX_DOMAIN_SIZE"
+   [ -z "$XENSTORED_TRACE" ] || 
XENSTORE_DOMAIN_ARGS="$XENSTORE_DOMAIN_ARGS -T xenstored-trace.log"
 
echo -n Starting $XENSTORE_DOMAIN_KERNEL...
${LIBEXEC_BIN}/init-xenstore-domain $XENSTORE_DOMAIN_ARGS || exit 1
diff --git a/tools/xenstored/control.c b/tools/xenstored/control.c
index b2f64d674f..dae23a5ac0 100644
--- a/tools/xenstored/control.c
+++ b/tools/xenstored/control.c
@@ -201,19 +201,6 @@ static int do_control_quota_s(const void *ctx, struct 
connection *conn,
return EINVAL;
 }
 
-#ifdef __MINIOS__
-static int do_control_memreport(const void *ctx, struct connection *conn,
-   const char **vec, int num)
-{
-   if (num)
-   return EINVAL;
-
-   talloc_report_full(NULL, stdout);
-
-   send_ack(conn, XS_CONTROL);
-   return 0;
-}
-#else
 static int do_control_logfile(const void *ctx, struct connection *conn,
  const char **vec, int num)
 {
@@ -222,13 +209,26 @@ static int do_control_logfile(const void *ctx, struct 
connection *conn,
 
close_log();
talloc_free(tracefile);
-   tracefile = talloc_strdup(NULL, vec[0]);
+   tracefile = absolute_filename(NULL, vec[0]);
reopen_log();
 
send_ack(conn, XS_CONTROL);
return 0;
 }
 
+#ifdef __MINIOS__
+static int do_control_memreport(const void *ctx, struct connection *conn,
+   const char **vec, int num)
+{
+   if (num)
+   return EINVAL;
+
+   talloc_report_full(NULL, stdout);
+
+   send_ack(conn, XS_CONTROL);
+   return 0;
+}
+#else
 static int do_control_memreport(const void *ctx, struct connection *conn,
const char **vec, int num)
 {
@@ -309,10 +309,10 @@ static struct cmd_s cmds[] = {
"[-c ] [-F] [-t ] \n"
"Default timeout is 60 seconds.", 5 },
 #endif
+   { "logfile", do_control_logfile, "" },
 #ifdef __MINIOS__
{ "memreport", do_control_memreport, "" },
 #else
-   { "logfile", do_control_logfile, "" },
{ "memreport", do_control_memreport, "[]" },
 #endif
{ "print", do_control_print, "" },
diff --git a/tools/xenstored/minios.c b/tools/xenstored/minios.c
index e70386f8c7..a229954cf4 100644
--- a/tools/xenstored/minios.c
+++ b/tools/xenstored/minios.c
@@ -122,6 +122,9 @@ static void mount_thread(void *p)
}
 
p9_device = init_9pfront(0, XENSTORE_LIB_DIR);
+
+   /* Start logging if selected. */
+   reopen_log();
 }
 
 void mount_9pfs(void)
-- 
2.35.3




[PATCH v9 2/6] stubdom: extend xenstore stubdom configs

2024-02-29 Thread Juergen Gross
Extend the config files of the Xenstore stubdoms to include XENBUS
and 9PFRONT items in order to support file based logging.

Signed-off-by: Juergen Gross 
Reviewed-by: Jason Andryuk 
---
 stubdom/xenstore-minios.cfg| 2 +-
 stubdom/xenstorepvh-minios.cfg | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/stubdom/xenstore-minios.cfg b/stubdom/xenstore-minios.cfg
index a41704bb6b..239da519b9 100644
--- a/stubdom/xenstore-minios.cfg
+++ b/stubdom/xenstore-minios.cfg
@@ -3,7 +3,7 @@ CONFIG_NETFRONT=n
 CONFIG_FBFRONT=n
 CONFIG_KBDFRONT=n
 CONFIG_CONSFRONT=n
-CONFIG_XENBUS=n
 CONFIG_LWIP=n
+CONFIG_9PFRONT=y
 CONFIG_BALLOON=y
 XEN_INTERFACE_VERSION=__XEN_LATEST_INTERFACE_VERSION__
diff --git a/stubdom/xenstorepvh-minios.cfg b/stubdom/xenstorepvh-minios.cfg
index 6af51f5753..752b90d7d3 100644
--- a/stubdom/xenstorepvh-minios.cfg
+++ b/stubdom/xenstorepvh-minios.cfg
@@ -4,7 +4,7 @@ CONFIG_NETFRONT=n
 CONFIG_FBFRONT=n
 CONFIG_KBDFRONT=n
 CONFIG_CONSFRONT=n
-CONFIG_XENBUS=n
 CONFIG_LWIP=n
+CONFIG_9PFRONT=y
 CONFIG_BALLOON=y
 XEN_INTERFACE_VERSION=__XEN_LATEST_INTERFACE_VERSION__
-- 
2.35.3




[PATCH v9 0/6] tools: enable xenstore-stubdom to use 9pfs

2024-02-29 Thread Juergen Gross
This series is adding 9pfs support to Xenstore-stubdom, enabling it
to do logging to a dom0 directory.

This is a prerequisite for the final goal to add live update support
to Xenstore-stubdom, as it enables the stubdom to store its state in
a dom0 file.

Reposting the rest series. CI-test has passed.

Changes in V9:
- new patch 1
- patches 1+2 and 4 of V8 have been applied

Changes in V8:
- patches 1-13 of V7 have been applied

Changes in V7:
- fixed V6 bugs

Changes in V6:
- patch 1 of V5 has been applied
- rebase
- addressed comments

Changes in V5:
- 10 patches have been applied already
- rename source directory to tools/9pfsd
- addressed comments

Changes in V4:
- patch 2 of V3 was applied
- added support of reading directories
- addressed review comments

Changes in V3:
- new patches 1, 23-25
- addressed review comments

Changes in V2:
- support of multiple rings per device
- xenlogd->xen-9pfsd rename
- addressed review comments
- fixed some bugs

Juergen Gross (6):
  config: update Mini-OS commit
  stubdom: extend xenstore stubdom configs
  tools/xenstored: mount 9pfs device in stubdom
  tools/xenstored: add helpers for filename handling
  tools/xenstored: support complete log capabilities in stubdom
  tools/xenstored: have a single do_control_memreport()

 Config.mk |  2 +-
 stubdom/xenstore-minios.cfg   |  2 +-
 stubdom/xenstorepvh-minios.cfg|  2 +-
 .../Linux/init.d/sysconfig.xencommons.in  |  1 -
 tools/hotplug/Linux/launch-xenstore.in|  1 +
 tools/xenstored/control.c | 29 +++--
 tools/xenstored/core.c| 15 -
 tools/xenstored/core.h| 11 +++-
 tools/xenstored/domain.c  |  2 +
 tools/xenstored/lu_daemon.c   |  4 +-
 tools/xenstored/minios.c  | 62 +++
 tools/xenstored/posix.c   |  8 ++-
 12 files changed, 108 insertions(+), 31 deletions(-)

-- 
2.35.3




[PATCH v9 3/6] tools/xenstored: mount 9pfs device in stubdom

2024-02-29 Thread Juergen Gross
Mount the 9pfs device in stubdom enabling it to use files.

This has to happen in a worker thread in order to allow the main thread
handling the required Xenstore accesses in parallel.

Signed-off-by: Juergen Gross 
Reviewed-by: Jason Andryuk 
Reviewed-by: Julien Grall 
---
V3:
- add logging in case of errors (Julien Grall)
---
 tools/xenstored/core.h   |  6 +
 tools/xenstored/domain.c |  2 ++
 tools/xenstored/minios.c | 54 
 3 files changed, 62 insertions(+)

diff --git a/tools/xenstored/core.h b/tools/xenstored/core.h
index f6af086f01..fe0ee90581 100644
--- a/tools/xenstored/core.h
+++ b/tools/xenstored/core.h
@@ -36,6 +36,8 @@
 #include "list.h"
 #include "hashtable.h"
 
+#define XENSTORE_LIB_DIR   XEN_LIB_DIR "/xenstore"
+
 #ifndef O_CLOEXEC
 #define O_CLOEXEC 0
 /* O_CLOEXEC support is needed for Live Update in the daemon case. */
@@ -399,6 +401,10 @@ void handle_special_fds(void);
 int get_socket_fd(void);
 void set_socket_fd(int fd);
 
+#ifdef __MINIOS__
+void mount_9pfs(void);
+#endif
+
 /* Close stdin/stdout/stderr to complete daemonize */
 void finish_daemonize(void);
 
diff --git a/tools/xenstored/domain.c b/tools/xenstored/domain.c
index 1a7d5e9756..64c8fd0cc3 100644
--- a/tools/xenstored/domain.c
+++ b/tools/xenstored/domain.c
@@ -1236,6 +1236,8 @@ void stubdom_init(void)
barf_perror("Failed to initialize stubdom");
 
xenevtchn_notify(xce_handle, stubdom->port);
+
+   mount_9pfs();
 #endif
 }
 
diff --git a/tools/xenstored/minios.c b/tools/xenstored/minios.c
index 22ac8defbd..562a9b4972 100644
--- a/tools/xenstored/minios.c
+++ b/tools/xenstored/minios.c
@@ -17,10 +17,20 @@
 */
 #include 
 #include 
+#include 
+#include "talloc.h"
 #include "core.h"
 #include "utils.h"
 #include 
 #include 
+#include 
+#include 
+#include 
+#include 
+
+#define P9_STATE_PATH  "device/9pfs/0/state"
+
+static void *p9_device;
 
 void finish_daemonize(void)
 {
@@ -74,3 +84,47 @@ int get_socket_fd(void)
 void set_socket_fd(int fd)
 {
 }
+
+static void mount_thread(void *p)
+{
+   xenbus_event_queue events = NULL;
+   char *err;
+   char *dummy;
+
+   err = xenbus_watch_path_token(XBT_NIL, P9_STATE_PATH, "9pfs", );
+   if (err) {
+   log("error \"%s\" when setting watch on \"%s\"\n", err,
+   P9_STATE_PATH);
+   free(err);
+   return;
+   }
+
+   for (;;) {
+   xenbus_wait_for_watch();
+
+   /*
+* We only care for existence of the state node.
+* State changes are handled in init_9pfront().
+*/
+   err = xenbus_read(XBT_NIL, P9_STATE_PATH, );
+   if (!err)
+   break;
+   free(err);
+   }
+
+   free(dummy);
+
+   err = xenbus_unwatch_path_token(XBT_NIL, P9_STATE_PATH, "9pfs");
+   if (err) {
+   log("error \"%s\" when unwatching \"%s\", leaking watch\n",
+   err, P9_STATE_PATH);
+   free(err);
+   }
+
+   p9_device = init_9pfront(0, XENSTORE_LIB_DIR);
+}
+
+void mount_9pfs(void)
+{
+   create_thread("mount-9pfs", mount_thread, NULL);
+}
-- 
2.35.3




[PATCH v9 1/6] config: update Mini-OS commit

2024-02-29 Thread Juergen Gross
Update the Mini-OS upstream revision.

Signed-off-by: Juergen Gross 
---
V9:
- new patch
---
 Config.mk | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Config.mk b/Config.mk
index 6f6e0425ba..a962f095ca 100644
--- a/Config.mk
+++ b/Config.mk
@@ -224,7 +224,7 @@ QEMU_UPSTREAM_URL ?= 
https://xenbits.xen.org/git-http/qemu-xen.git
 QEMU_UPSTREAM_REVISION ?= master
 
 MINIOS_UPSTREAM_URL ?= https://xenbits.xen.org/git-http/mini-os.git
-MINIOS_UPSTREAM_REVISION ?= 2bc8dbb9b613d27455cbca318ea337309c04
+MINIOS_UPSTREAM_REVISION ?= b6a5b4d72b88e5c4faed01f5a44505de022860fc
 
 SEABIOS_UPSTREAM_URL ?= https://xenbits.xen.org/git-http/seabios.git
 SEABIOS_UPSTREAM_REVISION ?= rel-1.16.3
-- 
2.35.3




[PATCH] Mini-OS: add symbol exports for xenstore stubdom

2024-02-26 Thread Juergen Gross
Xenstore stubdom needs some more symbols exported.

Signed-off-by: Juergen Gross 
---
 xenbus.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/xenbus.c b/xenbus.c
index eb9af055..686428a4 100644
--- a/xenbus.c
+++ b/xenbus.c
@@ -45,6 +45,7 @@
 #endif
 
 struct xenstore_domain_interface *xenstore_buf;
+EXPORT_SYMBOL(xenstore_buf);
 static DECLARE_WAIT_QUEUE_HEAD(xb_waitq);
 DECLARE_WAIT_QUEUE_HEAD(xenbus_watch_queue);
 static __DECLARE_SEMAPHORE_GENERIC(xb_write_sem, 1);
@@ -70,6 +71,7 @@ static struct xenbus_req_info req_info[NR_REQS];
 static char *errmsg(struct xsd_sockmsg *rep);
 
 uint32_t xenbus_evtchn;
+EXPORT_SYMBOL(xenbus_evtchn);
 
 #ifdef CONFIG_PARAVIRT
 void get_xenbus(void *p)
-- 
2.35.3




Re: [PATCH v8 3/8] stubdom: extend xenstore stubdom configs

2024-02-22 Thread Juergen Gross

On 16.02.24 17:31, Juergen Gross wrote:

Extend the config files of the Xenstore stubdoms to include XENBUS
and 9PFRONT items in order to support file based logging.

Signed-off-by: Juergen Gross 
Reviewed-by: Jason Andryuk 


Samuel, are you fine with this patch?


Juergen


---
  stubdom/xenstore-minios.cfg| 2 +-
  stubdom/xenstorepvh-minios.cfg | 2 +-
  2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/stubdom/xenstore-minios.cfg b/stubdom/xenstore-minios.cfg
index a41704bb6b..239da519b9 100644
--- a/stubdom/xenstore-minios.cfg
+++ b/stubdom/xenstore-minios.cfg
@@ -3,7 +3,7 @@ CONFIG_NETFRONT=n
  CONFIG_FBFRONT=n
  CONFIG_KBDFRONT=n
  CONFIG_CONSFRONT=n
-CONFIG_XENBUS=n
  CONFIG_LWIP=n
+CONFIG_9PFRONT=y
  CONFIG_BALLOON=y
  XEN_INTERFACE_VERSION=__XEN_LATEST_INTERFACE_VERSION__
diff --git a/stubdom/xenstorepvh-minios.cfg b/stubdom/xenstorepvh-minios.cfg
index 6af51f5753..752b90d7d3 100644
--- a/stubdom/xenstorepvh-minios.cfg
+++ b/stubdom/xenstorepvh-minios.cfg
@@ -4,7 +4,7 @@ CONFIG_NETFRONT=n
  CONFIG_FBFRONT=n
  CONFIG_KBDFRONT=n
  CONFIG_CONSFRONT=n
-CONFIG_XENBUS=n
  CONFIG_LWIP=n
+CONFIG_9PFRONT=y
  CONFIG_BALLOON=y
  XEN_INTERFACE_VERSION=__XEN_LATEST_INTERFACE_VERSION__




OpenPGP_0xB0DE9DD628BF132F.asc
Description: OpenPGP public key


OpenPGP_signature.asc
Description: OpenPGP digital signature


Re: Stats on Xen tarball downloads

2024-02-22 Thread Juergen Gross

On 22.02.24 10:49, Roger Pau Monné wrote:

On Wed, Feb 21, 2024 at 10:53:49PM +, Julien Grall wrote:

Hi George,

On 21/02/2024 02:55, George Dunlap wrote:

On Mon, Feb 19, 2024 at 6:38 PM Jan Beulich  wrote:


On 19.02.2024 11:31, Roger Pau Monné wrote:

On Mon, Feb 19, 2024 at 06:01:54PM +0800, George Dunlap wrote:

One of the questions we had with respect to changing our release
practice (for instance, making the process more light-weight so that
we could do a point release after every XSA) was, "How many people are
actually using the tarballs?"


What would this more lightweight process involve from a downstream
PoV?  IOW: in what would the contents of the tarball change compared
to the current releases?


  From all prior discussion my conclusion was "no tarball at all".


Or at very least, the tarball would be a simple `git archive` of a
release tag.   Right now the tarball creation has a number of
annoyingly manual parts about it.

At the moment we have the following steps:

1) Checkout tag
2) Create the tarball
3) Check the source tarball can build
4) Sign the tarball
5) Upload it

I managed to script it so I have only two commands to execute (mostly
because I build and sign on a different host).

AFAIU, your command 'git archive' will only replace 2. Am I correct? If so,
it is not entirely clear how your proposal is going to make it better.


IMO building for release tarballs is easier than from a git checkout
(or archive).  It's a bit annoying to have to pre-download the
external project sources, now even more as QEMU is using git
submodules.

Most distro binary builders have infrastructure to deal with all this,
but requires a bit more logic in the recipe than a plain just fetch a
tarball and build from it.


I have an unfinished patch series lying around doing the download steps
_before_ starting the build. This includes make targets for downloading
the required components, or all components if configure should be called
afterwards.

Creating the tarball after having downloaded all components is trivial.

There are a few bugs in the series I didn't have time yet to fix. If someone
is interested in working on it, I can post the series.


Juergen



OpenPGP_0xB0DE9DD628BF132F.asc
Description: OpenPGP public key


OpenPGP_signature.asc
Description: OpenPGP digital signature


Re: [linux-linus test] 184722: regressions - FAIL

2024-02-22 Thread Juergen Gross

On 22.02.24 09:21, osstest service owner wrote:

flight 184722 linux-linus real [real]
http://logs.test-lab.xenproject.org/osstest/logs/184722/

Regressions :-(

Tests which did not succeed and are blocking,
including tests which could not be run:
  build-arm64-pvops 6 kernel-build   fail in 184721 REGR. vs. 184719


Log says:

gcc: internal compiler error: Segmentation fault signal terminated program cc1
Please submit a full bug report,
with preprocessed source if appropriate.
See  for instructions.
make[5]: *** [scripts/Makefile.build:243: drivers/iio/adc/max9611.o] Error 4
make[5]: *** Waiting for unfinished jobs


Juergen


OpenPGP_0xB0DE9DD628BF132F.asc
Description: OpenPGP public key


OpenPGP_signature.asc
Description: OpenPGP digital signature


[PATCH] tools/9pfsd: add missing va_end() in fill_data()

2024-02-19 Thread Juergen Gross
In xen-9pfsd fill_data() va_end() needs to be called before returning.

Coverity Id CID 1592145

Fixes: bcec59cf7ff4 ("tools/xen-9pfsd: add 9pfs version request support")
Signed-off-by: Juergen Gross 
---
 tools/9pfsd/io.c | 29 -
 1 file changed, 16 insertions(+), 13 deletions(-)

diff --git a/tools/9pfsd/io.c b/tools/9pfsd/io.c
index ebc4102713..adb887c7d9 100644
--- a/tools/9pfsd/io.c
+++ b/tools/9pfsd/io.c
@@ -445,7 +445,7 @@ static int fill_data(struct ring *ring, const char *fmt, 
...)
 if ( !*f || array_sz )
 fmt_err(fmt);
 if ( !chk_data(ring, data, sizeof(uint16_t)) )
-return pars;
+goto out;
 array_sz = get_unaligned((uint16_t *)data);
 data += sizeof(uint16_t);
 *(unsigned int *)par = array_sz;
@@ -455,10 +455,10 @@ static int fill_data(struct ring *ring, const char *fmt, 
...)
 
 case 'b':
 if ( !chk_data(ring, data, sizeof(uint8_t)) )
-return pars;
+goto out;
 if ( !fill_data_elem(, array, _sz, sizeof(uint8_t),
  data) )
-return pars;
+goto out;
 data += sizeof(uint8_t);
 break;
 
@@ -466,48 +466,48 @@ static int fill_data(struct ring *ring, const char *fmt, 
...)
 if ( array_sz )
 fmt_err(fmt);
 if ( !chk_data(ring, data, sizeof(uint32_t)) )
-return pars;
+goto out;
 len = get_unaligned((uint32_t *)data);
 data += sizeof(uint32_t);
 *(unsigned int *)par = len;
 par = va_arg(ap, void *);
 if ( !chk_data(ring, data, len) )
-return pars;
+goto out;
 memcpy(par, data, len);
 data += len;
 break;
 
 case 'L':
 if ( !chk_data(ring, data, sizeof(uint64_t)) )
-return pars;
+goto out;
 if ( !fill_data_elem(, array, _sz, sizeof(uint64_t),
  data) )
-return pars;
+goto out;
 data += sizeof(uint64_t);
 break;
 
 case 'S':
 if ( !chk_data(ring, data, sizeof(uint16_t)) )
-return pars;
+goto out;
 len = get_unaligned((uint16_t *)data);
 data += sizeof(uint16_t);
 if ( !chk_data(ring, data, len) )
-return pars;
+goto out;
 str_off = add_string(ring, data, len);
 if ( str_off == ~0 )
-return pars;
+goto out;
 if ( !fill_data_elem(, array, _sz, sizeof(unsigned int),
  _off) )
-return pars;
+goto out;
 data += len;
 break;
 
 case 'U':
 if ( !chk_data(ring, data, sizeof(uint32_t)) )
-return pars;
+goto out;
 if ( !fill_data_elem(, array, _sz, sizeof(uint32_t),
  data) )
-return pars;
+goto out;
 data += sizeof(uint32_t);
 break;
 
@@ -520,6 +520,9 @@ static int fill_data(struct ring *ring, const char *fmt, 
...)
 pars++;
 }
 
+ out:
+va_end(ap);
+
 return pars;
 }
 
-- 
2.35.3




[PATCH v8 8/8] tools/xenstored: have a single do_control_memreport()

2024-02-16 Thread Juergen Gross
With 9pfs now available in Xenstore-stubdom, there is no reason to
have distinct do_control_memreport() variants for the daemon and the
stubdom implementations.

Signed-off-by: Juergen Gross 
Reviewed-by: Jason Andryuk 
---
 tools/xenstored/control.c | 27 +++
 1 file changed, 7 insertions(+), 20 deletions(-)

diff --git a/tools/xenstored/control.c b/tools/xenstored/control.c
index dae23a5ac0..9561289179 100644
--- a/tools/xenstored/control.c
+++ b/tools/xenstored/control.c
@@ -216,23 +216,11 @@ static int do_control_logfile(const void *ctx, struct 
connection *conn,
return 0;
 }
 
-#ifdef __MINIOS__
-static int do_control_memreport(const void *ctx, struct connection *conn,
-   const char **vec, int num)
-{
-   if (num)
-   return EINVAL;
-
-   talloc_report_full(NULL, stdout);
-
-   send_ack(conn, XS_CONTROL);
-   return 0;
-}
-#else
 static int do_control_memreport(const void *ctx, struct connection *conn,
const char **vec, int num)
 {
FILE *fp;
+   const char *filename;
int fd;
 
if (num > 1)
@@ -255,8 +243,12 @@ static int do_control_memreport(const void *ctx, struct 
connection *conn,
if (!fp)
close(fd);
}
-   } else
-   fp = fopen(vec[0], "a");
+   } else {
+   filename = absolute_filename(ctx, vec[0]);
+   if (!filename)
+   return ENOMEM;
+   fp = fopen(filename, "a");
+   }
 
if (!fp)
return EBADF;
@@ -267,7 +259,6 @@ static int do_control_memreport(const void *ctx, struct 
connection *conn,
send_ack(conn, XS_CONTROL);
return 0;
 }
-#endif
 
 static int do_control_print(const void *ctx, struct connection *conn,
const char **vec, int num)
@@ -310,11 +301,7 @@ static struct cmd_s cmds[] = {
"Default timeout is 60 seconds.", 5 },
 #endif
{ "logfile", do_control_logfile, "" },
-#ifdef __MINIOS__
-   { "memreport", do_control_memreport, "" },
-#else
{ "memreport", do_control_memreport, "[]" },
-#endif
{ "print", do_control_print, "" },
{ "quota", do_control_quota,
"[set  ||max [-r]]" },
-- 
2.35.3




[PATCH v8 5/8] tools/xenstored: mount 9pfs device in stubdom

2024-02-16 Thread Juergen Gross
Mount the 9pfs device in stubdom enabling it to use files.

This has to happen in a worker thread in order to allow the main thread
handling the required Xenstore accesses in parallel.

Signed-off-by: Juergen Gross 
Reviewed-by: Jason Andryuk 
Reviewed-by: Julien Grall 
---
V3:
- add logging in case of errors (Julien Grall)
---
 tools/xenstored/core.h   |  6 +
 tools/xenstored/domain.c |  2 ++
 tools/xenstored/minios.c | 54 
 3 files changed, 62 insertions(+)

diff --git a/tools/xenstored/core.h b/tools/xenstored/core.h
index f6af086f01..fe0ee90581 100644
--- a/tools/xenstored/core.h
+++ b/tools/xenstored/core.h
@@ -36,6 +36,8 @@
 #include "list.h"
 #include "hashtable.h"
 
+#define XENSTORE_LIB_DIR   XEN_LIB_DIR "/xenstore"
+
 #ifndef O_CLOEXEC
 #define O_CLOEXEC 0
 /* O_CLOEXEC support is needed for Live Update in the daemon case. */
@@ -399,6 +401,10 @@ void handle_special_fds(void);
 int get_socket_fd(void);
 void set_socket_fd(int fd);
 
+#ifdef __MINIOS__
+void mount_9pfs(void);
+#endif
+
 /* Close stdin/stdout/stderr to complete daemonize */
 void finish_daemonize(void);
 
diff --git a/tools/xenstored/domain.c b/tools/xenstored/domain.c
index 1a7d5e9756..64c8fd0cc3 100644
--- a/tools/xenstored/domain.c
+++ b/tools/xenstored/domain.c
@@ -1236,6 +1236,8 @@ void stubdom_init(void)
barf_perror("Failed to initialize stubdom");
 
xenevtchn_notify(xce_handle, stubdom->port);
+
+   mount_9pfs();
 #endif
 }
 
diff --git a/tools/xenstored/minios.c b/tools/xenstored/minios.c
index 22ac8defbd..562a9b4972 100644
--- a/tools/xenstored/minios.c
+++ b/tools/xenstored/minios.c
@@ -17,10 +17,20 @@
 */
 #include 
 #include 
+#include 
+#include "talloc.h"
 #include "core.h"
 #include "utils.h"
 #include 
 #include 
+#include 
+#include 
+#include 
+#include 
+
+#define P9_STATE_PATH  "device/9pfs/0/state"
+
+static void *p9_device;
 
 void finish_daemonize(void)
 {
@@ -74,3 +84,47 @@ int get_socket_fd(void)
 void set_socket_fd(int fd)
 {
 }
+
+static void mount_thread(void *p)
+{
+   xenbus_event_queue events = NULL;
+   char *err;
+   char *dummy;
+
+   err = xenbus_watch_path_token(XBT_NIL, P9_STATE_PATH, "9pfs", );
+   if (err) {
+   log("error \"%s\" when setting watch on \"%s\"\n", err,
+   P9_STATE_PATH);
+   free(err);
+   return;
+   }
+
+   for (;;) {
+   xenbus_wait_for_watch();
+
+   /*
+* We only care for existence of the state node.
+* State changes are handled in init_9pfront().
+*/
+   err = xenbus_read(XBT_NIL, P9_STATE_PATH, );
+   if (!err)
+   break;
+   free(err);
+   }
+
+   free(dummy);
+
+   err = xenbus_unwatch_path_token(XBT_NIL, P9_STATE_PATH, "9pfs");
+   if (err) {
+   log("error \"%s\" when unwatching \"%s\", leaking watch\n",
+   err, P9_STATE_PATH);
+   free(err);
+   }
+
+   p9_device = init_9pfront(0, XENSTORE_LIB_DIR);
+}
+
+void mount_9pfs(void)
+{
+   create_thread("mount-9pfs", mount_thread, NULL);
+}
-- 
2.35.3




[PATCH v8 7/8] tools/xenstored: support complete log capabilities in stubdom

2024-02-16 Thread Juergen Gross
With 9pfs being fully available in Xenstore-stubdom now, there is no
reason to not fully support all logging capabilities in stubdom.

Open the logfile on stubdom only after the 9pfs file system has been
mounted.

Signed-off-by: Juergen Gross 
Reviewed-by: Jason Andryuk 
Reviewed-by: Julien Grall 
Acked-by: Anthony PERARD 
---
V3:
- remove now stale comment in sysconfig.xencommons.in (Julien Grall)
---
 .../Linux/init.d/sysconfig.xencommons.in  |  1 -
 tools/hotplug/Linux/launch-xenstore.in|  1 +
 tools/xenstored/control.c | 30 +--
 tools/xenstored/minios.c  |  3 ++
 4 files changed, 19 insertions(+), 16 deletions(-)

diff --git a/tools/hotplug/Linux/init.d/sysconfig.xencommons.in 
b/tools/hotplug/Linux/init.d/sysconfig.xencommons.in
index 433e4849af..1bdd830d8a 100644
--- a/tools/hotplug/Linux/init.d/sysconfig.xencommons.in
+++ b/tools/hotplug/Linux/init.d/sysconfig.xencommons.in
@@ -58,7 +58,6 @@ XENSTORED_ARGS=
 ## Default: Not defined, tracing off
 #
 # Log xenstored messages
-# Only evaluated if XENSTORETYPE is "daemon".
 #XENSTORED_TRACE=[yes|on|1]
 
 ## Type: integer
diff --git a/tools/hotplug/Linux/launch-xenstore.in 
b/tools/hotplug/Linux/launch-xenstore.in
index e854ca1eb8..da4eeca7c5 100644
--- a/tools/hotplug/Linux/launch-xenstore.in
+++ b/tools/hotplug/Linux/launch-xenstore.in
@@ -98,6 +98,7 @@ test -f @CONFIG_DIR@/@CONFIG_LEAF_DIR@/xencommons && . 
@CONFIG_DIR@/@CONFIG_LEAF
[ -z "$XENSTORE_DOMAIN_SIZE" ] && XENSTORE_DOMAIN_SIZE=8
XENSTORE_DOMAIN_ARGS="$XENSTORE_DOMAIN_ARGS --memory 
$XENSTORE_DOMAIN_SIZE"
[ -z "$XENSTORE_MAX_DOMAIN_SIZE" ] || 
XENSTORE_DOMAIN_ARGS="$XENSTORE_DOMAIN_ARGS --maxmem $XENSTORE_MAX_DOMAIN_SIZE"
+   [ -z "$XENSTORED_TRACE" ] || 
XENSTORE_DOMAIN_ARGS="$XENSTORE_DOMAIN_ARGS -T xenstored-trace.log"
 
echo -n Starting $XENSTORE_DOMAIN_KERNEL...
${LIBEXEC_BIN}/init-xenstore-domain $XENSTORE_DOMAIN_ARGS || exit 1
diff --git a/tools/xenstored/control.c b/tools/xenstored/control.c
index b2f64d674f..dae23a5ac0 100644
--- a/tools/xenstored/control.c
+++ b/tools/xenstored/control.c
@@ -201,19 +201,6 @@ static int do_control_quota_s(const void *ctx, struct 
connection *conn,
return EINVAL;
 }
 
-#ifdef __MINIOS__
-static int do_control_memreport(const void *ctx, struct connection *conn,
-   const char **vec, int num)
-{
-   if (num)
-   return EINVAL;
-
-   talloc_report_full(NULL, stdout);
-
-   send_ack(conn, XS_CONTROL);
-   return 0;
-}
-#else
 static int do_control_logfile(const void *ctx, struct connection *conn,
  const char **vec, int num)
 {
@@ -222,13 +209,26 @@ static int do_control_logfile(const void *ctx, struct 
connection *conn,
 
close_log();
talloc_free(tracefile);
-   tracefile = talloc_strdup(NULL, vec[0]);
+   tracefile = absolute_filename(NULL, vec[0]);
reopen_log();
 
send_ack(conn, XS_CONTROL);
return 0;
 }
 
+#ifdef __MINIOS__
+static int do_control_memreport(const void *ctx, struct connection *conn,
+   const char **vec, int num)
+{
+   if (num)
+   return EINVAL;
+
+   talloc_report_full(NULL, stdout);
+
+   send_ack(conn, XS_CONTROL);
+   return 0;
+}
+#else
 static int do_control_memreport(const void *ctx, struct connection *conn,
const char **vec, int num)
 {
@@ -309,10 +309,10 @@ static struct cmd_s cmds[] = {
"[-c ] [-F] [-t ] \n"
"Default timeout is 60 seconds.", 5 },
 #endif
+   { "logfile", do_control_logfile, "" },
 #ifdef __MINIOS__
{ "memreport", do_control_memreport, "" },
 #else
-   { "logfile", do_control_logfile, "" },
{ "memreport", do_control_memreport, "[]" },
 #endif
{ "print", do_control_print, "" },
diff --git a/tools/xenstored/minios.c b/tools/xenstored/minios.c
index e70386f8c7..a229954cf4 100644
--- a/tools/xenstored/minios.c
+++ b/tools/xenstored/minios.c
@@ -122,6 +122,9 @@ static void mount_thread(void *p)
}
 
p9_device = init_9pfront(0, XENSTORE_LIB_DIR);
+
+   /* Start logging if selected. */
+   reopen_log();
 }
 
 void mount_9pfs(void)
-- 
2.35.3




[PATCH v8 6/8] tools/xenstored: add helpers for filename handling

2024-02-16 Thread Juergen Gross
Add some helpers for handling filenames which might need different
implementations between stubdom and daemon environments:

- expansion of relative filenames (those are not really defined today,
  just expand them to be relative to /var/lib/xen/xenstore)
- expansion of xenstore_daemon_rundir() (used e.g. for saving the state
  file in case of live update - needs to be unchanged in the daemon
  case, but should result in /var/lib/xen/xenstore for stubdom)

Signed-off-by: Juergen Gross 
Reviewed-by: Jason Andryuk 
Reviewed-by: Julien Grall 
---
V3:
- make absolute_filename() return a pointer to const (Julien Grall)
---
 tools/xenstored/core.c  | 15 +--
 tools/xenstored/core.h  |  5 -
 tools/xenstored/lu_daemon.c |  4 ++--
 tools/xenstored/minios.c|  5 +
 tools/xenstored/posix.c |  8 +++-
 5 files changed, 31 insertions(+), 6 deletions(-)

diff --git a/tools/xenstored/core.c b/tools/xenstored/core.c
index 48fc787ac1..bada1ad9a2 100644
--- a/tools/xenstored/core.c
+++ b/tools/xenstored/core.c
@@ -63,7 +63,7 @@ char **orig_argv;
 LIST_HEAD(connections);
 int tracefd = -1;
 bool keep_orphans = false;
-char *tracefile = NULL;
+const char *tracefile = NULL;
 static struct hashtable *nodes;
 unsigned int trace_flags = TRACE_OBJ | TRACE_IO;
 
@@ -137,6 +137,17 @@ void trace_destroy(const void *data, const char *type)
trace("obj: DESTROY %s %p\n", type, data);
 }
 
+/*
+ * Return an absolute filename.
+ * In case of a relative filename given as input, prepend XENSTORE_LIB_DIR.
+ */
+const char *absolute_filename(const void *ctx, const char *filename)
+{
+   if (filename[0] != '/')
+   return talloc_asprintf(ctx, XENSTORE_LIB_DIR "/%s", filename);
+   return talloc_strdup(ctx, filename);
+}
+
 void close_log(void)
 {
if (tracefd >= 0)
@@ -2759,7 +2770,7 @@ int main(int argc, char *argv[])
 #endif
 
if (tracefile)
-   tracefile = talloc_strdup(NULL, tracefile);
+   tracefile = absolute_filename(NULL, tracefile);
 
 #ifndef NO_LIVE_UPDATE
/* Read state in case of live update. */
diff --git a/tools/xenstored/core.h b/tools/xenstored/core.h
index fe0ee90581..e58779e88c 100644
--- a/tools/xenstored/core.h
+++ b/tools/xenstored/core.h
@@ -341,7 +341,7 @@ void close_log(void);
 extern int orig_argc;
 extern char **orig_argv;
 
-extern char *tracefile;
+extern const char *tracefile;
 extern int tracefd;
 
 /* Trace flag values must be kept in sync with trace_switches[] contents. */
@@ -405,6 +405,9 @@ void set_socket_fd(int fd);
 void mount_9pfs(void);
 #endif
 
+const char *xenstore_rundir(void);
+const char *absolute_filename(const void *ctx, const char *filename);
+
 /* Close stdin/stdout/stderr to complete daemonize */
 void finish_daemonize(void);
 
diff --git a/tools/xenstored/lu_daemon.c b/tools/xenstored/lu_daemon.c
index 71bcabadd3..635ab0 100644
--- a/tools/xenstored/lu_daemon.c
+++ b/tools/xenstored/lu_daemon.c
@@ -24,7 +24,7 @@ void lu_get_dump_state(struct lu_dump_state *state)
state->size = 0;
 
state->filename = talloc_asprintf(NULL, "%s/state_dump",
- xenstore_daemon_rundir());
+ xenstore_rundir());
if (!state->filename)
barf("Allocation failure");
 
@@ -65,7 +65,7 @@ FILE *lu_dump_open(const void *ctx)
int fd;
 
filename = talloc_asprintf(ctx, "%s/state_dump",
-  xenstore_daemon_rundir());
+  xenstore_rundir());
if (!filename)
return NULL;
 
diff --git a/tools/xenstored/minios.c b/tools/xenstored/minios.c
index 562a9b4972..e70386f8c7 100644
--- a/tools/xenstored/minios.c
+++ b/tools/xenstored/minios.c
@@ -128,3 +128,8 @@ void mount_9pfs(void)
 {
create_thread("mount-9pfs", mount_thread, NULL);
 }
+
+const char *xenstore_rundir(void)
+{
+   return XENSTORE_LIB_DIR;
+}
diff --git a/tools/xenstored/posix.c b/tools/xenstored/posix.c
index 496329dfd1..d88c82d972 100644
--- a/tools/xenstored/posix.c
+++ b/tools/xenstored/posix.c
@@ -326,9 +326,10 @@ void early_init(bool live_update, bool dofork, const char 
*pidfile)
 {
reopen_log();
 
-   /* Make sure xenstored directory exists. */
+   /* Make sure xenstored directories exist. */
/* Errors ignored here, will be reported when we open files */
mkdir(xenstore_daemon_rundir(), 0755);
+   mkdir(XENSTORE_LIB_DIR, 0755);
 
if (dofork) {
openlog("xenstored", 0, LOG_DAEMON);
@@ -406,3 +407,8 @@ void set_socket_fd(int fd)
 {
sock = fd;
 }
+
+const char *xenstore_rundir(void)
+{
+   return xenstore_daemon_rundir();
+}
-- 
2.35.3




[PATCH v8 3/8] stubdom: extend xenstore stubdom configs

2024-02-16 Thread Juergen Gross
Extend the config files of the Xenstore stubdoms to include XENBUS
and 9PFRONT items in order to support file based logging.

Signed-off-by: Juergen Gross 
Reviewed-by: Jason Andryuk 
---
 stubdom/xenstore-minios.cfg| 2 +-
 stubdom/xenstorepvh-minios.cfg | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/stubdom/xenstore-minios.cfg b/stubdom/xenstore-minios.cfg
index a41704bb6b..239da519b9 100644
--- a/stubdom/xenstore-minios.cfg
+++ b/stubdom/xenstore-minios.cfg
@@ -3,7 +3,7 @@ CONFIG_NETFRONT=n
 CONFIG_FBFRONT=n
 CONFIG_KBDFRONT=n
 CONFIG_CONSFRONT=n
-CONFIG_XENBUS=n
 CONFIG_LWIP=n
+CONFIG_9PFRONT=y
 CONFIG_BALLOON=y
 XEN_INTERFACE_VERSION=__XEN_LATEST_INTERFACE_VERSION__
diff --git a/stubdom/xenstorepvh-minios.cfg b/stubdom/xenstorepvh-minios.cfg
index 6af51f5753..752b90d7d3 100644
--- a/stubdom/xenstorepvh-minios.cfg
+++ b/stubdom/xenstorepvh-minios.cfg
@@ -4,7 +4,7 @@ CONFIG_NETFRONT=n
 CONFIG_FBFRONT=n
 CONFIG_KBDFRONT=n
 CONFIG_CONSFRONT=n
-CONFIG_XENBUS=n
 CONFIG_LWIP=n
+CONFIG_9PFRONT=y
 CONFIG_BALLOON=y
 XEN_INTERFACE_VERSION=__XEN_LATEST_INTERFACE_VERSION__
-- 
2.35.3




[PATCH v8 4/8] tools: add 9pfs device to xenstore-stubdom

2024-02-16 Thread Juergen Gross
Add a 9pfs device to Xenstore stubdom in order to allow it to do e.g.
logging into a dom0 file.

Use the following parameters for the new device:

- tag = "Xen"
- type = "xen_9pfsd"
- path = "/var/lib/xen/xenstore"
- security-model = "none"

For now don't limit allowed file space or number of files.

Add a new libxl function for adding it similar to the function for
adding the console device.

Signed-off-by: Juergen Gross 
Reviewed-by: Jason Andryuk 
Reviewed-by: Anthony PERARD 
---
V2:
- add security_model parameter to new libxl function (Jason Andryuk)
V4:
- rename function to libxl_device_9pfs_add() (Anthony Perard)
- use a libxl_device_p9 pointer as parameter (Anthony Perard)
V8:
- remove "_XENSTORE" suffix from define (Anthony Perard)
---
 tools/helpers/init-xenstore-domain.c |  7 +++
 tools/include/libxl.h| 15 +++
 tools/libs/light/libxl_9pfs.c| 16 
 3 files changed, 38 insertions(+)

diff --git a/tools/helpers/init-xenstore-domain.c 
b/tools/helpers/init-xenstore-domain.c
index 140ed610ae..1683438c5c 100644
--- a/tools/helpers/init-xenstore-domain.c
+++ b/tools/helpers/init-xenstore-domain.c
@@ -433,6 +433,12 @@ int main(int argc, char** argv)
 int rv, fd;
 char *maxmem_str = NULL;
 libxl_ctx *ctx;
+libxl_device_p9 p9 = { .backend_domid = 0,
+   .tag = "Xen",
+   .path = XEN_LIB_DIR"/xenstore",
+   .security_model = "none",
+   .type = LIBXL_P9_TYPE_XEN_9PFSD,
+};
 
 while ( (opt = getopt_long(argc, argv, "v", options, NULL)) != -1 )
 {
@@ -543,6 +549,7 @@ int main(int argc, char** argv)
 }
 libxl_console_add_xenstore(ctx, domid, 0, console_evtchn, console_gfn,
NULL);
+libxl_device_9pfs_add(ctx, domid, , NULL);
 libxl_ctx_free(ctx);
 
 fd = creat(XEN_RUN_DIR "/xenstored.pid", 0666);
diff --git a/tools/include/libxl.h b/tools/include/libxl.h
index 9a3e702557..804496a9f8 100644
--- a/tools/include/libxl.h
+++ b/tools/include/libxl.h
@@ -583,6 +583,13 @@
  * libxl_console_add_xenstore() in libxl.
  */
 #define LIBXL_HAVE_CONSOLE_ADD_XENSTORE 1
+
+/*
+ * LIBXL_HAVE_P9_ADD indicates presence of the function
+ * libxl_device_9pfs_add() in libxl.
+ */
+#define LIBXL_HAVE_P9_ADD 1
+
 /*
  * libxl ABI compatibility
  *
@@ -2074,6 +2081,14 @@ int libxl_console_add_xenstore(libxl_ctx *ctx, uint32_t 
domid, uint32_t backend,
const libxl_asyncop_how *ao_how)
LIBXL_EXTERNAL_CALLERS_ONLY;
 
+/* libxl_device_9pfs_add writes the Xenstore entries for a domain's
+ * primary 9pfs device based on domid, and device parameters.
+ * If needed it will start the backend daemon.
+ */
+int libxl_device_9pfs_add(libxl_ctx *ctx, uint32_t domid, libxl_device_p9 *p9,
+  const libxl_asyncop_how *ao_how)
+  LIBXL_EXTERNAL_CALLERS_ONLY;
+
 /* May be called with info_r == NULL to check for domain's existence.
  * Returns ERROR_DOMAIN_NOTFOUND if domain does not exist (used to return
  * ERROR_INVAL for this scenario). */
diff --git a/tools/libs/light/libxl_9pfs.c b/tools/libs/light/libxl_9pfs.c
index ddeb4f20a7..48f894f070 100644
--- a/tools/libs/light/libxl_9pfs.c
+++ b/tools/libs/light/libxl_9pfs.c
@@ -206,6 +206,22 @@ static void libxl__device_p9_add(libxl__egc *egc, uint32_t 
domid,
 aodev->callback(egc, aodev);
 }
 
+int libxl_device_9pfs_add(libxl_ctx *ctx, uint32_t domid, libxl_device_p9 *p9,
+  const libxl_asyncop_how *ao_how)
+{
+AO_CREATE(ctx, domid, ao_how);
+libxl__ao_device *aodev;
+
+GCNEW(aodev);
+libxl__prepare_ao_device(ao, aodev);
+aodev->action = LIBXL__DEVICE_ACTION_ADD;
+aodev->callback = device_addrm_aocomplete;
+
+libxl__device_p9_add(egc, domid, p9, aodev);
+
+return AO_INPROGRESS;
+}
+
 #define libxl_device_p9_list NULL
 #define libxl_device_p9_compare NULL
 
-- 
2.35.3




[PATCH v8 1/8] tools/libs/light: add backend type for 9pfs PV devices

2024-02-16 Thread Juergen Gross
Make the backend type of 9pfs PV devices configurable. The default is
"qemu" with the related Xenstore backend-side directory being "9pfs".

Add another type "xen_9pfsd" with the related Xenstore backend-side
directory "xen_9pfs".

As additional security features it is possible to specify:
- "max-space" for limiting the maximum space consumed on the filesystem
  in MBs
- "max-files" for limiting the maximum number of files in the
  filesystem
- "max-open-files" for limiting the maximum number of concurrent open
  files

For convenience "auto-delete" is available to let the backend delete the
oldest file of the guest in case otherwise "max-space" or "max-files"
would be violated.

The xen-9pfsd daemon will be started by libxenlight automatically when
the first "xen_9pfs" device is being created.

Signed-off-by: Juergen Gross 
Reviewed-by: Jason Andryuk 
Acked-by: George Dunlap  # Golang bits
Reviewed-by: Anthony PERARD 
---
V3:
- regenerate go bindings
V4:
- rename libxl_device_p9_dm_needed() to libxl__device_p9_dm_needed()
  (Anthony Perard)
- reorder span related functions (Anthony Perard)
- add comment for xen9pfsd_spawn() return values (Anthony Perard)
- add LIBXL_HAVE_XEN_9PFS to libxl.h (Anthony Perard)
- use a copy of 'p9' in xen9pfsd_spawn() (Anthony Perard)
V6:
- rebase (Anthony Perard)
- drop callback from struct libxl__aop9_state (Anthony Perard)
---
 tools/golang/xenlight/helpers.gen.go  |  10 ++
 tools/golang/xenlight/types.gen.go|  12 ++
 tools/include/libxl.h |   7 +
 tools/libs/light/libxl_9pfs.c | 157 +-
 tools/libs/light/libxl_create.c   |   4 +-
 tools/libs/light/libxl_dm.c   |   2 +-
 tools/libs/light/libxl_types.idl  |  11 ++
 tools/libs/light/libxl_types_internal.idl |   1 +
 8 files changed, 197 insertions(+), 7 deletions(-)

diff --git a/tools/golang/xenlight/helpers.gen.go 
b/tools/golang/xenlight/helpers.gen.go
index 0f8e23773c..8f44397a4e 100644
--- a/tools/golang/xenlight/helpers.gen.go
+++ b/tools/golang/xenlight/helpers.gen.go
@@ -2440,6 +2440,11 @@ x.Tag = C.GoString(xc.tag)
 x.Path = C.GoString(xc.path)
 x.SecurityModel = C.GoString(xc.security_model)
 x.Devid = Devid(xc.devid)
+x.Type = P9Type(xc._type)
+x.MaxSpace = int(xc.max_space)
+x.MaxFiles = int(xc.max_files)
+x.MaxOpenFiles = int(xc.max_open_files)
+x.AutoDelete = bool(xc.auto_delete)
 
  return nil}
 
@@ -2458,6 +2463,11 @@ xc.path = C.CString(x.Path)}
 if x.SecurityModel != "" {
 xc.security_model = C.CString(x.SecurityModel)}
 xc.devid = C.libxl_devid(x.Devid)
+xc._type = C.libxl_p9_type(x.Type)
+xc.max_space = C.int(x.MaxSpace)
+xc.max_files = C.int(x.MaxFiles)
+xc.max_open_files = C.int(x.MaxOpenFiles)
+xc.auto_delete = C.bool(x.AutoDelete)
 
  return nil
  }
diff --git a/tools/golang/xenlight/types.gen.go 
b/tools/golang/xenlight/types.gen.go
index 9c8b7b81f6..d31722407a 100644
--- a/tools/golang/xenlight/types.gen.go
+++ b/tools/golang/xenlight/types.gen.go
@@ -122,6 +122,13 @@ NicTypeVifIoemu NicType = 1
 NicTypeVif NicType = 2
 )
 
+type P9Type int
+const(
+P9TypeUnknown P9Type = 0
+P9TypeQemu P9Type = 1
+P9TypeXen9Pfsd P9Type = 2
+)
+
 type ActionOnShutdown int
 const(
 ActionOnShutdownDestroy ActionOnShutdown = 1
@@ -889,6 +896,11 @@ Tag string
 Path string
 SecurityModel string
 Devid Devid
+Type P9Type
+MaxSpace int
+MaxFiles int
+MaxOpenFiles int
+AutoDelete bool
 }
 
 type DevicePvcallsif struct {
diff --git a/tools/include/libxl.h b/tools/include/libxl.h
index 46bc774126..9a3e702557 100644
--- a/tools/include/libxl.h
+++ b/tools/include/libxl.h
@@ -615,6 +615,13 @@
  */
 #define LIBXL_HAVE_HVM_PIRQ 1
 
+/*
+ * LIBXL_HAVE_XEN_9PFS indicates the presence of the xen-9pfsd related
+ * fields in libxl_device_p9: type, max_space, max_files, max_open_files and
+ * auto_delete.
+ */
+#define LIBXL_HAVE_XEN_9PFS 1
+
 /*
  * libxl memory management
  *
diff --git a/tools/libs/light/libxl_9pfs.c b/tools/libs/light/libxl_9pfs.c
index 5ab0d3aa21..900c0d46a0 100644
--- a/tools/libs/light/libxl_9pfs.c
+++ b/tools/libs/light/libxl_9pfs.c
@@ -33,20 +33,171 @@ static int libxl__set_xenstore_p9(libxl__gc *gc, uint32_t 
domid,
 
 flexarray_append_pair(front, "tag", p9->tag);
 
+if (p9->type == LIBXL_P9_TYPE_XEN_9PFSD) {
+flexarray_append_pair(back, "max-space",
+  GCSPRINTF("%u", p9->max_space));
+flexarray_append_pair(back, "max-files",
+  GCSPRINTF("%u", p9->max_files));
+flexarray_append_pair(back, "max-open-files",
+  GCSPRINTF("%u", p9->max_open_files));
+flexarray_append_pair(back, "auto-delete",
+  p9->auto_delete ? "1" : "0");
+}
+

[PATCH v8 2/8] tools/xl: support new 9pfs backend xen_9pfsd

2024-02-16 Thread Juergen Gross
Add support for the new 9pfs backend "xen_9pfsd". For this backend type
the tag defaults to "Xen" and the host side path to
"/var/log/xen/guests/".

Do most of the default settings in libxl. Unfortunately the default
path can't easily be set in libxl, as the domain name isn't available
in the related 9pfs specific function.

Settings the defaults in libxl requires to move the sanity checking
of 9pfs parameters from xl to libxl, too.

Signed-off-by: Juergen Gross 
Reviewed-by: Jason Andryuk 
Reviewed-by: Anthony PERARD 
---
V2:
- test max_files and max_open_files, too (Jason Andryuk)
V4:
- fix man page to use the "xen_9pfsd" type due to idl limitation
  (Jason Andryuk)
- set (most of) the defaults in libxl (Anthony Perard)
---
 docs/man/xl.cfg.5.pod.in  | 36 +--
 tools/libs/light/libxl_9pfs.c | 18 ++
 tools/xl/xl_parse.c   | 23 +++---
 3 files changed, 72 insertions(+), 5 deletions(-)

diff --git a/docs/man/xl.cfg.5.pod.in b/docs/man/xl.cfg.5.pod.in
index ea8d41727d..039e057318 100644
--- a/docs/man/xl.cfg.5.pod.in
+++ b/docs/man/xl.cfg.5.pod.in
@@ -772,10 +772,16 @@ settings, from the following list:
 
 =over 4
 
+=item B
+
+The backendtype for the PV device. Supported values are B and
+B.  The default is B.
+
 =item B
 
 9pfs tag to identify the filesystem share. The tag is needed on the
-guest side to mount it.
+guest side to mount it. For the backendtype of B the tag defaults to
+"Xen".
 
 =item B
 
@@ -785,12 +791,38 @@ squash or remap).
 
 =item B
 
-Filesystem path on the backend to export.
+Filesystem path on the backend to export. For the backendtype of B
+the path defaults to "@XEN_LOG_DIR@/guests/".
 
 =item B
 
 Specify the backend domain name or id, defaults to dom0.
 
+=item B
+
+Specify the maximum number of files below B. A value of 0 (which
+is the default) doesn't limit the number of files. Only valid for
+B.
+
+=item B
+
+Specify the maximum number of concurrently opened files below B.
+Multiple opens of the same file are counted individually. Only valid for
+B, which has a default of B.
+
+=item B
+
+Specify the maximum used disk space in MiB below B. A value of 0 (which
+is the default) doesn't limit the usable disk space. Only valid for
+B.
+
+=item B
+
+When set the backend will delete the oldest file which is currently not
+opened by the guest in case the disk space limit set via B or the
+file limit set via B is being reached. Only valid for
+B.
+
 =back
 
 =item B
diff --git a/tools/libs/light/libxl_9pfs.c b/tools/libs/light/libxl_9pfs.c
index 900c0d46a0..ddeb4f20a7 100644
--- a/tools/libs/light/libxl_9pfs.c
+++ b/tools/libs/light/libxl_9pfs.c
@@ -20,6 +20,24 @@
 static int libxl__device_p9_setdefault(libxl__gc *gc, uint32_t domid,
libxl_device_p9 *p9, bool hotplug)
 {
+if (p9->type == LIBXL_P9_TYPE_UNKNOWN) {
+p9->type = LIBXL_P9_TYPE_QEMU;
+}
+if (p9->type == LIBXL_P9_TYPE_QEMU &&
+(p9->max_files || p9->max_open_files || p9->max_space ||
+ p9->auto_delete)) {
+LOGD(ERROR, domid, "Illegal 9pfs parameter combination");
+return ERROR_INVAL;
+}
+if (p9->type == LIBXL_P9_TYPE_XEN_9PFSD && !p9->tag) {
+p9->tag = libxl__strdup(NOGC, "Xen");
+}
+
+if (!p9->path || !p9->security_model || !p9->tag) {
+LOGD(ERROR, domid, "9pfs spec missing required field!");
+return ERROR_INVAL;
+}
+
 return libxl__resolve_domid(gc, p9->backend_domname, >backend_domid);
 }
 
diff --git a/tools/xl/xl_parse.c b/tools/xl/xl_parse.c
index 9b358f11b8..80ffe85f5e 100644
--- a/tools/xl/xl_parse.c
+++ b/tools/xl/xl_parse.c
@@ -2233,6 +2233,20 @@ void parse_config_data(const char *config_source,
 replace_string(>tag, value);
 } else if (!strcmp(key, "backend")) {
 replace_string(>backend_domname, value);
+} else if (!strcmp(key, "type")) {
+if (libxl_p9_type_from_string(value, >type)) {
+fprintf(stderr, "failed to parse 9pfs type: %s\n",
+value);
+exit(1);
+}
+} else if (!strcmp(key, "max-files")) {
+p9->max_files = parse_ulong(value);
+} else if (!strcmp(key, "max-open-files")) {
+p9->max_open_files = parse_ulong(value);
+} else if (!strcmp(key, "max-space")) {
+p9->max_space = parse_ulong(value);
+} else if (!strcmp(key, "auto-delete")) {
+p9->auto_delete = strtoul(value, NULL, 0);
 } else {

[PATCH v8 0/8] tools: enable xenstore-stubdom to use 9pfs

2024-02-16 Thread Juergen Gross
This series is adding 9pfs support to Xenstore-stubdom, enabling it
to do logging to a dom0 directory.

This is a prerequisite for the final goal to add live update support
to Xenstore-stubdom, as it enables the stubdom to store its state in
a dom0 file.

Reposting the rest series.

Changes in V8:
- patches 1-13 of V7 have been applied

Changes in V7:
- fixed V6 bugs

Changes in V6:
- patch 1 of V5 has been applied
- rebase
- addressed comments

Changes in V5:
- 10 patches have been applied already
- rename source directory to tools/9pfsd
- addressed comments

Changes in V4:
- patch 2 of V3 was applied
- added support of reading directories
- addressed review comments

Changes in V3:
- new patches 1, 23-25
- addressed review comments

Changes in V2:
- support of multiple rings per device
- xenlogd->xen-9pfsd rename
- addressed review comments
- fixed some bugs

Juergen Gross (8):
  tools/libs/light: add backend type for 9pfs PV devices
  tools/xl: support new 9pfs backend xen_9pfsd
  stubdom: extend xenstore stubdom configs
  tools: add 9pfs device to xenstore-stubdom
  tools/xenstored: mount 9pfs device in stubdom
  tools/xenstored: add helpers for filename handling
  tools/xenstored: support complete log capabilities in stubdom
  tools/xenstored: have a single do_control_memreport()

 docs/man/xl.cfg.5.pod.in  |  36 +++-
 stubdom/xenstore-minios.cfg   |   2 +-
 stubdom/xenstorepvh-minios.cfg|   2 +-
 tools/golang/xenlight/helpers.gen.go  |  10 +
 tools/golang/xenlight/types.gen.go|  12 ++
 tools/helpers/init-xenstore-domain.c  |   7 +
 .../Linux/init.d/sysconfig.xencommons.in  |   1 -
 tools/hotplug/Linux/launch-xenstore.in|   1 +
 tools/include/libxl.h |  22 ++
 tools/libs/light/libxl_9pfs.c | 191 +-
 tools/libs/light/libxl_create.c   |   4 +-
 tools/libs/light/libxl_dm.c   |   2 +-
 tools/libs/light/libxl_types.idl  |  11 +
 tools/libs/light/libxl_types_internal.idl |   1 +
 tools/xenstored/control.c |  29 +--
 tools/xenstored/core.c|  15 +-
 tools/xenstored/core.h|  11 +-
 tools/xenstored/domain.c  |   2 +
 tools/xenstored/lu_daemon.c   |   4 +-
 tools/xenstored/minios.c  |  62 ++
 tools/xenstored/posix.c   |   8 +-
 tools/xl/xl_parse.c   |  23 ++-
 22 files changed, 414 insertions(+), 42 deletions(-)

-- 
2.35.3




Re: [PATCH v7 03/21] tools/xen-9pfsd: add transport layer

2024-02-15 Thread Juergen Gross

On 15.02.24 14:04, Juergen Gross wrote:

Add the transport layer of 9pfs. This is basically the infrastructure
to receive requests from the frontend and to send the related answers
via the rings.

Signed-off-by: Juergen Gross 
Reviewed-by: Jason Andryuk 
Acked-by: Anthony PERARD 


Seems CI is unhappy due to a false positive.

The following diff needs to be folded in:

diff --git a/tools/9pfsd/io.c b/tools/9pfsd/io.c
index 4312a62dfe..996017a8be 100644
--- a/tools/9pfsd/io.c
+++ b/tools/9pfsd/io.c
@@ -104,7 +104,7 @@ void *io_thread(void *arg)
 {
 struct ring *ring = arg;
 unsigned int count = 0;
-struct p9_header hdr;
+struct p9_header hdr = { .size = 0 };
 bool in_hdr = true;

 ring->max_size = ring->ring_size;


Juergen


---
V2:
- rename put_request_bytes() (Jason Andryuk)
- rename get_request_bytes() and put_response_bytes() len parameter
   (Jason Andryuk)
- don't unmask event channel if error indicator is set (Jason Andryuk)
---
  tools/9pfsd/io.c| 143 +++-
  tools/9pfsd/xen-9pfsd.h |  16 +
  2 files changed, 156 insertions(+), 3 deletions(-)

diff --git a/tools/9pfsd/io.c b/tools/9pfsd/io.c
index eb7c136e09..4312a62dfe 100644
--- a/tools/9pfsd/io.c
+++ b/tools/9pfsd/io.c
@@ -6,39 +6,176 @@
   * Copyright (C) 2024 Juergen Gross 
   *
   * I/O thread handling.
+ *
+ * Only handle one request at a time, pushing out the complete response
+ * before looking for the next request.
   */
  
  #include 

+#include 
  #include 
  #include 
+#include/* For cpu barriers. */
+#include 
  
  #include "xen-9pfsd.h"
  
+/*

+ * Note that the ring names "in" and "out" are from the frontend's
+ * perspective, so the "in" ring will be used for responses to the frontend,
+ * while the "out" ring is used for requests from the frontend to the
+ * backend.
+ */
+static unsigned int ring_in_free(struct ring *ring)
+{
+unsigned int queued;
+
+queued = xen_9pfs_queued(ring->prod_pvt_in, ring->intf->in_cons,
+ ring->ring_size);
+xen_rmb();
+
+return ring->ring_size - queued;
+}
+
+static unsigned int ring_out_data(struct ring *ring)
+{
+unsigned int queued;
+
+queued = xen_9pfs_queued(ring->intf->out_prod, ring->cons_pvt_out,
+ ring->ring_size);
+xen_rmb();
+
+return queued;
+}
+
+static unsigned int get_request_bytes(struct ring *ring, unsigned int off,
+  unsigned int total_len)
+{
+unsigned int size;
+unsigned int out_data = ring_out_data(ring);
+RING_IDX prod, cons;
+
+size = min(total_len - off, out_data);
+prod = xen_9pfs_mask(ring->intf->out_prod, ring->ring_size);
+cons = xen_9pfs_mask(ring->cons_pvt_out, ring->ring_size);
+xen_9pfs_read_packet(ring->buffer + off, ring->data.out, size,
+ prod, , ring->ring_size);
+
+xen_rmb();   /* Read data out before setting visible consumer. */
+ring->cons_pvt_out += size;
+ring->intf->out_cons = ring->cons_pvt_out;
+
+/* Signal that more space is available now. */
+xenevtchn_notify(xe, ring->evtchn);
+
+return size;
+}
+
+static unsigned int put_response_bytes(struct ring *ring, unsigned int off,
+   unsigned int total_len)
+{
+unsigned int size;
+unsigned int in_data = ring_in_free(ring);
+RING_IDX prod, cons;
+
+size = min(total_len - off, in_data);
+prod = xen_9pfs_mask(ring->prod_pvt_in, ring->ring_size);
+cons = xen_9pfs_mask(ring->intf->in_cons, ring->ring_size);
+xen_9pfs_write_packet(ring->data.in, ring->buffer + off, size,
+  , cons, ring->ring_size);
+
+xen_wmb();   /* Write data out before setting visible producer. */
+ring->prod_pvt_in += size;
+ring->intf->in_prod = ring->prod_pvt_in;
+
+return size;
+}
+
  static bool io_work_pending(struct ring *ring)
  {
  if ( ring->stop_thread )
  return true;
-return false;
+if ( ring->error )
+return false;
+return ring->handle_response ? ring_in_free(ring) : ring_out_data(ring);
  }
  
  void *io_thread(void *arg)

  {
  struct ring *ring = arg;
+unsigned int count = 0;
+struct p9_header hdr;
+bool in_hdr = true;
+
+ring->max_size = ring->ring_size;
+ring->buffer = malloc(ring->max_size);
+if ( !ring->buffer )
+{
+syslog(LOG_CRIT, "memory allocation failure!");
+return NULL;
+}
  
  while ( !ring->stop_thread )

  {
  pthread_mutex_lock(>mutex);
  if ( !io_work_pending(ring) )
  {
-if ( xenevtchn_unmask(xe, ring->evtchn) < 0 )
+if ( !ring->error && xenevtchn_unmask(xe, ring-&

[GIT PULL] xen: branch for v6.8-rc5

2024-02-15 Thread Juergen Gross
Linus,

Please git pull the following tag:

 git://git.kernel.org/pub/scm/linux/kernel/git/xen/tip.git 
for-linus-6.8a-rc5-tag

xen: branch for v6.8-rc5

It contains the following fixes and simple cleanups:

- A fix using a proper flexible array instead of a one-element array in order to
  avoid array-bounds sanitizer errors.

- A fix adding NULL pointer checks after allocating memory.

- A cleanup using memdup_array_user() instead of open-coding it.

- A fix for a rare race condition in Xen event channel allocation code.

- A small series making struct bus_type instances const.

- A fix of kerneldoc inline comments to match reality.


Thanks.

Juergen

 arch/x86/xen/smp.c | 12 
 drivers/xen/events/events_base.c   |  8 ++--
 drivers/xen/gntalloc.c |  2 +-
 drivers/xen/pcpu.c |  2 +-
 drivers/xen/privcmd.c  | 15 +--
 drivers/xen/xen-balloon.c  |  2 +-
 drivers/xen/xenbus/xenbus_client.c | 15 +--
 include/uapi/xen/gntalloc.h|  5 -
 8 files changed, 39 insertions(+), 22 deletions(-)

Kees Cook (1):
  xen/gntalloc: Replace UAPI 1-element array

Kunwu Chan (1):
  x86/xen: Add some null pointer checking to smp.c

Markus Elfring (1):
  xen/privcmd: Use memdup_array_user() in alloc_ioreq()

Maximilian Heyne (1):
  xen/events: close evtchn after mapping cleanup

Ricardo B. Marliere (2):
  xen: pcpu: make xen_pcpu_subsys const
  xen: balloon: make balloon_subsys const

SeongJae Park (1):
  xen/xenbus: document will_handle argument for xenbus_watch_path()



[PATCH v7 18/21] tools/xenstored: mount 9pfs device in stubdom

2024-02-15 Thread Juergen Gross
Mount the 9pfs device in stubdom enabling it to use files.

This has to happen in a worker thread in order to allow the main thread
handling the required Xenstore accesses in parallel.

Signed-off-by: Juergen Gross 
Reviewed-by: Jason Andryuk 
Reviewed-by: Julien Grall 
---
V3:
- add logging in case of errors (Julien Grall)
---
 tools/xenstored/core.h   |  6 +
 tools/xenstored/domain.c |  2 ++
 tools/xenstored/minios.c | 54 
 3 files changed, 62 insertions(+)

diff --git a/tools/xenstored/core.h b/tools/xenstored/core.h
index f6af086f01..fe0ee90581 100644
--- a/tools/xenstored/core.h
+++ b/tools/xenstored/core.h
@@ -36,6 +36,8 @@
 #include "list.h"
 #include "hashtable.h"
 
+#define XENSTORE_LIB_DIR   XEN_LIB_DIR "/xenstore"
+
 #ifndef O_CLOEXEC
 #define O_CLOEXEC 0
 /* O_CLOEXEC support is needed for Live Update in the daemon case. */
@@ -399,6 +401,10 @@ void handle_special_fds(void);
 int get_socket_fd(void);
 void set_socket_fd(int fd);
 
+#ifdef __MINIOS__
+void mount_9pfs(void);
+#endif
+
 /* Close stdin/stdout/stderr to complete daemonize */
 void finish_daemonize(void);
 
diff --git a/tools/xenstored/domain.c b/tools/xenstored/domain.c
index 1a7d5e9756..64c8fd0cc3 100644
--- a/tools/xenstored/domain.c
+++ b/tools/xenstored/domain.c
@@ -1236,6 +1236,8 @@ void stubdom_init(void)
barf_perror("Failed to initialize stubdom");
 
xenevtchn_notify(xce_handle, stubdom->port);
+
+   mount_9pfs();
 #endif
 }
 
diff --git a/tools/xenstored/minios.c b/tools/xenstored/minios.c
index 22ac8defbd..562a9b4972 100644
--- a/tools/xenstored/minios.c
+++ b/tools/xenstored/minios.c
@@ -17,10 +17,20 @@
 */
 #include 
 #include 
+#include 
+#include "talloc.h"
 #include "core.h"
 #include "utils.h"
 #include 
 #include 
+#include 
+#include 
+#include 
+#include 
+
+#define P9_STATE_PATH  "device/9pfs/0/state"
+
+static void *p9_device;
 
 void finish_daemonize(void)
 {
@@ -74,3 +84,47 @@ int get_socket_fd(void)
 void set_socket_fd(int fd)
 {
 }
+
+static void mount_thread(void *p)
+{
+   xenbus_event_queue events = NULL;
+   char *err;
+   char *dummy;
+
+   err = xenbus_watch_path_token(XBT_NIL, P9_STATE_PATH, "9pfs", );
+   if (err) {
+   log("error \"%s\" when setting watch on \"%s\"\n", err,
+   P9_STATE_PATH);
+   free(err);
+   return;
+   }
+
+   for (;;) {
+   xenbus_wait_for_watch();
+
+   /*
+* We only care for existence of the state node.
+* State changes are handled in init_9pfront().
+*/
+   err = xenbus_read(XBT_NIL, P9_STATE_PATH, );
+   if (!err)
+   break;
+   free(err);
+   }
+
+   free(dummy);
+
+   err = xenbus_unwatch_path_token(XBT_NIL, P9_STATE_PATH, "9pfs");
+   if (err) {
+   log("error \"%s\" when unwatching \"%s\", leaking watch\n",
+   err, P9_STATE_PATH);
+   free(err);
+   }
+
+   p9_device = init_9pfront(0, XENSTORE_LIB_DIR);
+}
+
+void mount_9pfs(void)
+{
+   create_thread("mount-9pfs", mount_thread, NULL);
+}
-- 
2.35.3




[PATCH v7 13/21] tools/xen-9pfsd: add 9pfs read request support

2024-02-15 Thread Juergen Gross
Add the read request of the 9pfs protocol.

Signed-off-by: Juergen Gross 
Reviewed-by: Jason Andryuk 
Acked-by: Anthony PERARD 
---
V2:
- make error check more readable (Jason Andryuk)
V4:
- add directory read support
V5:
- rewinddir() if reading a directory and offset is 0 (Jason Andryuk)
---
 tools/9pfsd/io.c | 93 
 1 file changed, 93 insertions(+)

diff --git a/tools/9pfsd/io.c b/tools/9pfsd/io.c
index 1832c6b06e..ba2d4a436e 100644
--- a/tools/9pfsd/io.c
+++ b/tools/9pfsd/io.c
@@ -33,6 +33,7 @@
 #define P9_CMD_WALK   110
 #define P9_CMD_OPEN   112
 #define P9_CMD_CREATE 114
+#define P9_CMD_READ   116
 #define P9_CMD_WRITE  118
 #define P9_CMD_CLUNK  120
 #define P9_CMD_STAT   124
@@ -1245,6 +1246,94 @@ static void p9_stat(struct ring *ring, struct p9_header 
*hdr)
 free_fid(device, fidp);
 }
 
+static void p9_read(struct ring *ring, struct p9_header *hdr)
+{
+device *device = ring->device;
+uint32_t fid;
+uint64_t off;
+unsigned int len;
+uint32_t count;
+void *buf;
+struct p9_fid *fidp;
+int ret;
+
+ret = fill_data(ring, "ULU", , , );
+if ( ret != 3 )
+{
+p9_error(ring, hdr->tag, EINVAL);
+return;
+}
+
+fidp = get_fid_ref(device, fid);
+if ( !fidp || !fidp->opened )
+{
+errno = EBADF;
+goto err;
+}
+
+len = count;
+buf = ring->buffer + sizeof(*hdr) + sizeof(uint32_t);
+
+if ( fidp->isdir )
+{
+struct dirent *dirent;
+struct stat st;
+struct p9_stat p9s;
+
+if ( off == 0 )
+rewinddir(fidp->data);
+
+while ( len != 0 )
+{
+errno = 0;
+dirent = readdir(fidp->data);
+if ( !dirent )
+{
+if ( errno )
+goto err;
+break;
+}
+if ( fstatat(fidp->fd, dirent->d_name, , 0) < 0 )
+goto err;
+fill_p9_stat(device, , , dirent->d_name);
+if ( p9s.size + sizeof(p9s.size) > len )
+{
+seekdir(fidp->data, dirent->d_off);
+break;
+}
+fill_buffer_at(, "s", );
+len -= p9s.size + sizeof(p9s.size);
+}
+}
+else
+{
+while ( len != 0 )
+{
+ret = pread(fidp->fd, buf, len, off);
+if ( ret <= 0 )
+break;
+len -= ret;
+buf += ret;
+off += ret;
+}
+if ( ret < 0 && len == count )
+goto err;
+}
+
+buf = ring->buffer + sizeof(*hdr) + sizeof(uint32_t);
+len = count - len;
+fill_buffer(ring, hdr->cmd + 1, hdr->tag, "D", , buf);
+
+ out:
+free_fid(device, fidp);
+
+return;
+
+ err:
+p9_error(ring, hdr->tag, errno);
+goto out;
+}
+
 static void p9_write(struct ring *ring, struct p9_header *hdr)
 {
 device *device = ring->device;
@@ -1369,6 +1458,10 @@ void *io_thread(void *arg)
 p9_create(ring, );
 break;
 
+case P9_CMD_READ:
+p9_read(ring, );
+break;
+
 case P9_CMD_WRITE:
 p9_write(ring, );
 break;
-- 
2.35.3




[PATCH v7 10/21] tools/xen-9pfsd: add 9pfs create request support

2024-02-15 Thread Juergen Gross
Add the create request of the 9pfs protocol.

Signed-off-by: Juergen Gross 
Acked-by: Anthony PERARD 
Reviewed-by: Jason Andryuk 
---
V2:
- set permissions correctly (Jason Andryuk)
V3:
- use opendirat() etc. (Jason Andryuk)
- rework error handling a little bit
---
 tools/9pfsd/io.c | 151 +++
 1 file changed, 151 insertions(+)

diff --git a/tools/9pfsd/io.c b/tools/9pfsd/io.c
index 6af14e5ee9..8fa80865b4 100644
--- a/tools/9pfsd/io.c
+++ b/tools/9pfsd/io.c
@@ -32,6 +32,7 @@
 #define P9_CMD_ERROR  107
 #define P9_CMD_WALK   110
 #define P9_CMD_OPEN   112
+#define P9_CMD_CREATE 114
 #define P9_CMD_CLUNK  120
 
 /* P9 protocol open flags. */
@@ -42,6 +43,12 @@
 #define P9_OTRUNC0x10   /* or'ed in, truncate file first */
 #define P9_OREMOVE   0x40   /* or'ed in, remove file after clunk */
 
+/* P9 protocol create permission masks. */
+#define P9_CREATE_PERM_DIR0x8000
+#define P9_CREATE_PERM_NOTSUPP0x03b0   /* link, symlink, ... */
+#define P9_CREATE_PERM_DIR_MASK   0777
+#define P9_CREATE_PERM_FILE_MASK  0666
+
 #define P9_MIN_MSIZE  2048
 #define P9_VERSION"9P2000.u"
 #define P9_WALK_MAXELEM   16
@@ -961,6 +968,146 @@ static void p9_open(struct ring *ring, struct p9_header 
*hdr)
 p9_error(ring, hdr->tag, errno);
 }
 
+static void p9_create(struct ring *ring, struct p9_header *hdr)
+{
+device *device = ring->device;
+uint32_t fid;
+unsigned int name_off;
+uint32_t perm;
+uint8_t mode;
+unsigned int ext_off;
+struct p9_fid *fidp;
+struct p9_fid *new_fidp;
+char *path;
+struct stat st;
+struct p9_qid qid;
+uint32_t iounit;
+int flags;
+int ret;
+
+ret = fill_data(ring, "USUbS", , _off, , , _off);
+if ( ret != 5 )
+{
+p9_error(ring, hdr->tag, EINVAL);
+return;
+}
+
+if ( !name_ok(ring->str + name_off) )
+{
+p9_error(ring, hdr->tag, ENOENT);
+return;
+}
+
+if ( perm & P9_CREATE_PERM_NOTSUPP )
+{
+p9_error(ring, hdr->tag, EINVAL);
+return;
+}
+
+fidp = get_fid_ref(device, fid);
+if ( !fidp || fidp->opened )
+{
+free_fid(device, fidp);
+p9_error(ring, hdr->tag, EINVAL);
+return;
+}
+if ( fstatat(device->root_fd, fidp->path, , 0) < 0 )
+{
+free_fid(device, fidp);
+p9_error(ring, hdr->tag, errno);
+return;
+}
+
+path = malloc(strlen(fidp->path) + strlen(ring->str + name_off) + 2);
+if ( !path )
+{
+free_fid(device, fidp);
+p9_error(ring, hdr->tag, ENOMEM);
+return;
+}
+sprintf(path, "%s/%s", fidp->path, ring->str + name_off);
+new_fidp = alloc_fid_mem(device, fid, path);
+free(path);
+if ( !new_fidp )
+{
+free_fid(device, fidp);
+p9_error(ring, hdr->tag, ENOMEM);
+return;
+}
+
+pthread_mutex_lock(>fid_mutex);
+
+new_fidp->ref = fidp->ref;
+
+if ( perm & P9_CREATE_PERM_DIR )
+{
+perm &= P9_CREATE_PERM_DIR_MASK & st.st_mode;
+if ( mode != P9_OREAD )
+{
+errno = EINVAL;
+goto err;
+}
+if ( mkdirat(device->root_fd, new_fidp->path, perm) < 0 )
+goto err;
+
+XEN_TAILQ_REMOVE(>fids, fidp, list);
+XEN_TAILQ_INSERT_HEAD(>fids, new_fidp, list);
+free(fidp);
+fidp = new_fidp;
+new_fidp = NULL;
+
+fidp->fd = openat(device->root_fd, fidp->path, O_RDONLY);
+if ( fidp->fd < 0 )
+goto err;
+fidp->data = fdopendir(fidp->fd);
+if ( !fidp->data )
+goto err;
+}
+else
+{
+flags = open_flags_from_mode(mode);
+if ( flags < 0 )
+{
+errno = EINVAL;
+goto err;
+}
+perm &= P9_CREATE_PERM_FILE_MASK & st.st_mode;
+
+XEN_TAILQ_REMOVE(>fids, fidp, list);
+XEN_TAILQ_INSERT_HEAD(>fids, new_fidp, list);
+free(fidp);
+fidp = new_fidp;
+new_fidp = NULL;
+
+fidp->fd = openat(device->root_fd, fidp->path, flags | O_CREAT | 
O_EXCL,
+  perm);
+if ( fidp->fd < 0 )
+goto err;
+}
+
+if ( fstatat(device->root_fd, fidp->path, , 0) < 0 )
+goto err;
+
+fill_qid(device, fidp->path, , );
+iounit = get_iounit(ring, );
+fidp->opened = true;
+fidp->mode = mode;
+
+pthread_mutex_unlock(>fid_mutex);
+
+fill_buffer(ring, hdr->cmd + 1, hdr->tag, "QU", , );
+
+return;
+
+ err:
+p9_error(ring, hdr->tag, errno);
+
+pthread_mutex_unlock(>fid_mutex);
+
+free(new_fidp);
+free_fid(device, fidp);
+}
+
 static void p9_clunk

[PATCH v7 15/21] tools/xl: support new 9pfs backend xen_9pfsd

2024-02-15 Thread Juergen Gross
Add support for the new 9pfs backend "xen_9pfsd". For this backend type
the tag defaults to "Xen" and the host side path to
"/var/log/xen/guests/".

Do most of the default settings in libxl. Unfortunately the default
path can't easily be set in libxl, as the domain name isn't available
in the related 9pfs specific function.

Settings the defaults in libxl requires to move the sanity checking
of 9pfs parameters from xl to libxl, too.

Signed-off-by: Juergen Gross 
Reviewed-by: Jason Andryuk 
Reviewed-by: Anthony PERARD 
---
V2:
- test max_files and max_open_files, too (Jason Andryuk)
V4:
- fix man page to use the "xen_9pfsd" type due to idl limitation
  (Jason Andryuk)
- set (most of) the defaults in libxl (Anthony Perard)
---
 docs/man/xl.cfg.5.pod.in  | 36 +--
 tools/libs/light/libxl_9pfs.c | 18 ++
 tools/xl/xl_parse.c   | 23 +++---
 3 files changed, 72 insertions(+), 5 deletions(-)

diff --git a/docs/man/xl.cfg.5.pod.in b/docs/man/xl.cfg.5.pod.in
index ea8d41727d..039e057318 100644
--- a/docs/man/xl.cfg.5.pod.in
+++ b/docs/man/xl.cfg.5.pod.in
@@ -772,10 +772,16 @@ settings, from the following list:
 
 =over 4
 
+=item B
+
+The backendtype for the PV device. Supported values are B and
+B.  The default is B.
+
 =item B
 
 9pfs tag to identify the filesystem share. The tag is needed on the
-guest side to mount it.
+guest side to mount it. For the backendtype of B the tag defaults to
+"Xen".
 
 =item B
 
@@ -785,12 +791,38 @@ squash or remap).
 
 =item B
 
-Filesystem path on the backend to export.
+Filesystem path on the backend to export. For the backendtype of B
+the path defaults to "@XEN_LOG_DIR@/guests/".
 
 =item B
 
 Specify the backend domain name or id, defaults to dom0.
 
+=item B
+
+Specify the maximum number of files below B. A value of 0 (which
+is the default) doesn't limit the number of files. Only valid for
+B.
+
+=item B
+
+Specify the maximum number of concurrently opened files below B.
+Multiple opens of the same file are counted individually. Only valid for
+B, which has a default of B.
+
+=item B
+
+Specify the maximum used disk space in MiB below B. A value of 0 (which
+is the default) doesn't limit the usable disk space. Only valid for
+B.
+
+=item B
+
+When set the backend will delete the oldest file which is currently not
+opened by the guest in case the disk space limit set via B or the
+file limit set via B is being reached. Only valid for
+B.
+
 =back
 
 =item B
diff --git a/tools/libs/light/libxl_9pfs.c b/tools/libs/light/libxl_9pfs.c
index 900c0d46a0..ddeb4f20a7 100644
--- a/tools/libs/light/libxl_9pfs.c
+++ b/tools/libs/light/libxl_9pfs.c
@@ -20,6 +20,24 @@
 static int libxl__device_p9_setdefault(libxl__gc *gc, uint32_t domid,
libxl_device_p9 *p9, bool hotplug)
 {
+if (p9->type == LIBXL_P9_TYPE_UNKNOWN) {
+p9->type = LIBXL_P9_TYPE_QEMU;
+}
+if (p9->type == LIBXL_P9_TYPE_QEMU &&
+(p9->max_files || p9->max_open_files || p9->max_space ||
+ p9->auto_delete)) {
+LOGD(ERROR, domid, "Illegal 9pfs parameter combination");
+return ERROR_INVAL;
+}
+if (p9->type == LIBXL_P9_TYPE_XEN_9PFSD && !p9->tag) {
+p9->tag = libxl__strdup(NOGC, "Xen");
+}
+
+if (!p9->path || !p9->security_model || !p9->tag) {
+LOGD(ERROR, domid, "9pfs spec missing required field!");
+return ERROR_INVAL;
+}
+
 return libxl__resolve_domid(gc, p9->backend_domname, >backend_domid);
 }
 
diff --git a/tools/xl/xl_parse.c b/tools/xl/xl_parse.c
index 9b358f11b8..80ffe85f5e 100644
--- a/tools/xl/xl_parse.c
+++ b/tools/xl/xl_parse.c
@@ -2233,6 +2233,20 @@ void parse_config_data(const char *config_source,
 replace_string(>tag, value);
 } else if (!strcmp(key, "backend")) {
 replace_string(>backend_domname, value);
+} else if (!strcmp(key, "type")) {
+if (libxl_p9_type_from_string(value, >type)) {
+fprintf(stderr, "failed to parse 9pfs type: %s\n",
+value);
+exit(1);
+}
+} else if (!strcmp(key, "max-files")) {
+p9->max_files = parse_ulong(value);
+} else if (!strcmp(key, "max-open-files")) {
+p9->max_open_files = parse_ulong(value);
+} else if (!strcmp(key, "max-space")) {
+p9->max_space = parse_ulong(value);
+} else if (!strcmp(key, "auto-delete")) {
+p9->auto_delete = strtoul(value, NULL, 0);
 } else {

[PATCH v7 21/21] tools/xenstored: have a single do_control_memreport()

2024-02-15 Thread Juergen Gross
With 9pfs now available in Xenstore-stubdom, there is no reason to
have distinct do_control_memreport() variants for the daemon and the
stubdom implementations.

Signed-off-by: Juergen Gross 
Reviewed-by: Jason Andryuk 
---
 tools/xenstored/control.c | 27 +++
 1 file changed, 7 insertions(+), 20 deletions(-)

diff --git a/tools/xenstored/control.c b/tools/xenstored/control.c
index dae23a5ac0..9561289179 100644
--- a/tools/xenstored/control.c
+++ b/tools/xenstored/control.c
@@ -216,23 +216,11 @@ static int do_control_logfile(const void *ctx, struct 
connection *conn,
return 0;
 }
 
-#ifdef __MINIOS__
-static int do_control_memreport(const void *ctx, struct connection *conn,
-   const char **vec, int num)
-{
-   if (num)
-   return EINVAL;
-
-   talloc_report_full(NULL, stdout);
-
-   send_ack(conn, XS_CONTROL);
-   return 0;
-}
-#else
 static int do_control_memreport(const void *ctx, struct connection *conn,
const char **vec, int num)
 {
FILE *fp;
+   const char *filename;
int fd;
 
if (num > 1)
@@ -255,8 +243,12 @@ static int do_control_memreport(const void *ctx, struct 
connection *conn,
if (!fp)
close(fd);
}
-   } else
-   fp = fopen(vec[0], "a");
+   } else {
+   filename = absolute_filename(ctx, vec[0]);
+   if (!filename)
+   return ENOMEM;
+   fp = fopen(filename, "a");
+   }
 
if (!fp)
return EBADF;
@@ -267,7 +259,6 @@ static int do_control_memreport(const void *ctx, struct 
connection *conn,
send_ack(conn, XS_CONTROL);
return 0;
 }
-#endif
 
 static int do_control_print(const void *ctx, struct connection *conn,
const char **vec, int num)
@@ -310,11 +301,7 @@ static struct cmd_s cmds[] = {
"Default timeout is 60 seconds.", 5 },
 #endif
{ "logfile", do_control_logfile, "" },
-#ifdef __MINIOS__
-   { "memreport", do_control_memreport, "" },
-#else
{ "memreport", do_control_memreport, "[]" },
-#endif
{ "print", do_control_print, "" },
{ "quota", do_control_quota,
"[set  ||max [-r]]" },
-- 
2.35.3




[PATCH v7 11/21] tools/xen-9pfsd: add 9pfs stat request support

2024-02-15 Thread Juergen Gross
Add the stat request of the 9pfs protocol.

Signed-off-by: Juergen Gross 
Acked-by: Anthony PERARD 
Reviewed-by: Jason Andryuk 
---
V3:
- use fstatat() (Jason Andryuk)
V4:
- add "s" format to fill_buffer() as a preparation for reading dirs
---
 tools/9pfsd/io.c | 102 +++
 1 file changed, 102 insertions(+)

diff --git a/tools/9pfsd/io.c b/tools/9pfsd/io.c
index 8fa80865b4..8c36106d90 100644
--- a/tools/9pfsd/io.c
+++ b/tools/9pfsd/io.c
@@ -34,6 +34,7 @@
 #define P9_CMD_OPEN   112
 #define P9_CMD_CREATE 114
 #define P9_CMD_CLUNK  120
+#define P9_CMD_STAT   124
 
 /* P9 protocol open flags. */
 #define P9_OREAD0   /* read */
@@ -60,6 +61,25 @@ struct p9_qid {
 uint64_t path;
 };
 
+struct p9_stat {
+uint16_t size;
+uint16_t type;
+uint32_t dev;
+struct p9_qid qid;
+uint32_t mode;
+uint32_t atime;
+uint32_t mtime;
+uint64_t length;
+const char *name;
+const char *uid;
+const char *gid;
+const char *muid;
+const char *extension;
+uint32_t n_uid;
+uint32_t n_gid;
+uint32_t n_muid;
+};
+
 /*
  * Note that the ring names "in" and "out" are from the frontend's
  * perspective, so the "in" ring will be used for responses to the frontend,
@@ -166,6 +186,7 @@ static void fmt_err(const char *fmt)
  * S: String (2 byte length +  characters)
  *The length is obtained via strlen() of the parameter, being a pointer
  *to the first character of the string
+ * s: stat (struct p9_stat)
  * U: 4 byte unsigned integer
  *The parameter is a pointer to a uint32_t value
  */
@@ -176,6 +197,8 @@ static void vfill_buffer_at(void **data, const char *fmt, 
va_list ap)
 const void *par;
 const char *str_val;
 const struct p9_qid *qid;
+const struct p9_stat *stat;
+uint16_t tlen;
 unsigned int len;
 unsigned int array_sz = 0;
 unsigned int elem_sz = 0;
@@ -259,6 +282,18 @@ static void vfill_buffer_at(void **data, const char *fmt, 
va_list ap)
 *data += len;
 break;
 
+case 's':
+stat = par;
+elem_sz = sizeof(*stat);
+tlen = stat->size + sizeof(stat->size);
+fill_buffer_at(data, "uuuUQUUULSUUU", , >size,
+   >type, >dev, >qid, >mode,
+   >atime, >mtime, >length,
+   stat->name, stat->uid, stat->gid, stat->muid,
+   stat->extension, >n_uid, >n_gid,
+   >n_muid);
+break;
+
 case 'U':
 put_unaligned(*(const uint32_t *)par, (uint32_t *)*data);
 elem_sz = sizeof(uint32_t);
@@ -1146,6 +1181,69 @@ static void p9_clunk(struct ring *ring, struct p9_header 
*hdr)
 fill_buffer(ring, hdr->cmd + 1, hdr->tag, "");
 }
 
+static void fill_p9_stat(device *device, struct p9_stat *p9s, struct stat *st,
+ const char *name)
+{
+memset(p9s, 0, sizeof(*p9s));
+fill_qid(device, NULL, >qid, st);
+p9s->mode = st->st_mode & 0777;
+if ( S_ISDIR(st->st_mode) )
+p9s->mode |= P9_CREATE_PERM_DIR;
+p9s->atime = st->st_atime;
+p9s->mtime = st->st_mtime;
+p9s->length = st->st_size;
+p9s->name = name;
+p9s->uid = "";
+p9s->gid = "";
+p9s->muid = "";
+p9s->extension = "";
+p9s->n_uid = 0;
+p9s->n_gid = 0;
+p9s->n_muid = 0;
+
+/*
+ * Size of individual fields without the size field, including 5 2-byte
+ * string length fields.
+ */
+p9s->size = 71 + strlen(p9s->name);
+}
+
+static void p9_stat(struct ring *ring, struct p9_header *hdr)
+{
+device *device = ring->device;
+uint32_t fid;
+struct p9_fid *fidp;
+struct p9_stat p9s;
+struct stat st;
+int ret;
+
+ret = fill_data(ring, "U", );
+if ( ret != 1 )
+{
+p9_error(ring, hdr->tag, EINVAL);
+return;
+}
+
+fidp = get_fid_ref(device, fid);
+if ( !fidp )
+{
+p9_error(ring, hdr->tag, ENOENT);
+return;
+}
+
+if ( fstatat(device->root_fd, fidp->path, , 0) < 0 )
+{
+p9_error(ring, hdr->tag, errno);
+goto out;
+}
+fill_p9_stat(device, , , strrchr(fidp->path, '/') + 1);
+
+fill_buffer(ring, hdr->cmd + 1, hdr->tag, "s", );
+
+ out:
+free_fid(device, fidp);
+}
+
 void *io_thread(void *arg)
 {
 struct ring *ring = arg;
@@ -1225,6 +1323,10 @@ void *io_thread(void *arg)
 p9_clunk(ring, );
 break;
 
+case P9_CMD_STAT:
+p9_stat(ring, );
+break;
+
 default:
 syslog(LOG_DEBUG, "%u.%u sent unhandled command %u\n",
ring->device->domid, ring->device->devid, hdr.cmd);
-- 
2.35.3




[PATCH v7 12/21] tools/xen-9pfsd: add 9pfs write request support

2024-02-15 Thread Juergen Gross
Add the write request of the 9pfs protocol.

Signed-off-by: Juergen Gross 
Reviewed-by: Jason Andryuk 
Acked-by: Anthony PERARD 
---
 tools/9pfsd/io.c | 54 
 1 file changed, 54 insertions(+)

diff --git a/tools/9pfsd/io.c b/tools/9pfsd/io.c
index 8c36106d90..1832c6b06e 100644
--- a/tools/9pfsd/io.c
+++ b/tools/9pfsd/io.c
@@ -33,6 +33,7 @@
 #define P9_CMD_WALK   110
 #define P9_CMD_OPEN   112
 #define P9_CMD_CREATE 114
+#define P9_CMD_WRITE  118
 #define P9_CMD_CLUNK  120
 #define P9_CMD_STAT   124
 
@@ -1244,6 +1245,55 @@ static void p9_stat(struct ring *ring, struct p9_header 
*hdr)
 free_fid(device, fidp);
 }
 
+static void p9_write(struct ring *ring, struct p9_header *hdr)
+{
+device *device = ring->device;
+uint32_t fid;
+uint64_t off;
+unsigned int len;
+uint32_t written;
+void *buf;
+struct p9_fid *fidp;
+int ret;
+
+ret = fill_data(ring, "ULD", , , , ring->buffer);
+if ( ret != 3 )
+{
+p9_error(ring, hdr->tag, EINVAL);
+return;
+}
+
+fidp = get_fid_ref(device, fid);
+if ( !fidp || !fidp->opened || fidp->isdir )
+{
+p9_error(ring, hdr->tag, EBADF);
+goto out;
+}
+
+buf = ring->buffer;
+
+while ( len != 0 )
+{
+ret = pwrite(fidp->fd, buf, len, off);
+if ( ret < 0 )
+break;
+len -= ret;
+buf += ret;
+off += ret;
+}
+
+written = buf - ring->buffer;
+if ( written == 0 )
+{
+p9_error(ring, hdr->tag, errno);
+goto out;
+}
+fill_buffer(ring, hdr->cmd + 1, hdr->tag, "U", );
+
+ out:
+free_fid(device, fidp);
+}
+
 void *io_thread(void *arg)
 {
 struct ring *ring = arg;
@@ -1319,6 +1369,10 @@ void *io_thread(void *arg)
 p9_create(ring, );
 break;
 
+case P9_CMD_WRITE:
+p9_write(ring, );
+break;
+
 case P9_CMD_CLUNK:
 p9_clunk(ring, );
 break;
-- 
2.35.3




[PATCH v7 19/21] tools/xenstored: add helpers for filename handling

2024-02-15 Thread Juergen Gross
Add some helpers for handling filenames which might need different
implementations between stubdom and daemon environments:

- expansion of relative filenames (those are not really defined today,
  just expand them to be relative to /var/lib/xen/xenstore)
- expansion of xenstore_daemon_rundir() (used e.g. for saving the state
  file in case of live update - needs to be unchanged in the daemon
  case, but should result in /var/lib/xen/xenstore for stubdom)

Signed-off-by: Juergen Gross 
Reviewed-by: Jason Andryuk 
Reviewed-by: Julien Grall 
---
V3:
- make absolute_filename() return a pointer to const (Julien Grall)
---
 tools/xenstored/core.c  | 15 +--
 tools/xenstored/core.h  |  5 -
 tools/xenstored/lu_daemon.c |  4 ++--
 tools/xenstored/minios.c|  5 +
 tools/xenstored/posix.c |  8 +++-
 5 files changed, 31 insertions(+), 6 deletions(-)

diff --git a/tools/xenstored/core.c b/tools/xenstored/core.c
index 48fc787ac1..bada1ad9a2 100644
--- a/tools/xenstored/core.c
+++ b/tools/xenstored/core.c
@@ -63,7 +63,7 @@ char **orig_argv;
 LIST_HEAD(connections);
 int tracefd = -1;
 bool keep_orphans = false;
-char *tracefile = NULL;
+const char *tracefile = NULL;
 static struct hashtable *nodes;
 unsigned int trace_flags = TRACE_OBJ | TRACE_IO;
 
@@ -137,6 +137,17 @@ void trace_destroy(const void *data, const char *type)
trace("obj: DESTROY %s %p\n", type, data);
 }
 
+/*
+ * Return an absolute filename.
+ * In case of a relative filename given as input, prepend XENSTORE_LIB_DIR.
+ */
+const char *absolute_filename(const void *ctx, const char *filename)
+{
+   if (filename[0] != '/')
+   return talloc_asprintf(ctx, XENSTORE_LIB_DIR "/%s", filename);
+   return talloc_strdup(ctx, filename);
+}
+
 void close_log(void)
 {
if (tracefd >= 0)
@@ -2759,7 +2770,7 @@ int main(int argc, char *argv[])
 #endif
 
if (tracefile)
-   tracefile = talloc_strdup(NULL, tracefile);
+   tracefile = absolute_filename(NULL, tracefile);
 
 #ifndef NO_LIVE_UPDATE
/* Read state in case of live update. */
diff --git a/tools/xenstored/core.h b/tools/xenstored/core.h
index fe0ee90581..e58779e88c 100644
--- a/tools/xenstored/core.h
+++ b/tools/xenstored/core.h
@@ -341,7 +341,7 @@ void close_log(void);
 extern int orig_argc;
 extern char **orig_argv;
 
-extern char *tracefile;
+extern const char *tracefile;
 extern int tracefd;
 
 /* Trace flag values must be kept in sync with trace_switches[] contents. */
@@ -405,6 +405,9 @@ void set_socket_fd(int fd);
 void mount_9pfs(void);
 #endif
 
+const char *xenstore_rundir(void);
+const char *absolute_filename(const void *ctx, const char *filename);
+
 /* Close stdin/stdout/stderr to complete daemonize */
 void finish_daemonize(void);
 
diff --git a/tools/xenstored/lu_daemon.c b/tools/xenstored/lu_daemon.c
index 71bcabadd3..635ab0 100644
--- a/tools/xenstored/lu_daemon.c
+++ b/tools/xenstored/lu_daemon.c
@@ -24,7 +24,7 @@ void lu_get_dump_state(struct lu_dump_state *state)
state->size = 0;
 
state->filename = talloc_asprintf(NULL, "%s/state_dump",
- xenstore_daemon_rundir());
+ xenstore_rundir());
if (!state->filename)
barf("Allocation failure");
 
@@ -65,7 +65,7 @@ FILE *lu_dump_open(const void *ctx)
int fd;
 
filename = talloc_asprintf(ctx, "%s/state_dump",
-  xenstore_daemon_rundir());
+  xenstore_rundir());
if (!filename)
return NULL;
 
diff --git a/tools/xenstored/minios.c b/tools/xenstored/minios.c
index 562a9b4972..e70386f8c7 100644
--- a/tools/xenstored/minios.c
+++ b/tools/xenstored/minios.c
@@ -128,3 +128,8 @@ void mount_9pfs(void)
 {
create_thread("mount-9pfs", mount_thread, NULL);
 }
+
+const char *xenstore_rundir(void)
+{
+   return XENSTORE_LIB_DIR;
+}
diff --git a/tools/xenstored/posix.c b/tools/xenstored/posix.c
index 496329dfd1..d88c82d972 100644
--- a/tools/xenstored/posix.c
+++ b/tools/xenstored/posix.c
@@ -326,9 +326,10 @@ void early_init(bool live_update, bool dofork, const char 
*pidfile)
 {
reopen_log();
 
-   /* Make sure xenstored directory exists. */
+   /* Make sure xenstored directories exist. */
/* Errors ignored here, will be reported when we open files */
mkdir(xenstore_daemon_rundir(), 0755);
+   mkdir(XENSTORE_LIB_DIR, 0755);
 
if (dofork) {
openlog("xenstored", 0, LOG_DAEMON);
@@ -406,3 +407,8 @@ void set_socket_fd(int fd)
 {
sock = fd;
 }
+
+const char *xenstore_rundir(void)
+{
+   return xenstore_daemon_rundir();
+}
-- 
2.35.3




[PATCH v7 17/21] tools: add 9pfs device to xenstore-stubdom

2024-02-15 Thread Juergen Gross
Add a 9pfs device to Xenstore stubdom in order to allow it to do e.g.
logging into a dom0 file.

Use the following parameters for the new device:

- tag = "Xen"
- type = "xen_9pfsd"
- path = "/var/lib/xen/xenstore"
- security-model = "none"

For now don't limit allowed file space or number of files.

Add a new libxl function for adding it similar to the function for
adding the console device.

Signed-off-by: Juergen Gross 
---
V2:
- add security_model parameter to new libxl function (Jason Andryuk)
V4:
- rename function to libxl_device_9pfs_add() (Anthony Perard)
- use a libxl_device_p9 pointer as parameter (Anthony Perard)
---
 tools/helpers/init-xenstore-domain.c |  7 +++
 tools/include/libxl.h| 15 +++
 tools/libs/light/libxl_9pfs.c| 16 
 3 files changed, 38 insertions(+)

diff --git a/tools/helpers/init-xenstore-domain.c 
b/tools/helpers/init-xenstore-domain.c
index 140ed610ae..1683438c5c 100644
--- a/tools/helpers/init-xenstore-domain.c
+++ b/tools/helpers/init-xenstore-domain.c
@@ -433,6 +433,12 @@ int main(int argc, char** argv)
 int rv, fd;
 char *maxmem_str = NULL;
 libxl_ctx *ctx;
+libxl_device_p9 p9 = { .backend_domid = 0,
+   .tag = "Xen",
+   .path = XEN_LIB_DIR"/xenstore",
+   .security_model = "none",
+   .type = LIBXL_P9_TYPE_XEN_9PFSD,
+};
 
 while ( (opt = getopt_long(argc, argv, "v", options, NULL)) != -1 )
 {
@@ -543,6 +549,7 @@ int main(int argc, char** argv)
 }
 libxl_console_add_xenstore(ctx, domid, 0, console_evtchn, console_gfn,
NULL);
+libxl_device_9pfs_add(ctx, domid, , NULL);
 libxl_ctx_free(ctx);
 
 fd = creat(XEN_RUN_DIR "/xenstored.pid", 0666);
diff --git a/tools/include/libxl.h b/tools/include/libxl.h
index 9a3e702557..44a2205d2b 100644
--- a/tools/include/libxl.h
+++ b/tools/include/libxl.h
@@ -583,6 +583,13 @@
  * libxl_console_add_xenstore() in libxl.
  */
 #define LIBXL_HAVE_CONSOLE_ADD_XENSTORE 1
+
+/*
+ * LIBXL_HAVE_P9_ADD_XENSTORE indicates presence of the function
+ * libxl_device_9pfs_add() in libxl.
+ */
+#define LIBXL_HAVE_P9_ADD_XENSTORE 1
+
 /*
  * libxl ABI compatibility
  *
@@ -2074,6 +2081,14 @@ int libxl_console_add_xenstore(libxl_ctx *ctx, uint32_t 
domid, uint32_t backend,
const libxl_asyncop_how *ao_how)
LIBXL_EXTERNAL_CALLERS_ONLY;
 
+/* libxl_device_9pfs_add writes the Xenstore entries for a domain's
+ * primary 9pfs device based on domid, and device parameters.
+ * If needed it will start the backend daemon.
+ */
+int libxl_device_9pfs_add(libxl_ctx *ctx, uint32_t domid, libxl_device_p9 *p9,
+  const libxl_asyncop_how *ao_how)
+  LIBXL_EXTERNAL_CALLERS_ONLY;
+
 /* May be called with info_r == NULL to check for domain's existence.
  * Returns ERROR_DOMAIN_NOTFOUND if domain does not exist (used to return
  * ERROR_INVAL for this scenario). */
diff --git a/tools/libs/light/libxl_9pfs.c b/tools/libs/light/libxl_9pfs.c
index ddeb4f20a7..48f894f070 100644
--- a/tools/libs/light/libxl_9pfs.c
+++ b/tools/libs/light/libxl_9pfs.c
@@ -206,6 +206,22 @@ static void libxl__device_p9_add(libxl__egc *egc, uint32_t 
domid,
 aodev->callback(egc, aodev);
 }
 
+int libxl_device_9pfs_add(libxl_ctx *ctx, uint32_t domid, libxl_device_p9 *p9,
+  const libxl_asyncop_how *ao_how)
+{
+AO_CREATE(ctx, domid, ao_how);
+libxl__ao_device *aodev;
+
+GCNEW(aodev);
+libxl__prepare_ao_device(ao, aodev);
+aodev->action = LIBXL__DEVICE_ACTION_ADD;
+aodev->callback = device_addrm_aocomplete;
+
+libxl__device_p9_add(egc, domid, p9, aodev);
+
+return AO_INPROGRESS;
+}
+
 #define libxl_device_p9_list NULL
 #define libxl_device_p9_compare NULL
 
-- 
2.35.3




[PATCH v7 16/21] stubdom: extend xenstore stubdom configs

2024-02-15 Thread Juergen Gross
Extend the config files of the Xenstore stubdoms to include XENBUS
and 9PFRONT items in order to support file based logging.

Signed-off-by: Juergen Gross 
Reviewed-by: Jason Andryuk 
---
 stubdom/xenstore-minios.cfg| 2 +-
 stubdom/xenstorepvh-minios.cfg | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/stubdom/xenstore-minios.cfg b/stubdom/xenstore-minios.cfg
index a41704bb6b..239da519b9 100644
--- a/stubdom/xenstore-minios.cfg
+++ b/stubdom/xenstore-minios.cfg
@@ -3,7 +3,7 @@ CONFIG_NETFRONT=n
 CONFIG_FBFRONT=n
 CONFIG_KBDFRONT=n
 CONFIG_CONSFRONT=n
-CONFIG_XENBUS=n
 CONFIG_LWIP=n
+CONFIG_9PFRONT=y
 CONFIG_BALLOON=y
 XEN_INTERFACE_VERSION=__XEN_LATEST_INTERFACE_VERSION__
diff --git a/stubdom/xenstorepvh-minios.cfg b/stubdom/xenstorepvh-minios.cfg
index 6af51f5753..752b90d7d3 100644
--- a/stubdom/xenstorepvh-minios.cfg
+++ b/stubdom/xenstorepvh-minios.cfg
@@ -4,7 +4,7 @@ CONFIG_NETFRONT=n
 CONFIG_FBFRONT=n
 CONFIG_KBDFRONT=n
 CONFIG_CONSFRONT=n
-CONFIG_XENBUS=n
 CONFIG_LWIP=n
+CONFIG_9PFRONT=y
 CONFIG_BALLOON=y
 XEN_INTERFACE_VERSION=__XEN_LATEST_INTERFACE_VERSION__
-- 
2.35.3




[PATCH v7 20/21] tools/xenstored: support complete log capabilities in stubdom

2024-02-15 Thread Juergen Gross
With 9pfs being fully available in Xenstore-stubdom now, there is no
reason to not fully support all logging capabilities in stubdom.

Open the logfile on stubdom only after the 9pfs file system has been
mounted.

Signed-off-by: Juergen Gross 
Reviewed-by: Jason Andryuk 
Reviewed-by: Julien Grall 
---
V3:
- remove now stale comment in sysconfig.xencommons.in (Julien Grall)
---
 .../Linux/init.d/sysconfig.xencommons.in  |  1 -
 tools/hotplug/Linux/launch-xenstore.in|  1 +
 tools/xenstored/control.c | 30 +--
 tools/xenstored/minios.c  |  3 ++
 4 files changed, 19 insertions(+), 16 deletions(-)

diff --git a/tools/hotplug/Linux/init.d/sysconfig.xencommons.in 
b/tools/hotplug/Linux/init.d/sysconfig.xencommons.in
index 433e4849af..1bdd830d8a 100644
--- a/tools/hotplug/Linux/init.d/sysconfig.xencommons.in
+++ b/tools/hotplug/Linux/init.d/sysconfig.xencommons.in
@@ -58,7 +58,6 @@ XENSTORED_ARGS=
 ## Default: Not defined, tracing off
 #
 # Log xenstored messages
-# Only evaluated if XENSTORETYPE is "daemon".
 #XENSTORED_TRACE=[yes|on|1]
 
 ## Type: integer
diff --git a/tools/hotplug/Linux/launch-xenstore.in 
b/tools/hotplug/Linux/launch-xenstore.in
index e854ca1eb8..da4eeca7c5 100644
--- a/tools/hotplug/Linux/launch-xenstore.in
+++ b/tools/hotplug/Linux/launch-xenstore.in
@@ -98,6 +98,7 @@ test -f @CONFIG_DIR@/@CONFIG_LEAF_DIR@/xencommons && . 
@CONFIG_DIR@/@CONFIG_LEAF
[ -z "$XENSTORE_DOMAIN_SIZE" ] && XENSTORE_DOMAIN_SIZE=8
XENSTORE_DOMAIN_ARGS="$XENSTORE_DOMAIN_ARGS --memory 
$XENSTORE_DOMAIN_SIZE"
[ -z "$XENSTORE_MAX_DOMAIN_SIZE" ] || 
XENSTORE_DOMAIN_ARGS="$XENSTORE_DOMAIN_ARGS --maxmem $XENSTORE_MAX_DOMAIN_SIZE"
+   [ -z "$XENSTORED_TRACE" ] || 
XENSTORE_DOMAIN_ARGS="$XENSTORE_DOMAIN_ARGS -T xenstored-trace.log"
 
echo -n Starting $XENSTORE_DOMAIN_KERNEL...
${LIBEXEC_BIN}/init-xenstore-domain $XENSTORE_DOMAIN_ARGS || exit 1
diff --git a/tools/xenstored/control.c b/tools/xenstored/control.c
index b2f64d674f..dae23a5ac0 100644
--- a/tools/xenstored/control.c
+++ b/tools/xenstored/control.c
@@ -201,19 +201,6 @@ static int do_control_quota_s(const void *ctx, struct 
connection *conn,
return EINVAL;
 }
 
-#ifdef __MINIOS__
-static int do_control_memreport(const void *ctx, struct connection *conn,
-   const char **vec, int num)
-{
-   if (num)
-   return EINVAL;
-
-   talloc_report_full(NULL, stdout);
-
-   send_ack(conn, XS_CONTROL);
-   return 0;
-}
-#else
 static int do_control_logfile(const void *ctx, struct connection *conn,
  const char **vec, int num)
 {
@@ -222,13 +209,26 @@ static int do_control_logfile(const void *ctx, struct 
connection *conn,
 
close_log();
talloc_free(tracefile);
-   tracefile = talloc_strdup(NULL, vec[0]);
+   tracefile = absolute_filename(NULL, vec[0]);
reopen_log();
 
send_ack(conn, XS_CONTROL);
return 0;
 }
 
+#ifdef __MINIOS__
+static int do_control_memreport(const void *ctx, struct connection *conn,
+   const char **vec, int num)
+{
+   if (num)
+   return EINVAL;
+
+   talloc_report_full(NULL, stdout);
+
+   send_ack(conn, XS_CONTROL);
+   return 0;
+}
+#else
 static int do_control_memreport(const void *ctx, struct connection *conn,
const char **vec, int num)
 {
@@ -309,10 +309,10 @@ static struct cmd_s cmds[] = {
"[-c ] [-F] [-t ] \n"
"Default timeout is 60 seconds.", 5 },
 #endif
+   { "logfile", do_control_logfile, "" },
 #ifdef __MINIOS__
{ "memreport", do_control_memreport, "" },
 #else
-   { "logfile", do_control_logfile, "" },
{ "memreport", do_control_memreport, "[]" },
 #endif
{ "print", do_control_print, "" },
diff --git a/tools/xenstored/minios.c b/tools/xenstored/minios.c
index e70386f8c7..a229954cf4 100644
--- a/tools/xenstored/minios.c
+++ b/tools/xenstored/minios.c
@@ -122,6 +122,9 @@ static void mount_thread(void *p)
}
 
p9_device = init_9pfront(0, XENSTORE_LIB_DIR);
+
+   /* Start logging if selected. */
+   reopen_log();
 }
 
 void mount_9pfs(void)
-- 
2.35.3




[PATCH v7 14/21] tools/libs/light: add backend type for 9pfs PV devices

2024-02-15 Thread Juergen Gross
Make the backend type of 9pfs PV devices configurable. The default is
"qemu" with the related Xenstore backend-side directory being "9pfs".

Add another type "xen_9pfsd" with the related Xenstore backend-side
directory "xen_9pfs".

As additional security features it is possible to specify:
- "max-space" for limiting the maximum space consumed on the filesystem
  in MBs
- "max-files" for limiting the maximum number of files in the
  filesystem
- "max-open-files" for limiting the maximum number of concurrent open
  files

For convenience "auto-delete" is available to let the backend delete the
oldest file of the guest in case otherwise "max-space" or "max-files"
would be violated.

The xen-9pfsd daemon will be started by libxenlight automatically when
the first "xen_9pfs" device is being created.

Signed-off-by: Juergen Gross 
Reviewed-by: Jason Andryuk 
Acked-by: George Dunlap  # Golang bits
Reviewed-by: Anthony PERARD 
---
V3:
- regenerate go bindings
V4:
- rename libxl_device_p9_dm_needed() to libxl__device_p9_dm_needed()
  (Anthony Perard)
- reorder span related functions (Anthony Perard)
- add comment for xen9pfsd_spawn() return values (Anthony Perard)
- add LIBXL_HAVE_XEN_9PFS to libxl.h (Anthony Perard)
- use a copy of 'p9' in xen9pfsd_spawn() (Anthony Perard)
V6:
- rebase (Anthony Perard)
- drop callback from struct libxl__aop9_state (Anthony Perard)
---
 tools/golang/xenlight/helpers.gen.go  |  10 ++
 tools/golang/xenlight/types.gen.go|  12 ++
 tools/include/libxl.h |   7 +
 tools/libs/light/libxl_9pfs.c | 157 +-
 tools/libs/light/libxl_create.c   |   4 +-
 tools/libs/light/libxl_dm.c   |   2 +-
 tools/libs/light/libxl_types.idl  |  11 ++
 tools/libs/light/libxl_types_internal.idl |   1 +
 8 files changed, 197 insertions(+), 7 deletions(-)

diff --git a/tools/golang/xenlight/helpers.gen.go 
b/tools/golang/xenlight/helpers.gen.go
index 0f8e23773c..8f44397a4e 100644
--- a/tools/golang/xenlight/helpers.gen.go
+++ b/tools/golang/xenlight/helpers.gen.go
@@ -2440,6 +2440,11 @@ x.Tag = C.GoString(xc.tag)
 x.Path = C.GoString(xc.path)
 x.SecurityModel = C.GoString(xc.security_model)
 x.Devid = Devid(xc.devid)
+x.Type = P9Type(xc._type)
+x.MaxSpace = int(xc.max_space)
+x.MaxFiles = int(xc.max_files)
+x.MaxOpenFiles = int(xc.max_open_files)
+x.AutoDelete = bool(xc.auto_delete)
 
  return nil}
 
@@ -2458,6 +2463,11 @@ xc.path = C.CString(x.Path)}
 if x.SecurityModel != "" {
 xc.security_model = C.CString(x.SecurityModel)}
 xc.devid = C.libxl_devid(x.Devid)
+xc._type = C.libxl_p9_type(x.Type)
+xc.max_space = C.int(x.MaxSpace)
+xc.max_files = C.int(x.MaxFiles)
+xc.max_open_files = C.int(x.MaxOpenFiles)
+xc.auto_delete = C.bool(x.AutoDelete)
 
  return nil
  }
diff --git a/tools/golang/xenlight/types.gen.go 
b/tools/golang/xenlight/types.gen.go
index 9c8b7b81f6..d31722407a 100644
--- a/tools/golang/xenlight/types.gen.go
+++ b/tools/golang/xenlight/types.gen.go
@@ -122,6 +122,13 @@ NicTypeVifIoemu NicType = 1
 NicTypeVif NicType = 2
 )
 
+type P9Type int
+const(
+P9TypeUnknown P9Type = 0
+P9TypeQemu P9Type = 1
+P9TypeXen9Pfsd P9Type = 2
+)
+
 type ActionOnShutdown int
 const(
 ActionOnShutdownDestroy ActionOnShutdown = 1
@@ -889,6 +896,11 @@ Tag string
 Path string
 SecurityModel string
 Devid Devid
+Type P9Type
+MaxSpace int
+MaxFiles int
+MaxOpenFiles int
+AutoDelete bool
 }
 
 type DevicePvcallsif struct {
diff --git a/tools/include/libxl.h b/tools/include/libxl.h
index 46bc774126..9a3e702557 100644
--- a/tools/include/libxl.h
+++ b/tools/include/libxl.h
@@ -615,6 +615,13 @@
  */
 #define LIBXL_HAVE_HVM_PIRQ 1
 
+/*
+ * LIBXL_HAVE_XEN_9PFS indicates the presence of the xen-9pfsd related
+ * fields in libxl_device_p9: type, max_space, max_files, max_open_files and
+ * auto_delete.
+ */
+#define LIBXL_HAVE_XEN_9PFS 1
+
 /*
  * libxl memory management
  *
diff --git a/tools/libs/light/libxl_9pfs.c b/tools/libs/light/libxl_9pfs.c
index 5ab0d3aa21..900c0d46a0 100644
--- a/tools/libs/light/libxl_9pfs.c
+++ b/tools/libs/light/libxl_9pfs.c
@@ -33,20 +33,171 @@ static int libxl__set_xenstore_p9(libxl__gc *gc, uint32_t 
domid,
 
 flexarray_append_pair(front, "tag", p9->tag);
 
+if (p9->type == LIBXL_P9_TYPE_XEN_9PFSD) {
+flexarray_append_pair(back, "max-space",
+  GCSPRINTF("%u", p9->max_space));
+flexarray_append_pair(back, "max-files",
+  GCSPRINTF("%u", p9->max_files));
+flexarray_append_pair(back, "max-open-files",
+  GCSPRINTF("%u", p9->max_open_files));
+flexarray_append_pair(back, "auto-delete",
+  p9->auto_delete ? "1" : "0");
+}
+

[PATCH v7 09/21] tools/xen-9pfsd: add 9pfs clunk request support

2024-02-15 Thread Juergen Gross
Add the clunk request of the 9pfs protocol.

Signed-off-by: Juergen Gross 
Acked-by: Anthony PERARD 
Reviewed-by: Jason Andryuk 
---
V3:
- use unlinkat() (Jason Andryuk)
---
 tools/9pfsd/io.c | 43 +++
 1 file changed, 43 insertions(+)

diff --git a/tools/9pfsd/io.c b/tools/9pfsd/io.c
index 5ec780af14..6af14e5ee9 100644
--- a/tools/9pfsd/io.c
+++ b/tools/9pfsd/io.c
@@ -32,6 +32,7 @@
 #define P9_CMD_ERROR  107
 #define P9_CMD_WALK   110
 #define P9_CMD_OPEN   112
+#define P9_CMD_CLUNK  120
 
 /* P9 protocol open flags. */
 #define P9_OREAD0   /* read */
@@ -960,6 +961,44 @@ static void p9_open(struct ring *ring, struct p9_header 
*hdr)
 p9_error(ring, hdr->tag, errno);
 }
 
+static void p9_clunk(struct ring *ring, struct p9_header *hdr)
+{
+device *device = ring->device;
+uint32_t fid;
+struct p9_fid *fidp;
+int ret;
+
+ret = fill_data(ring, "U", );
+if ( ret != 1 )
+{
+p9_error(ring, hdr->tag, EINVAL);
+return;
+}
+
+fidp = get_fid_ref(device, fid);
+if ( !fidp )
+{
+p9_error(ring, hdr->tag, ENOENT);
+return;
+}
+
+if ( fidp->opened )
+{
+fidp->opened = false;
+free_fid(device, fidp);
+close(fidp->fd);
+if ( fidp->mode & P9_OREMOVE )
+unlinkat(device->root_fd, fidp->path,
+ fidp->isdir ? AT_REMOVEDIR : 0);
+}
+
+/* 2 calls of free_fid(): one for our reference, and one to free it. */
+free_fid(device, fidp);
+free_fid(device, fidp);
+
+fill_buffer(ring, hdr->cmd + 1, hdr->tag, "");
+}
+
 void *io_thread(void *arg)
 {
 struct ring *ring = arg;
@@ -1031,6 +1070,10 @@ void *io_thread(void *arg)
 p9_open(ring, );
 break;
 
+case P9_CMD_CLUNK:
+p9_clunk(ring, );
+break;
+
 default:
 syslog(LOG_DEBUG, "%u.%u sent unhandled command %u\n",
ring->device->domid, ring->device->devid, hdr.cmd);
-- 
2.35.3




[PATCH v7 08/21] tools/xen-9pfsd: add 9pfs open request support

2024-02-15 Thread Juergen Gross
Add the open request of the 9pfs protocol.

Signed-off-by: Juergen Gross 
Acked-by: Anthony PERARD 
Reviewed-by: Jason Andryuk 
---
V2:
- don't allow to open symbolic link
V3:
- use openat() (Jason Andryuk)
- use common error handling in p9_open()
---
 tools/9pfsd/io.c| 137 
 tools/9pfsd/xen-9pfsd.h |   4 ++
 2 files changed, 141 insertions(+)

diff --git a/tools/9pfsd/io.c b/tools/9pfsd/io.c
index 731fbd1ad7..5ec780af14 100644
--- a/tools/9pfsd/io.c
+++ b/tools/9pfsd/io.c
@@ -19,6 +19,8 @@
 #include 
 #include 
 #include 
+#include 
+#include 
 #include/* For cpu barriers. */
 #include 
 
@@ -29,6 +31,15 @@
 #define P9_CMD_ATTACH 104
 #define P9_CMD_ERROR  107
 #define P9_CMD_WALK   110
+#define P9_CMD_OPEN   112
+
+/* P9 protocol open flags. */
+#define P9_OREAD0   /* read */
+#define P9_OWRITE   1   /* write */
+#define P9_ORDWR2   /* read and write */
+#define P9_OMODEMASK 0x03
+#define P9_OTRUNC0x10   /* or'ed in, truncate file first */
+#define P9_OREMOVE   0x40   /* or'ed in, remove file after clunk */
 
 #define P9_MIN_MSIZE  2048
 #define P9_VERSION"9P2000.u"
@@ -827,6 +838,128 @@ static void p9_walk(struct ring *ring, struct p9_header 
*hdr)
 free(names);
 }
 
+static int open_flags_from_mode(uint8_t mode)
+{
+int flags;
+
+switch ( mode & P9_OMODEMASK )
+{
+case P9_OREAD:
+flags = O_RDONLY;
+break;
+
+case P9_OWRITE:
+flags = O_WRONLY;
+break;
+
+case P9_ORDWR:
+flags = O_RDWR;
+break;
+
+default:
+errno = EINVAL;
+return -1;
+}
+
+if ( mode & P9_OTRUNC )
+flags |= O_TRUNC;
+
+return flags;
+}
+
+static unsigned int get_iounit(struct ring *ring, struct stat *st)
+{
+return (ring->max_size - st->st_blksize) & ~(st->st_blksize - 1);
+}
+
+static void p9_open(struct ring *ring, struct p9_header *hdr)
+{
+device *device = ring->device;
+uint32_t fid;
+uint8_t mode;
+struct p9_fid *fidp;
+struct stat st;
+struct p9_qid qid;
+uint32_t iounit;
+int flags;
+int ret;
+
+ret = fill_data(ring, "Ub", , );
+if ( ret != 2 )
+{
+p9_error(ring, hdr->tag, EINVAL);
+return;
+}
+if ( mode & ~(P9_OMODEMASK | P9_OTRUNC | P9_OREMOVE) )
+{
+p9_error(ring, hdr->tag, EINVAL);
+return;
+}
+
+fidp = get_fid_ref(device, fid);
+if ( !fidp )
+{
+p9_error(ring, hdr->tag, ENOENT);
+return;
+}
+if ( fidp->opened )
+{
+errno = EINVAL;
+goto err;
+}
+
+if ( fstatat(device->root_fd, fidp->path, , 0) < 0 )
+{
+errno = ENOENT;
+goto err;
+}
+
+if ( S_ISLNK(st.st_mode) )
+{
+errno = EMLINK;
+goto err;
+}
+
+fidp->isdir = S_ISDIR(st.st_mode);
+fidp->mode = mode;
+if ( fidp->isdir )
+{
+if ( mode != P9_OREAD )
+{
+errno = EINVAL;
+goto err;
+}
+fidp->fd = openat(device->root_fd, fidp->path, O_RDONLY);
+if ( fidp->fd < 0 )
+goto err;
+fidp->data = fdopendir(fidp->fd);
+if ( !fidp->data )
+goto err;
+}
+else
+{
+flags = open_flags_from_mode(mode);
+if ( flags < 0 )
+goto err;
+
+fidp->fd = openat(device->root_fd, fidp->path, flags);
+if ( fidp->fd < 0 )
+goto err;
+}
+
+fill_qid(device, fidp->path, , );
+iounit = get_iounit(ring, );
+fidp->opened = true;
+
+fill_buffer(ring, hdr->cmd + 1, hdr->tag, "QU", , );
+
+return;
+
+ err:
+free_fid(device, fidp);
+p9_error(ring, hdr->tag, errno);
+}
+
 void *io_thread(void *arg)
 {
 struct ring *ring = arg;
@@ -894,6 +1027,10 @@ void *io_thread(void *arg)
 p9_walk(ring, );
 break;
 
+case P9_CMD_OPEN:
+p9_open(ring, );
+break;
+
 default:
 syslog(LOG_DEBUG, "%u.%u sent unhandled command %u\n",
ring->device->domid, ring->device->devid, hdr.cmd);
diff --git a/tools/9pfsd/xen-9pfsd.h b/tools/9pfsd/xen-9pfsd.h
index f01fffb0bb..757be2da4b 100644
--- a/tools/9pfsd/xen-9pfsd.h
+++ b/tools/9pfsd/xen-9pfsd.h
@@ -25,7 +25,11 @@ struct p9_fid {
 XEN_TAILQ_ENTRY(struct p9_fid) list;
 unsigned int fid;
 unsigned int ref;
+int fd;
+uint8_t mode;
 bool opened;
+bool isdir;
+void *data;/* File type specific. */
 char path[];
 };
 
-- 
2.35.3




[PATCH v7 06/21] tools/xen-9pfsd: add 9pfs attach request support

2024-02-15 Thread Juergen Gross
Add the attach request of the 9pfs protocol. This introduces the "fid"
scheme of the 9pfs protocol.

As this will be needed later, use a dedicated memory allocation
function in alloc_fid() and prepare a fid reference count.

For filling the qid data take the approach from the qemu 9pfs backend
implementation.

Signed-off-by: Juergen Gross 
Acked-by: Anthony PERARD 
Reviewed-by: Jason Andryuk 
---
V2:
- make fill_qid() parameter stbuf const (Jason Andryuk)
- free fids after disconnecting guest (Jason Andryuk)
V3:
- only store relative path in fid (Jason Andryuk)
V4:
- store a path directly usable by *at() functions in fid (Jason Andryuk)
V7:
- use EBADF instead of EBADFD (Andrew Cooper)
- use strcpy() instead of strncpy() (Andrew Cooper)
---
 tools/9pfsd/io.c| 162 
 tools/9pfsd/xen-9pfsd.c |   6 ++
 tools/9pfsd/xen-9pfsd.h |  14 
 3 files changed, 182 insertions(+)

diff --git a/tools/9pfsd/io.c b/tools/9pfsd/io.c
index 839dd1112c..4c422b06ac 100644
--- a/tools/9pfsd/io.c
+++ b/tools/9pfsd/io.c
@@ -11,11 +11,14 @@
  * before looking for the next request.
  */
 
+#include 
 #include 
 #include 
 #include 
 #include 
 #include 
+#include 
+#include 
 #include/* For cpu barriers. */
 #include 
 
@@ -23,6 +26,7 @@
 
 /* P9 protocol commands (response is either cmd+1 or P9_CMD_ERROR). */
 #define P9_CMD_VERSION100
+#define P9_CMD_ATTACH 104
 #define P9_CMD_ERROR  107
 
 #define P9_MIN_MSIZE  2048
@@ -461,6 +465,124 @@ static int fill_data(struct ring *ring, const char *fmt, 
...)
 return pars;
 }
 
+static struct p9_fid *find_fid(device *device, unsigned int fid)
+{
+struct p9_fid *fidp;
+
+XEN_TAILQ_FOREACH(fidp, >fids, list)
+{
+if ( fidp->fid == fid )
+return fidp;
+}
+
+return NULL;
+}
+
+static struct p9_fid *alloc_fid_mem(device *device, unsigned int fid,
+const char *path)
+{
+struct p9_fid *fidp;
+
+fidp = calloc(sizeof(*fidp) + strlen(path) + 1, 1);
+if ( !fidp )
+return NULL;
+
+fidp->fid = fid;
+strcpy(fidp->path, path);
+
+return fidp;
+}
+
+static struct p9_fid *alloc_fid(device *device, unsigned int fid,
+const char *path)
+{
+struct p9_fid *fidp = NULL;
+
+pthread_mutex_lock(>fid_mutex);
+
+if ( find_fid(device, fid) )
+{
+errno = EBADF;
+goto out;
+}
+
+if ( device->n_fids >= device->max_open_files )
+{
+errno = EMFILE;
+goto out;
+}
+
+fidp = alloc_fid_mem(device, fid, path);
+if ( !fidp )
+goto out;
+
+fidp->ref = 1;
+XEN_TAILQ_INSERT_HEAD(>fids, fidp, list);
+device->n_fids++;
+
+ out:
+pthread_mutex_unlock(>fid_mutex);
+
+return fidp;
+}
+
+static void free_fid(device *device, struct p9_fid *fidp)
+{
+if ( !fidp )
+return;
+
+pthread_mutex_lock(>fid_mutex);
+
+fidp->ref--;
+if ( !fidp->ref )
+{
+device->n_fids--;
+XEN_TAILQ_REMOVE(>fids, fidp, list);
+free(fidp);
+}
+
+pthread_mutex_unlock(>fid_mutex);
+}
+
+void free_fids(device *device)
+{
+struct p9_fid *fidp;
+
+while ( (fidp = XEN_TAILQ_FIRST(>fids)) != NULL )
+{
+XEN_TAILQ_REMOVE(>fids, fidp, list);
+free(fidp);
+}
+}
+
+static const char *relpath_from_path(const char *path)
+{
+if (!strcmp(path, "/"))
+return ".";
+
+return (path[0] == '/') ? path + 1 : path;
+}
+
+static int fill_qid(device *device, const char *path, struct p9_qid *qid,
+const struct stat *stbuf)
+{
+struct stat st;
+
+if ( !stbuf )
+{
+if ( fstatat(device->root_fd, path, , 0) )
+return errno;
+
+stbuf = 
+}
+
+qid->type = S_ISDIR(stbuf->st_mode) ? QID_TYPE_DIR : 0;
+qid->version = stbuf->st_mtime ^ (stbuf->st_size << 8);
+qid->path = stbuf->st_ino;
+
+return 0;
+}
+
 static void p9_error(struct ring *ring, uint16_t tag, uint32_t err)
 {
 unsigned int erroff;
@@ -502,6 +624,42 @@ static void p9_version(struct ring *ring, struct p9_header 
*hdr)
 fill_buffer(ring, hdr->cmd + 1, hdr->tag, "US", >max_size, version);
 }
 
+static void p9_attach(struct ring *ring, struct p9_header *hdr)
+{
+device *device = ring->device;
+uint32_t fid;
+uint32_t dummy_u32;
+unsigned int dummy_uint;
+struct p9_qid qid;
+int ret;
+
+ret = fill_data(ring, "UUSSU", , _u32, _uint, _uint,
+_u32);
+if ( ret != 5 )
+{
+p9_error(ring, hdr->tag, errno);
+return;
+}
+
+device->root_fid = alloc_fid(device, fid, relpath_from_path("/"));
+if ( !device->root_fid )
+{
+p9_error(ring, hdr->tag, errno);
+retu

[PATCH v7 07/21] tools/xen-9pfsd: add 9pfs walk request support

2024-02-15 Thread Juergen Gross
Add the walk request of the 9pfs protocol.

Signed-off-by: Juergen Gross 
Acked-by: Anthony PERARD 
Reviewed-by: Jason Andryuk 
---
V2:
- don't allow walking across symbolic links
V6:
- use EBADF instead of EBADFD (Andrew Cooper)
- use strncpy() with strlen() + 1 (Andrew Cooper)
V7:
- undo V6 changes as they were in wrong patch
---
 tools/9pfsd/io.c| 171 
 tools/9pfsd/xen-9pfsd.h |   1 +
 2 files changed, 172 insertions(+)

diff --git a/tools/9pfsd/io.c b/tools/9pfsd/io.c
index 4c422b06ac..731fbd1ad7 100644
--- a/tools/9pfsd/io.c
+++ b/tools/9pfsd/io.c
@@ -28,9 +28,11 @@
 #define P9_CMD_VERSION100
 #define P9_CMD_ATTACH 104
 #define P9_CMD_ERROR  107
+#define P9_CMD_WALK   110
 
 #define P9_MIN_MSIZE  2048
 #define P9_VERSION"9P2000.u"
+#define P9_WALK_MAXELEM   16
 
 struct p9_qid {
 uint8_t type;
@@ -478,6 +480,21 @@ static struct p9_fid *find_fid(device *device, unsigned 
int fid)
 return NULL;
 }
 
+static struct p9_fid *get_fid_ref(device *device, unsigned int fid)
+{
+struct p9_fid *fidp;
+
+pthread_mutex_lock(>fid_mutex);
+
+fidp = find_fid(device, fid);
+if ( fidp )
+fidp->ref++;
+
+pthread_mutex_unlock(>fid_mutex);
+
+return fidp;
+}
+
 static struct p9_fid *alloc_fid_mem(device *device, unsigned int fid,
 const char *path)
 {
@@ -576,6 +593,10 @@ static int fill_qid(device *device, const char *path, 
struct p9_qid *qid,
 stbuf = 
 }
 
+/* Don't allow symbolic links. */
+if ( S_ISLNK(stbuf->st_mode) )
+return EMLINK;
+
 qid->type = S_ISDIR(stbuf->st_mode) ? QID_TYPE_DIR : 0;
 qid->version = stbuf->st_mtime ^ (stbuf->st_size << 8);
 qid->path = stbuf->st_ino;
@@ -583,6 +604,20 @@ static int fill_qid(device *device, const char *path, 
struct p9_qid *qid,
 return 0;
 }
 
+static bool name_ok(const char *str)
+{
+if ( !*str )
+return false;
+
+if ( strchr(str, '/' ) )
+return false;
+
+if ( !strcmp(str, "..") || !strcmp(str, ".") )
+return false;
+
+return true;
+}
+
 static void p9_error(struct ring *ring, uint16_t tag, uint32_t err)
 {
 unsigned int erroff;
@@ -660,6 +695,138 @@ static void p9_attach(struct ring *ring, struct p9_header 
*hdr)
 fill_buffer(ring, hdr->cmd + 1, hdr->tag, "Q", );
 }
 
+static void p9_walk(struct ring *ring, struct p9_header *hdr)
+{
+device *device = ring->device;
+uint32_t fid;
+uint32_t newfid;
+struct p9_fid *fidp = NULL;
+struct p9_qid *qids = NULL;
+unsigned int n_names = 0;
+unsigned int *names = NULL;
+unsigned int walked = 0;
+unsigned int i;
+char *path = NULL;
+unsigned int path_len;
+int ret;
+
+ret = fill_data(ring, "UUaS", , , _names, );
+if ( n_names > P9_WALK_MAXELEM )
+{
+p9_error(ring, hdr->tag, EINVAL);
+goto out;
+}
+if ( ret != 3 + n_names )
+{
+p9_error(ring, hdr->tag, errno);
+goto out;
+}
+
+fidp = get_fid_ref(device, fid);
+if ( !fidp )
+{
+p9_error(ring, hdr->tag, ENOENT);
+goto out;
+}
+if ( fidp->opened )
+{
+p9_error(ring, hdr->tag, EINVAL);
+goto out;
+}
+
+path_len = strlen(fidp->path) + 1;
+for ( i = 0; i < n_names; i++ )
+{
+if ( !name_ok(ring->str + names[i]) )
+{
+p9_error(ring, hdr->tag, ENOENT);
+goto out;
+}
+path_len += strlen(ring->str + names[i]) + 1;
+}
+path = calloc(path_len + 1, 1);
+if ( !path )
+{
+p9_error(ring, hdr->tag, ENOMEM);
+goto out;
+}
+strcpy(path, fidp->path);
+
+if ( n_names )
+{
+qids = calloc(n_names, sizeof(*qids));
+if ( !qids )
+{
+p9_error(ring, hdr->tag, ENOMEM);
+goto out;
+}
+for ( i = 0; i < n_names; i++ )
+{
+strcat(path, "/");
+strcat(path, ring->str + names[i]);
+ret = fill_qid(device, path, qids + i, NULL);
+if ( ret )
+{
+if ( !walked )
+{
+p9_error(ring, hdr->tag, errno);
+goto out;
+}
+break;
+}
+walked++;
+}
+}
+
+if ( walked == n_names )
+{
+bool ok = false;
+
+if ( fid == newfid )
+{
+struct p9_fid *new_fidp;
+
+pthread_mutex_lock(>fid_mutex);
+
+if ( fidp->ref != 2 )
+{
+errno = EBUSY;
+}
+else
+{
+new_fidp = alloc_fid_mem(device, fid, path);
+if ( new_fidp )
+   

[PATCH v7 04/21] tools/xen-9pfsd: add 9pfs response generation support

2024-02-15 Thread Juergen Gross
Add support for generation a 9pfs protocol response via a format based
approach.

Strings are stored in a per device string buffer and they are
referenced via their offset in this buffer. This allows to avoid
having to dynamically allocate memory for each single string.

As a first user of the response handling add a generic p9_error()
function which will be used to return any error to the client.

Add all format parsing variants in order to avoid additional code churn
later when adding the users of those variants. Prepare a special case
for the "read" case already (format character 'D'): in order to avoid
adding another buffer for read data support doing the read I/O directly
into the response buffer.

Signed-off-by: Juergen Gross 
Acked-by: Anthony PERARD 
Reviewed-by: Jason Andryuk 
---
V2:
- check parameter size limits (Jason Andryuk)
V3:
- use new unaligned access macros (Jason Andryuk)
V4:
- use recursion in fill_buffer() as a preparation for reading dirs
---
 tools/9pfsd/io.c| 217 +++-
 tools/9pfsd/xen-9pfsd.h |   3 +
 2 files changed, 219 insertions(+), 1 deletion(-)

diff --git a/tools/9pfsd/io.c b/tools/9pfsd/io.c
index 4312a62dfe..4a44c70c4d 100644
--- a/tools/9pfsd/io.c
+++ b/tools/9pfsd/io.c
@@ -11,6 +11,7 @@
  * before looking for the next request.
  */
 
+#include 
 #include 
 #include 
 #include 
@@ -20,6 +21,16 @@
 
 #include "xen-9pfsd.h"
 
+/* P9 protocol commands (response is either cmd+1 or P9_CMD_ERROR). */
+#define P9_CMD_ERROR  107
+
+struct p9_qid {
+uint8_t type;
+#define QID_TYPE_DIR  0x80
+uint32_t version;
+uint64_t path;
+};
+
 /*
  * Note that the ring names "in" and "out" are from the frontend's
  * perspective, so the "in" ring will be used for responses to the frontend,
@@ -100,6 +111,200 @@ static bool io_work_pending(struct ring *ring)
 return ring->handle_response ? ring_in_free(ring) : ring_out_data(ring);
 }
 
+static void fmt_err(const char *fmt)
+{
+syslog(LOG_CRIT, "illegal format %s passed to fill_buffer()", fmt);
+exit(1);
+}
+
+/*
+ * Fill buffer with response data.
+ * fmt is a sequence of format characters. Supported characters are:
+ * a: an array (2 bytes number of elements + the following format as elements)
+ *The number of elements is passed in the first unsigned int parameter, the
+ *next parameter is a pointer to an array of elements as denoted by the 
next
+ *format character.
+ * b: 1 byte unsigned integer
+ * u: 2 byte unsigned integer
+ *The parameter is a pointer to a uint16_t value
+ * D: Data blob (4 byte length +  bytes)
+ *2 parameters are consumed, first an unsigned int for the length, then a
+ *pointer to the first uint8_t value.
+ *No array support.
+ * L: 8 byte unsigned integer
+ *The parameter is a pointer to a uint64_t value
+ * Q: Qid (struct p9_qid)
+ * S: String (2 byte length +  characters)
+ *The length is obtained via strlen() of the parameter, being a pointer
+ *to the first character of the string
+ * U: 4 byte unsigned integer
+ *The parameter is a pointer to a uint32_t value
+ */
+static void fill_buffer_at(void **data, const char *fmt, ...);
+static void vfill_buffer_at(void **data, const char *fmt, va_list ap)
+{
+const char *f;
+const void *par;
+const char *str_val;
+const struct p9_qid *qid;
+unsigned int len;
+unsigned int array_sz = 0;
+unsigned int elem_sz = 0;
+
+for ( f = fmt; *f; f++ )
+{
+if ( !array_sz )
+par = va_arg(ap, const void *);
+else
+{
+par += elem_sz;
+array_sz--;
+}
+
+switch ( *f )
+{
+case 'a':
+f++;
+if ( !*f || array_sz )
+fmt_err(fmt);
+array_sz = *(const unsigned int *)par;
+if ( array_sz > 0x )
+{
+syslog(LOG_CRIT, "array size %u in fill_buffer()", array_sz);
+exit(1);
+}
+put_unaligned(array_sz, (uint16_t *)*data);
+*data += sizeof(uint16_t);
+par = va_arg(ap, const void *);
+elem_sz = 0;
+break;
+
+case 'b':
+put_unaligned(*(const uint8_t *)par, (uint8_t *)*data);
+elem_sz = sizeof(uint8_t);
+*data += sizeof(uint8_t);
+break;
+
+case 'u':
+put_unaligned(*(const uint16_t *)par, (uint16_t *)*data);
+elem_sz = sizeof(uint16_t);
+*data += sizeof(uint16_t);
+break;
+
+case 'D':
+if ( array_sz )
+fmt_err(fmt);
+len = *(const unsigned int *)par;
+put_unaligned(len, (uint32_t *)*data);
+*data += sizeof(uint32_t);
+par = va_arg(ap, const void *);
+if ( *data != par )
+memcpy(*data, 

[PATCH v7 05/21] tools/xen-9pfsd: add 9pfs version request support

2024-02-15 Thread Juergen Gross
Add the version request of the 9pfs protocol. For the version use the
"9P2000.u" variant, as it is supported by Mini-OS and Linux.

For the request parsing add all format items needed even in future in
order to avoid code churn for those additions later.

Signed-off-by: Juergen Gross 
Acked-by: Anthony PERARD 
Reviewed-by: Jason Andryuk 
---
V3:
- use unaligned helper macros (Jason Andryuk)
---
 tools/9pfsd/io.c | 201 +++
 1 file changed, 201 insertions(+)

diff --git a/tools/9pfsd/io.c b/tools/9pfsd/io.c
index 4a44c70c4d..839dd1112c 100644
--- a/tools/9pfsd/io.c
+++ b/tools/9pfsd/io.c
@@ -22,8 +22,12 @@
 #include "xen-9pfsd.h"
 
 /* P9 protocol commands (response is either cmd+1 or P9_CMD_ERROR). */
+#define P9_CMD_VERSION100
 #define P9_CMD_ERROR  107
 
+#define P9_MIN_MSIZE  2048
+#define P9_VERSION"9P2000.u"
+
 struct p9_qid {
 uint8_t type;
 #define QID_TYPE_DIR  0x80
@@ -294,6 +298,169 @@ static unsigned int add_string(struct ring *ring, const 
char *str,
 return ret;
 }
 
+static bool chk_data(struct ring *ring, void *data, unsigned int len)
+{
+struct p9_header *hdr = ring->buffer;
+
+if ( data + len <= ring->buffer + hdr->size )
+return true;
+
+errno = E2BIG;
+
+return false;
+}
+
+static bool fill_data_elem(void **par, void **array, unsigned int *array_sz,
+   unsigned int elem_sz, void *data)
+{
+if ( *array_sz && !*array )
+{
+*array = calloc(*array_sz, elem_sz);
+if ( !*array )
+return false;
+*par = *array;
+}
+
+memcpy(*par, data, elem_sz);
+
+if ( *array_sz )
+{
+*par += elem_sz;
+*array_sz -= 1;
+}
+
+return true;
+}
+
+/*
+ * Fill variables with request data.
+ * fmt is a sequence of format characters. Supported characters are:
+ * a: an array (2 bytes number of elements + the following format as elements)
+ *The number of elements is stored in the first unsigned int parameter, the
+ *next parameter is a pointer to an array of elements as denoted by the 
next
+ *format character. The array is allocated dynamically.
+ * b: 1 byte unsigned integer
+ *The value is stored in the next parameter with type uint8_t.
+ * D: Data blob (4 byte length +  bytes)
+ *2 parameters are consumed, first an unsigned int for the length, then a
+ *pointer to the first uint8_t value.
+ *No array support.
+ * L: 8 byte unsigned integer
+ *The value is stored in the next parameter with type uint64_t.
+ * S: String (2 byte length +  characters)
+ *The 0-terminated string is stored in device->str + off, off is stored in
+ *the next parameter with type unsigned int.
+ * U: 4 byte unsigned integer
+ *The value is stored in the next parameter with type uint32_t.
+ *
+ * Return value: number of filled variables, errno will be set in case of
+ *   error.
+ */
+static int fill_data(struct ring *ring, const char *fmt, ...)
+{
+struct p9_header *hdr = ring->buffer;
+void *data = hdr + 1;
+void *par;
+unsigned int pars = 0;
+const char *f;
+va_list ap;
+unsigned int len;
+unsigned int str_off;
+unsigned int array_sz = 0;
+void **array = NULL;
+
+va_start(ap, fmt);
+
+for ( f = fmt; *f; f++ )
+{
+if ( !array_sz )
+par = va_arg(ap, void *);
+
+switch ( *f )
+{
+case 'a':
+f++;
+if ( !*f || array_sz )
+fmt_err(fmt);
+if ( !chk_data(ring, data, sizeof(uint16_t)) )
+return pars;
+array_sz = get_unaligned((uint16_t *)data);
+data += sizeof(uint16_t);
+*(unsigned int *)par = array_sz;
+array = va_arg(ap, void **);
+*array = NULL;
+break;
+
+case 'b':
+if ( !chk_data(ring, data, sizeof(uint8_t)) )
+return pars;
+if ( !fill_data_elem(, array, _sz, sizeof(uint8_t),
+ data) )
+return pars;
+data += sizeof(uint8_t);
+break;
+
+case 'D':
+if ( array_sz )
+fmt_err(fmt);
+if ( !chk_data(ring, data, sizeof(uint32_t)) )
+return pars;
+len = get_unaligned((uint32_t *)data);
+data += sizeof(uint32_t);
+*(unsigned int *)par = len;
+par = va_arg(ap, void *);
+if ( !chk_data(ring, data, len) )
+return pars;
+memcpy(par, data, len);
+data += len;
+break;
+
+case 'L':
+if ( !chk_data(ring, data, sizeof(uint64_t)) )
+return pars;
+if ( !fill_data_elem(, array, _sz, sizeof(uint64_t),
+ data) )
+return pars;
+data += 

[PATCH v7 02/21] tools/xen-9pfsd: connect to frontend

2024-02-15 Thread Juergen Gross
Add the code for connecting to frontends to xenlogd.

Signed-off-by: Juergen Gross 
Acked-by: Anthony PERARD 
Reviewed-by: Jason Andryuk 
---
V2:
- support multiple rings per device (Jason Andryuk)
- don't set .revents initially (Jason Andryuk)
- call poll() with infinite timeout (Jason Andryuk)
- take mutex before calling pthread_cond_signal()
V3:
- fix SPDX indentifier (Andrew Cooper)
- better validation of host path (Jason Andryuk)
- don't hard-code dom0 in backend nodes (Jason Andryuk)
- use bool instead of int for some functions' return types
- open root directory (Jason Andryuk)
---
 tools/9pfsd/Makefile|   2 +-
 tools/9pfsd/io.c|  45 +++
 tools/9pfsd/xen-9pfsd.c | 653 +++-
 tools/9pfsd/xen-9pfsd.h |  61 
 4 files changed, 757 insertions(+), 4 deletions(-)
 create mode 100644 tools/9pfsd/io.c
 create mode 100644 tools/9pfsd/xen-9pfsd.h

diff --git a/tools/9pfsd/Makefile b/tools/9pfsd/Makefile
index 089cf5ae24..50573121ed 100644
--- a/tools/9pfsd/Makefile
+++ b/tools/9pfsd/Makefile
@@ -10,7 +10,7 @@ LDFLAGS += $(PTHREAD_LDFLAGS)
 
 TARGETS := xen-9pfsd
 
-XEN-9PFSD_OBJS = xen-9pfsd.o
+XEN-9PFSD_OBJS = xen-9pfsd.o io.o
 $(XEN-9PFSD_OBJS): CFLAGS += $(CFLAGS_libxenstore)
 $(XEN-9PFSD_OBJS): CFLAGS += $(CFLAGS_libxenevtchn)
 $(XEN-9PFSD_OBJS): CFLAGS += $(CFLAGS_libxengnttab)
diff --git a/tools/9pfsd/io.c b/tools/9pfsd/io.c
new file mode 100644
index 00..eb7c136e09
--- /dev/null
+++ b/tools/9pfsd/io.c
@@ -0,0 +1,45 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+
+/*
+ * xen-9pfsd - Xen 9pfs daemon
+ *
+ * Copyright (C) 2024 Juergen Gross 
+ *
+ * I/O thread handling.
+ */
+
+#include 
+#include 
+#include 
+
+#include "xen-9pfsd.h"
+
+static bool io_work_pending(struct ring *ring)
+{
+if ( ring->stop_thread )
+return true;
+return false;
+}
+
+void *io_thread(void *arg)
+{
+struct ring *ring = arg;
+
+while ( !ring->stop_thread )
+{
+pthread_mutex_lock(>mutex);
+if ( !io_work_pending(ring) )
+{
+if ( xenevtchn_unmask(xe, ring->evtchn) < 0 )
+syslog(LOG_WARNING, "xenevtchn_unmask() failed");
+pthread_cond_wait(>cond, >mutex);
+}
+pthread_mutex_unlock(>mutex);
+
+/* TODO: I/O handling. */
+}
+
+ring->thread_active = false;
+
+return NULL;
+}
diff --git a/tools/9pfsd/xen-9pfsd.c b/tools/9pfsd/xen-9pfsd.c
index 6939d01574..73b6c3a30e 100644
--- a/tools/9pfsd/xen-9pfsd.c
+++ b/tools/9pfsd/xen-9pfsd.c
@@ -24,34 +24,632 @@
 
 #include 
 #include 
+#include 
+#include 
+#include 
 #include 
 #include 
 #include 
 #include 
 #include 
 #include 
+#include 
 #include 
 #include 
 #include 
-#include 
 #include 
 #include 
 
+#include "xen-9pfsd.h"
+
+/*
+ * List of currently known devices.
+ * The list itself is modified only in the main thread. When a device is being
+ * removed its memory needs to be freed after the I/O thread (if existing)
+ * has stopped.
+ */
+static XEN_TAILQ_HEAD(devhead, device) devs = XEN_TAILQ_HEAD_INITIALIZER(devs);
+
+struct path {
+char path[100];
+};
+
 static volatile bool stop_me;
 static bool daemon_running;
 static struct xs_handle *xs;
 static xengnttab_handle *xg;
-static xenevtchn_handle *xe;
+static unsigned int now;
+
+xenevtchn_handle *xe;
 
 static void handle_stop(int sig)
 {
 stop_me = true;
 }
 
+static int check_host_path(device *device)
+{
+struct stat statbuf;
+char *path, *p;
+int ret = 1;
+
+if ( !device->host_path )
+return 1;
+
+/* Path must be absolute. */
+if ( device->host_path[0] != '/' )
+return 1;
+
+/* No double "/". */
+if ( strstr(device->host_path, "//") )
+return 1;
+
+/* No trailing "/" (includes refusing to share "/"). */
+if ( device->host_path[strlen(device->host_path) - 1] == '/' )
+return 1;
+
+path = strdup(device->host_path);
+if ( !path )
+{
+syslog(LOG_CRIT, "memory allocation failure!");
+return 1;
+}
+
+for ( p = path; p; )
+{
+p = strchr(p + 1, '/');
+if ( p )
+*p = 0;
+if ( !stat(path, ) )
+{
+if ( !(statbuf.st_mode & S_IFDIR) )
+break;
+if ( !p )
+{
+ret = 0;
+break;
+}
+*p = '/';
+continue;
+}
+if ( mkdir(path, 0777) )
+break;
+if ( p )
+*p = '/';
+}
+
+free(path);
+return ret;
+}
+
+static void construct_frontend_path(device *device, const char *node,
+struct path *p)
+{
+snprintf(p->path, sizeof(p->path), "/local/domain/%u/device/9pfs/%u/%s",
+ device->domid, device->devid, node);
+}
+
+static void construct

[PATCH v7 00/21] tools: enable xenstore-stubdom to use 9pfs

2024-02-15 Thread Juergen Gross
This series is adding 9pfs support to Xenstore-stubdom, enabling it
to do logging to a dom0 directory.

This is a prerequisite for the final goal to add live update support
to Xenstore-stubdom, as it enables the stubdom to store its state in
a dom0 file.

The 9pfs backend is a new daemon written from scratch. Using a
dedicated 9pfs daemon has several advantages:

- it is using much less resources than a full blown qemu process
- it can serve multiple guests (the idea is to use it for other
  infrastructure domains, like qemu-stubdom or driver domains, too)
- it is designed to support several security enhancements, like
  limiting the number of files for a guest, or limiting the allocated
  file system space
- it doesn't support file links (neither hard nor soft links) or
  referencing parent directories via "..", minimizing the risk that
  a guest can "escape" from its home directory

Note that for now the daemon only contains the minimal needed
functionality to do logging from Xenstore-stubdom. I didn't want to
add all the 9pfs commands and security add-ons in the beginning, in
order to avoid needless efforts in case the idea of the daemon is
being rejected.

Please note that the pending patch for updating the Mini-OS commit
in Config.mk needs to be applied for patch "stubdom: extend xenstore
stubdom configs" and the following ones.

Changes in V7:
- fixed V6 bugs

Changes in V6:
- patch 1 of V5 has been applied
- rebase
- addressed comments

Changes in V5:
- 10 patches have been applied already
- rename source directory to tools/9pfsd
- addressed comments

Changes in V4:
- patch 2 of V3 was applied
- added support of reading directories
- addressed review comments

Changes in V3:
- new patches 1, 23-25
- addressed review comments

Changes in V2:
- support of multiple rings per device
- xenlogd->xen-9pfsd rename
- addressed review comments
- fixed some bugs

Juergen Gross (21):
  tools: add a new xen 9pfs daemon
  tools/xen-9pfsd: connect to frontend
  tools/xen-9pfsd: add transport layer
  tools/xen-9pfsd: add 9pfs response generation support
  tools/xen-9pfsd: add 9pfs version request support
  tools/xen-9pfsd: add 9pfs attach request support
  tools/xen-9pfsd: add 9pfs walk request support
  tools/xen-9pfsd: add 9pfs open request support
  tools/xen-9pfsd: add 9pfs clunk request support
  tools/xen-9pfsd: add 9pfs create request support
  tools/xen-9pfsd: add 9pfs stat request support
  tools/xen-9pfsd: add 9pfs write request support
  tools/xen-9pfsd: add 9pfs read request support
  tools/libs/light: add backend type for 9pfs PV devices
  tools/xl: support new 9pfs backend xen_9pfsd
  stubdom: extend xenstore stubdom configs
  tools: add 9pfs device to xenstore-stubdom
  tools/xenstored: mount 9pfs device in stubdom
  tools/xenstored: add helpers for filename handling
  tools/xenstored: support complete log capabilities in stubdom
  tools/xenstored: have a single do_control_memreport()

 docs/man/xl.cfg.5.pod.in  |   36 +-
 stubdom/xenstore-minios.cfg   |2 +-
 stubdom/xenstorepvh-minios.cfg|2 +-
 tools/9pfsd/.gitignore|1 +
 tools/9pfsd/Makefile  |   38 +
 tools/9pfsd/io.c  | 1511 +
 tools/9pfsd/xen-9pfsd.c   |  800 +
 tools/9pfsd/xen-9pfsd.h   |   99 ++
 tools/Makefile|1 +
 tools/golang/xenlight/helpers.gen.go  |   10 +
 tools/golang/xenlight/types.gen.go|   12 +
 tools/helpers/init-xenstore-domain.c  |7 +
 .../Linux/init.d/sysconfig.xencommons.in  |1 -
 tools/hotplug/Linux/launch-xenstore.in|1 +
 tools/include/libxl.h |   22 +
 tools/libs/light/libxl_9pfs.c |  191 ++-
 tools/libs/light/libxl_create.c   |4 +-
 tools/libs/light/libxl_dm.c   |2 +-
 tools/libs/light/libxl_types.idl  |   11 +
 tools/libs/light/libxl_types_internal.idl |1 +
 tools/xenstored/control.c |   29 +-
 tools/xenstored/core.c|   15 +-
 tools/xenstored/core.h|   11 +-
 tools/xenstored/domain.c  |2 +
 tools/xenstored/lu_daemon.c   |4 +-
 tools/xenstored/minios.c  |   62 +
 tools/xenstored/posix.c   |8 +-
 tools/xl/xl_parse.c   |   23 +-
 28 files changed, 2864 insertions(+), 42 deletions(-)
 create mode 100644 tools/9pfsd/.gitignore
 create mode 100644 tools/9pfsd/Makefile
 create mode 100644 tools/9pfsd/io.c
 create mode 100644 tools/9pfsd/xen-9pfsd.c
 create mode 100644 tools/9pfsd/xen-9pfsd.h

-- 
2.35.3




[PATCH v7 03/21] tools/xen-9pfsd: add transport layer

2024-02-15 Thread Juergen Gross
Add the transport layer of 9pfs. This is basically the infrastructure
to receive requests from the frontend and to send the related answers
via the rings.

Signed-off-by: Juergen Gross 
Reviewed-by: Jason Andryuk 
Acked-by: Anthony PERARD 
---
V2:
- rename put_request_bytes() (Jason Andryuk)
- rename get_request_bytes() and put_response_bytes() len parameter
  (Jason Andryuk)
- don't unmask event channel if error indicator is set (Jason Andryuk)
---
 tools/9pfsd/io.c| 143 +++-
 tools/9pfsd/xen-9pfsd.h |  16 +
 2 files changed, 156 insertions(+), 3 deletions(-)

diff --git a/tools/9pfsd/io.c b/tools/9pfsd/io.c
index eb7c136e09..4312a62dfe 100644
--- a/tools/9pfsd/io.c
+++ b/tools/9pfsd/io.c
@@ -6,39 +6,176 @@
  * Copyright (C) 2024 Juergen Gross 
  *
  * I/O thread handling.
+ *
+ * Only handle one request at a time, pushing out the complete response
+ * before looking for the next request.
  */
 
 #include 
+#include 
 #include 
 #include 
+#include/* For cpu barriers. */
+#include 
 
 #include "xen-9pfsd.h"
 
+/*
+ * Note that the ring names "in" and "out" are from the frontend's
+ * perspective, so the "in" ring will be used for responses to the frontend,
+ * while the "out" ring is used for requests from the frontend to the
+ * backend.
+ */
+static unsigned int ring_in_free(struct ring *ring)
+{
+unsigned int queued;
+
+queued = xen_9pfs_queued(ring->prod_pvt_in, ring->intf->in_cons,
+ ring->ring_size);
+xen_rmb();
+
+return ring->ring_size - queued;
+}
+
+static unsigned int ring_out_data(struct ring *ring)
+{
+unsigned int queued;
+
+queued = xen_9pfs_queued(ring->intf->out_prod, ring->cons_pvt_out,
+ ring->ring_size);
+xen_rmb();
+
+return queued;
+}
+
+static unsigned int get_request_bytes(struct ring *ring, unsigned int off,
+  unsigned int total_len)
+{
+unsigned int size;
+unsigned int out_data = ring_out_data(ring);
+RING_IDX prod, cons;
+
+size = min(total_len - off, out_data);
+prod = xen_9pfs_mask(ring->intf->out_prod, ring->ring_size);
+cons = xen_9pfs_mask(ring->cons_pvt_out, ring->ring_size);
+xen_9pfs_read_packet(ring->buffer + off, ring->data.out, size,
+ prod, , ring->ring_size);
+
+xen_rmb();   /* Read data out before setting visible consumer. */
+ring->cons_pvt_out += size;
+ring->intf->out_cons = ring->cons_pvt_out;
+
+/* Signal that more space is available now. */
+xenevtchn_notify(xe, ring->evtchn);
+
+return size;
+}
+
+static unsigned int put_response_bytes(struct ring *ring, unsigned int off,
+   unsigned int total_len)
+{
+unsigned int size;
+unsigned int in_data = ring_in_free(ring);
+RING_IDX prod, cons;
+
+size = min(total_len - off, in_data);
+prod = xen_9pfs_mask(ring->prod_pvt_in, ring->ring_size);
+cons = xen_9pfs_mask(ring->intf->in_cons, ring->ring_size);
+xen_9pfs_write_packet(ring->data.in, ring->buffer + off, size,
+  , cons, ring->ring_size);
+
+xen_wmb();   /* Write data out before setting visible producer. */
+ring->prod_pvt_in += size;
+ring->intf->in_prod = ring->prod_pvt_in;
+
+return size;
+}
+
 static bool io_work_pending(struct ring *ring)
 {
 if ( ring->stop_thread )
 return true;
-return false;
+if ( ring->error )
+return false;
+return ring->handle_response ? ring_in_free(ring) : ring_out_data(ring);
 }
 
 void *io_thread(void *arg)
 {
 struct ring *ring = arg;
+unsigned int count = 0;
+struct p9_header hdr;
+bool in_hdr = true;
+
+ring->max_size = ring->ring_size;
+ring->buffer = malloc(ring->max_size);
+if ( !ring->buffer )
+{
+syslog(LOG_CRIT, "memory allocation failure!");
+return NULL;
+}
 
 while ( !ring->stop_thread )
 {
 pthread_mutex_lock(>mutex);
 if ( !io_work_pending(ring) )
 {
-if ( xenevtchn_unmask(xe, ring->evtchn) < 0 )
+if ( !ring->error && xenevtchn_unmask(xe, ring->evtchn) < 0 )
 syslog(LOG_WARNING, "xenevtchn_unmask() failed");
 pthread_cond_wait(>cond, >mutex);
 }
 pthread_mutex_unlock(>mutex);
 
-/* TODO: I/O handling. */
+if ( ring->stop_thread || ring->error )
+continue;
+
+if ( !ring->handle_response )
+{
+if ( in_hdr )
+{
+count += get_request_bytes(ring, count, sizeof(hdr));
+if ( count != sizeof(hdr) )
+

[PATCH v7 01/21] tools: add a new xen 9pfs daemon

2024-02-15 Thread Juergen Gross
Add "xen-9pfsd", a new 9pfs daemon meant to support infrastructure
domains (e.g. xenstore-stubdom) to access files in dom0.

For now only add the code needed for starting the daemon and
registering it with Xenstore via a new "libxl/xen-9pfs/state" node by
writing the "running" state to it.

Signed-off-by: Juergen Gross 
Reviewed-by: Jason Andryuk 
Acked-by: Andrew Cooper 
Acked-by: Anthony PERARD 
---
V2:
- rename from xenlogd to xen-9pfsd (Andrew Cooper)
- use a backend domain local Xenstore node (Jason Andryuk)
- use "volatile" for stop_me (Andrew Cooper)
V3:
- fix SPDX Identifier (Andrew Cooper)
V4:
- add strerror() test to error logging (Andrew Cooper)
- don't handle "daemon already running" as error (Anthony Perard)
V5:
- rename source directory to tools/9pfsd (Andrew Cooper)
---
 tools/9pfsd/.gitignore  |   1 +
 tools/9pfsd/Makefile|  38 +++
 tools/9pfsd/xen-9pfsd.c | 147 
 tools/Makefile  |   1 +
 4 files changed, 187 insertions(+)
 create mode 100644 tools/9pfsd/.gitignore
 create mode 100644 tools/9pfsd/Makefile
 create mode 100644 tools/9pfsd/xen-9pfsd.c

diff --git a/tools/9pfsd/.gitignore b/tools/9pfsd/.gitignore
new file mode 100644
index 00..d0c2d223ef
--- /dev/null
+++ b/tools/9pfsd/.gitignore
@@ -0,0 +1 @@
+/xen-9pfsd
diff --git a/tools/9pfsd/Makefile b/tools/9pfsd/Makefile
new file mode 100644
index 00..089cf5ae24
--- /dev/null
+++ b/tools/9pfsd/Makefile
@@ -0,0 +1,38 @@
+#
+# tools/9pfsd/Makefile
+#
+
+XEN_ROOT = $(CURDIR)/../..
+include $(XEN_ROOT)/tools/Rules.mk
+
+CFLAGS += $(PTHREAD_CFLAGS)
+LDFLAGS += $(PTHREAD_LDFLAGS)
+
+TARGETS := xen-9pfsd
+
+XEN-9PFSD_OBJS = xen-9pfsd.o
+$(XEN-9PFSD_OBJS): CFLAGS += $(CFLAGS_libxenstore)
+$(XEN-9PFSD_OBJS): CFLAGS += $(CFLAGS_libxenevtchn)
+$(XEN-9PFSD_OBJS): CFLAGS += $(CFLAGS_libxengnttab)
+xen-9pfsd: LDLIBS += $(call xenlibs-ldlibs,store evtchn gnttab)
+
+.PHONY: all
+all: $(TARGETS)
+
+xen-9pfsd: $(XEN-9PFSD_OBJS)
+   $(CC) $(LDFLAGS) -o $@ $(XEN-9PFSD_OBJS) $(LDLIBS) $(APPEND_LDFLAGS)
+
+.PHONY: install
+install: all
+   $(INSTALL_DIR) $(DESTDIR)$(LIBEXEC_BIN)
+   for i in $(TARGETS); do $(INSTALL_PROG) $$i $(DESTDIR)$(LIBEXEC_BIN); 
done
+
+.PHONY: uninstall
+uninstall:
+   for i in $(TARGETS); do rm -f $(DESTDIR)$(LIBEXEC_BIN)/$$i; done
+
+.PHONY: clean
+clean:
+   $(RM) *.o $(TARGETS) $(DEPS_RM)
+
+distclean: clean
diff --git a/tools/9pfsd/xen-9pfsd.c b/tools/9pfsd/xen-9pfsd.c
new file mode 100644
index 00..6939d01574
--- /dev/null
+++ b/tools/9pfsd/xen-9pfsd.c
@@ -0,0 +1,147 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+
+/*
+ * xen-9pfsd - Xen 9pfs daemon
+ *
+ * Copyright (C) 2024 Juergen Gross 
+ *
+ * Daemon to enable guests to access a directory of the dom0 file system.
+ * Access is made via the 9pfs protocol (xen-9pfsd acts as a PV 9pfs backend).
+ *
+ * Usage: xen-9pfsd
+ *
+ * xen-9pfsd does NOT support writing any links (neither soft links nor hard
+ * links), and it is accepting only canonicalized file paths in order to
+ * avoid the possibility to "escape" from the guest specific directory.
+ *
+ * The backend device string is "xen_9pfs", the tag used for mounting the
+ * 9pfs device is "Xen".
+ *
+ * As an additional security measure the maximum file space used by the guest
+ * can be limited by the backend Xenstore node "max-size" specifying the size
+ * in MBytes. This size includes the size of the root directory of the guest.
+ */
+
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+
+static volatile bool stop_me;
+static bool daemon_running;
+static struct xs_handle *xs;
+static xengnttab_handle *xg;
+static xenevtchn_handle *xe;
+
+static void handle_stop(int sig)
+{
+stop_me = true;
+}
+
+static void close_all(void)
+{
+if ( daemon_running )
+xs_rm(xs, XBT_NULL, "libxl/xen-9pfs");
+if ( xe )
+xenevtchn_close(xe);
+if ( xg )
+xengnttab_close(xg);
+if ( xs )
+xs_close(xs);
+closelog();
+}
+
+static void do_err(const char *msg)
+{
+syslog(LOG_ALERT, "%s, errno = %d, %s", msg, errno, strerror(errno));
+close_all();
+exit(1);
+}
+
+static void xen_connect(void)
+{
+xs_transaction_t t;
+char *val;
+unsigned int len;
+
+xs = xs_open(0);
+if ( xs == NULL )
+do_err("xs_open() failed");
+
+xg = xengnttab_open(NULL, 0);
+if ( xg == NULL )
+do_err("xengnttab_open() failed");
+
+xe = xenevtchn_open(NULL, 0);
+if ( xe == NULL )
+do_err("xenevtchn_open() failed");
+
+while ( true )
+{
+t = xs_transaction_start(xs);
+if ( t == XBT_NULL )
+do_err("xs_transaction_start() failed");
+
+val = xs_read(xs, t, "

Re: [PATCH v6 15/21] tools/xl: support new 9pfs backend xen_9pfsd

2024-02-15 Thread Juergen Gross

On 15.02.24 07:55, Juergen Gross wrote:

Add support for the new 9pfs backend "xen_9pfsd". For this backend type
the tag defaults to "Xen" and the host side path to
"/var/log/xen/guests/".

Do most of the default settings in libxl. Unfortunately the default
path can't easily be set in libxl, as the domain name isn't available
in the related 9pfs specific function.

Settings the defaults in libxl requires to move the sanity checking
of 9pfs parameters from xl to libxl, too.

Signed-off-by: Juergen Gross 
Reviewed-by: Jason Andryuk 


Sorry Anthony, just found your R-b: in my spam folder :-(


Juergen


---
V2:
- test max_files and max_open_files, too (Jason Andryuk)
V4:
- fix man page to use the "xen_9pfsd" type due to idl limitation
   (Jason Andryuk)
- set (most of) the defaults in libxl (Anthony Perard)
---
  docs/man/xl.cfg.5.pod.in  | 36 +--
  tools/libs/light/libxl_9pfs.c | 18 ++
  tools/xl/xl_parse.c   | 23 +++---
  3 files changed, 72 insertions(+), 5 deletions(-)

diff --git a/docs/man/xl.cfg.5.pod.in b/docs/man/xl.cfg.5.pod.in
index ea8d41727d..039e057318 100644
--- a/docs/man/xl.cfg.5.pod.in
+++ b/docs/man/xl.cfg.5.pod.in
@@ -772,10 +772,16 @@ settings, from the following list:
  
  =over 4
  
+=item B

+
+The backendtype for the PV device. Supported values are B and
+B.  The default is B.
+
  =item B
  
  9pfs tag to identify the filesystem share. The tag is needed on the

-guest side to mount it.
+guest side to mount it. For the backendtype of B the tag defaults to
+"Xen".
  
  =item B
  
@@ -785,12 +791,38 @@ squash or remap).
  
  =item B
  
-Filesystem path on the backend to export.

+Filesystem path on the backend to export. For the backendtype of B
+the path defaults to "@XEN_LOG_DIR@/guests/".
  
  =item B
  
  Specify the backend domain name or id, defaults to dom0.
  
+=item B

+
+Specify the maximum number of files below B. A value of 0 (which
+is the default) doesn't limit the number of files. Only valid for
+B.
+
+=item B
+
+Specify the maximum number of concurrently opened files below B.
+Multiple opens of the same file are counted individually. Only valid for
+B, which has a default of B.
+
+=item B
+
+Specify the maximum used disk space in MiB below B. A value of 0 (which
+is the default) doesn't limit the usable disk space. Only valid for
+B.
+
+=item B
+
+When set the backend will delete the oldest file which is currently not
+opened by the guest in case the disk space limit set via B or the
+file limit set via B is being reached. Only valid for
+B.
+
  =back
  
  =item B

diff --git a/tools/libs/light/libxl_9pfs.c b/tools/libs/light/libxl_9pfs.c
index 900c0d46a0..ddeb4f20a7 100644
--- a/tools/libs/light/libxl_9pfs.c
+++ b/tools/libs/light/libxl_9pfs.c
@@ -20,6 +20,24 @@
  static int libxl__device_p9_setdefault(libxl__gc *gc, uint32_t domid,
 libxl_device_p9 *p9, bool hotplug)
  {
+if (p9->type == LIBXL_P9_TYPE_UNKNOWN) {
+p9->type = LIBXL_P9_TYPE_QEMU;
+}
+if (p9->type == LIBXL_P9_TYPE_QEMU &&
+(p9->max_files || p9->max_open_files || p9->max_space ||
+ p9->auto_delete)) {
+LOGD(ERROR, domid, "Illegal 9pfs parameter combination");
+return ERROR_INVAL;
+}
+if (p9->type == LIBXL_P9_TYPE_XEN_9PFSD && !p9->tag) {
+p9->tag = libxl__strdup(NOGC, "Xen");
+}
+
+if (!p9->path || !p9->security_model || !p9->tag) {
+LOGD(ERROR, domid, "9pfs spec missing required field!");
+return ERROR_INVAL;
+}
+
  return libxl__resolve_domid(gc, p9->backend_domname, >backend_domid);
  }
  
diff --git a/tools/xl/xl_parse.c b/tools/xl/xl_parse.c

index 9b358f11b8..80ffe85f5e 100644
--- a/tools/xl/xl_parse.c
+++ b/tools/xl/xl_parse.c
@@ -2233,6 +2233,20 @@ void parse_config_data(const char *config_source,
  replace_string(>tag, value);
  } else if (!strcmp(key, "backend")) {
  replace_string(>backend_domname, value);
+} else if (!strcmp(key, "type")) {
+if (libxl_p9_type_from_string(value, >type)) {
+fprintf(stderr, "failed to parse 9pfs type: %s\n",
+value);
+exit(1);
+}
+} else if (!strcmp(key, "max-files")) {
+p9->max_files = parse_ulong(value);
+} else if (!strcmp(key, "max-open-files")) {
+p9->max_open_files = parse_ulong(value);
+} else if (!strcmp(key, "max-space")) {
+p9->max_space = parse_ulong(value);
+} else if (!strcmp(key, "auto-dele

[PATCH v6 11/21] tools/xen-9pfsd: add 9pfs stat request support

2024-02-14 Thread Juergen Gross
Add the stat request of the 9pfs protocol.

Signed-off-by: Juergen Gross 
Acked-by: Anthony PERARD 
Reviewed-by: Jason Andryuk 
---
V3:
- use fstatat() (Jason Andryuk)
V4:
- add "s" format to fill_buffer() as a preparation for reading dirs
---
 tools/9pfsd/io.c | 102 +++
 1 file changed, 102 insertions(+)

diff --git a/tools/9pfsd/io.c b/tools/9pfsd/io.c
index 65ff4dab73..d08c4b1283 100644
--- a/tools/9pfsd/io.c
+++ b/tools/9pfsd/io.c
@@ -34,6 +34,7 @@
 #define P9_CMD_OPEN   112
 #define P9_CMD_CREATE 114
 #define P9_CMD_CLUNK  120
+#define P9_CMD_STAT   124
 
 /* P9 protocol open flags. */
 #define P9_OREAD0   /* read */
@@ -60,6 +61,25 @@ struct p9_qid {
 uint64_t path;
 };
 
+struct p9_stat {
+uint16_t size;
+uint16_t type;
+uint32_t dev;
+struct p9_qid qid;
+uint32_t mode;
+uint32_t atime;
+uint32_t mtime;
+uint64_t length;
+const char *name;
+const char *uid;
+const char *gid;
+const char *muid;
+const char *extension;
+uint32_t n_uid;
+uint32_t n_gid;
+uint32_t n_muid;
+};
+
 /*
  * Note that the ring names "in" and "out" are from the frontend's
  * perspective, so the "in" ring will be used for responses to the frontend,
@@ -166,6 +186,7 @@ static void fmt_err(const char *fmt)
  * S: String (2 byte length +  characters)
  *The length is obtained via strlen() of the parameter, being a pointer
  *to the first character of the string
+ * s: stat (struct p9_stat)
  * U: 4 byte unsigned integer
  *The parameter is a pointer to a uint32_t value
  */
@@ -176,6 +197,8 @@ static void vfill_buffer_at(void **data, const char *fmt, 
va_list ap)
 const void *par;
 const char *str_val;
 const struct p9_qid *qid;
+const struct p9_stat *stat;
+uint16_t tlen;
 unsigned int len;
 unsigned int array_sz = 0;
 unsigned int elem_sz = 0;
@@ -259,6 +282,18 @@ static void vfill_buffer_at(void **data, const char *fmt, 
va_list ap)
 *data += len;
 break;
 
+case 's':
+stat = par;
+elem_sz = sizeof(*stat);
+tlen = stat->size + sizeof(stat->size);
+fill_buffer_at(data, "uuuUQUUULSUUU", , >size,
+   >type, >dev, >qid, >mode,
+   >atime, >mtime, >length,
+   stat->name, stat->uid, stat->gid, stat->muid,
+   stat->extension, >n_uid, >n_gid,
+   >n_muid);
+break;
+
 case 'U':
 put_unaligned(*(const uint32_t *)par, (uint32_t *)*data);
 elem_sz = sizeof(uint32_t);
@@ -1148,6 +1183,69 @@ static void p9_clunk(struct ring *ring, struct p9_header 
*hdr)
 fill_buffer(ring, hdr->cmd + 1, hdr->tag, "");
 }
 
+static void fill_p9_stat(device *device, struct p9_stat *p9s, struct stat *st,
+ const char *name)
+{
+memset(p9s, 0, sizeof(*p9s));
+fill_qid(device, NULL, >qid, st);
+p9s->mode = st->st_mode & 0777;
+if ( S_ISDIR(st->st_mode) )
+p9s->mode |= P9_CREATE_PERM_DIR;
+p9s->atime = st->st_atime;
+p9s->mtime = st->st_mtime;
+p9s->length = st->st_size;
+p9s->name = name;
+p9s->uid = "";
+p9s->gid = "";
+p9s->muid = "";
+p9s->extension = "";
+p9s->n_uid = 0;
+p9s->n_gid = 0;
+p9s->n_muid = 0;
+
+/*
+ * Size of individual fields without the size field, including 5 2-byte
+ * string length fields.
+ */
+p9s->size = 71 + strlen(p9s->name);
+}
+
+static void p9_stat(struct ring *ring, struct p9_header *hdr)
+{
+device *device = ring->device;
+uint32_t fid;
+struct p9_fid *fidp;
+struct p9_stat p9s;
+struct stat st;
+int ret;
+
+ret = fill_data(ring, "U", );
+if ( ret != 1 )
+{
+p9_error(ring, hdr->tag, EINVAL);
+return;
+}
+
+fidp = get_fid_ref(device, fid);
+if ( !fidp )
+{
+p9_error(ring, hdr->tag, ENOENT);
+return;
+}
+
+if ( fstatat(device->root_fd, fidp->path, , 0) < 0 )
+{
+p9_error(ring, hdr->tag, errno);
+goto out;
+}
+fill_p9_stat(device, , , strrchr(fidp->path, '/') + 1);
+
+fill_buffer(ring, hdr->cmd + 1, hdr->tag, "s", );
+
+ out:
+free_fid(device, fidp);
+}
+
 void *io_thread(void *arg)
 {
 struct ring *ring = arg;
@@ -1227,6 +1325,10 @@ void *io_thread(void *arg)
 p9_clunk(ring, );
 break;
 
+case P9_CMD_STAT:
+p9_stat(ring, );
+break;
+
 default:
 syslog(LOG_DEBUG, "%u.%u sent unhandled command %u\n",
ring->device->domid, ring->device->devid, hdr.cmd);
-- 
2.35.3




[PATCH v6 21/21] tools/xenstored: have a single do_control_memreport()

2024-02-14 Thread Juergen Gross
With 9pfs now available in Xenstore-stubdom, there is no reason to
have distinct do_control_memreport() variants for the daemon and the
stubdom implementations.

Signed-off-by: Juergen Gross 
Reviewed-by: Jason Andryuk 
---
 tools/xenstored/control.c | 27 +++
 1 file changed, 7 insertions(+), 20 deletions(-)

diff --git a/tools/xenstored/control.c b/tools/xenstored/control.c
index dae23a5ac0..9561289179 100644
--- a/tools/xenstored/control.c
+++ b/tools/xenstored/control.c
@@ -216,23 +216,11 @@ static int do_control_logfile(const void *ctx, struct 
connection *conn,
return 0;
 }
 
-#ifdef __MINIOS__
-static int do_control_memreport(const void *ctx, struct connection *conn,
-   const char **vec, int num)
-{
-   if (num)
-   return EINVAL;
-
-   talloc_report_full(NULL, stdout);
-
-   send_ack(conn, XS_CONTROL);
-   return 0;
-}
-#else
 static int do_control_memreport(const void *ctx, struct connection *conn,
const char **vec, int num)
 {
FILE *fp;
+   const char *filename;
int fd;
 
if (num > 1)
@@ -255,8 +243,12 @@ static int do_control_memreport(const void *ctx, struct 
connection *conn,
if (!fp)
close(fd);
}
-   } else
-   fp = fopen(vec[0], "a");
+   } else {
+   filename = absolute_filename(ctx, vec[0]);
+   if (!filename)
+   return ENOMEM;
+   fp = fopen(filename, "a");
+   }
 
if (!fp)
return EBADF;
@@ -267,7 +259,6 @@ static int do_control_memreport(const void *ctx, struct 
connection *conn,
send_ack(conn, XS_CONTROL);
return 0;
 }
-#endif
 
 static int do_control_print(const void *ctx, struct connection *conn,
const char **vec, int num)
@@ -310,11 +301,7 @@ static struct cmd_s cmds[] = {
"Default timeout is 60 seconds.", 5 },
 #endif
{ "logfile", do_control_logfile, "" },
-#ifdef __MINIOS__
-   { "memreport", do_control_memreport, "" },
-#else
{ "memreport", do_control_memreport, "[]" },
-#endif
{ "print", do_control_print, "" },
{ "quota", do_control_quota,
"[set  ||max [-r]]" },
-- 
2.35.3




[PATCH v6 18/21] tools/xenstored: mount 9pfs device in stubdom

2024-02-14 Thread Juergen Gross
Mount the 9pfs device in stubdom enabling it to use files.

This has to happen in a worker thread in order to allow the main thread
handling the required Xenstore accesses in parallel.

Signed-off-by: Juergen Gross 
Reviewed-by: Jason Andryuk 
Reviewed-by: Julien Grall 
---
V3:
- add logging in case of errors (Julien Grall)
---
 tools/xenstored/core.h   |  6 +
 tools/xenstored/domain.c |  2 ++
 tools/xenstored/minios.c | 54 
 3 files changed, 62 insertions(+)

diff --git a/tools/xenstored/core.h b/tools/xenstored/core.h
index f6af086f01..fe0ee90581 100644
--- a/tools/xenstored/core.h
+++ b/tools/xenstored/core.h
@@ -36,6 +36,8 @@
 #include "list.h"
 #include "hashtable.h"
 
+#define XENSTORE_LIB_DIR   XEN_LIB_DIR "/xenstore"
+
 #ifndef O_CLOEXEC
 #define O_CLOEXEC 0
 /* O_CLOEXEC support is needed for Live Update in the daemon case. */
@@ -399,6 +401,10 @@ void handle_special_fds(void);
 int get_socket_fd(void);
 void set_socket_fd(int fd);
 
+#ifdef __MINIOS__
+void mount_9pfs(void);
+#endif
+
 /* Close stdin/stdout/stderr to complete daemonize */
 void finish_daemonize(void);
 
diff --git a/tools/xenstored/domain.c b/tools/xenstored/domain.c
index 1a7d5e9756..64c8fd0cc3 100644
--- a/tools/xenstored/domain.c
+++ b/tools/xenstored/domain.c
@@ -1236,6 +1236,8 @@ void stubdom_init(void)
barf_perror("Failed to initialize stubdom");
 
xenevtchn_notify(xce_handle, stubdom->port);
+
+   mount_9pfs();
 #endif
 }
 
diff --git a/tools/xenstored/minios.c b/tools/xenstored/minios.c
index 22ac8defbd..562a9b4972 100644
--- a/tools/xenstored/minios.c
+++ b/tools/xenstored/minios.c
@@ -17,10 +17,20 @@
 */
 #include 
 #include 
+#include 
+#include "talloc.h"
 #include "core.h"
 #include "utils.h"
 #include 
 #include 
+#include 
+#include 
+#include 
+#include 
+
+#define P9_STATE_PATH  "device/9pfs/0/state"
+
+static void *p9_device;
 
 void finish_daemonize(void)
 {
@@ -74,3 +84,47 @@ int get_socket_fd(void)
 void set_socket_fd(int fd)
 {
 }
+
+static void mount_thread(void *p)
+{
+   xenbus_event_queue events = NULL;
+   char *err;
+   char *dummy;
+
+   err = xenbus_watch_path_token(XBT_NIL, P9_STATE_PATH, "9pfs", );
+   if (err) {
+   log("error \"%s\" when setting watch on \"%s\"\n", err,
+   P9_STATE_PATH);
+   free(err);
+   return;
+   }
+
+   for (;;) {
+   xenbus_wait_for_watch();
+
+   /*
+* We only care for existence of the state node.
+* State changes are handled in init_9pfront().
+*/
+   err = xenbus_read(XBT_NIL, P9_STATE_PATH, );
+   if (!err)
+   break;
+   free(err);
+   }
+
+   free(dummy);
+
+   err = xenbus_unwatch_path_token(XBT_NIL, P9_STATE_PATH, "9pfs");
+   if (err) {
+   log("error \"%s\" when unwatching \"%s\", leaking watch\n",
+   err, P9_STATE_PATH);
+   free(err);
+   }
+
+   p9_device = init_9pfront(0, XENSTORE_LIB_DIR);
+}
+
+void mount_9pfs(void)
+{
+   create_thread("mount-9pfs", mount_thread, NULL);
+}
-- 
2.35.3




[PATCH v6 17/21] tools: add 9pfs device to xenstore-stubdom

2024-02-14 Thread Juergen Gross
Add a 9pfs device to Xenstore stubdom in order to allow it to do e.g.
logging into a dom0 file.

Use the following parameters for the new device:

- tag = "Xen"
- type = "xen_9pfsd"
- path = "/var/lib/xen/xenstore"
- security-model = "none"

For now don't limit allowed file space or number of files.

Add a new libxl function for adding it similar to the function for
adding the console device.

Signed-off-by: Juergen Gross 
---
V2:
- add security_model parameter to new libxl function (Jason Andryuk)
V4:
- rename function to libxl_device_9pfs_add() (Anthony Perard)
- use a libxl_device_p9 pointer as parameter (Anthony Perard)
---
 tools/helpers/init-xenstore-domain.c |  7 +++
 tools/include/libxl.h| 15 +++
 tools/libs/light/libxl_9pfs.c| 16 
 3 files changed, 38 insertions(+)

diff --git a/tools/helpers/init-xenstore-domain.c 
b/tools/helpers/init-xenstore-domain.c
index 140ed610ae..1683438c5c 100644
--- a/tools/helpers/init-xenstore-domain.c
+++ b/tools/helpers/init-xenstore-domain.c
@@ -433,6 +433,12 @@ int main(int argc, char** argv)
 int rv, fd;
 char *maxmem_str = NULL;
 libxl_ctx *ctx;
+libxl_device_p9 p9 = { .backend_domid = 0,
+   .tag = "Xen",
+   .path = XEN_LIB_DIR"/xenstore",
+   .security_model = "none",
+   .type = LIBXL_P9_TYPE_XEN_9PFSD,
+};
 
 while ( (opt = getopt_long(argc, argv, "v", options, NULL)) != -1 )
 {
@@ -543,6 +549,7 @@ int main(int argc, char** argv)
 }
 libxl_console_add_xenstore(ctx, domid, 0, console_evtchn, console_gfn,
NULL);
+libxl_device_9pfs_add(ctx, domid, , NULL);
 libxl_ctx_free(ctx);
 
 fd = creat(XEN_RUN_DIR "/xenstored.pid", 0666);
diff --git a/tools/include/libxl.h b/tools/include/libxl.h
index 9a3e702557..44a2205d2b 100644
--- a/tools/include/libxl.h
+++ b/tools/include/libxl.h
@@ -583,6 +583,13 @@
  * libxl_console_add_xenstore() in libxl.
  */
 #define LIBXL_HAVE_CONSOLE_ADD_XENSTORE 1
+
+/*
+ * LIBXL_HAVE_P9_ADD_XENSTORE indicates presence of the function
+ * libxl_device_9pfs_add() in libxl.
+ */
+#define LIBXL_HAVE_P9_ADD_XENSTORE 1
+
 /*
  * libxl ABI compatibility
  *
@@ -2074,6 +2081,14 @@ int libxl_console_add_xenstore(libxl_ctx *ctx, uint32_t 
domid, uint32_t backend,
const libxl_asyncop_how *ao_how)
LIBXL_EXTERNAL_CALLERS_ONLY;
 
+/* libxl_device_9pfs_add writes the Xenstore entries for a domain's
+ * primary 9pfs device based on domid, and device parameters.
+ * If needed it will start the backend daemon.
+ */
+int libxl_device_9pfs_add(libxl_ctx *ctx, uint32_t domid, libxl_device_p9 *p9,
+  const libxl_asyncop_how *ao_how)
+  LIBXL_EXTERNAL_CALLERS_ONLY;
+
 /* May be called with info_r == NULL to check for domain's existence.
  * Returns ERROR_DOMAIN_NOTFOUND if domain does not exist (used to return
  * ERROR_INVAL for this scenario). */
diff --git a/tools/libs/light/libxl_9pfs.c b/tools/libs/light/libxl_9pfs.c
index ddeb4f20a7..48f894f070 100644
--- a/tools/libs/light/libxl_9pfs.c
+++ b/tools/libs/light/libxl_9pfs.c
@@ -206,6 +206,22 @@ static void libxl__device_p9_add(libxl__egc *egc, uint32_t 
domid,
 aodev->callback(egc, aodev);
 }
 
+int libxl_device_9pfs_add(libxl_ctx *ctx, uint32_t domid, libxl_device_p9 *p9,
+  const libxl_asyncop_how *ao_how)
+{
+AO_CREATE(ctx, domid, ao_how);
+libxl__ao_device *aodev;
+
+GCNEW(aodev);
+libxl__prepare_ao_device(ao, aodev);
+aodev->action = LIBXL__DEVICE_ACTION_ADD;
+aodev->callback = device_addrm_aocomplete;
+
+libxl__device_p9_add(egc, domid, p9, aodev);
+
+return AO_INPROGRESS;
+}
+
 #define libxl_device_p9_list NULL
 #define libxl_device_p9_compare NULL
 
-- 
2.35.3




[PATCH v6 14/21] tools/libs/light: add backend type for 9pfs PV devices

2024-02-14 Thread Juergen Gross
Make the backend type of 9pfs PV devices configurable. The default is
"qemu" with the related Xenstore backend-side directory being "9pfs".

Add another type "xen_9pfsd" with the related Xenstore backend-side
directory "xen_9pfs".

As additional security features it is possible to specify:
- "max-space" for limiting the maximum space consumed on the filesystem
  in MBs
- "max-files" for limiting the maximum number of files in the
  filesystem
- "max-open-files" for limiting the maximum number of concurrent open
  files

For convenience "auto-delete" is available to let the backend delete the
oldest file of the guest in case otherwise "max-space" or "max-files"
would be violated.

The xen-9pfsd daemon will be started by libxenlight automatically when
the first "xen_9pfs" device is being created.

Signed-off-by: Juergen Gross 
Reviewed-by: Jason Andryuk 
Acked-by: George Dunlap  # Golang bits
Reviewed-by: Anthony PERARD 
---
V3:
- regenerate go bindings
V4:
- rename libxl_device_p9_dm_needed() to libxl__device_p9_dm_needed()
  (Anthony Perard)
- reorder span related functions (Anthony Perard)
- add comment for xen9pfsd_spawn() return values (Anthony Perard)
- add LIBXL_HAVE_XEN_9PFS to libxl.h (Anthony Perard)
- use a copy of 'p9' in xen9pfsd_spawn() (Anthony Perard)
V6:
- rebase (Anthony Perard)
- drop callback from struct libxl__aop9_state (Anthony Perard)
---
 tools/golang/xenlight/helpers.gen.go  |  10 ++
 tools/golang/xenlight/types.gen.go|  12 ++
 tools/include/libxl.h |   7 +
 tools/libs/light/libxl_9pfs.c | 157 +-
 tools/libs/light/libxl_create.c   |   4 +-
 tools/libs/light/libxl_dm.c   |   2 +-
 tools/libs/light/libxl_types.idl  |  11 ++
 tools/libs/light/libxl_types_internal.idl |   1 +
 8 files changed, 197 insertions(+), 7 deletions(-)

diff --git a/tools/golang/xenlight/helpers.gen.go 
b/tools/golang/xenlight/helpers.gen.go
index 0f8e23773c..8f44397a4e 100644
--- a/tools/golang/xenlight/helpers.gen.go
+++ b/tools/golang/xenlight/helpers.gen.go
@@ -2440,6 +2440,11 @@ x.Tag = C.GoString(xc.tag)
 x.Path = C.GoString(xc.path)
 x.SecurityModel = C.GoString(xc.security_model)
 x.Devid = Devid(xc.devid)
+x.Type = P9Type(xc._type)
+x.MaxSpace = int(xc.max_space)
+x.MaxFiles = int(xc.max_files)
+x.MaxOpenFiles = int(xc.max_open_files)
+x.AutoDelete = bool(xc.auto_delete)
 
  return nil}
 
@@ -2458,6 +2463,11 @@ xc.path = C.CString(x.Path)}
 if x.SecurityModel != "" {
 xc.security_model = C.CString(x.SecurityModel)}
 xc.devid = C.libxl_devid(x.Devid)
+xc._type = C.libxl_p9_type(x.Type)
+xc.max_space = C.int(x.MaxSpace)
+xc.max_files = C.int(x.MaxFiles)
+xc.max_open_files = C.int(x.MaxOpenFiles)
+xc.auto_delete = C.bool(x.AutoDelete)
 
  return nil
  }
diff --git a/tools/golang/xenlight/types.gen.go 
b/tools/golang/xenlight/types.gen.go
index 9c8b7b81f6..d31722407a 100644
--- a/tools/golang/xenlight/types.gen.go
+++ b/tools/golang/xenlight/types.gen.go
@@ -122,6 +122,13 @@ NicTypeVifIoemu NicType = 1
 NicTypeVif NicType = 2
 )
 
+type P9Type int
+const(
+P9TypeUnknown P9Type = 0
+P9TypeQemu P9Type = 1
+P9TypeXen9Pfsd P9Type = 2
+)
+
 type ActionOnShutdown int
 const(
 ActionOnShutdownDestroy ActionOnShutdown = 1
@@ -889,6 +896,11 @@ Tag string
 Path string
 SecurityModel string
 Devid Devid
+Type P9Type
+MaxSpace int
+MaxFiles int
+MaxOpenFiles int
+AutoDelete bool
 }
 
 type DevicePvcallsif struct {
diff --git a/tools/include/libxl.h b/tools/include/libxl.h
index 46bc774126..9a3e702557 100644
--- a/tools/include/libxl.h
+++ b/tools/include/libxl.h
@@ -615,6 +615,13 @@
  */
 #define LIBXL_HAVE_HVM_PIRQ 1
 
+/*
+ * LIBXL_HAVE_XEN_9PFS indicates the presence of the xen-9pfsd related
+ * fields in libxl_device_p9: type, max_space, max_files, max_open_files and
+ * auto_delete.
+ */
+#define LIBXL_HAVE_XEN_9PFS 1
+
 /*
  * libxl memory management
  *
diff --git a/tools/libs/light/libxl_9pfs.c b/tools/libs/light/libxl_9pfs.c
index 5ab0d3aa21..900c0d46a0 100644
--- a/tools/libs/light/libxl_9pfs.c
+++ b/tools/libs/light/libxl_9pfs.c
@@ -33,20 +33,171 @@ static int libxl__set_xenstore_p9(libxl__gc *gc, uint32_t 
domid,
 
 flexarray_append_pair(front, "tag", p9->tag);
 
+if (p9->type == LIBXL_P9_TYPE_XEN_9PFSD) {
+flexarray_append_pair(back, "max-space",
+  GCSPRINTF("%u", p9->max_space));
+flexarray_append_pair(back, "max-files",
+  GCSPRINTF("%u", p9->max_files));
+flexarray_append_pair(back, "max-open-files",
+  GCSPRINTF("%u", p9->max_open_files));
+flexarray_append_pair(back, "auto-delete",
+  p9->auto_delete ? "1" : "0");
+}
+

[PATCH v6 13/21] tools/xen-9pfsd: add 9pfs read request support

2024-02-14 Thread Juergen Gross
Add the read request of the 9pfs protocol.

Signed-off-by: Juergen Gross 
Reviewed-by: Jason Andryuk 
Acked-by: Anthony PERARD 
---
V2:
- make error check more readable (Jason Andryuk)
V4:
- add directory read support
V5:
- rewinddir() if reading a directory and offset is 0 (Jason Andryuk)
---
 tools/9pfsd/io.c | 93 
 1 file changed, 93 insertions(+)

diff --git a/tools/9pfsd/io.c b/tools/9pfsd/io.c
index 358b7f0781..28b43eb992 100644
--- a/tools/9pfsd/io.c
+++ b/tools/9pfsd/io.c
@@ -33,6 +33,7 @@
 #define P9_CMD_WALK   110
 #define P9_CMD_OPEN   112
 #define P9_CMD_CREATE 114
+#define P9_CMD_READ   116
 #define P9_CMD_WRITE  118
 #define P9_CMD_CLUNK  120
 #define P9_CMD_STAT   124
@@ -1247,6 +1248,94 @@ static void p9_stat(struct ring *ring, struct p9_header 
*hdr)
 free_fid(device, fidp);
 }
 
+static void p9_read(struct ring *ring, struct p9_header *hdr)
+{
+device *device = ring->device;
+uint32_t fid;
+uint64_t off;
+unsigned int len;
+uint32_t count;
+void *buf;
+struct p9_fid *fidp;
+int ret;
+
+ret = fill_data(ring, "ULU", , , );
+if ( ret != 3 )
+{
+p9_error(ring, hdr->tag, EINVAL);
+return;
+}
+
+fidp = get_fid_ref(device, fid);
+if ( !fidp || !fidp->opened )
+{
+errno = EBADF;
+goto err;
+}
+
+len = count;
+buf = ring->buffer + sizeof(*hdr) + sizeof(uint32_t);
+
+if ( fidp->isdir )
+{
+struct dirent *dirent;
+struct stat st;
+struct p9_stat p9s;
+
+if ( off == 0 )
+rewinddir(fidp->data);
+
+while ( len != 0 )
+{
+errno = 0;
+dirent = readdir(fidp->data);
+if ( !dirent )
+{
+if ( errno )
+goto err;
+break;
+}
+if ( fstatat(fidp->fd, dirent->d_name, , 0) < 0 )
+goto err;
+fill_p9_stat(device, , , dirent->d_name);
+if ( p9s.size + sizeof(p9s.size) > len )
+{
+seekdir(fidp->data, dirent->d_off);
+break;
+}
+fill_buffer_at(, "s", );
+len -= p9s.size + sizeof(p9s.size);
+}
+}
+else
+{
+while ( len != 0 )
+{
+ret = pread(fidp->fd, buf, len, off);
+if ( ret <= 0 )
+break;
+len -= ret;
+buf += ret;
+off += ret;
+}
+if ( ret < 0 && len == count )
+goto err;
+}
+
+buf = ring->buffer + sizeof(*hdr) + sizeof(uint32_t);
+len = count - len;
+fill_buffer(ring, hdr->cmd + 1, hdr->tag, "D", , buf);
+
+ out:
+free_fid(device, fidp);
+
+return;
+
+ err:
+p9_error(ring, hdr->tag, errno);
+goto out;
+}
+
 static void p9_write(struct ring *ring, struct p9_header *hdr)
 {
 device *device = ring->device;
@@ -1371,6 +1460,10 @@ void *io_thread(void *arg)
 p9_create(ring, );
 break;
 
+case P9_CMD_READ:
+p9_read(ring, );
+break;
+
 case P9_CMD_WRITE:
 p9_write(ring, );
 break;
-- 
2.35.3




[PATCH v6 19/21] tools/xenstored: add helpers for filename handling

2024-02-14 Thread Juergen Gross
Add some helpers for handling filenames which might need different
implementations between stubdom and daemon environments:

- expansion of relative filenames (those are not really defined today,
  just expand them to be relative to /var/lib/xen/xenstore)
- expansion of xenstore_daemon_rundir() (used e.g. for saving the state
  file in case of live update - needs to be unchanged in the daemon
  case, but should result in /var/lib/xen/xenstore for stubdom)

Signed-off-by: Juergen Gross 
Reviewed-by: Jason Andryuk 
Reviewed-by: Julien Grall 
---
V3:
- make absolute_filename() return a pointer to const (Julien Grall)
---
 tools/xenstored/core.c  | 15 +--
 tools/xenstored/core.h  |  5 -
 tools/xenstored/lu_daemon.c |  4 ++--
 tools/xenstored/minios.c|  5 +
 tools/xenstored/posix.c |  8 +++-
 5 files changed, 31 insertions(+), 6 deletions(-)

diff --git a/tools/xenstored/core.c b/tools/xenstored/core.c
index 48fc787ac1..bada1ad9a2 100644
--- a/tools/xenstored/core.c
+++ b/tools/xenstored/core.c
@@ -63,7 +63,7 @@ char **orig_argv;
 LIST_HEAD(connections);
 int tracefd = -1;
 bool keep_orphans = false;
-char *tracefile = NULL;
+const char *tracefile = NULL;
 static struct hashtable *nodes;
 unsigned int trace_flags = TRACE_OBJ | TRACE_IO;
 
@@ -137,6 +137,17 @@ void trace_destroy(const void *data, const char *type)
trace("obj: DESTROY %s %p\n", type, data);
 }
 
+/*
+ * Return an absolute filename.
+ * In case of a relative filename given as input, prepend XENSTORE_LIB_DIR.
+ */
+const char *absolute_filename(const void *ctx, const char *filename)
+{
+   if (filename[0] != '/')
+   return talloc_asprintf(ctx, XENSTORE_LIB_DIR "/%s", filename);
+   return talloc_strdup(ctx, filename);
+}
+
 void close_log(void)
 {
if (tracefd >= 0)
@@ -2759,7 +2770,7 @@ int main(int argc, char *argv[])
 #endif
 
if (tracefile)
-   tracefile = talloc_strdup(NULL, tracefile);
+   tracefile = absolute_filename(NULL, tracefile);
 
 #ifndef NO_LIVE_UPDATE
/* Read state in case of live update. */
diff --git a/tools/xenstored/core.h b/tools/xenstored/core.h
index fe0ee90581..e58779e88c 100644
--- a/tools/xenstored/core.h
+++ b/tools/xenstored/core.h
@@ -341,7 +341,7 @@ void close_log(void);
 extern int orig_argc;
 extern char **orig_argv;
 
-extern char *tracefile;
+extern const char *tracefile;
 extern int tracefd;
 
 /* Trace flag values must be kept in sync with trace_switches[] contents. */
@@ -405,6 +405,9 @@ void set_socket_fd(int fd);
 void mount_9pfs(void);
 #endif
 
+const char *xenstore_rundir(void);
+const char *absolute_filename(const void *ctx, const char *filename);
+
 /* Close stdin/stdout/stderr to complete daemonize */
 void finish_daemonize(void);
 
diff --git a/tools/xenstored/lu_daemon.c b/tools/xenstored/lu_daemon.c
index 71bcabadd3..635ab0 100644
--- a/tools/xenstored/lu_daemon.c
+++ b/tools/xenstored/lu_daemon.c
@@ -24,7 +24,7 @@ void lu_get_dump_state(struct lu_dump_state *state)
state->size = 0;
 
state->filename = talloc_asprintf(NULL, "%s/state_dump",
- xenstore_daemon_rundir());
+ xenstore_rundir());
if (!state->filename)
barf("Allocation failure");
 
@@ -65,7 +65,7 @@ FILE *lu_dump_open(const void *ctx)
int fd;
 
filename = talloc_asprintf(ctx, "%s/state_dump",
-  xenstore_daemon_rundir());
+  xenstore_rundir());
if (!filename)
return NULL;
 
diff --git a/tools/xenstored/minios.c b/tools/xenstored/minios.c
index 562a9b4972..e70386f8c7 100644
--- a/tools/xenstored/minios.c
+++ b/tools/xenstored/minios.c
@@ -128,3 +128,8 @@ void mount_9pfs(void)
 {
create_thread("mount-9pfs", mount_thread, NULL);
 }
+
+const char *xenstore_rundir(void)
+{
+   return XENSTORE_LIB_DIR;
+}
diff --git a/tools/xenstored/posix.c b/tools/xenstored/posix.c
index 496329dfd1..d88c82d972 100644
--- a/tools/xenstored/posix.c
+++ b/tools/xenstored/posix.c
@@ -326,9 +326,10 @@ void early_init(bool live_update, bool dofork, const char 
*pidfile)
 {
reopen_log();
 
-   /* Make sure xenstored directory exists. */
+   /* Make sure xenstored directories exist. */
/* Errors ignored here, will be reported when we open files */
mkdir(xenstore_daemon_rundir(), 0755);
+   mkdir(XENSTORE_LIB_DIR, 0755);
 
if (dofork) {
openlog("xenstored", 0, LOG_DAEMON);
@@ -406,3 +407,8 @@ void set_socket_fd(int fd)
 {
sock = fd;
 }
+
+const char *xenstore_rundir(void)
+{
+   return xenstore_daemon_rundir();
+}
-- 
2.35.3




  1   2   3   4   5   6   7   8   9   10   >