[osv-dev] [COMMIT osv master] vfs: implement linkat() except for AT_SYMLINK_FOLLOW

2024-03-18 Thread Commit Bot
From: Waldemar Kozaczuk 
Committer: WALDEMAR KOZACZUK 
Branch: master

vfs: implement linkat() except for AT_SYMLINK_FOLLOW

Signed-off-by: Waldemar Kozaczuk 

---
diff --git a/fs/vfs/main.cc b/fs/vfs/main.cc
--- a/fs/vfs/main.cc
+++ b/fs/vfs/main.cc
@@ -1096,7 +1096,7 @@ int renameat(int olddirfd, const char *oldpath,
 } else {
 char absolute_newpath[PATH_MAX];
 auto error = vfs_fun_at(newdirfd, newpath, [_newpath](const 
char *absolute_path) {
-strcpy(absolute_newpath, absolute_path);
+strlcpy(absolute_newpath, absolute_path, PATH_MAX);
 return 0;
 });
 
@@ -1245,6 +1245,40 @@ int link(const char *oldpath, const char *newpath)
 return -1;
 }
 
+OSV_LIBC_API
+int linkat(int olddirfd, const char *oldpath, int newdirfd, const char 
*newpath, int flags)
+{
+if (flags & AT_SYMLINK_FOLLOW) {
+WARN_ONCE("linkat() does not support AT_SYMLINK_FOLLOW\n");
+errno = EINVAL;
+return -1;
+}
+
+if (!oldpath || !newpath) {
+errno = EINVAL;
+return -1;
+}
+
+if (newpath[0] == '/' || newdirfd == AT_FDCWD) {
+return vfs_fun_at2(olddirfd, oldpath, [newpath](const char *path) {
+return link(path, newpath);
+});
+} else {
+char absolute_newpath[PATH_MAX];
+auto error = vfs_fun_at(newdirfd, newpath, [_newpath](const 
char *absolute_path) {
+strlcpy(absolute_newpath, absolute_path, PATH_MAX);
+return 0;
+});
+
+if (error) {
+return error;
+} else {
+return vfs_fun_at2(olddirfd, oldpath, [absolute_newpath](const 
char *path) {
+return link(path, absolute_newpath);
+});
+}
+}
+}
 
 TRACEPOINT(trace_vfs_symlink, "oldpath=%s, newpath=%s", const char*, const 
char*);
 TRACEPOINT(trace_vfs_symlink_ret, "");

-- 
You received this message because you are subscribed to the Google Groups "OSv 
Development" group.
To unsubscribe from this group and stop receiving emails from it, send an email 
to osv-dev+unsubscr...@googlegroups.com.
To view this discussion on the web visit 
https://groups.google.com/d/msgid/osv-dev/5d0d490613f2afeb%40google.com.


[osv-dev] [COMMIT osv master] vfs: implement subset of copy_file_range()

2024-03-18 Thread Commit Bot
From: Waldemar Kozaczuk 
Committer: WALDEMAR KOZACZUK 
Branch: master

vfs: implement subset of copy_file_range()

This patch implements subset of copy_file_range() needed
by the GNU cp utility to function.

The implementation delagates to sendfile() and accepts
calls with off_out equal to 0 only.

Signed-off-by: Waldemar Kozaczuk 

---
diff --git a/fs/vfs/main.cc b/fs/vfs/main.cc
--- a/fs/vfs/main.cc
+++ b/fs/vfs/main.cc
@@ -2345,6 +2345,27 @@ ssize_t sendfile(int out_fd, int in_fd, off_t *_offset, 
size_t count)
 #undef sendfile64
 LFS64(sendfile);
 
+extern "C" OSV_LIBC_API
+ssize_t copy_file_range(int fd_in, off_t *off_in,
+int fd_out, off_t *off_out,
+size_t len, unsigned int flags)
+{
+//Non-zero flags are rejected according to the manual
+if (flags != 0) {
+errno = EINVAL;
+return -1;
+}
+//We do not support writing to a file at specified offset because
+//we delegate to sendfile() which assumes current position of the output
+//file
+if (off_out) {
+WARN("copy_file_range() does not support non-zero off_out\n");
+errno = EINVAL;
+return -1;
+}
+return sendfile(fd_out, fd_in, off_in, len);
+}
+
 NO_SYS(OSV_LIBC_API int fchmodat(int dirfd, const char *pathname, mode_t mode, 
int flags));
 
 OSV_LIBC_API

-- 
You received this message because you are subscribed to the Google Groups "OSv 
Development" group.
To unsubscribe from this group and stop receiving emails from it, send an email 
to osv-dev+unsubscr...@googlegroups.com.
To view this discussion on the web visit 
https://groups.google.com/d/msgid/osv-dev/874b2a0613f1f44b%40google.com.


[osv-dev] [COMMIT osv master] libc: add aliases __isoc23_*

2024-03-17 Thread Commit Bot
From: Nadav Har'El 
Committer: Nadav Har'El 
Branch: master

libc: add aliases __isoc23_*

The C23 standard decided that 50 years of tradition CAN be messed with,
and the language and all its parsing functions should start accepting
binary constants that look like "0b...". Since glibc wants to support
compilation on both C23 and older compilers, it split the parsing functions
like strtoul() to two - the classic one that doesn't support "0b...", and
a new one __iso23_stroul() which gets used on C23 compilers.

On modern systems, the C++ standard library (libstdc++.a) gets compiled
on a modern compiler supporting C23 and modern header files which support
it. Ideally, the C++ standard library would be compiled by C++, which is
NOT C23, so would not end up using __iso23_strtoul(), but it turns out
that it actually does. On such a system OSv build fails the linking
stage, because __iso23_strtoul() is called in a couple of places in
libstdc++.a (eh_alloc.o and debug.o).

Also, __iso23_sscanf() gets called in the Fedora-built shared library
libboost_filesystem.so.

Eventually, we should implement __iso23_*() to really support binary
constants. More likely, we'll wait for Musl to implement it and adopt
their implementation. Until then, the simple workaround in this patch
to make the __iso23_*() functions alias of the classic functions().
It won't work for applications that really need their parsing functions
to support binary constants - but I doubt any such application really
exists today so I don't feel any urgency to implement it today.

After this patch
   scripts/build image=rogue
   scripts/run

Fully works on Fedora 39. Building the default Lua shell (scripts/build
without parameters) works, but when running it, __openat64_2 is missing
(the is Refs #1299)

Fixes #1301

Signed-off-by: Nadav Har'El 

---
diff --git a/libc/aliases.ld b/libc/aliases.ld
--- a/libc/aliases.ld
+++ b/libc/aliases.ld
@@ -76,3 +76,12 @@ _Exit = exit;
 
 __dn_expand = dn_expand;
 __sysconf = sysconf;
+
+/* glibc uses these functions as C23 versions of the existing functions,
+   which are supposed to also allow binary constants (0b...). So they
+   should be slightly different than the classic functions, but until
+   we implement that, lets at least support them as an alias - see
+   issue #1299.
+*/
+__isoc23_strtoul = strtoul;
+__isoc23_sscanf = sscanf;
diff --git a/scripts/setup.py b/scripts/setup.py
--- a/scripts/setup.py
+++ b/scripts/setup.py
@@ -157,8 +157,15 @@ class Fedora_38(object):
 ec2_post_install = None
 version = '38'
 
+class Fedora_39(object):
+packages = []
+ec2_packages = []
+test_packages = []
+ec2_post_install = None
+version = '39'
+
 versions = [
-Fedora_27, Fedora_28, Fedora_29, Fedora_30, Fedora_31, Fedora_32, 
Fedora_33, Fedora_34, Fedora_35, Fedora_37, Fedora_38
+Fedora_27, Fedora_28, Fedora_29, Fedora_30, Fedora_31, Fedora_32, 
Fedora_33, Fedora_34, Fedora_35, Fedora_37, Fedora_38, Fedora_39
 ]
 
 

-- 
You received this message because you are subscribed to the Google Groups "OSv 
Development" group.
To unsubscribe from this group and stop receiving emails from it, send an email 
to osv-dev+unsubscr...@googlegroups.com.
To view this discussion on the web visit 
https://groups.google.com/d/msgid/osv-dev/27769a0613d9840d%40google.com.


[osv-dev] [COMMIT osv master] musl: partially hide "hidden" macro that confuses Boost

2024-03-17 Thread Commit Bot
From: Nadav Har'El 
Committer: Nadav Har'El 
Branch: master

musl: partially hide "hidden" macro that confuses Boost

This patch fixes a build error when trying build OSv on a modern distro
(namely, Fedora 39) with Boost 1.81.

In the new Boost, they started to use the name "hidden" for some
constant variable as well as a namespace. The problem is that Musl
has a *macro* hidden in include/glibc-compat/feature.hh - see
commit af2d371.

An ideal fix would have been to rename Musl's macro, or use it (via
a "-D" option) just when building Musl source files, not files in core/.
But both of these solutions would require fairly elaborate changes.

So instead, in this patch we take a simpler workaround - there are just two
source files where we include a Musl header file which adds the "hidden"
macro and then a Boost header file which uses the "hidden" name. Just
in these two files, we "#undef" the macro "hidden" after including the
Musl header file and before including the Boost one.

After this fix, the OSv kernel mostly compiles on Fedora 39, but
fails linking due to other problems that will need to be fixed
separately.

Refs #1301

Signed-off-by: Nadav Har'El 

---
diff --git a/core/mempool.cc b/core/mempool.cc
--- a/core/mempool.cc
+++ b/core/mempool.cc
@@ -31,11 +31,15 @@
 #include 
 #include 
 #include 
+#include 
+#include 
+
+// recent Boost gets confused by the "hidden" macro we add in some Musl
+// header files, so need to undefine it
+#undef hidden
 #include 
 #include 
 #include 
-#include 
-#include 
 
 TRACEPOINT(trace_memory_malloc, "buf=%p, len=%d, align=%d", void *, size_t,
size_t);
diff --git a/include/osv/callstack.hh b/include/osv/callstack.hh
--- a/include/osv/callstack.hh
+++ b/include/osv/callstack.hh
@@ -8,13 +8,16 @@
 #ifndef CALLSTACK_HH_
 #define CALLSTACK_HH_
 
-#include 
 #include 
 #include 
 #include 
 #include 
 #include 
 #include 
+// recent Boost gets confused by the "hidden" macro we add in some Musl
+// header files, so need to undefine it
+#undef hidden
+#include 
 
 // An object that instruments tracepoints to collect backtraces.
 //

-- 
You received this message because you are subscribed to the Google Groups "OSv 
Development" group.
To unsubscribe from this group and stop receiving emails from it, send an email 
to osv-dev+unsubscr...@googlegroups.com.
To view this discussion on the web visit 
https://groups.google.com/d/msgid/osv-dev/e26dbd0613d8826b%40google.com.


[osv-dev] [COMMIT osv master] libc/pthread: Make pthread_rwlock_trywrlock/tryrdlock posix compatible

2024-01-31 Thread Commit Bot
From: Valentin Ghita 
Committer: WALDEMAR KOZACZUK 
Branch: master

libc/pthread: Make pthread_rwlock_trywrlock/tryrdlock posix compatible

The current implementations of pthread_rwlock_trywrlock and 
pthread_rwlock_tryrdlock
don't follow the POSIX specification and return incorrect values.
Fix the return values to follow the specification.

Signed-off-by: Valentin Ghita 

---
diff --git a/libc/pthread.cc b/libc/pthread.cc
--- a/libc/pthread.cc
+++ b/libc/pthread.cc
@@ -501,8 +501,10 @@ int pthread_rwlock_destroy(pthread_rwlock_t *rw)
 
 int pthread_rwlock_trywrlock(pthread_rwlock_t *rw)
 {
-from_libc(rw)->try_wlock();
-return 0;
+if (from_libc(rw)->try_wlock()) {
+return 0;
+}
+return EBUSY;
 }
 
 int pthread_rwlock_wrlock(pthread_rwlock_t *rw)
@@ -519,7 +521,10 @@ int pthread_rwlock_rdlock(pthread_rwlock_t *rw)
 
 int pthread_rwlock_tryrdlock(pthread_rwlock_t *rw)
 {
-return from_libc(rw)->try_rlock();
+if (from_libc(rw)->try_rlock()) {
+return 0;
+}
+return EBUSY;
 }
 
 int pthread_rwlockattr_destroy(pthread_rwlockattr_t *attr)

-- 
You received this message because you are subscribed to the Google Groups "OSv 
Development" group.
To unsubscribe from this group and stop receiving emails from it, send an email 
to osv-dev+unsubscr...@googlegroups.com.
To view this discussion on the web visit 
https://groups.google.com/d/msgid/osv-dev/80998c06104169da%40google.com.


[osv-dev] [COMMIT osv master] ena: improve driver implementation

2024-01-11 Thread Commit Bot
From: Waldemar Kozaczuk 
Committer: WALDEMAR KOZACZUK 
Branch: master

ena: improve driver implementation

This last patch improves certain aspects of the driver implementation:
- completes LRO handling
- adds number of tracepoints to help trubleshoot and analaze performance
- pins cleanup worker thread and corresponding MSIX vector to a cpu

Signed-off-by: Waldemar Kozaczuk 

---
diff --git a/bsd/sys/dev/ena/ena.cc b/bsd/sys/dev/ena/ena.cc
--- a/bsd/sys/dev/ena/ena.cc
+++ b/bsd/sys/dev/ena/ena.cc
@@ -83,6 +83,7 @@ __FBSDID("$FreeBSD$");
 #include 
 #include 
 #include 
+#include 
 
 int ena_log_level = ENA_INFO;
 
@@ -398,6 +399,8 @@ ena_free_all_io_rings_resources(struct ena_adapter *adapter)
ena_free_io_ring_resources(adapter, i);
 }
 
+TRACEPOINT(trace_ena_enqueue_wake, "");
+
 static void
 enqueue_work(ena_ring *ring)
 {
@@ -406,6 +409,7 @@ enqueue_work(ena_ring *ring)
ring->enqueue_pending = 0;
 
if (!ring->enqueue_stop) {
+   trace_ena_enqueue_wake();
ena_deferred_mq_start(ring);
}
} while (!ring->enqueue_stop);
@@ -941,6 +945,8 @@ ena_destroy_all_io_queues(struct ena_adapter *adapter)
ena_destroy_all_rx_queues(adapter);
 }
 
+TRACEPOINT(trace_ena_cleanup_wake, "");
+
 static void
 cleanup_work(ena_que *queue)
 {
@@ -952,6 +958,7 @@ cleanup_work(ena_que *queue)
if (!queue->cleanup_stop) {
ena_log(dev, INFO, "cleanup_work: cleaning up queue 
%d", queue->id);
ena_cleanup(queue);
+   trace_ena_cleanup_wake();
}
} while (!queue->cleanup_stop);
 }
@@ -1041,8 +1048,12 @@ ena_create_io_queues(struct ena_adapter *adapter)
for (i = 0; i < adapter->num_io_queues; i++) {
queue = >que[i];
 
+   //We pin each cleanup worker thread and corresponding MSIX 
vector
+   //to one of the cpus (queue modulo #cpus) in order to minimize 
IPIs
+   int cpu = i % sched::cpus.size();
queue->cleanup_thread = sched::thread::make([queue] { 
cleanup_work(queue); },
-   sched::thread::attr().name("ena_cleanup_queue_" + 
std::to_string(i)));
+   sched::thread::attr().name("ena_clean_que_" + 
std::to_string(i)).pin(sched::cpus[cpu]));
+   
queue->cleanup_thread->set_priority(sched::thread::priority_infinity);
queue->cleanup_stop = false;
queue->cleanup_pending = 0;
queue->cleanup_thread->start();
@@ -1067,17 +1078,13 @@ ena_create_io_queues(struct ena_adapter *adapter)
  *
  **/
 
-static std::atomic mgmt_intr_count = {0};
-static std::atomic io_intr_count = {0};
-
 /**
  * ena_intr_msix_mgmnt - MSIX Interrupt Handler for admin/async queue
  * @arg: interrupt number
  **/
 static void
 ena_intr_msix_mgmnt(void *arg)
 {
-   mgmt_intr_count++;
struct ena_adapter *adapter = (struct ena_adapter *)arg;
 
ena_com_admin_q_comp_intr_handler(adapter->ena_dev);
@@ -1096,7 +1103,6 @@ ena_handle_msix(void *arg)
struct ena_adapter *adapter = queue->adapter;
if_t ifp = adapter->ifp;
 
-   io_intr_count++;
if (unlikely((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0))
return 0;
 
@@ -1220,7 +1226,7 @@ ena_request_io_irq(struct ena_adapter *adapter)
}
 
for (int entry = ENA_IO_IRQ_FIRST_IDX; entry < adapter->msix_vecs; 
entry++) {
-   auto idx = entry - 1;
+   auto idx = entry - ENA_IO_IRQ_FIRST_IDX;
auto vec = assigned[idx];
auto queue = >que[idx];
if (!_msi.assign_isr(vec, [queue]() { ena_handle_msix(queue); 
})) {
@@ -1239,7 +1245,16 @@ ena_request_io_irq(struct ena_adapter *adapter)
//Save assigned msix vectors
for (int entry = ENA_IO_IRQ_FIRST_IDX; entry < adapter->msix_vecs; 
entry++) {
ena_irq *irq = >irq_tbl[entry];
-   irq->mvector = assigned[entry - 1];
+   auto idx = entry - ENA_IO_IRQ_FIRST_IDX;
+   auto vec = irq->mvector = assigned[idx];
+   //In our case the worker threads are all pinned so we probably 
do not need
+   //to re-pin the interrupt vector
+   auto cpu = idx % sched::cpus.size();
+   std::atomic_thread_fence(std::memory_order_seq_cst);
+   vec->set_affinity(sched::cpus[cpu]->arch.apic_id);
+   std::atomic_thread_fence(std::memory_order_seq_cst);
+
+   ena_log(pdev, INFO, "pinned MSIX vector on queue %d - cpu 
%d\n", idx, cpu);
}
 
_msi.unmask_interrupts(assigned);
@@ -1462,7 +1477,7 @@ ena_up(struct ena_adapter *adapter)
}
 
if (ENA_FLAG_ISSET(ENA_FLAG_LINK_UP, adapter))
-   if_link_state_change(adapter->ifp, 

[osv-dev] [COMMIT osv master] ena: add upper "thin" layer in form of OSv driver class

2024-01-11 Thread Commit Bot
From: Waldemar Kozaczuk 
Committer: WALDEMAR KOZACZUK 
Branch: master

ena: add upper "thin" layer in form of OSv driver class

This almost final patch implements a very upper "thin" layer in form of
the aws::ena driver class that subclasses from hw_driver.

The contructor, destructor and probe() merely delegate to functions
ena_attach(), ena_detach() and ena_probe() respectively implemented
in bsd/sys/dev/ena/ena.cc.

Please note that some of the statistics functionality (see fill_stats())
and if_getinfo are left unimplemented for now.

Signed-off-by: Waldemar Kozaczuk 

---
diff --git a/Makefile b/Makefile
--- a/Makefile
+++ b/Makefile
@@ -940,6 +940,9 @@ drivers += drivers/xenclock.o
 drivers += drivers/xenfront.o drivers/xenfront-xenbus.o drivers/xenfront-blk.o
 drivers += drivers/xenplatform-pci.o
 endif
+ifeq ($(conf_drivers_ena),1)
+drivers += drivers/ena.o
+endif
 endif # x64
 
 ifeq ($(arch),aarch64)
diff --git a/arch/x64/arch-setup.cc b/arch/x64/arch-setup.cc
--- a/arch/x64/arch-setup.cc
+++ b/arch/x64/arch-setup.cc
@@ -310,6 +310,9 @@ void arch_init_premain()
 #if CONF_drivers_ide
 #include "drivers/ide.hh"
 #endif
+#if CONF_drivers_ena
+#include "drivers/ena.hh"
+#endif
 
 extern bool opt_pci_disabled;
 void arch_init_drivers()
@@ -364,6 +367,9 @@ void arch_init_drivers()
 #endif
 #if CONF_drivers_ide
 drvman->register_driver(ide::ide_drive::probe);
+#endif
+#if CONF_drivers_ena
+drvman->register_driver(aws::ena::probe);
 #endif
 boot_time.event("drivers probe");
 drvman->load_all();
diff --git a/conf/profiles/x64/all.mk b/conf/profiles/x64/all.mk
--- a/conf/profiles/x64/all.mk
+++ b/conf/profiles/x64/all.mk
@@ -4,6 +4,6 @@ include conf/profiles/$(arch)/virtio-mmio.mk
 include conf/profiles/$(arch)/virtio-pci.mk
 include conf/profiles/$(arch)/vmware.mk
 include conf/profiles/$(arch)/xen.mk
+include conf/profiles/$(arch)/aws.mk
 
 conf_drivers_vga?=1
-conf_drivers_ena?=1
diff --git a/conf/profiles/x64/aws.mk b/conf/profiles/x64/aws.mk
--- a/conf/profiles/x64/aws.mk
+++ b/conf/profiles/x64/aws.mk
@@ -0,0 +1,7 @@
+conf_drivers_pci?=1
+conf_drivers_acpi?=1
+
+conf_drivers_ena?=1
+
+conf_drivers_pvpanic?=1
+conf_drivers_hpet?=1
diff --git a/conf/profiles/x64/base.mk b/conf/profiles/x64/base.mk
--- a/conf/profiles/x64/base.mk
+++ b/conf/profiles/x64/base.mk
@@ -43,6 +43,11 @@ ifeq ($(conf_drivers_vmxnet3),1)
 export conf_drivers_pci?=1
 endif
 
+export conf_drivers_ena?=0
+ifeq ($(conf_drivers_ena),1)
+export conf_drivers_pci?=1
+endif
+
 export conf_drivers_ide?=0
 ifeq ($(conf_drivers_ide),1)
 export conf_drivers_pci?=1
diff --git a/core/debug.cc b/core/debug.cc
--- a/core/debug.cc
+++ b/core/debug.cc
@@ -47,6 +47,7 @@ bool logger::parse_configuration(void)
 add_tag("poll", logger_info);
 add_tag("dhcp", logger_info);
 add_tag("acpi", logger_error);
+add_tag("ena", logger_debug);
 
 return (true);
 }
diff --git a/drivers/ena.cc b/drivers/ena.cc
--- a/drivers/ena.cc
+++ b/drivers/ena.cc
@@ -0,0 +1,105 @@
+/*
+ * Copyright (C) 2023 Waldemar Kozaczuk
+ *
+ * This work is open source software, licensed under the terms of the
+ * BSD license as described in the LICENSE file in the top-level directory.
+ */
+
+#include 
+
+#include "drivers/ena.hh"
+#include "drivers/pci-device.hh"
+
+#include 
+
+#include 
+
+extern bool opt_maxnic;
+extern int maxnic;
+
+namespace aws {
+
+#define ena_tag "ena"
+#define ena_d(...)   tprintf_d(ena_tag, __VA_ARGS__)
+#define ena_i(...)   tprintf_i(ena_tag, __VA_ARGS__)
+#define ena_w(...)   tprintf_w(ena_tag, __VA_ARGS__)
+#define ena_e(...)   tprintf_e(ena_tag, __VA_ARGS__)
+
+/* TODO - figure out how and if needed to integrate it - ENA code has it own 
logic to track statistics
+static void if_getinfo(struct ifnet* ifp, struct if_data* out_data)
+{
+ena* _ena = (ena*)ifp->if_softc;
+
+// First - take the ifnet data
+memcpy(out_data, >if_data, sizeof(*out_data));
+
+// then fill the internal statistics we've gathered
+_ena->fill_stats(out_data);
+}*/
+
+void ena::fill_stats(struct if_data* out_data) const
+{
+assert(!out_data->ifi_oerrors && !out_data->ifi_obytes && 
!out_data->ifi_opackets);
+/* TODO - figure out how and if needed to integrate it - ENA code has it 
own logic to track statistics
+out_data->ifi_ipackets += _rxq[0].stats.rx_packets;
+out_data->ifi_ibytes   += _rxq[0].stats.rx_bytes;
+out_data->ifi_iqdrops  += _rxq[0].stats.rx_drops;
+out_data->ifi_ierrors  += _rxq[0].stats.rx_csum_err;
+out_data->ifi_opackets += _txq[0].stats.tx_packets;
+out_data->ifi_obytes   += _txq[0].stats.tx_bytes;
+out_data->ifi_oerrors  += _txq[0].stats.tx_err + _txq[0].stats.tx_drops;
+
+out_data->ifi_iwakeup_stats = _rxq[0].stats.rx_wakeup_stats;
+out_data->ifi_owakeup_stats = _txq[0].stats.tx_wakeup_stats;*/
+}
+
+ena::ena(pci::device )
+: _dev(dev)
+{
+_adapter = nullptr;
+auto ret = ena_attach(&_dev, &_adapter);
+if (ret || !_adapter) {
+   throw 

[osv-dev] [COMMIT osv master] ena: adapt driver admin/setup header to OSv

2024-01-11 Thread Commit Bot
From: Waldemar Kozaczuk 
Committer: WALDEMAR KOZACZUK 
Branch: master

ena: adapt driver admin/setup header to OSv

This patch adapts the admin/setup header ena.h to OSv.

In particular it addresses following:

- import atomic bitset support from FreeBSD tree (see
  https://github.com/freebsd/freebsd-src/blob/main/sys/sys/_bitset.h)

- remove unnecessary fields from ena_adapter struct

- replace the IRQ related fields with OSv equivalent (see ena_irq)

- replace cleanup_task and cleanup_tq in ena_qeu struct with OSv
  equivalent cleanup_thread

- replace enqueue_task and enqueue_tq in ena_ring struct with OSv
  equivalent enqueue_thread

- remove RSS and DEV_NETMAP artifacts

- for now define counter_* macros to disable related functionality

- replace callout_reset_sbt() with equivalent callout_reset()

Signed-off-by: Waldemar Kozaczuk 

---
diff --git a/bsd/sys/dev/ena/ena.h b/bsd/sys/dev/ena/ena.h
--- a/bsd/sys/dev/ena/ena.h
+++ b/bsd/sys/dev/ena/ena.h
@@ -38,12 +38,19 @@
 #include "ena_com/ena_com.h"
 #include "ena_com/ena_eth_com.h"
 
+#include 
+#include 
+#include "drivers/pci-device.hh"
+
 #define ENA_DRV_MODULE_VER_MAJOR   2
 #define ENA_DRV_MODULE_VER_MINOR   6
 #define ENA_DRV_MODULE_VER_SUBMINOR3
 
 #define ENA_DRV_MODULE_NAME"ena"
 
+#define__STRING(x) #x  /* stringify without expanding 
x */
+#define__XSTRING(x)__STRING(x) /* expand x, then stringify */
+
 #ifndef ENA_DRV_MODULE_VERSION
 #define ENA_DRV_MODULE_VERSION \
__XSTRING(ENA_DRV_MODULE_VER_MAJOR) "." \
@@ -135,10 +142,12 @@
  * ENA device should send keep alive msg every 1 sec.
  * We wait for 6 sec just to be on the safe side.
  */
-#define ENA_DEFAULT_KEEP_ALIVE_TO  (SBT_1S * 6)
+#define NANOSECONDS_IN_SEC  10l
+#define NANOSECONDS_IN_MSEC 100l
+#define ENA_DEFAULT_KEEP_ALIVE_TO  (6 * NANOSECONDS_IN_SEC)
 
 /* Time in jiffies before concluding the transmitter is hung. */
-#define ENA_DEFAULT_TX_CMP_TO  (SBT_1S * 5)
+#define ENA_DEFAULT_TX_CMP_TO  (5 * NANOSECONDS_IN_SEC)
 
 /* Number of queues to check for missing queues per timer tick */
 #define ENA_DEFAULT_TX_MONITORED_QUEUES(4)
@@ -156,6 +165,45 @@
 #define PCI_DEV_ID_ENA_VF  0xec20
 #define PCI_DEV_ID_ENA_VF_RSERV0   0xec21
 
+//These macros are taken verbatim from FreeBSD code and implement atomic bitset
+#define_BITSET_BITS(sizeof(long) * 8)
+
+#define__howmany(x, y) (((x) + ((y) - 1)) / (y))
+
+#define__bitset_words(_s)  (__howmany(_s, _BITSET_BITS))
+
+#define__constexpr_cond(expr)  (__builtin_constant_p((expr)) && (expr))
+
+#define__bitset_mask(_s, n)
\
+   (1UL << (__constexpr_cond(__bitset_words((_s)) == 1) ?  \
+   (size_t)(n) : ((n) % _BITSET_BITS)))
+
+#define__bitset_word(_s, n)
\
+   (__constexpr_cond(__bitset_words((_s)) == 1) ?  \
+0 : ((n) / _BITSET_BITS))
+
+#defineBITSET_DEFINE(_t, _s)   
\
+struct _t {\
+   long__bits[__bitset_words((_s))];   \
+}
+
+#defineBIT_ZERO(_s, p) do {
\
+   size_t __i; \
+   for (__i = 0; __i < __bitset_words((_s)); __i++)\
+   (p)->__bits[__i] = 0L;  \
+} while (0)
+
+#defineBIT_ISSET(_s, n, p) 
\
+   p)->__bits[__bitset_word(_s, n)] & __bitset_mask((_s), (n))) != 0))
+
+#defineBIT_SET_ATOMIC(_s, n, p)
\
+   atomic_set_long((volatile u_long*)(&(p)->__bits[__bitset_word(_s, n)]), 
\
+   __bitset_mask((_s), n))
+
+#defineBIT_CLR_ATOMIC(_s, n, p)
\
+   atomic_clear_long((volatile u_long*)(&(p)->__bits[__bitset_word(_s, 
n)]),\
+   __bitset_mask((_s), n))
+
 /*
  * Flags indicating current ENA driver state
  */
@@ -174,9 +222,9 @@ enum ena_flags_t {
 BITSET_DEFINE(_ena_state, ENA_FLAGS_NUMBER);
 typedef struct _ena_state ena_state_t;
 
-#define ENA_FLAG_ZERO(adapter) \
+#define ENA_FLAG_ZERO(adapter)  \
BIT_ZERO(ENA_FLAGS_NUMBER, &(adapter)->flags)
-#define ENA_FLAG_ISSET(bit, adapter)   \
+#define ENA_FLAG_ISSET(bit, adapter)\
BIT_ISSET(ENA_FLAGS_NUMBER, (bit), &(adapter)->flags)
 #define ENA_FLAG_SET_ATOMIC(bit, adapter)  \
BIT_SET_ATOMIC(ENA_FLAGS_NUMBER, (bit), &(adapter)->flags)
@@ -196,39 +244,30 @@ typedef struct _ena_vendor_info_t {
 
 struct ena_irq {
/* Interrupt resources */
-   struct resource *res;
-   driver_filter_t 

[osv-dev] [COMMIT osv master] ena: adapt driver admin/setup implementation to OSv

2024-01-11 Thread Commit Bot
From: Waldemar Kozaczuk 
Committer: WALDEMAR KOZACZUK 
Branch: master

ena: adapt driver admin/setup implementation to OSv

This patch adapts the middle layer of the admin and device setup/teardown
handling logic to make it work in OSv. It is also the last patch
to complete the porting work of FreeBSD ena driver code to work in
OSv.

The code in ena.cc mostly implements the logic to probe, attach
and detach the device and involves interacting with lower-level admin
API of ena_com/ena_com.cc to submit commands to Admin Queue (AQ) and
receive and process completions from Admin Completion Queue (ACQ).
It also implements interrupt handlers and worker threads to process I/O.
For more details read
https://github.com/amzn/amzn-drivers/tree/master/kernel/fbsd/ena#management-interface.

In particular this patch addresses following:

- change FreeBSD header include paths to match OSv source tree

- eliminate most DMA-related functions ena_*dma_*()

- eliminate metrics task code for now

- eliminate LLQ, RSS and DEV_NETMAP related code

- deactivate counters (aka statistics collection) code

- rewrite ena_dma_alloc() to use OSv memory::alloc_phys_contiguous_aligned()
  and mmu::virt_to_phys() (it probably should not have *dma* in name)

- rewrite the functions that setup MSI/X and implement other PCI-related
  functionality to use OSv PCI code from drivers/pci-* and arch/*/msi.** -
  ena_free_pci_resources(), ena_probe(), ena_enable_msix(),
  ena_setup_mgmnt_intr(), ena_setup_io_intr(), ena_request_mgmnt_irq(),
  ena_request_io_irq(), ena_free_io_irq(), ena_disable_msix()

- replace the calls to drbr_*() functions with buf_ring_*() equivalent ones

- implement the main function of the enqueue worker thread -
  enqueue_work(); this function is used when setting TX resource in
  ena_setup_tx_resources() and replaces FreeBSD version of it -
  enqueue_tq and enqueue_task

- simplify ena_alloc_rx_mbuf() by mostly not using the DMA-related code

- eliminate ena_update_buf_ring_size(), ena_update_queue_size(),
  ena_update_io_rings(), ena_update_io_queue_nb() which are not needed
  as OSv will not support changing ring and queue size (see for example
  
https://github.com/amzn/amzn-drivers/tree/master/kernel/fbsd/ena#size-of-the-tx-buffer-ring-drbr)
  through ioctl()

- simplify ena_ioctl()

- implement the main function of the cleanup worker thread -
  cleanup_work(); this function is used when setting I/O queues
  in ena_create_io_queues() and replaces FreeBSD version of it -
  cleanup_tq and cleanup_task

- adjust CSUM_* constant to match the version of OSv version of FreeBSD
  headers

- replace if_set*() function calls with equivalent code directly setting
  fields of if_t structure (for example if_settransmitfn(ifp,
  ena_mq_start) => ifp->if_transmit = ena_mq_start)

- hardcode TX queue memory type to ENA_ADMIN_PLACEMENT_POLICY_HOST (we
  do not support LLQ)

- eliminate LLQ-related code - ena_map_llq_mem_bar(),
  set_default_llq_configurations() and any ifs testing 
ENA_ADMIN_PLACEMENT_POLICY_DEV

- adapt code reading current boot time to use osv::clock::uptime::now()

- adapt ena_handle_msix() and other places to use OSV 
wake_with_irq_or_preemption_disabled()
  instead of taskqueue_enqueue()

- add remaining *cc files to the Makefile - everything should compile

Signed-off-by: Waldemar Kozaczuk 

---
diff --git a/Makefile b/Makefile
--- a/Makefile
+++ b/Makefile
@@ -587,6 +587,7 @@ bsd += bsd/sys/kern/uipc_sockbuf.o
 bsd += bsd/sys/kern/uipc_socket.o
 bsd += bsd/sys/kern/uipc_syscalls.o
 bsd += bsd/sys/kern/uipc_syscalls_wrap.o
+bsd += bsd/sys/kern/subr_bufring.o
 bsd += bsd/sys/kern/subr_sbuf.o
 bsd += bsd/sys/kern/subr_eventhandler.o
 bsd += bsd/sys/kern/subr_hash.o
@@ -678,6 +679,10 @@ bsd += bsd/sys/dev/hyperv/vmbus/hyperv.o
 endif
 ifeq ($(conf_drivers_ena),1)
 bsd += bsd/sys/contrib/ena_com/ena_eth_com.o
+bsd += bsd/sys/contrib/ena_com/ena_com.o
+bsd += bsd/sys/dev/ena/ena_datapath.o
+bsd += bsd/sys/dev/ena/ena.o
+$(out)/bsd/sys/dev/ena/%.o: CXXFLAGS += -Ibsd/sys/contrib
 endif
 endif
 
diff --git a/bsd/sys/dev/ena/ena.cc b/bsd/sys/dev/ena/ena.cc
--- a/bsd/sys/dev/ena/ena.cc
+++ b/bsd/sys/dev/ena/ena.cc
null
diff --git a/conf/profiles/x64/all.mk b/conf/profiles/x64/all.mk
--- a/conf/profiles/x64/all.mk
+++ b/conf/profiles/x64/all.mk
@@ -6,3 +6,4 @@ include conf/profiles/$(arch)/vmware.mk
 include conf/profiles/$(arch)/xen.mk
 
 conf_drivers_vga?=1
+conf_drivers_ena?=1

-- 
You received this message because you are subscribed to the Google Groups "OSv 
Development" group.
To unsubscribe from this group and stop receiving emails from it, send an email 
to osv-dev+unsubscr...@googlegroups.com.
To view this discussion on the web visit 
https://groups.google.com/d/msgid/osv-dev/18efe7060eaf15ad%40google.com.


[osv-dev] [COMMIT osv master] ena: adapt data path code to OSv

2024-01-11 Thread Commit Bot
From: Waldemar Kozaczuk 
Committer: WALDEMAR KOZACZUK 
Branch: master

ena: adapt data path code to OSv

This patch adapts the middle layer of data path handling logic
to make it work in OSv. For more details about it please see
https://github.com/amzn/amzn-drivers/tree/master/kernel/fbsd/ena#data-path-interface.
and https://github.com/amzn/amzn-drivers/tree/master/kernel/fbsd/ena#data-path.

In high level the main entry point for RX part is ena_cleanup() that
delegates to ena_rx_cleanup() and eventually ends up calling net
channel ifp->if_classifier.post_packet() (fast path) or ifp->if_input()
(slow path). The ena_cleanup() is called by cleanup_work thread that
is woken every time the MSI-X vector for given TX/RX queue is called.

Similarly, the main entry point for TX part is ena_mq_start() which
is what ifp->if_transmit is set to and ena_deferred_mq_start() which is
called by enqueue_work thread that is woken in ena_mq_start() and
ena_tx_cleanup() (other part of ena_cleanup() routine).

Finally, ena_qflush is what ifp->if_qflush is set to.

The particular code changes to ena_datapath.* involve following:

- implement critical_enter()/critical_exit() used by buf_ring (see
  https://man.freebsd.org/cgi/man.cgi?query=critical_enter)

- for now remove RSS and DEV_NETMAP related code

- replace the drbr_* functions with buf_ring_* equivalent ones

- replace taskqueue_enqueue() with OSv wake_with()

- adapt references to the mbuf fields to match OSv version of
  it (please 
https://github.com/freebsd/freebsd-src/commit/3d1a9ed34e10dc6c5b6d86617464534ac9fc928a
  commit that changed the layout of mbuf struct a bit)

- simplify ena_tx_map_mbuf() given we hard-code to use 
ENA_ADMIN_PLACEMENT_POLICY_HOST
  TX queue type and do not use bus_dma API (see
  https://man.freebsd.org/cgi/man.cgi?query=bus_dma)

Signed-off-by: Waldemar Kozaczuk 

---
diff --git a/bsd/sys/dev/ena/ena_datapath.cc b/bsd/sys/dev/ena/ena_datapath.cc
--- a/bsd/sys/dev/ena/ena_datapath.cc
+++ b/bsd/sys/dev/ena/ena_datapath.cc
@@ -30,16 +30,20 @@
 #include 
 __FBSDID("$FreeBSD$");
 
+//#define ENA_LOG_ENABLE 1
+//#define ENA_LOG_IO_ENABLE 1
+
 #include "ena.h"
 #include "ena_datapath.h"
-#ifdef DEV_NETMAP
-#include "ena_netmap.h"
-#endif /* DEV_NETMAP */
-#ifdef RSS
-#include 
-#endif /* RSS */
 
-#include 
+#include 
+
+static inline void critical_enter()  { sched::preempt_disable(); }
+static inline void critical_exit() { sched::preempt_enable(); }
+
+#include 
+
+//#include 
 
 /*
  *  Static functions prototypes
@@ -49,8 +53,6 @@ static int ena_tx_cleanup(struct ena_ring *);
 static int ena_rx_cleanup(struct ena_ring *);
 static inline int ena_get_tx_req_id(struct ena_ring *tx_ring,
 struct ena_com_io_cq *io_cq, uint16_t *req_id);
-static void ena_rx_hash_mbuf(struct ena_ring *, struct ena_com_rx_ctx *,
-struct mbuf *);
 static struct mbuf *ena_rx_mbuf(struct ena_ring *, struct ena_com_rx_buf_info 
*,
 struct ena_com_rx_ctx *, uint16_t *);
 static inline void ena_rx_checksum(struct ena_ring *, struct ena_com_rx_ctx *,
@@ -66,9 +68,8 @@ static void ena_start_xmit(struct ena_ring *);
  */
 
 void
-ena_cleanup(void *arg, int pending)
+ena_cleanup(struct ena_que *que)
 {
-   struct ena_que *que = arg;
struct ena_adapter *adapter = que->adapter;
if_t ifp = adapter->ifp;
struct ena_ring *tx_ring;
@@ -78,25 +79,25 @@ ena_cleanup(void *arg, int pending)
int qid, ena_qid;
int txc, rxc, i;
 
-   if (unlikely((if_getdrvflags(ifp) & IFF_DRV_RUNNING) == 0))
+   if (unlikely((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0))
return;
 
-   ena_log_io(adapter->pdev, DBG, "MSI-X TX/RX routine\n");
+   ena_log_io(adapter->pdev, INFO, "MSI-X TX/RX routine");
 
tx_ring = que->tx_ring;
rx_ring = que->rx_ring;
qid = que->id;
ena_qid = ENA_IO_TXQ_IDX(qid);
io_cq = >ena_dev->io_cq_queues[ena_qid];
 
-   atomic_store_8(_ring->first_interrupt, 1);
-   atomic_store_8(_ring->first_interrupt, 1);
+   tx_ring->first_interrupt.store(1);
+   rx_ring->first_interrupt.store(1);
 
for (i = 0; i < ENA_CLEAN_BUDGET; ++i) {
rxc = ena_rx_cleanup(rx_ring);
txc = ena_tx_cleanup(tx_ring);
 
-   if (unlikely((if_getdrvflags(ifp) & IFF_DRV_RUNNING) == 0))
+   if (unlikely((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0))
return;
 
if ((txc != ENA_TX_BUDGET) && (rxc != ENA_RX_BUDGET))
@@ -111,13 +112,12 @@ ena_cleanup(void *arg, int pending)
 }
 
 void
-ena_deferred_mq_start(void *arg, int pending)
+ena_deferred_mq_start(struct ena_ring *tx_ring )
 {
-   struct ena_ring *tx_ring = (struct ena_ring *)arg;
if_t ifp = tx_ring->adapter->ifp;
 
-   while (!drbr_empty(ifp, tx_ring->br) && tx_ring->running &&

[osv-dev] [COMMIT osv master] mbuf: add mtodo() macro

2024-01-11 Thread Commit Bot
From: Waldemar Kozaczuk 
Committer: WALDEMAR KOZACZUK 
Branch: master

mbuf: add mtodo() macro

Signed-off-by: Waldemar Kozaczuk 

---
diff --git a/bsd/sys/sys/mbuf.h b/bsd/sys/sys/mbuf.h
--- a/bsd/sys/sys/mbuf.h
+++ b/bsd/sys/sys/mbuf.h
@@ -64,8 +64,10 @@ __BEGIN_DECLS
  * type:
  *
  * mtod(m, t)  -- Convert mbuf pointer to data pointer of correct type.
+ * mtodo(m, o) -- Same as above but with offset 'o' into data.
  */
 #definemtod(m, t)  ((t)((m)->m_hdr.mh_data))
+#definemtodo(m, o) ((void *)(((m)->m_hdr.mh_data) + (o)))
 
 /*
  * Argument structure passed to UMA routines during mbuf and packet

-- 
You received this message because you are subscribed to the Google Groups "OSv 
Development" group.
To unsubscribe from this group and stop receiving emails from it, send an email 
to osv-dev+unsubscr...@googlegroups.com.
To view this discussion on the web visit 
https://groups.google.com/d/msgid/osv-dev/f0a2ab060eaf14c0%40google.com.


[osv-dev] [COMMIT osv master] bsd: import lock-less structure buf_ring from FreeBSD

2024-01-11 Thread Commit Bot
From: Waldemar Kozaczuk 
Committer: WALDEMAR KOZACZUK 
Branch: master

bsd: import lock-less structure buf_ring from FreeBSD

This patch imports new lock-less structure - buf_ring from FreeBSD
source tree (see https://man.freebsd.org/cgi/man.cgi?query=buf_ring).
The buf_ring is used by ENA driver as a multiple-producer,
single-consumer lockless ring for buffering extra mbufs coming from
the stack in case the Tx procedure is busy sending the packets or
the Tx ring is full.

OSv has its own lock-less sigle-producer single-consumer ring implementation
(see include/lockfree/ring.hh> but it is not clear if and how we could
somehow adapt it in similar way unordered-queue-mpsc.hh does to implement
multiple-producer single-consumer collection that does not preserve
insertion order. Given that, I have found it easier to import and use the
FreeBSD version of it as is.

Please note the original FreeBSD ena code uses drbr_* functions that
delegate to buf_ring_* or ALTQ if it is enabled (see
https://man.freebsd.org/cgi/man.cgi?query=drbr_enqueue_). Given OSv
does not implement ALTQ
(https://www.usenix.org/legacy/publications/library/proceedings/lisa97/failsafe/usenix98/full_papers/cho/cho_html/cho.html#ALTQ),
the adapted version of ena driver ends up using the buf_ring_* functions
directly.

Signed-off-by: Waldemar Kozaczuk 

---
diff --git a/bsd/sys/kern/subr_bufring.c b/bsd/sys/kern/subr_bufring.c
--- a/bsd/sys/kern/subr_bufring.c
+++ b/bsd/sys/kern/subr_bufring.c
@@ -0,0 +1,63 @@
+/*-
+ * SPDX-License-Identifier: BSD-2-Clause
+ *
+ * Copyright (c) 2007, 2008 Kip Macy 
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *notice, this list of conditions and the following disclaimer in the
+ *documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include 
+#include 
+#include 
+#include 
+#include 
+//#include 
+#include 
+
+struct buf_ring *
+buf_ring_alloc(int count, int type, int flags, struct mtx *lock)
+{
+   struct buf_ring *br;
+
+   KASSERT(powerof2(count), ("buf ring must be size power of 2"));
+
+   br = malloc(sizeof(struct buf_ring) + count*sizeof(caddr_t),
+   type, flags|M_ZERO);
+   if (br == NULL)
+   return (NULL);
+#ifdef DEBUG_BUFRING
+   br->br_lock = lock;
+#endif 
+   br->br_prod_size = br->br_cons_size = count;
+   br->br_prod_mask = br->br_cons_mask = count-1;
+   br->br_prod_head = br->br_cons_head = 0;
+   br->br_prod_tail = br->br_cons_tail = 0;
+   
+   return (br);
+}
+
+void
+buf_ring_free(struct buf_ring *br, int type)
+{
+   free(br, type);
+}
diff --git a/bsd/sys/sys/buf_ring.h b/bsd/sys/sys/buf_ring.h
--- a/bsd/sys/sys/buf_ring.h
+++ b/bsd/sys/sys/buf_ring.h
@@ -0,0 +1,371 @@
+/*-
+ * SPDX-License-Identifier: BSD-2-Clause
+ *
+ * Copyright (c) 2007-2009 Kip Macy 
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *notice, this list of conditions and the following disclaimer in the
+ *documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; 

[osv-dev] [COMMIT osv master] ena: convert ena_com/ena_com.cc to C++ and eliminate unneeded code

2024-01-11 Thread Commit Bot
From: Waldemar Kozaczuk 
Committer: WALDEMAR KOZACZUK 
Branch: master

ena: convert ena_com/ena_com.cc to C++ and eliminate unneeded code

The ena_com.cc is the 2nd of the 2 source files that make up
a low-level ena_com API. This part is used in the intermediate level
to mainly implement the admin functionality like for example
creating I/O queues. See 
https://github.com/amzn/amzn-drivers/tree/master/kernel/fbsd/ena#ena-source-code-directory-structure
for more insight.

This patch:

- uses C++ constructs to apply type conversions where necessary.

- eliminates the MSI-X interrupt-based logic to handle completions
  of admin commands (see ena_com_wait_and_process_admin_cq_interrupts())
  and leaves the polling mode logic the default one

- eliminates the RSS (Receive-Side Scaling) related code for now

- implements busy_sleep() used by ENA_USLEEP and EN_UDELAT macros

Signed-off-by: Waldemar Kozaczuk 

---
diff --git a/bsd/sys/contrib/ena_com/ena_com.cc 
b/bsd/sys/contrib/ena_com/ena_com.cc
--- a/bsd/sys/contrib/ena_com/ena_com.cc
+++ b/bsd/sys/contrib/ena_com/ena_com.cc
@@ -32,6 +32,8 @@
  */
 
 #include "ena_com.h"
+#include 
+#include 
 
 /*/
 /*/
@@ -74,6 +76,22 @@
 
 #define ENA_MAX_ADMIN_POLL_US 5000
 
+#include 
+#include 
+
+static inline void busy_sleep(u64 nanoseconds)
+{
+   auto end = osv::clock::uptime::now().time_since_epoch().count() + 
nanoseconds;
+   while (osv::clock::uptime::now().time_since_epoch().count() < end) {
+#ifdef __x86_64__
+   __asm __volatile("pause");
+#endif
+#ifdef __aarch64__
+   __asm __volatile("isb sy");
+#endif
+   }
+}
+
 /*/
 /*/
 /*/
@@ -86,7 +104,6 @@ enum ena_cmd_status {
 };
 
 struct ena_comp_ctx {
-   ena_wait_event_t wait_event;
struct ena_admin_acq_entry *user_cqe;
u32 comp_size;
enum ena_cmd_status status;
@@ -118,11 +135,10 @@ static int ena_com_mem_addr_set(struct ena_com_dev 
*ena_dev,
 
 static int ena_com_admin_init_sq(struct ena_com_admin_queue *admin_queue)
 {
-   struct ena_com_dev *ena_dev = admin_queue->ena_dev;
struct ena_com_admin_sq *sq = _queue->sq;
u16 size = ADMIN_SQ_SIZE(admin_queue->q_depth);
 
-   ENA_MEM_ALLOC_COHERENT(admin_queue->q_dmadev, size, sq->entries, 
sq->dma_addr,
+   ENA_MEM_ALLOC_COHERENT(static_cast(admin_queue->q_dmadev), 
size, sq->entries, sq->dma_addr,
   sq->mem_handle);
 
if (!sq->entries) {
@@ -141,11 +157,10 @@ static int ena_com_admin_init_sq(struct 
ena_com_admin_queue *admin_queue)
 
 static int ena_com_admin_init_cq(struct ena_com_admin_queue *admin_queue)
 {
-   struct ena_com_dev *ena_dev = admin_queue->ena_dev;
struct ena_com_admin_cq *cq = _queue->cq;
u16 size = ADMIN_CQ_SIZE(admin_queue->q_depth);
 
-   ENA_MEM_ALLOC_COHERENT(admin_queue->q_dmadev, size, cq->entries, 
cq->dma_addr,
+   ENA_MEM_ALLOC_COHERENT(static_cast(admin_queue->q_dmadev), 
size, cq->entries, cq->dma_addr,
   cq->mem_handle);
 
if (!cq->entries)  {
@@ -168,7 +183,7 @@ static int ena_com_admin_init_aenq(struct ena_com_dev 
*ena_dev,
 
ena_dev->aenq.q_depth = ENA_ASYNC_QUEUE_DEPTH;
size = ADMIN_AENQ_SIZE(ENA_ASYNC_QUEUE_DEPTH);
-   ENA_MEM_ALLOC_COHERENT(ena_dev->dmadev, size,
+   ENA_MEM_ALLOC_COHERENT(static_cast(ena_dev->dmadev), size,
aenq->entries,
aenq->dma_addr,
aenq->mem_handle);
@@ -261,7 +276,7 @@ static struct ena_comp_ctx 
*__ena_com_submit_admin_cmd(struct ena_com_admin_queu
if (cnt >= admin_queue->q_depth) {
ena_trc_dbg(admin_queue->ena_dev, "Admin queue is full.\n");
admin_queue->stats.out_of_space++;
-   return ERR_PTR(ENA_COM_NO_SPACE);
+   return static_cast(ERR_PTR(ENA_COM_NO_SPACE));
}
 
cmd_id = admin_queue->curr_cmd_id;
@@ -274,15 +289,13 @@ static struct ena_comp_ctx 
*__ena_com_submit_admin_cmd(struct ena_com_admin_queu
 
comp_ctx = get_comp_ctxt(admin_queue, cmd_id, true);
if (unlikely(!comp_ctx))
-   return ERR_PTR(ENA_COM_INVAL);
+   return static_cast(ERR_PTR(ENA_COM_INVAL));
 
comp_ctx->status = ENA_CMD_SUBMITTED;
comp_ctx->comp_size = (u32)comp_size_in_bytes;
comp_ctx->user_cqe = comp;
comp_ctx->cmd_opcode = cmd->aq_common_descriptor.opcode;
 
-   ENA_WAIT_EVENT_CLEAR(comp_ctx->wait_event);
-
memcpy(_queue->sq.entries[tail_masked], cmd, cmd_size_in_bytes);
 
admin_queue->curr_cmd_id 

[osv-dev] [COMMIT osv master] ena: convert ena_com/ena_eth_com.cc to C++

2024-01-11 Thread Commit Bot
From: Waldemar Kozaczuk 
Committer: WALDEMAR KOZACZUK 
Branch: master

ena: convert ena_com/ena_eth_com.cc to C++

The ena_eth_com.cc is one of the 2 source files that make up
a low-level ena_com API. This part is used in the intermediate level
to implement data path functionality.

This patch uses C++ constructs to apply type conversions where
necessary.

Signed-off-by: Waldemar Kozaczuk 

---
diff --git a/Makefile b/Makefile
--- a/Makefile
+++ b/Makefile
@@ -676,6 +676,9 @@ endif
 ifeq ($(conf_drivers_hyperv),1)
 bsd += bsd/sys/dev/hyperv/vmbus/hyperv.o
 endif
+ifeq ($(conf_drivers_ena),1)
+bsd += bsd/sys/contrib/ena_com/ena_eth_com.o
+endif
 endif
 
 bsd += bsd/sys/dev/random/hash.o
diff --git a/bsd/sys/contrib/ena_com/ena_eth_com.cc 
b/bsd/sys/contrib/ena_com/ena_eth_com.cc
--- a/bsd/sys/contrib/ena_com/ena_eth_com.cc
+++ b/bsd/sys/contrib/ena_com/ena_eth_com.cc
@@ -116,7 +116,7 @@ static int ena_com_write_bounce_buffer_to_dev(struct 
ena_com_io_sq *io_sq,
 }
 
 static int ena_com_write_header_to_bounce(struct ena_com_io_sq *io_sq,
-u8 *header_src,
+void *header_src,
 u16 header_len)
 {
struct ena_com_llq_pkt_ctrl *pkt_ctrl = _sq->llq_buf_ctrl;
@@ -303,7 +303,7 @@ static int ena_com_create_meta(struct ena_com_io_sq *io_sq,
 {
struct ena_eth_io_tx_meta_desc *meta_desc = NULL;
 
-   meta_desc = get_sq_desc(io_sq);
+   meta_desc = reinterpret_cast(get_sq_desc(io_sq));
if (unlikely(!meta_desc))
return ENA_COM_FAULT;
 
@@ -377,11 +377,11 @@ static void ena_com_rx_set_flags(struct ena_com_io_cq 
*io_cq,
 struct ena_com_rx_ctx *ena_rx_ctx,
 struct ena_eth_io_rx_cdesc_base *cdesc)
 {
-   ena_rx_ctx->l3_proto = cdesc->status &
-   ENA_ETH_IO_RX_CDESC_BASE_L3_PROTO_IDX_MASK;
-   ena_rx_ctx->l4_proto =
+   ena_rx_ctx->l3_proto = 
static_cast(cdesc->status &
+   ENA_ETH_IO_RX_CDESC_BASE_L3_PROTO_IDX_MASK);
+   ena_rx_ctx->l4_proto = static_cast(
(cdesc->status & ENA_ETH_IO_RX_CDESC_BASE_L4_PROTO_IDX_MASK) >>
-   ENA_ETH_IO_RX_CDESC_BASE_L4_PROTO_IDX_SHIFT;
+   ENA_ETH_IO_RX_CDESC_BASE_L4_PROTO_IDX_SHIFT);
ena_rx_ctx->l3_csum_err =
!!((cdesc->status & ENA_ETH_IO_RX_CDESC_BASE_L3_CSUM_ERR_MASK) 
>>
ENA_ETH_IO_RX_CDESC_BASE_L3_CSUM_ERR_SHIFT);
@@ -470,7 +470,7 @@ int ena_com_prepare_tx(struct ena_com_io_sq *io_sq,
return rc;
}
 
-   desc = get_sq_desc(io_sq);
+   desc = reinterpret_cast(get_sq_desc(io_sq));
if (unlikely(!desc))
return ENA_COM_FAULT;
memset(desc, 0x0, sizeof(struct ena_eth_io_tx_desc));
@@ -531,7 +531,7 @@ int ena_com_prepare_tx(struct ena_com_io_sq *io_sq,
return rc;
}
 
-   desc = get_sq_desc(io_sq);
+   desc = reinterpret_cast(get_sq_desc(io_sq));
if (unlikely(!desc))
return ENA_COM_FAULT;
 
@@ -647,7 +647,7 @@ int ena_com_add_single_rx_desc(struct ena_com_io_sq *io_sq,
if (unlikely(!ena_com_sq_have_enough_space(io_sq, 1)))
return ENA_COM_NO_SPACE;
 
-   desc = get_sq_desc(io_sq);
+   desc = reinterpret_cast(get_sq_desc(io_sq));
if (unlikely(!desc))
return ENA_COM_FAULT;
 

-- 
You received this message because you are subscribed to the Google Groups "OSv 
Development" group.
To unsubscribe from this group and stop receiving emails from it, send an email 
to osv-dev+unsubscr...@googlegroups.com.
To view this discussion on the web visit 
https://groups.google.com/d/msgid/osv-dev/9e3b82060eaf1402%40google.com.


[osv-dev] [COMMIT osv master] core: add IRQ version of spinlock needed by ena driver

2024-01-11 Thread Commit Bot
From: Waldemar Kozaczuk 
Committer: WALDEMAR KOZACZUK 
Branch: master

core: add IRQ version of spinlock needed by ena driver

It turns out the ena driver code uses spinlocks (see ENA_SPINLOCK_*
macros) in relatively few places when submitting and processing admin
commands which happens during the ena device attach and detach stage.
The analysis of the FreeBSD version of mutex with type MTX_SPIN and
mtx_lock_spin() and mtx_unlock_spin() (see
https://man.freebsd.org/cgi/man.cgi?query=mtx_lock_spin) indicates
the interrupts should be disabled before spinning.

For that reason we add new type of spinlock - irq_spinlock - which
is almost identical to regular spinlock but uses irq_lock
to disable and enable interrupts before acquiring a lock and
after releasing respectively.

At the same time, this commit also adjusts the spinning loop
to use correct architecture specific instruction.

Signed-off-by: Waldemar Kozaczuk 

---
diff --git a/core/spinlock.cc b/core/spinlock.cc
--- a/core/spinlock.cc
+++ b/core/spinlock.cc
@@ -13,7 +13,12 @@ void spin_lock(spinlock_t *sl)
 sched::preempt_disable();
 while (__sync_lock_test_and_set(>_lock, 1)) {
 while (sl->_lock) {
-barrier();
+#ifdef __x86_64__
+__asm __volatile("pause");
+#endif
+#ifdef __aarch64__
+__asm __volatile("isb sy");
+#endif
 }
 }
 }
@@ -33,3 +38,34 @@ void spin_unlock(spinlock_t *sl)
 __sync_lock_release(>_lock, 0);
 sched::preempt_enable();
 }
+
+void irq_spin_lock(irq_spinlock_t *sl)
+{
+sl->_irq_lock.lock();
+while (__sync_lock_test_and_set(>_lock, 1)) {
+while (sl->_lock) {
+#ifdef __x86_64__
+__asm __volatile("pause");
+#endif
+#ifdef __aarch64__
+__asm __volatile("isb sy");
+#endif
+}
+}
+}
+
+bool irq_spin_trylock(irq_spinlock_t *sl)
+{
+sl->_irq_lock.lock();
+if (__sync_lock_test_and_set(>_lock, 1)) {
+sl->_irq_lock.unlock();
+return false;
+}
+return true;
+}
+
+void irq_spin_unlock(irq_spinlock_t *sl)
+{
+__sync_lock_release(>_lock, 0);
+sl->_irq_lock.unlock();
+}
diff --git a/include/osv/spinlock.h b/include/osv/spinlock.h
--- a/include/osv/spinlock.h
+++ b/include/osv/spinlock.h
@@ -9,6 +9,7 @@
 #define OSV_SPINLOCK_H_
 
 #include 
+#include 
 
 __BEGIN_DECLS
 
@@ -33,6 +34,26 @@ void spin_lock(spinlock_t *sl);
 bool spin_trylock(spinlock_t *sl);
 void spin_unlock(spinlock_t *sl);
 
+typedef struct irq_spinlock {
+bool _lock;
+irq_save_lock_type _irq_lock;
+#ifdef __cplusplus
+// additional convenience methods for C++
+inline constexpr irq_spinlock() : _lock(false), _irq_lock() { }
+inline bool trylock();
+inline void lock();
+inline void unlock();
+#endif
+} irq_spinlock_t;
+
+static inline void irq_spinlock_init(irq_spinlock_t *sl)
+{
+sl->_lock = false;
+}
+void irq_spin_lock(irq_spinlock_t *sl);
+bool irq_spin_trylock(irq_spinlock_t *sl);
+void irq_spin_unlock(irq_spinlock_t *sl);
+
 __END_DECLS
 
 #ifdef __cplusplus
@@ -44,6 +65,15 @@ void spinlock::unlock()
 {
 spin_unlock(this);
 }
+
+void irq_spinlock::lock()
+{
+irq_spin_lock(this);
+}
+void irq_spinlock::unlock()
+{
+irq_spin_unlock(this);
+}
 #endif
 
 #endif /* OSV_SPINLOCK_H_ */
diff --git a/libc/pthread.cc b/libc/pthread.cc
--- a/libc/pthread.cc
+++ b/libc/pthread.cc
@@ -364,8 +364,12 @@ int pthread_spin_lock(pthread_spinlock_t *lock)
 bool* b = from_libc(lock);
 while (__sync_lock_test_and_set(b, 1)) {
 while (*b) {
-barrier();
-// FIXME: use "PAUSE" instruction here
+#ifdef __x86_64__
+__asm __volatile("pause");
+#endif
+#ifdef __aarch64__
+__asm __volatile("isb sy");
+#endif
 }
 }
 return 0; // We can't really do deadlock detection

-- 
You received this message because you are subscribed to the Google Groups "OSv 
Development" group.
To unsubscribe from this group and stop receiving emails from it, send an email 
to osv-dev+unsubscr...@googlegroups.com.
To view this discussion on the web visit 
https://groups.google.com/d/msgid/osv-dev/8ad21c060eaf1432%40google.com.


[osv-dev] [COMMIT osv master] ena: replace unsupported FreeBSD mechanisms with OSv equivalent ones

2024-01-11 Thread Commit Bot
From: Waldemar Kozaczuk 
Committer: WALDEMAR KOZACZUK 
Branch: master

ena: replace unsupported FreeBSD mechanisms with OSv equivalent ones

This adapts ena_com/ena_plat.h by replacing some unsupported FreeBSD
mechanisms with the OSv equivalent ones.

Specifically it:

- changes FreeBSD header include paths to match OSv source tree

- reimplements ENA_*SLEEP and ENA_UDELAY macros to use busy_sleep()
  function instead of pause_sbt(); these macros are used in ena_com.cc
  where we cannot use regular sleep mechanism

- reimplements ENA_SPINLOCK_* macros to use new OSv irq_spinlock_*
  methods which are defined in later patch

- removes ENA_WAIT_* macros which are not needed because we use
  the polling mode when submitting and processing admin commands
  (like for example create an I/O queue for RX or TX)

- removes FreeBSD bus_dma* functions and replaces where needed
  with OSv equivalent code

- replaces FreeBSD way of handling PCI by adapting code to
  use OSv pci::bar and reg_bar->readl() and reg_bar->writel()

- converts C casts to C++ ones

Signed-off-by: Waldemar Kozaczuk 

---
diff --git a/bsd/sys/contrib/ena_com/ena_plat.h 
b/bsd/sys/contrib/ena_com/ena_plat.h
--- a/bsd/sys/contrib/ena_com/ena_plat.h
+++ b/bsd/sys/contrib/ena_com/ena_plat.h
@@ -41,11 +41,6 @@ __FBSDID("$FreeBSD$");
 #include 
 
 #include 
-#include 
-#if __FreeBSD_version > 1200055
-#include 
-#endif
-#include 
 #include 
 #include 
 #include 
@@ -54,13 +49,11 @@ __FBSDID("$FreeBSD$");
 #include 
 #include 
 #include 
-#include 
-#include 
+#include 
 #include 
 #include 
 #include 
 #include 
-#include 
 #include 
 
 #include 
@@ -70,29 +63,25 @@ __FBSDID("$FreeBSD$");
 #include 
 #include 
 
-#include 
-#include 
-#include 
-#include 
-#include 
-#include 
-#include 
-
-#include 
-#include 
-
-#include 
-#include 
-#include 
-#include 
-#include 
-#include 
-#include 
-#include 
-
-#include 
-#include 
-#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+
+#include 
+#include 
+
+#include 
+#include 
+#include 
+#include 
+//#include 
+#include 
+#include 
+#include 
 
 #include "ena_fbsd_log.h"
 
@@ -107,9 +96,7 @@ extern struct ena_bus_space ebs;
(type *)((uintptr_t)__p - offsetof(type, member));  \
})
 
-#define ena_trace(ctx, level, fmt, args...)\
-   ena_log((ctx)->dmadev, level, "%s() [TID:%d]: " \
-   fmt, __func__, curthread->td_tid, ##args)
+#define ena_trace(ctx, level, fmt, args...) do {} while (0)
 
 #define ena_trc_dbg(ctx, format, arg...)   \
ena_trace(ctx, DBG, format, ##arg)
@@ -175,12 +162,12 @@ static inline long PTR_ERR(const void *ptr)
 
 #define ENA_NODE_ANY   (-1)
 
-#define ENA_MSLEEP(x)  pause_sbt("ena", SBT_1MS * (x), SBT_1MS, 0)
-#define ENA_USLEEP(x)  pause_sbt("ena", SBT_1US * (x), SBT_1US, 0)
-#define ENA_UDELAY(x)  DELAY(x)
+#define ENA_MSLEEP(x)  busy_sleep(100 * x)
+#define ENA_USLEEP(x)  busy_sleep(1000 * x)
+#define ENA_UDELAY(x)  busy_sleep(1000 * x)
 #define ENA_GET_SYSTEM_TIMEOUT(timeout_us) \
-((long)cputick2usec(cpu_ticks()) + (timeout_us))
-#define ENA_TIME_EXPIRE(timeout)  ((timeout) < cputick2usec(cpu_ticks()))
+((osv::clock::uptime::now().time_since_epoch().count() / 1000) + 
(timeout_us))
+#define ENA_TIME_EXPIRE(timeout)  ((timeout) < 
(osv::clock::uptime::now().time_since_epoch().count() / 1000))
 #define ENA_MIGHT_SLEEP()
 
 #define min_t(type, _x, _y) ((type)(_x) < (type)(_y) ? (type)(_x) : (type)(_y))
@@ -194,60 +181,24 @@ static inline long PTR_ERR(const void *ptr)
 #define ENA_MAX16(x,y) MAX(x, y)
 #define ENA_MAX8(x,y)  MAX(x, y)
 
+#include 
 /* Spinlock related methods */
-#define ena_spinlock_t struct mtx
+#define ena_spinlock_t irq_spinlock_t
+#define mtx_initialized(spinlock) (1)
 #define ENA_SPINLOCK_INIT(spinlock)\
-   mtx_init(&(spinlock), "ena_spin", NULL, MTX_SPIN)
+   irq_spinlock_init(&(spinlock))
 #define ENA_SPINLOCK_DESTROY(spinlock) \
do {\
-   if (mtx_initialized(&(spinlock)))   \
-   mtx_destroy(&(spinlock));   \
} while (0)
 #define ENA_SPINLOCK_LOCK(spinlock, flags) \
do {\
(void)(flags);  \
-   mtx_lock_spin(&(spinlock)); \
+   irq_spin_lock(&(spinlock)); \
} while (0)
 #define ENA_SPINLOCK_UNLOCK(spinlock, flags)   \
do {\
(void)(flags);  \
-   mtx_unlock_spin(&(spinlock));   \
-   } while (0)
-
-
-/* Wait queue related 

[osv-dev] [COMMIT osv master] ena_com: adapt headers to work in C++ code

2024-01-11 Thread Commit Bot
From: Waldemar Kozaczuk 
Committer: WALDEMAR KOZACZUK 
Branch: master

ena_com: adapt headers to work in C++ code

Signed-off-by: Waldemar Kozaczuk 

---
diff --git a/bsd/sys/contrib/ena_com/ena_com.h 
b/bsd/sys/contrib/ena_com/ena_com.h
--- a/bsd/sys/contrib/ena_com/ena_com.h
+++ b/bsd/sys/contrib/ena_com/ena_com.h
@@ -977,7 +977,8 @@ int ena_com_config_dev_mode(struct ena_com_dev *ena_dev,
  */
 static inline struct ena_com_dev *ena_com_io_sq_to_ena_dev(struct 
ena_com_io_sq *io_sq)
 {
-   return container_of(io_sq, struct ena_com_dev, 
io_sq_queues[io_sq->qid]);
+   ena_com_io_sq *first = io_sq - io_sq->qid;
+   return reinterpret_cast(((uintptr_t)first - 
offsetof(ena_com_dev, io_sq_queues)));
 }
 
 /* ena_com_io_cq_to_ena_dev - Extract ena_com_dev using contained field io_cq.
@@ -987,7 +988,8 @@ static inline struct ena_com_dev 
*ena_com_io_sq_to_ena_dev(struct ena_com_io_sq
  */
 static inline struct ena_com_dev *ena_com_io_cq_to_ena_dev(struct 
ena_com_io_cq *io_cq)
 {
-   return container_of(io_cq, struct ena_com_dev, 
io_cq_queues[io_cq->qid]);
+   ena_com_io_cq *first = io_cq - io_cq->qid;
+   return reinterpret_cast(((uintptr_t)first - 
offsetof(ena_com_dev, io_cq_queues)));
 }
 
 static inline bool ena_com_get_adaptive_moderation_enabled(struct ena_com_dev 
*ena_dev)
diff --git a/bsd/sys/contrib/ena_com/ena_eth_com.h 
b/bsd/sys/contrib/ena_com/ena_eth_com.h
--- a/bsd/sys/contrib/ena_com/ena_eth_com.h
+++ b/bsd/sys/contrib/ena_com/ena_eth_com.h
@@ -34,10 +34,11 @@
 #ifndef ENA_ETH_COM_H_
 #define ENA_ETH_COM_H_
 
+#include "ena_com.h"
+
 #if defined(__cplusplus)
 extern "C" {
 #endif
-#include "ena_com.h"
 
 /* head update threshold in units of (queue size / ENA_COMP_HEAD_THRESH) */
 #define ENA_COMP_HEAD_THRESH 4

-- 
You received this message because you are subscribed to the Google Groups "OSv 
Development" group.
To unsubscribe from this group and stop receiving emails from it, send an email 
to osv-dev+unsubscr...@googlegroups.com.
To view this discussion on the web visit 
https://groups.google.com/d/msgid/osv-dev/63ef7a060eaf1480%40google.com.


[osv-dev] [COMMIT osv master] ena_log: adapt to OSv by switching to tprintf()

2024-01-11 Thread Commit Bot
From: Waldemar Kozaczuk 
Committer: WALDEMAR KOZACZUK 
Branch: master

ena_log: adapt to OSv by switching to tprintf()

Signed-off-by: Waldemar Kozaczuk 

---
diff --git a/bsd/sys/contrib/ena_com/ena_fbsd_log.h 
b/bsd/sys/contrib/ena_com/ena_fbsd_log.h
--- a/bsd/sys/contrib/ena_com/ena_fbsd_log.h
+++ b/bsd/sys/contrib/ena_com/ena_fbsd_log.h
@@ -43,22 +43,29 @@ enum ena_log_t {
 
 extern int ena_log_level;
 
+#define ena_log_unused(dev, level, fmt, args...)   \
+   do {\
+   } while (0)
+
+#ifdef ENA_LOG_ENABLE
 #define ena_log(dev, level, fmt, args...)  \
do {\
if (ENA_ ## level <= ena_log_level) \
-   device_printf((dev), fmt, ##args);  \
+   tprintf("ena", logger_debug, fmt, ##args);\
} while (0)
 
 #define ena_log_raw(level, fmt, args...)   \
do {\
if (ENA_ ## level <= ena_log_level) \
printf(fmt, ##args);\
} while (0)
+#else
+#define ena_log(dev, level, fmt, args...)  \
+   ena_log_unused((dev), level, fmt, ##args)
 
-#define ena_log_unused(dev, level, fmt, args...)   \
-   do {\
-   (void)(dev);\
-   } while (0)
+#define ena_log_raw(level, fmt, args...)   \
+   ena_log_unused((dev), level, fmt, ##args)
+#endif
 
 #ifdef ENA_LOG_IO_ENABLE
 #define ena_log_io(dev, level, fmt, args...)   \

-- 
You received this message because you are subscribed to the Google Groups "OSv 
Development" group.
To unsubscribe from this group and stop receiving emails from it, send an email 
to osv-dev+unsubscr...@googlegroups.com.
To view this discussion on the web visit 
https://groups.google.com/d/msgid/osv-dev/4fc120060eaf14c4%40google.com.


[osv-dev] [COMMIT osv master] aarch64: refactor GICv2 code for introduction of GICv3

2024-01-11 Thread Commit Bot
From: Waldemar Kozaczuk 
Committer: WALDEMAR KOZACZUK 
Branch: master

aarch64: refactor GICv2 code for introduction of GICv3

This patch refactors existing implementation of the GICv2
driver found under arch/aarch64/gic.** to allow fo upcoming
introduction of GICv3 support.

In essence, we create new base class gic_driver with mostly
virtual functions intended to provide an abstraction of the GIC driver.
We also extract common code into gic_dist class under gic-common.**.
Finally, we move and refactor a little the original GICv2 implementation
found under gic.** to giv-v2.**.

Signed-off-by: Waldemar Kozaczuk 

---
diff --git a/Makefile b/Makefile
--- a/Makefile
+++ b/Makefile
@@ -993,7 +993,8 @@ $(out)/arch/x64/string-ssse3.o: CXXFLAGS += -mssse3
 ifeq ($(arch),aarch64)
 objects += arch/$(arch)/psci.o
 objects += arch/$(arch)/arm-clock.o
-objects += arch/$(arch)/gic.o
+objects += arch/$(arch)/gic-common.o
+objects += arch/$(arch)/gic-v2.o
 objects += arch/$(arch)/arch-dtb.o
 objects += arch/$(arch)/hypercall.o
 objects += arch/$(arch)/memset.o
diff --git a/arch/aarch64/arch-cpu.hh b/arch/aarch64/arch-cpu.hh
--- a/arch/aarch64/arch-cpu.hh
+++ b/arch/aarch64/arch-cpu.hh
@@ -53,7 +53,7 @@ public:
 inline void arch_cpu::init_on_cpu()
 {
 if (this->smp_idx != 0) {
-gic::gic->init_cpu(this->smp_idx);
+gic::gic->init_on_secondary_cpu(this->smp_idx);
 }
 }
 
diff --git a/arch/aarch64/arch-interrupt.hh b/arch/aarch64/arch-interrupt.hh
--- a/arch/aarch64/arch-interrupt.hh
+++ b/arch/aarch64/arch-interrupt.hh
@@ -9,7 +9,7 @@
 #define ARCH_INTERRUPT_HH
 
 #include "exceptions.hh"
-#include "gic.hh"
+#include "gic-common.hh"
 
 #include 
 
diff --git a/arch/aarch64/arch-setup.cc b/arch/aarch64/arch-setup.cc
--- a/arch/aarch64/arch-setup.cc
+++ b/arch/aarch64/arch-setup.cc
@@ -23,6 +23,7 @@
 
 #include "arch-mmu.hh"
 #include "arch-dtb.hh"
+#include "gic-v2.hh"
 
 #include "drivers/console.hh"
 #include "drivers/pl011.hh"
@@ -122,17 +123,21 @@ void arch_setup_free_memory()
 }
 #endif
 
-/* linear_map [TTBR0 - GIC DIST and GIC CPU] */
-u64 dist, cpu;
-size_t dist_len, cpu_len;
-if (!dtb_get_gic_v2(, _len, , _len)) {
-abort("arch-setup: failed to get GICv2 information from dtb.\n");
+//Locate GICv2 information in DTB and construct corresponding GIC driver
+//and map relevant physical memory
+u64 dist, cpuif;
+size_t dist_len, cpuif_len;
+if (dtb_get_gic_v2(, _len, , _len)) {
+gic::gic = new gic::gic_v2_driver(dist, cpuif);
+/* linear_map [TTBR0 - GIC CPUIF] */
+mmu::linear_map((void *)cpuif, (mmu::phys)cpuif, cpuif_len, 
"gic_cpuif", mmu::page_size,
+mmu::mattr::dev);
+} else {
+abort("arch-setup: failed to get GiCv2 information from dtb.\n");
 }
-gic::gic = new gic::gic_driver(dist, cpu);
+/* linear_map [TTBR0 - GIC DIST] */
 mmu::linear_map((void *)dist, (mmu::phys)dist, dist_len, "gic_dist", 
mmu::page_size,
 mmu::mattr::dev);
-mmu::linear_map((void *)cpu, (mmu::phys)cpu, cpu_len, "gic_cpu", 
mmu::page_size,
-mmu::mattr::dev);
 
 #if CONF_drivers_pci
 if (!opt_pci_disabled) {
diff --git a/arch/aarch64/exceptions.cc b/arch/aarch64/exceptions.cc
--- a/arch/aarch64/exceptions.cc
+++ b/arch/aarch64/exceptions.cc
@@ -37,10 +37,9 @@ interrupt_table::interrupt_table() {
 debug_early_entry("interrupt_table::interrupt_table()");
 #endif
 
-gic::gic->init_cpu(0);
-gic::gic->init_dist(0);
+gic::gic->init_on_primary_cpu();
 
-this->nr_irqs = gic::gic->nr_irqs;
+this->nr_irqs = gic::gic->nr_of_irqs();
 #if CONF_logger_debug
 debug_early("interrupt table: gic driver created.\n");
 #endif
@@ -163,7 +162,7 @@ void interrupt(exception_frame* frame)
 
 /* note that special values 1022 and 1023 are used for
group 1 and spurious interrupts respectively. */
-if (irq >= gic::gic->nr_irqs) {
+if (irq >= gic::gic->nr_of_irqs()) {
 debug_early_u64("special InterruptID detected irq=", irq);
 
 } else {
diff --git a/arch/aarch64/exceptions.hh b/arch/aarch64/exceptions.hh
--- a/arch/aarch64/exceptions.hh
+++ b/arch/aarch64/exceptions.hh
@@ -18,7 +18,7 @@
 #include 
 #include 
 
-#include "gic.hh"
+#include "gic-common.hh"
 
 struct exception_frame {
 u64 regs[31];
diff --git a/arch/aarch64/gic-common.cc b/arch/aarch64/gic-common.cc
--- a/arch/aarch64/gic-common.cc
+++ b/arch/aarch64/gic-common.cc
@@ -0,0 +1,46 @@
+/*
+ * Copyright (C) 2014 Huawei Technologies Duesseldorf GmbH
+ * Copyright (C) 2024 Waldemar Kozaczuk
+ *
+ * This work is open source software, licensed under the terms of the
+ * BSD license as described in the LICENSE file in the top-level directory.
+ */
+
+#include 
+
+#include "gic-common.hh"
+
+namespace gic {
+
+u32 gic_dist::read_reg(gicd_reg reg)
+{
+return mmio_getl((mmioaddr_t)_base + (u32)reg);
+}
+
+void gic_dist::write_reg(gicd_reg reg, u32 value)
+{
+

[osv-dev] [COMMIT osv master] aarch64: implement GICv3

2024-01-11 Thread Commit Bot
From: Waldemar Kozaczuk 
Committer: WALDEMAR KOZACZUK 
Branch: master

aarch64: implement GICv3

This patch implements GICv3 driver and thus allows OSv to
run on modern SBCs (Single Board Computers) like Radxa's Rock 5B.
It is also a prerequisite to support AWS Graviton.

The code in gic-v3.** is somewhat based on the implementation of
GICv3 in the Unikraft project (see 
https://github.com/unikraft/unikraft/blob/staging/drivers/ukintctlr/gic/gic-v3.*).
It also re-uses elements from the abstract class gic-driver.
Please note this implementation does not support MSI-X and LPIs
(arm version of Message Signalled Interrupts).

The code to initialize GICv3 is logically similar to GICv2
in that it needs to initialize so called distributor interface
on boot cpu (see gic_v3_driver::init_dist()). And then it needs
to call gic_v3_driver::init_redist(int smp_idx) in order to
initialize so called redistributor interface specific to each cpu
(kind of equivalent to cpu interface (cpuif) in GICv2).
We also implement other key methods - mask_irq(), unmask_irq(), set_irq_type(),
send_sgi(), ack_irq() and end_irq() - in a way specific to GICv3.

Finally, the run.py has been modified to pass "gic-version=max" to make
QEMU automatically expose the highest version of the GIC supported on
given hardware platform.

For more information about GICv3 look at this official ARM documentation:
- GICv3 and GICv4 Software Overview:
https://documentation-service.arm.com/static/5f1068720daa596235e7f6ef
- Arm® Generic Interrupt Controller Architecture Specification (GIC
  architecture version 3 and version 4) - 
https://documentation-service.arm.com/static/601412d54ccc190e5e681269

Fixes #1290

Signed-off-by: Waldemar Kozaczuk 

---
diff --git a/Makefile b/Makefile
--- a/Makefile
+++ b/Makefile
@@ -995,6 +995,7 @@ objects += arch/$(arch)/psci.o
 objects += arch/$(arch)/arm-clock.o
 objects += arch/$(arch)/gic-common.o
 objects += arch/$(arch)/gic-v2.o
+objects += arch/$(arch)/gic-v3.o
 objects += arch/$(arch)/arch-dtb.o
 objects += arch/$(arch)/hypercall.o
 objects += arch/$(arch)/memset.o
diff --git a/arch/aarch64/arch-dtb.cc b/arch/aarch64/arch-dtb.cc
--- a/arch/aarch64/arch-dtb.cc
+++ b/arch/aarch64/arch-dtb.cc
@@ -381,6 +381,30 @@ bool dtb_get_gic_v2(u64 *dist, size_t *dist_len, u64 *cpu, 
size_t *cpu_len)
 return true;
 }
 
+bool dtb_get_gic_v3(u64 *dist, size_t *dist_len, u64 *redist, size_t 
*redist_len)
+{
+u64 addr[2], len[2];
+int node;
+
+if (!dtb)
+return false;
+
+node = fdt_node_offset_by_compatible(dtb, -1, "arm,gic-v3");
+if (node < 0) {
+return false;
+}
+
+if (!dtb_get_reg_n(node, addr, len, 2))
+return false;
+
+*dist = addr[0];
+*dist_len = len[0];
+*redist = addr[1];
+*redist_len = len[1];
+
+return true;
+}
+
 /* this parses the cpus node and mpidr values and returns the number of cpu in 
it. */
 #define DTB_MAX_CPU_COUNT 32
 static int dtb_cpu_count = -1;
diff --git a/arch/aarch64/arch-dtb.hh b/arch/aarch64/arch-dtb.hh
--- a/arch/aarch64/arch-dtb.hh
+++ b/arch/aarch64/arch-dtb.hh
@@ -93,6 +93,8 @@ int dtb_get_timer_irq();
  */
 bool dtb_get_gic_v2(u64 *dist, size_t *dist_len, u64 *cpu, size_t *cpu_len);
 
+bool dtb_get_gic_v3(u64 *dist, size_t *dist_len, u64 *redist, size_t 
*redist_len);
+
 /* int dtb_get_cpus_count();
  *
  * gets the number of available cpus.
diff --git a/arch/aarch64/arch-setup.cc b/arch/aarch64/arch-setup.cc
--- a/arch/aarch64/arch-setup.cc
+++ b/arch/aarch64/arch-setup.cc
@@ -24,6 +24,7 @@
 #include "arch-mmu.hh"
 #include "arch-dtb.hh"
 #include "gic-v2.hh"
+#include "gic-v3.hh"
 
 #include "drivers/console.hh"
 #include "drivers/pl011.hh"
@@ -123,17 +124,22 @@ void arch_setup_free_memory()
 }
 #endif
 
-//Locate GICv2 information in DTB and construct corresponding GIC driver
+//Locate GICv2 or GICv3 information in DTB and construct corresponding GIC 
driver
 //and map relevant physical memory
-u64 dist, cpuif;
-size_t dist_len, cpuif_len;
-if (dtb_get_gic_v2(, _len, , _len)) {
+u64 dist, redist, cpuif;
+size_t dist_len, redist_len, cpuif_len;
+if (dtb_get_gic_v3(, _len, , _len)) {
+gic::gic = new gic::gic_v3_driver(dist, redist);
+/* linear_map [TTBR0 - GIC REDIST] */
+mmu::linear_map((void *)redist, (mmu::phys)redist, redist_len, 
"gic_redist", mmu::page_size,
+mmu::mattr::dev);
+} else if (dtb_get_gic_v2(, _len, , _len)) {
 gic::gic = new gic::gic_v2_driver(dist, cpuif);
 /* linear_map [TTBR0 - GIC CPUIF] */
 mmu::linear_map((void *)cpuif, (mmu::phys)cpuif, cpuif_len, 
"gic_cpuif", mmu::page_size,
 mmu::mattr::dev);
 } else {
-abort("arch-setup: failed to get GiCv2 information from dtb.\n");
+abort("arch-setup: failed to get GICv3 nor GiCv2 information from 
dtb.\n");
 }
 /* linear_map [TTBR0 - GIC DIST] */
 mmu::linear_map((void *)dist, 

[osv-dev] [COMMIT osv master] apps: update to the latest

2024-01-09 Thread Commit Bot
From: Waldemar Kozaczuk 
Committer: Waldemar Kozaczuk 
Branch: master

apps: update to the latest

Signed-off-by: Waldemar Kozaczuk 

---
diff --git a/apps b/apps
--- a/apps
+++ b/apps
@@ -1 +1 @@
-Subproject commit 22e1541ca18d3794053b9ca61671508a2d1944ec
+Subproject commit 17b7c2d71f31590ace2e1ffcfd079fcd6b0b3939

-- 
You received this message because you are subscribed to the Google Groups "OSv 
Development" group.
To unsubscribe from this group and stop receiving emails from it, send an email 
to osv-dev+unsubscr...@googlegroups.com.
To view this discussion on the web visit 
https://groups.google.com/d/msgid/osv-dev/2bc915060e8cf85b%40google.com.


[osv-dev] [COMMIT osv master] strace: make shutdown wait to print all trace

2024-01-09 Thread Commit Bot
From: Waldemar Kozaczuk 
Committer: Waldemar Kozaczuk 
Branch: master

strace: make shutdown wait to print all trace

This adjusts strace and shudown logic to make strace print
all remaining tracepoints before shutting down.

As the result running a static version of native example
yields this:

./scripts/run.py -e '--strace --trace=syscall* /hello-static-non-pie'
OSv v0.57.0-125-g37674461
eth0: 192.168.122.15
Booted up in 192.61 ms
Cmdline: /hello-static-non-pie
/hello-static-n   0  0.131545648 syscall_arch_prctl(0x <= 12289 
0x200910)
syscall(): unimplemented system call 334
/hello-static-n   0  0.132670626 syscall_sys_brk(0x40 <= 0x0)
/hello-static-n   0  0.132975683 syscall_sys_brk(0x400d00 <= 0x400d00)
Hello from C code
/hello-static-n   0  0.132979423 syscall_arch_prctl(0x0 <= 4098 0x400380)
/hello-static-n   0  0.132982269 syscall_sys_set_tid_address(45 <= 
0x20400650)
/hello-static-n   0  0.132982850 syscall_sys_set_robust_list(0 <= 
0x20400660 24)
/hello-static-n   0  0.135752484 syscall_prlimit64(0 <= 0 3 0 0x20200830)
/hello-static-n   0  0.136627674 syscall_readlink(21 <= "/proc/self/exe" 
0x1ff7a0 4096)
/hello-static-n   0  0.137028545 syscall_getrandom(18446744073709551615 <= 
0x4ae1f0 8 1)
/hello-static-n   0  0.137032762 syscall_clock_gettime(0 <= 1 0x201ff730)
/hello-static-n   0  0.137033284 syscall_clock_gettime(0 <= 1 0x201ff730)
/hello-static-n   0  0.137037322 syscall_sys_brk(0x400d00 <= 0x0)
/hello-static-n   0  0.137040541 syscall_sys_brk(0x421d00 <= 0x421d00)
/hello-static-n   0  0.137040928 syscall_sys_brk(0x422000 <= 0x422000)
/hello-static-n   0  0.138665915 syscall_mprotect(0 <= 0x4a3000 16384 1)
/hello-static-n   0  0.139025077 syscall_fstatat(0 <= 1 "" 0x20200630 
01)
/hello-static-n   0  0.139027357 syscall_sys_ioctl(0 <= 1 21505 35184374187408)
/hello-static-n   0  0.140211701 syscall_write(0x12 <= 1 0x20401610 0x12)

Signed-off-by: Waldemar Kozaczuk 

---
diff --git a/core/shutdown.cc b/core/shutdown.cc
--- a/core/shutdown.cc
+++ b/core/shutdown.cc
@@ -3,13 +3,15 @@
 #include 
 #include 
 #include 
+#include 
 
 extern void vfs_exit(void);
 
 namespace osv {
 
 void shutdown()
 {
+wait_strace_complete();
 dhcp_release();
 
 // The vfs_exit() call below will forcibly unmount the filesystem. If any
diff --git a/core/strace.cc b/core/strace.cc
--- a/core/strace.cc
+++ b/core/strace.cc
@@ -129,17 +129,32 @@ static void print_trace(trace_record* tr) {
 }
 
 static sched::thread *strace = nullptr;
+static std::atomic strace_done = {false};
+
+static void print_traces() {
+while (auto tr = _trace_log->read()) {
+print_trace(tr);
+}
+}
 
 void start_strace() {
 _trace_log = new trace_log();
 strace = sched::thread::make([] {
-while (true) {
-while (auto tr = _trace_log->read()) {
-print_trace(tr);
-}
+print_traces();
+do {
 sched::thread::sleep(std::chrono::microseconds(100));
-}
+print_traces();
+} while (!strace_done);
 }, sched::thread::attr().name("strace"));
 
 strace->start();
 }
+
+void wait_strace_complete() {
+if (!_trace_log) {
+return;
+}
+strace_done = true;
+strace->join();
+delete strace;
+}
diff --git a/include/osv/strace.hh b/include/osv/strace.hh
--- a/include/osv/strace.hh
+++ b/include/osv/strace.hh
@@ -11,5 +11,6 @@
 #include 
 
 void start_strace();
+void wait_strace_complete();
 
 #endif
diff --git a/loader.cc b/loader.cc
--- a/loader.cc
+++ b/loader.cc
@@ -31,7 +31,7 @@
 #include "arch.hh"
 #include "arch-setup.hh"
 #include "osv/trace.hh"
-#include "osv/strace.hh"
+#include 
 #include 
 #include 
 #include 

-- 
You received this message because you are subscribed to the Google Groups "OSv 
Development" group.
To unsubscribe from this group and stop receiving emails from it, send an email 
to osv-dev+unsubscr...@googlegroups.com.
To view this discussion on the web visit 
https://groups.google.com/d/msgid/osv-dev/17ae70060e8cf8c0%40google.com.


[osv-dev] [COMMIT osv-apps master] golang-pie-example: add statically linked executables

2024-01-09 Thread Commit Bot
From: Waldemar Kozaczuk 
Committer: Waldemar Kozaczuk 
Branch: master

golang-pie-example: add statically linked executables

Signed-off-by: Waldemar Kozaczuk 

---
diff --git a/golang-pie-example/Makefile b/golang-pie-example/Makefile
--- a/golang-pie-example/Makefile
+++ b/golang-pie-example/Makefile
@@ -1,13 +1,22 @@
 .PHONY: module
-module: hello.so
+module: hello.so hello-static-non-pie hello-static-pie
echo '/hello.so: $${MODULE_DIR}/hello.so' > usr.manifest
+   echo '/hello-static-non-pie: $${MODULE_DIR}/hello-static-non-pie' >> 
usr.manifest
+   echo '/hello-static-pie: $${MODULE_DIR}/hello-static-pie' >> 
usr.manifest
 
 hello.so: hello.go
go build -buildmode=pie -ldflags "-linkmode external" -o hello.so 
hello.go
+
+hello-static-pie: hello.go
+   go build -buildmode=pie -ldflags '-linkmode external -extldflags 
"--static-pie"' -o hello-static-pie hello.go
+
+hello-static-non-pie: hello.go
+   go build --ldflags '-extldflags "-static"' -o hello-static-non-pie 
hello.go
+
 # Please note that executable built following methods would work as well:
 #  'go build -buildmode=exe -ldflags "-linkmode external" -o hello.so hello.go'
 #   - on both older 1.12.6 and newer 1.15.8 version of Golang
 #  'go build -buildmode=pie -o hello.so hello.go' - on older 1.12.6
 
 clean:
-   rm -f hello*.so usr.manifest
+   rm -f hello*.so hello-static* usr.manifest

-- 
You received this message because you are subscribed to the Google Groups "OSv 
Development" group.
To unsubscribe from this group and stop receiving emails from it, send an email 
to osv-dev+unsubscr...@googlegroups.com.
To view this discussion on the web visit 
https://groups.google.com/d/msgid/osv-dev/5f76a0060e8ce6d0%40google.com.


[osv-dev] [COMMIT osv-apps master] native-example: add statically linked executables

2024-01-09 Thread Commit Bot
From: Waldemar Kozaczuk 
Committer: Waldemar Kozaczuk 
Branch: master

native-example: add statically linked executables

Signed-off-by: Waldemar Kozaczuk 

---
diff --git a/native-example/Makefile b/native-example/Makefile
--- a/native-example/Makefile
+++ b/native-example/Makefile
@@ -1,10 +1,16 @@
 .PHONY: module
-module: hello
+module: hello hello-static-pie hello-static-non-pie
 
 CFLAGS = -std=gnu99 -fpie -rdynamic
 
 hello: hello.c
$(CC) -pie -o $@ $(CFLAGS) $(LDFLAGS) hello.c
 
+hello-static-pie: hello.c
+   $(CC) -fPIE -static-pie -o hello-static-pie hello.c
+
+hello-static-non-pie: hello.c
+   $(CC) -static -o hello-static-non-pie hello.c
+
 clean:
-   rm -f hello
+   rm -f hello hello-static-pie hello-static-non-pie
diff --git a/native-example/usr.manifest b/native-example/usr.manifest
--- a/native-example/usr.manifest
+++ b/native-example/usr.manifest
@@ -1 +1,3 @@
 /hello: ${MODULE_DIR}/hello
+/hello-static-pie: ${MODULE_DIR}/hello-static-pie
+/hello-static-non-pie: ${MODULE_DIR}/hello-static-non-pie

-- 
You received this message because you are subscribed to the Google Groups "OSv 
Development" group.
To unsubscribe from this group and stop receiving emails from it, send an email 
to osv-dev+unsubscr...@googlegroups.com.
To view this discussion on the web visit 
https://groups.google.com/d/msgid/osv-dev/4b15c5060e8ce6a2%40google.com.


[osv-dev] [COMMIT osv master] aarch64: set the generic timer PPI interrupt to the type level

2024-01-09 Thread Commit Bot
From: Waldemar Kozaczuk 
Committer: Waldemar Kozaczuk 
Branch: master

aarch64: set the generic timer PPI interrupt to the type level

The AArch64 Programmer's Guides Generic Timer states this in the section 3.4:

"The interrupts generated by the timer behave in a level-sensitive manner."

The type of the generic timer interrupt was set incorrectly to edge and
most likely was ignored by QEMU so OSv would work fine. But when testing
on firecracker on Rock 5B OSv would crash when initializing the timer interrupt
in arm_clock_events::arm_clock_events().

This patch fixes it by correctly setting interrupt type to level.

Signed-off-by: Waldemar Kozaczuk 

---
diff --git a/arch/aarch64/arm-clock.cc b/arch/aarch64/arm-clock.cc
--- a/arch/aarch64/arm-clock.cc
+++ b/arch/aarch64/arm-clock.cc
@@ -125,7 +125,7 @@ int get_timer_irq_id()
 
 arm_clock_events::arm_clock_events()
 {
-_irq.reset(new ppi_interrupt(gic::irq_type::IRQ_TYPE_EDGE, 
get_timer_irq_id(),
+_irq.reset(new ppi_interrupt(gic::irq_type::IRQ_TYPE_LEVEL, 
get_timer_irq_id(),
  [this] {
 /* From AArch64 Programmer's Guides Generic Timer (chapter 3.4, page 
10):
  * The interrupts generated by the timer behave in a level-sensitive 
manner.

-- 
You received this message because you are subscribed to the Google Groups "OSv 
Development" group.
To unsubscribe from this group and stop receiving emails from it, send an email 
to osv-dev+unsubscr...@googlegroups.com.
To view this discussion on the web visit 
https://groups.google.com/d/msgid/osv-dev/062689060e899275%40google.com.


[osv-dev] [COMMIT osv master] jdk8: add symlink to fix Java 8 image

2024-01-09 Thread Commit Bot
From: Waldemar Kozaczuk 
Committer: Waldemar Kozaczuk 
Branch: master

jdk8: add symlink to fix Java 8 image

Signed-off-by: Waldemar Kozaczuk 

---
diff --git a/modules/openjdk8-from-host/module.py 
b/modules/openjdk8-from-host/module.py
--- a/modules/openjdk8-from-host/module.py
+++ b/modules/openjdk8-from-host/module.py
@@ -48,3 +48,4 @@
 usr_files.link('/usr/lib/jvm/jre').to('java/jre')
 
usr_files.link('/usr/lib/jvm/java/jre/lib/security/cacerts').to('/etc/pki/java/cacerts')
 usr_files.link('/usr/bin/java').to('/usr/lib/jvm/java/jre/bin/java')
+usr_files.link('/usr/lib/jvm/java/bin/java').to('/usr/lib/jvm/java/jre/bin/java')

-- 
You received this message because you are subscribed to the Google Groups "OSv 
Development" group.
To unsubscribe from this group and stop receiving emails from it, send an email 
to osv-dev+unsubscr...@googlegroups.com.
To view this discussion on the web visit 
https://groups.google.com/d/msgid/osv-dev/e9cc76060e89919b%40google.com.


[osv-dev] [COMMIT osv master] aarch64: implement clone/clone3 to run multi-threaded static apps

2023-12-23 Thread Commit Bot
From: Waldemar Kozaczuk 
Committer: Waldemar Kozaczuk 
Branch: master

aarch64: implement clone/clone3 to run multi-threaded static apps

Just like the patch b3792dfa62149a0f8c5dd75d445dcf2266235de1, this one
implements clone/clone3 system call but on aarch64. For more details
please read the code comments.

In addition this patch refactors the clone code by extracting common
logic into the clone() funtion in linux.cc and leaving the arch-specific
code in clone_thread() found under arch/$(arch)/clone.cc

With this patch, one can run multi-threaded static executables and
dynamic ones with Linux dynamic linker on OSv on aarch64.

./scripts/test.py --linux_ld -m modules/tests-with-linux-ld/usr.manifest \
  -d java_no_wrapper \
  -d tst-chmod \
  -d tst-kill \
  -d tst-remove \
  -d tst-sigaction \
  -d tst-sigwait \
  -d tst-stdio-rofs \
  -d tst-wctype

Please note the java_wrapper crashes because of the missing implementation
of AT_SYMLINK_NOFOLLOW in faccessat(). And tst-chmod and tst-remove fail
because of missing fchmodat syscall.

Signed-off-by: Waldemar Kozaczuk 

---
diff --git a/Makefile b/Makefile
--- a/Makefile
+++ b/Makefile
@@ -977,6 +977,7 @@ objects += arch/$(arch)/cpuid.o
 objects += arch/$(arch)/firmware.o
 objects += arch/$(arch)/hypervisor.o
 objects += arch/$(arch)/interrupt.o
+objects += arch/$(arch)/clone.o
 ifeq ($(conf_drivers_pci),1)
 objects += arch/$(arch)/pci.o
 objects += arch/$(arch)/msi.o
@@ -1013,7 +1014,6 @@ objects += arch/x64/apic.o
 objects += arch/x64/apic-clock.o
 objects += arch/x64/entry-xen.o
 objects += arch/x64/prctl.o
-objects += arch/x64/clone.o
 objects += arch/x64/vmlinux.o
 objects += arch/x64/vmlinux-boot64.o
 objects += arch/x64/pvh-boot.o
diff --git a/arch/aarch64/clone.cc b/arch/aarch64/clone.cc
--- a/arch/aarch64/clone.cc
+++ b/arch/aarch64/clone.cc
@@ -0,0 +1,88 @@
+/*
+ * Copyright (C) 2023 Waldemar Kozaczuk
+ *
+ * This work is open source software, licensed under the terms of the
+ * BSD license as described in the LICENSE file in the top-level directory.
+ */
+
+#include "arch.hh"
+#include 
+#include 
+
+#define CLONE_SETTLS   0x0008
+
+static constexpr size_t CHILD_FRAME_OFFSET = 7*4096 + sizeof(exception_frame);
+static constexpr size_t PARENT_FRAME_OFFSET = sizeof(exception_frame);
+
+sched::thread *clone_thread(unsigned long flags, void *child_stack, unsigned 
long newtls)
+{   //
+//If the parent thread is pinned we should make new thread inherit this
+auto parent_pinned_cpu = sched::thread::current()->pinned() ? 
sched::cpu::current() : nullptr;
+//
+//Create new child thread
+auto t = sched::thread::make([=] {
+   //
+   //Switch to app TCB if one specified
+   auto frame_start_on_exception_stack = 
sched::thread::current()->get_exception_stack_top() - CHILD_FRAME_OFFSET;
+   exception_frame *child_frame = 
reinterpret_cast(frame_start_on_exception_stack);
+   if (child_frame->far) {
+   asm volatile ("msr tpidr_el0, %0; isb; " :: "r"(child_frame->far) : 
"memory");
+   }
+   //
+   //Restore registers from the exception stack and jump to the caller
+   //We are restoring the registers based on how they were saved
+   //on the exception stack of the parent
+   asm volatile
+ ("msr daifset, #2 \n\t"  // Disable interrupts
+  "isb \n\t"
+  "mov sp, %0 \n\t"   // Set child stack
+  "msr spsel, #0 \n\t"// Switch to exception stack
+  "mov sp, %1 \n\t"   // Set stack to the beginning of the 
stack frame
+  "ldr x30, [sp, #256] \n\t"  // Load x30 (link register) with 
elr_el1 (exception link register)
+  "ldp x0, x1, [sp], #16 \n\t"
+  "ldp x2, x3, [sp], #16 \n\t"
+  "ldp x4, x5, [sp], #16 \n\t"
+  "ldp x6, x7, [sp], #16 \n\t"
+  "ldp x8, x9, [sp], #16 \n\t"
+  "ldp x10, x11, [sp], #16 \n\t"
+  "ldp x12, x13, [sp], #16 \n\t"
+  "ldp x14, x15, [sp], #16 \n\t"
+  "ldp x16, x17, [sp], #16 \n\t"
+  "ldp x18, x19, [sp], #16 \n\t"
+  "ldp x20, x21, [sp], #16 \n\t"
+  "ldp x22, x23, [sp], #16 \n\t"
+  "ldp x24, x25, [sp], #16 \n\t"
+  "ldp x26, x27, [sp], #16 \n\t"
+  "ldp x28, x29, [sp], #16 \n\t"
+  "add sp, sp, #48 \n\t"
+  "add sp, sp, #28672 \n\t"   // Move back 7*4096
+  "msr spsel, #1 \n\t"// Switch to user stack
+  "msr daifclr, #2 \n\t"  // Enable interrupts
+  "isb \n\t" : : "r"(child_frame->sp), 
"r"(frame_start_on_exception_stack));
+}, sched::thread::attr().
+stack(4096 * 4). //16K kernel stack should be large enough
+pin(parent_pinned_cpu),
+false,
+true);
+//
+//Copy all saved registers from parent exception stack to the child 
exception stack
+//so that they can be restored in the child thread in the inlined assembly 
above
+auto 

[osv-dev] [COMMIT osv master] tst-pthread-barrier.cc: account for different size of pthread_barrierattr_t on aarch64

2023-12-23 Thread Commit Bot
From: Waldemar Kozaczuk 
Committer: Waldemar Kozaczuk 
Branch: master

tst-pthread-barrier.cc: account for different size of pthread_barrierattr_t on 
aarch64

Signed-off-by: Waldemar Kozaczuk 

---
diff --git a/tests/tst-pthread-barrier.cc b/tests/tst-pthread-barrier.cc
--- a/tests/tst-pthread-barrier.cc
+++ b/tests/tst-pthread-barrier.cc
@@ -67,7 +67,15 @@ int main(void)
 printf("Sizeof pthread_barrier_t: %ld\n", sizeof(barrier));
 report("sizeof pthread_barrier_t is 32 bytes\n", sizeof(barrier) == 32);
 printf("Sizeof pthread_barrierattr_t: %ld\n", sizeof(attr));
+#ifdef __x86_64__
 report("sizeof pthread_barrierattr_t is 4 bytes\n", sizeof(attr) == 4);
+#else
+#ifdef __OSV__
+report("sizeof pthread_barrierattr_t is 4 bytes\n", sizeof(attr) == 4);
+#else
+report("sizeof pthread_barrierattr_t is 8 bytes\n", sizeof(attr) == 8);
+#endif
+#endif
 
 #ifdef __OSV__
 // Try an invalid initialization (-1 or 0 or a null pthread_barrier_t*)

-- 
You received this message because you are subscribed to the Google Groups "OSv 
Development" group.
To unsubscribe from this group and stop receiving emails from it, send an email 
to osv-dev+unsubscr...@googlegroups.com.
To view this discussion on the web visit 
https://groups.google.com/d/msgid/osv-dev/0b0603060d3093ce%40google.com.


[osv-dev] [COMMIT osv master] aarch64: support runing unit tests with Linux dynamic linker

2023-12-22 Thread Commit Bot
From: Waldemar Kozaczuk 
Committer: Waldemar Kozaczuk 
Branch: master

aarch64: support runing unit tests with Linux dynamic linker

Signed-off-by: Waldemar Kozaczuk 

---
diff --git a/modules/tests-with-linux-ld/Makefile 
b/modules/tests-with-linux-ld/Makefile
--- a/modules/tests-with-linux-ld/Makefile
+++ b/modules/tests-with-linux-ld/Makefile
@@ -174,11 +174,11 @@ usr.manifest: build_all_tests $(lastword 
$(MAKEFILE_LIST)) usr.manifest.skel FOR
@cat $@.skel > $@
@case "$(CROSS_PREFIX)" in \
"aarch64"*) ../tests/add_aarch64_boost_libraries.sh $(OSV_BASE) 
>> $@ ;; \
-   *) LD_LIBRARY_PATH=$(boost-lib-dir) ldd $(addprefix 
$(out)/tests-with-linux-ld/,$(boost-tests)) $(addprefix 
$(out)/tests-with-linux-ld/,$(boost-program-options-tests)) | grep libboost | 
sed 's/ *[^ ] *\(.*\) => \(.*\) .*/\/usr\/lib64\/\1: \2/' | sort | uniq >> $@ 
;; \
esac
@echo $(all_tests) | tr ' ' '\n' | grep -v 
"tests-with-linux-ld/rofs/tst-.*" | awk '{print "/" $$0 ": ./" $$0}' | sed 
's/^\/tests-with-linux-ld/\/tests/' >> $@
@echo $(all_tests) | tr ' ' '\n' | grep 
"tests-with-linux-ld/rofs/tst-.*" | awk 'BEGIN { FS = "/" } ; { print "/tests/" 
$$3 "-rofs: ./tests-with-linux-ld/" $$2 "/" $$3 ""}' >> $@
-   @$(OSV_BASE)/scripts/manifest_from_host.sh 
$(out)/tests-with-linux-ld/tst-threadcomplete >> $@
+   @$(OSV_BASE)/scripts/manifest_from_host.sh 
$(out)/tests-with-linux-ld/tst-stat >> $@
+   @$(OSV_BASE)/scripts/manifest_from_host.sh 
$(out)/tests-with-linux-ld/misc-tcp >> $@
$(call very-quiet, ./create_static.sh $(out) usr.manifest $(fs_type))
 .PHONY: FORCE
 FORCE:
@@ -188,7 +188,6 @@ common.manifest: build_all_tests $(lastword 
$(MAKEFILE_LIST)) usr.manifest.skel
@cat usr.manifest.skel > $@
@case "$(CROSS_PREFIX)" in \
"aarch64"*) ../tests/add_aarch64_boost_libraries.sh $(OSV_BASE) 
>> $@ ;; \
-   *) LD_LIBRARY_PATH=$(boost-lib-dir) ldd $(addprefix 
$(out)/tests-with-linux-ld/,$(boost-tests)) $(addprefix 
$(out)/tests-with-linux-ld/,$(boost-program-options-tests)) | grep libboost | 
sed 's/ *[^ ] *\(.*\) => \(.*\) .*/\/usr\/lib64\/\1: \2/' | sort | uniq >> $@ 
;; \
esac
@echo $(common-tests) | tr ' ' '\n' | awk '{print "/tests/" $$0 ": 
./tests/" $$0}' >> $@
 
diff --git a/scripts/test.py b/scripts/test.py
--- a/scripts/test.py
+++ b/scripts/test.py
@@ -39,11 +39,14 @@
 "tcp_close_without_reading_on_qemu"
 ]
 
-linux_ld = '/lib64/ld-linux-x86-64.so.2 '
+if host_arch == 'aarch64':
+linux_ld = '/lib/ld-linux-aarch64.so.1'
+else:
+linux_ld = '/lib64/ld-linux-x86-64.so.2'
 
 class TestRunnerTest(SingleCommandTest):
 def __init__(self, name):
-super(TestRunnerTest, self).__init__(name, '%s/tests/%s' % (linux_ld 
if cmdargs.linux_ld else '', name))
+super(TestRunnerTest, self).__init__(name, '%s /tests/%s' % (linux_ld 
if cmdargs.linux_ld else '', name))
 
 # Not all files in build/release/tests/tst-*.so may be on the test image
 # (e.g., some may have actually remain there from old builds) - so lets take
@@ -81,7 +84,7 @@ def collect_java_tests():
 components = line.split(": ", 2);
 test_name = components[0].strip();
 test_command = components[1].strip()
-add_tests([SingleCommandTest(test_name, linux_ld + test_command if 
cmdargs.linux_ld else test_command)])
+add_tests([SingleCommandTest(test_name, linux_ld + ' ' + 
test_command if cmdargs.linux_ld else test_command)])
 
 def run_test(test):
 sys.stdout.write("  TEST %-35s" % test.name)

-- 
You received this message because you are subscribed to the Google Groups "OSv 
Development" group.
To unsubscribe from this group and stop receiving emails from it, send an email 
to osv-dev+unsubscr...@googlegroups.com.
To view this discussion on the web visit 
https://groups.google.com/d/msgid/osv-dev/c72439060d269cfc%40google.com.


[osv-dev] [COMMIT osv master] aarch64: support app TLS switch in statically linked executables

2023-12-22 Thread Commit Bot
From: Waldemar Kozaczuk 
Committer: Waldemar Kozaczuk 
Branch: master

aarch64: support app TLS switch in statically linked executables

This patch is logically similar to ce3293f6c975c1a0b309bb4cdc2626676d869339
that added support of TLS in statically linked executables on x86_64.

Unlike on x86_64, there is no arch_prctl syscall on aarch64, and statically
linked executable or one running with Linux dynamic linker, freely sets
the thread pointer register - tpidr_el0 - without registering it with
kernel in any way. OSv kernel also uses the same register to store kernel thread
pointer and switch its value on every context switch.

In order to accomodate the statically linked executables we come up with
a simple strategy that takes advantage of another thread pointer
register tpidr_el1. In essence we hold a copy of the kernel managed thread
pointer in tpidr_el1 and update both tpidr_el0 and tpidr_el1 on every
context switch (see sched.S). In addition on every entry to an exception
(see changes in entry.S), we save current value of tpidr_el0 in a callee-saved
register x19 and copy the value from tpidr_el1 to tpidr_el0 in case
it was different from kernel thread pointer (please remember tpidr_el1 always
holds the kernel value). On exit from exception we restore original value
of tpidr_el0 from the register x19. This logic is enough to make sure
that both interrupts and system calls are executed with kernel thread pointer
and tpidr_el0 is switched back to the app thread pointer if necessary.

This code change is enough to run single-threaded statically linked executables
and dynamically linked ones with Linux dynamic linker on OSv on aarch64.

Signed-off-by: Waldemar Kozaczuk 

---
diff --git a/arch/aarch64/arch-setup.cc b/arch/aarch64/arch-setup.cc
--- a/arch/aarch64/arch-setup.cc
+++ b/arch/aarch64/arch-setup.cc
@@ -162,7 +162,7 @@ void arch_setup_tls(void *tls, const elf::tls_data& info)
 tcb[0].tls_base = [1];
 
 memcpy([1], info.start, info.filesize);
-asm volatile ("msr tpidr_el0, %0; isb; " :: "r"(tcb) : "memory");
+asm volatile ("msr tpidr_el0, %0; msr tpidr_el1, %0; isb; " :: "r"(tcb) : 
"memory");
 
 /* check that the tls variable preempt_counter is correct */
 assert(sched::get_preempt_counter() == 1);
diff --git a/arch/aarch64/arch-switch.hh b/arch/aarch64/arch-switch.hh
--- a/arch/aarch64/arch-switch.hh
+++ b/arch/aarch64/arch-switch.hh
@@ -23,7 +23,7 @@ namespace sched {
 
 void thread::switch_to_first()
 {
-asm volatile ("msr tpidr_el0, %0; isb; " :: "r"(_tcb) : "memory");
+asm volatile ("msr tpidr_el0, %0; msr tpidr_el1, %0; isb; " :: "r"(_tcb) : 
"memory");
 
 /* check that the tls variable preempt_counter is correct */
 assert(sched::get_preempt_counter() == 1);
diff --git a/arch/aarch64/entry.S b/arch/aarch64/entry.S
--- a/arch/aarch64/entry.S
+++ b/arch/aarch64/entry.S
@@ -163,12 +163,16 @@ entry_\level\()_\type:
  // of the exception frame to help gdb link to the
  // address when interrupt was raised
 push_state_to_exception_frame 1
+mrs x19, tpidr_el0 // Save potential app thread pointer
+mrs x20, tpidr_el1 // Read copy of kernel thread pointer
+msr tpidr_el0, x20 // Set thread pointer to kernel one
 mrs x1, esr_el1
 str w1, [sp, #272] // Store Exception Syndrom Register in the frame
 mov x0, sp // Save exception_frame to x0
 mov x1, \level_id
 mov x2, \type_id
 bl  handle_unexpected_exception
+msr tpidr_el0, x19 // Restore thread pointer
 pop_state_from_exception_frame
 bl  abort
 .cfi_endproc
@@ -212,6 +216,9 @@ entry_curr_el_sp\stack\()_sync:
  // of the exception frame to help gdb link to the
  // address when interrupt was raised
 push_state_to_exception_frame \switch
+mrs x19, tpidr_el0 // Save potential app thread pointer
+mrs x20, tpidr_el1 // Read copy of kernel thread pointer
+msr tpidr_el0, x20 // Set thread pointer to kernel one
 mrs x1, esr_el1
 str w1, [sp, #272] // Store Exception Syndrom Register in the frame
 ubfmx2, x1, #ESR_EC_BEG, #ESR_EC_END // Exception Class -> X2
@@ -229,6 +236,7 @@ handle_mem_abort_sp\stack:
 
 mov x0, sp  // save exception_frame to x0
 bl  page_fault
+msr tpidr_el0, x19 // Restore thread pointer
 pop_state_from_exception_frame
 eret
 .cfi_endproc
@@ -252,6 +260,7 @@ handle_system_call_sp\stack:
 isb
 
 str x0, [sp, #0] // copy the result in x0 directly into the frame 
so that it can be restored
+msr tpidr_el0, x19 //Restore thread pointer
 pop_state_from_exception_frame
 eret
 .cfi_endproc
@@ -261,6 +270,7 @@ 

[osv-dev] [COMMIT osv master] aarch64: clear atexit pointer before jumping to ELF entry point

2023-12-22 Thread Commit Bot
From: Waldemar Kozaczuk 
Committer: Waldemar Kozaczuk 
Branch: master

aarch64: clear atexit pointer before jumping to ELF entry point

Signed-off-by: Waldemar Kozaczuk 

---
diff --git a/arch/aarch64/arch-elf.hh b/arch/aarch64/arch-elf.hh
--- a/arch/aarch64/arch-elf.hh
+++ b/arch/aarch64/arch-elf.hh
@@ -52,6 +52,7 @@ inline void run_entry_point(void* ep, int argc, char** argv, 
int argv_size)
 //Set stack pointer and jump to the ELF entry point
 asm volatile (
 "mov sp, %1\n\t" //set stack
+"mov x0, #0\n\t" //set atexit pointer
 "blr %0\n\t"
 :
 : "r"(ep), "r"(stack));

-- 
You received this message because you are subscribed to the Google Groups "OSv 
Development" group.
To unsubscribe from this group and stop receiving emails from it, send an email 
to osv-dev+unsubscr...@googlegroups.com.
To view this discussion on the web visit 
https://groups.google.com/d/msgid/osv-dev/8b2b36060d269c87%40google.com.


[osv-dev] [COMMIT osv master] aarch64: increase exception stack size to 64K

2023-12-22 Thread Commit Bot
From: Waldemar Kozaczuk 
Committer: Waldemar Kozaczuk 
Branch: master

aarch64: increase exception stack size to 64K

On aarch64 the syscalls are handled on the exception stack. And some of
the syscalls involving vfs may end up consuming more then 16K
of stack. A good example of it is a namei() function that needs
a minimum of 8K of stack. The tst-symlink test involves syscalls
that need more than 32K.

In order to prevent corruption of thread structures we need to
increase the exception stack size to 64K which is the same as
the x64 syscall stack size.

Signed-off-by: Waldemar Kozaczuk 

---
diff --git a/arch/aarch64/arch-cpu.hh b/arch/aarch64/arch-cpu.hh
--- a/arch/aarch64/arch-cpu.hh
+++ b/arch/aarch64/arch-cpu.hh
@@ -33,7 +33,7 @@ struct arch_cpu {
 };
 
 struct arch_thread {
-char exception_stack[4096*4] __attribute__((aligned(16)));
+char exception_stack[4096*16] __attribute__((aligned(16)));
 };
 
 struct arch_fpu {

-- 
You received this message because you are subscribed to the Google Groups "OSv 
Development" group.
To unsubscribe from this group and stop receiving emails from it, send an email 
to osv-dev+unsubscr...@googlegroups.com.
To view this discussion on the web visit 
https://groups.google.com/d/msgid/osv-dev/9f5f97060d269cb1%40google.com.


[osv-dev] [COMMIT osv master] aarch64: got it to compile again

2023-12-20 Thread Commit Bot
From: Waldemar Kozaczuk 
Committer: Waldemar Kozaczuk 
Branch: master

aarch64: got it to compile again

Signed-off-by: Waldemar Kozaczuk 

---
diff --git a/Makefile b/Makefile
--- a/Makefile
+++ b/Makefile
@@ -1724,7 +1724,9 @@ $(out)/libc/stdlib/qsort_r.o: COMMON += 
-Wno-dangling-pointer
 libc += stdlib/strtol.o
 libc += stdlib/strtod.o
 libc += stdlib/wcstol.o
+ifeq ($(arch),x64)
 libc += stdlib/unimplemented.o
+endif
 
 libc += string/__memcpy_chk.o
 libc += string/explicit_bzero.o
diff --git a/include/api/stdlib.h b/include/api/stdlib.h
--- a/include/api/stdlib.h
+++ b/include/api/stdlib.h
@@ -22,7 +22,9 @@ double atof (const char *);
 float strtof (const char *__restrict, char **__restrict);
 double strtod (const char *__restrict, char **__restrict);
 long double strtold (const char *__restrict, char **__restrict);
+#ifdef __x86_64__
 __float128 strtof128 (const char *__restrict, char **__restrict);
+#endif
 
 long strtol (const char *__restrict, char **__restrict, int);
 unsigned long strtoul (const char *__restrict, char **__restrict, int);
@@ -32,7 +34,9 @@ unsigned long long strtoull (const char *__restrict, char 
**__restrict, int);
 int strfromd (char *__restrict, size_t, const char *__restrict, double);
 int strfromf (char *__restrict, size_t, const char *__restrict, float);
 int strfromld (char *__restrict, size_t, const char *__restrict, long double);
+#ifdef __x86_64__
 int strfromf128 (char *__restrict, size_t, const char *__restrict, __float128);
+#endif
 
 int rand (void);
 void srand (unsigned);
diff --git a/include/osv/sched.hh b/include/osv/sched.hh
--- a/include/osv/sched.hh
+++ b/include/osv/sched.hh
@@ -831,8 +831,10 @@ private:
 std::shared_ptr _app_runtime;
 public:
 void destroy();
+#ifdef __x86_64__
 unsigned long get_app_tcb() { return _tcb->app_tcb; }
 void set_app_tcb(unsigned long tcb) { _tcb->app_tcb = tcb; }
+#endif
 private:
 #ifdef __aarch64__
 friend void ::destroy_current_cpu_terminating_thread();
diff --git a/libc/vdso/vdso.cc b/libc/vdso/vdso.cc
--- a/libc/vdso/vdso.cc
+++ b/libc/vdso/vdso.cc
@@ -1,5 +1,6 @@
 #include 
 #include 
+#include 
 
 #ifdef __x86_64__
 #include "tls-switch.hh"
diff --git a/linux.cc b/linux.cc
--- a/linux.cc
+++ b/linux.cc
@@ -51,7 +51,9 @@
 #include 
 #include 
 #include 
+#ifdef __x86_64__
 #include "tls-switch.hh"
+#endif
 
 #include 
 
@@ -675,46 +677,51 @@ TRACEPOINT(trace_syscall_mknodat, "%d <= %d \"%s\" %d 
%d", int, int, const char
 TRACEPOINT(trace_syscall_statx, "%d <= %d \"%s\" %d %u %p", int, int, const 
char *, int, unsigned int, struct statx *);
 TRACEPOINT(trace_syscall_sys_getcpu, "%ld <= %p %p 0x%x", long, unsigned int 
*, unsigned int *, void *);
 TRACEPOINT(trace_syscall_dup, "%d <= %d", int, int);
+#ifdef __x86_64__
 TRACEPOINT(trace_syscall_dup2, "%d <= %d %d", int, int, int);
-TRACEPOINT(trace_syscall_mprotect, "%d <= 0x%x %lu %d", int, void *, size_t, 
int);
 TRACEPOINT(trace_syscall_access, "%d <= \"%s\" %d", int, const char *, int);
-TRACEPOINT(trace_syscall_writev, "%lu <= %d %p %d", ssize_t, int, const struct 
iovec *, int);
 TRACEPOINT(trace_syscall_readlink, "%lu <= \"%s\" 0x%x %lu", ssize_t, const 
char *, char *, size_t);
+TRACEPOINT(trace_syscall_poll, "%d <= %p %ld %d", int, struct pollfd *, 
nfds_t, int);
+TRACEPOINT(trace_syscall_epoll_create, "%d <= %d", int, int);
+TRACEPOINT(trace_syscall_time, "%ld <= %p", time_t, time_t *);
+TRACEPOINT(trace_syscall_unlink, "%d <= \"%s\"", int, const char *);
+TRACEPOINT(trace_syscall_pipe, "%d <= %p", int, int*);
+TRACEPOINT(trace_syscall_alarm, "%d <= %u", int, unsigned int);
+TRACEPOINT(trace_syscall_symlink, "%d <= \"%s\" \"%s\"", int, const char *, 
const char *);
+TRACEPOINT(trace_syscall_rmdir, "%d <= \"%s\"", int, const char *);
+TRACEPOINT(trace_syscall_creat, "%d <= \"%s\" %d", int, const char *, mode_t);
+TRACEPOINT(trace_syscall_chmod, "%d <= \"%s\" %d", int, const char *, mode_t);
+TRACEPOINT(trace_syscall_rename, "%d <= %s %s", int, const char *, const char 
*);
+#endif
+TRACEPOINT(trace_syscall_mprotect, "%d <= 0x%x %lu %d", int, void *, size_t, 
int);
+TRACEPOINT(trace_syscall_writev, "%lu <= %d %p %d", ssize_t, int, const struct 
iovec *, int);
 TRACEPOINT(trace_syscall_geteuid, "%d <=", uid_t);
 TRACEPOINT(trace_syscall_getegid, "%d <=", gid_t);
 TRACEPOINT(trace_syscall_gettimeofday, "%d <= %p %p", int, struct timeval *, 
struct timezone *);
-TRACEPOINT(trace_syscall_poll, "%d <= %p %ld %d", int, struct pollfd *, 
nfds_t, int);
 TRACEPOINT(trace_syscall_getppid, "%d <=", pid_t);
-TRACEPOINT(trace_syscall_epoll_create, "%d <= %d", int, int);
 TRACEPOINT(trace_syscall_sysinfo, "%d <= %p", int, struct sysinfo *);
-TRACEPOINT(trace_syscall_time, "%ld <= %p", time_t, time_t *);
 TRACEPOINT(trace_syscall_sendfile, "%lu <= %d %d %p %lu", ssize_t, int, int, 
off_t *, size_t);
 TRACEPOINT(trace_syscall_socketpair, "%d <= %d %d %d %p", int, int, int, int, 
int *);
 TRACEPOINT(trace_syscall_shutdown, "%d <= %d %d", int, int, int);

[osv-dev] [COMMIT osv master] waitqueue: fix edge condition in disarm()

2023-12-19 Thread Commit Bot
From: Waldemar Kozaczuk 
Committer: WALDEMAR KOZACZUK 
Branch: master

waitqueue: fix edge condition in disarm()

As the issue #1285 describes, following edge condition is not handled
correctly and results in a crash:

1. wo_older.arm() //(of wait_object type)
2. wo_newer1.arm()
3. wo_older.disarm()
4. wo_newer2.arm() // Crash would happen here
5. wo_newer1.disarm()
6. wo_newer2.disarm()

./scripts/run.py -e '/tests/tst-wait-for.so'
OSv v0.57.0-109-gcf0f7526
eth0: 192.168.122.15
Booted up in 180.05 ms
Cmdline: /tests/tst-wait-for.so
Running 8 test cases...
page fault outside application, addr: 0x
[registers]
RIP: 0x403a18c1 ::arm()+17>
RFL: 0x00010206  CS:  0x0008  SS:  0x0010
RAX: 0x201ff770  RBX: 0x201ff7f0  RCX: 0x  RDX: 
0x
RSI: 0x60fd4c00  RDI: 0x201ff980  RBP: 0x201ff9f0  R8:  
0x
R9:  0x  R10: 0x10007692  R11: 0x0063  R12: 
0x201ff7d0
R13: 0x  R14: 0x10007692  R15: 0x0063  RSP: 
0x201ff6b8
Aborted

[backtrace]
0x40205287 
0x40306b5b 
0x40305892 
0x100083c2 

This patch adds new test cases to tst-wait-for.cc that validate arm()
and disarm(). The 3rd one - test_waitqueue_linked_list_3 - formulates
the exact scenario this patch fixes.

Fixes #1285

Signed-off-by: Waldemar Kozaczuk 

---
diff --git a/core/waitqueue.cc b/core/waitqueue.cc
--- a/core/waitqueue.cc
+++ b/core/waitqueue.cc
@@ -38,7 +38,7 @@ void wait_object::disarm()
 while (*pnext) {
 if (&_wr == *pnext) {
 *pnext = _wr.next;
-if (!*pnext || !(*pnext)->next) {
+if (!*pnext) {
 fifo.newest = newest;
 }
 break;
diff --git a/tests/tst-wait-for.cc b/tests/tst-wait-for.cc
--- a/tests/tst-wait-for.cc
+++ b/tests/tst-wait-for.cc
@@ -123,4 +123,52 @@ BOOST_AUTO_TEST_CASE(test_wait_for_predicate)
 false_waker->join();
 }
 
+BOOST_AUTO_TEST_CASE(test_waitqueue_linked_list_1)
+{
+waitqueue wq;
+mutex mtx;
+sched::wait_object wo(wq, );
+wo.arm();
+BOOST_REQUIRE(!wq.empty());
+wo.disarm();
+BOOST_REQUIRE(wq.empty());
+}
+
+BOOST_AUTO_TEST_CASE(test_waitqueue_linked_list_2)
+{
+waitqueue wq;
+mutex mtx;
+sched::wait_object wo_older(wq, );
+wo_older.arm();
+BOOST_REQUIRE(!wq.empty());
+sched::wait_object wo_newer(wq, );
+wo_newer.arm();
+BOOST_REQUIRE(!wq.empty());
+wo_older.disarm();
+BOOST_REQUIRE(!wq.empty());
+wo_newer.disarm();
+BOOST_REQUIRE(wq.empty());
+}
+
+BOOST_AUTO_TEST_CASE(test_waitqueue_linked_list_3)
+{
+waitqueue wq;
+mutex mtx;
+sched::wait_object wo_older(wq, );
+wo_older.arm();
+BOOST_REQUIRE(!wq.empty());
+sched::wait_object wo_newer_1(wq, );
+wo_newer_1.arm();
+BOOST_REQUIRE(!wq.empty());
+wo_older.disarm();
+BOOST_REQUIRE(!wq.empty());
+sched::wait_object wo_newer_2(wq, );
+wo_newer_2.arm();
+BOOST_REQUIRE(!wq.empty());
+wo_newer_1.disarm();
+BOOST_REQUIRE(!wq.empty());
+wo_newer_2.disarm();
+BOOST_REQUIRE(wq.empty());
+}
+
 OSV_ELF_MLOCK_OBJECT();

-- 
You received this message because you are subscribed to the Google Groups "OSv 
Development" group.
To unsubscribe from this group and stop receiving emails from it, send an email 
to osv-dev+unsubscr...@googlegroups.com.
To view this discussion on the web visit 
https://groups.google.com/d/msgid/osv-dev/42bf6d060ce47f93%40google.com.


[osv-dev] [COMMIT osv master] syscall: expose getpriority and setpriority

2023-12-18 Thread Commit Bot
From: Waldemar Kozaczuk 
Committer: Waldemar Kozaczuk 
Branch: master

syscall: expose getpriority and setpriority

Signed-off-by: Waldemar Kozaczuk 

---
diff --git a/linux.cc b/linux.cc
--- a/linux.cc
+++ b/linux.cc
@@ -731,6 +731,8 @@ TRACEPOINT(trace_syscall_shmget, "%d <= %d %lu %d", int, 
key_t, size_t, int);
 TRACEPOINT(trace_syscall_rename, "%d <= %s %s", int, const char *, const char 
*);
 TRACEPOINT(trace_syscall_rt_sigtimedwait, "%d <= %p %p %p %lu", int, const 
sigset_t *, siginfo_t *, const struct timespec *, size_t);
 TRACEPOINT(trace_syscall_getrlimit, "%d <= %d %p", int, int, struct rlimit *);
+TRACEPOINT(trace_syscall_getpriority, "%d <= %d %d", int, int, int);
+TRACEPOINT(trace_syscall_setpriority, "%d <= %d %d %d", int, int, int, int);
 
 OSV_LIBC_API long syscall(long number, ...)
 {
@@ -886,6 +888,8 @@ OSV_LIBC_API long syscall(long number, ...)
 SYSCALL2(rename, const char *, const char *);
 SYSCALL4(rt_sigtimedwait, const sigset_t *, siginfo_t *, const struct 
timespec *, size_t);
 SYSCALL2(getrlimit, int, struct rlimit *);
+SYSCALL2(getpriority, int, int);
+SYSCALL3(setpriority, int, int, int);
 }
 
 debug_always("syscall(): unimplemented system call %d\n", number);

-- 
You received this message because you are subscribed to the Google Groups "OSv 
Development" group.
To unsubscribe from this group and stop receiving emails from it, send an email 
to osv-dev+unsubscr...@googlegroups.com.
To view this discussion on the web visit 
https://groups.google.com/d/msgid/osv-dev/8fe9fc060cd0eb52%40google.com.


[osv-dev] [COMMIT osv master] clock_getres: better validate negative clk_id

2023-12-18 Thread Commit Bot
From: Waldemar Kozaczuk 
Committer: Waldemar Kozaczuk 
Branch: master

clock_getres: better validate negative clk_id

Not all negative clock_id numbers are valid. To catch invalid
negative values, apply the formula to calculate the thread id
and try to find matching thread by id.

This fixes broken tst-time.cc

Signed-off-by: Waldemar Kozaczuk 

---
diff --git a/libc/time.cc b/libc/time.cc
--- a/libc/time.cc
+++ b/libc/time.cc
@@ -137,6 +137,13 @@ int clock_getres(clockid_t clk_id, struct timespec* ts)
 //which represent clock_id for specific thread
 if (clk_id >= 0) {
 return libc_error(EINVAL);
+} else {
+//Reverse the formula used in pthread_getcpuclockid()
+//and calculate thread id given clk_id
+pid_t tid = (-clk_id - 2) / 8;
+if( !sched::thread::find_by_id(tid)) {
+return libc_error(EINVAL);
+}
 }
 }
 

-- 
You received this message because you are subscribed to the Google Groups "OSv 
Development" group.
To unsubscribe from this group and stop receiving emails from it, send an email 
to osv-dev+unsubscr...@googlegroups.com.
To view this discussion on the web visit 
https://groups.google.com/d/msgid/osv-dev/42db0e060ccf0f9d%40google.com.


[osv-dev] [COMMIT osv master] tests: skip tst-dns-resolver when running with Linux dynamic linker

2023-12-18 Thread Commit Bot
From: Waldemar Kozaczuk 
Committer: Waldemar Kozaczuk 
Branch: master

tests: skip tst-dns-resolver when running with Linux dynamic linker

Signed-off-by: Waldemar Kozaczuk 

---
diff --git a/scripts/test.py b/scripts/test.py
--- a/scripts/test.py
+++ b/scripts/test.py
@@ -19,7 +19,9 @@
 
 disabled_list= [
 "tst-dns-resolver.so",
+"tst-dns-resolver",
 "tst-feexcept.so", # On AArch64 the tests around floating point exceptions 
(SIGFPE) fail even on KVM - see issue #1150
+"tst-feexcept",
 ]
 
 qemu_disabled_list= [
@@ -195,6 +197,7 @@ def main():
 
 if running_with_kvm_on(cmdargs.arch, cmdargs.hypervisor) and cmdargs.arch 
!= 'aarch64':
 disabled_list.remove("tst-feexcept.so")
+disabled_list.remove("tst-feexcept")
 
 test_net.set_arch(cmdargs.arch)
 test_tracing.set_arch(cmdargs.arch)

-- 
You received this message because you are subscribed to the Google Groups "OSv 
Development" group.
To unsubscribe from this group and stop receiving emails from it, send an email 
to osv-dev+unsubscr...@googlegroups.com.
To view this discussion on the web visit 
https://groups.google.com/d/msgid/osv-dev/2e9b0d060ccf0fff%40google.com.


[osv-dev] [COMMIT osv master] syscall: expose getrlimit and implement rt_sigtimedwait

2023-12-18 Thread Commit Bot
From: Waldemar Kozaczuk 
Committer: Waldemar Kozaczuk 
Branch: master

syscall: expose getrlimit and implement rt_sigtimedwait

This patch exposes getrlimit and adds limited implementation
of rt_sigtimedwait system call. The latter only supports calls without
the timeout parameter.

Signed-off-by: Waldemar Kozaczuk 

---
diff --git a/linux.cc b/linux.cc
--- a/linux.cc
+++ b/linux.cc
@@ -373,8 +373,17 @@ int rt_sigprocmask(int how, sigset_t * nset, sigset_t * 
oset, size_t sigsetsize)
 return sigprocmask(how, nset, oset);
 }
 
-#define __NR_sys_exit __NR_exit
+int rt_sigtimedwait(const sigset_t *set, siginfo_t *info, const struct 
timespec *timeout, size_t sigsetsize)
+{
+if (!timeout || (!timeout->tv_sec && !timeout->tv_nsec)) {
+return sigwaitinfo(set, info);
+} else {
+errno = ENOSYS;
+return -1;
+}
+}
 
+#define __NR_sys_exit __NR_exit
 static int sys_exit(int ret)
 {
 sched::thread::current()->exit();
@@ -720,6 +729,8 @@ TRACEPOINT(trace_syscall_shmctl, "%d <= %d %d %p", int, 
int, int, struct shmid_d
 TRACEPOINT(trace_syscall_shmdt, "%d <= 0x%x", int, const void *)
 TRACEPOINT(trace_syscall_shmget, "%d <= %d %lu %d", int, key_t, size_t, int);
 TRACEPOINT(trace_syscall_rename, "%d <= %s %s", int, const char *, const char 
*);
+TRACEPOINT(trace_syscall_rt_sigtimedwait, "%d <= %p %p %p %lu", int, const 
sigset_t *, siginfo_t *, const struct timespec *, size_t);
+TRACEPOINT(trace_syscall_getrlimit, "%d <= %d %p", int, int, struct rlimit *);
 
 OSV_LIBC_API long syscall(long number, ...)
 {
@@ -873,6 +884,8 @@ OSV_LIBC_API long syscall(long number, ...)
 SYSCALL1(shmdt, const void *);
 SYSCALL3(shmget, key_t, size_t, int);
 SYSCALL2(rename, const char *, const char *);
+SYSCALL4(rt_sigtimedwait, const sigset_t *, siginfo_t *, const struct 
timespec *, size_t);
+SYSCALL2(getrlimit, int, struct rlimit *);
 }
 
 debug_always("syscall(): unimplemented system call %d\n", number);

-- 
You received this message because you are subscribed to the Google Groups "OSv 
Development" group.
To unsubscribe from this group and stop receiving emails from it, send an email 
to osv-dev+unsubscr...@googlegroups.com.
To view this discussion on the web visit 
https://groups.google.com/d/msgid/osv-dev/bf4a47060cc993bb%40google.com.


[osv-dev] [COMMIT osv master] libc: make pthread_getcpuclockid() compatible with glibc

2023-12-18 Thread Commit Bot
From: Waldemar Kozaczuk 
Committer: Nadav Har'El 
Branch: master

libc: make pthread_getcpuclockid() compatible with glibc

When we run tst-pthread-clock.cc on OSv with Linux dynamic linker,
it uses the glibc version of pthread_getcpuclockid(). The glibc
pthread_getcpuclockid() uses different scheme (same musl) to encode
the clockid which is actually compatible with what Linux kernel expects
when handling clock_gettime(). In the end, the tst-pthread-clock.cc
fails when calling clock_gettime() with clock_id returned earlier
by pthread_getcpuclockid().

To make this test work correctly on OSv with Linux dynamic linker,
we change the scheme used by pthread_getcpuclockid() and clock_gettime()
to follow what glibc and musl do (see
https://git.musl-libc.org/cgit/musl/tree/src/thread/pthread_getcpuclockid.c
and
https://github.com/bminor/glibc/blob/master/sysdeps/unix/sysv/linux/kernel-posix-cpu-timers.h).

With this patch one can run more tests on OSv with Linux dynamic linker:

scripts/test.py --linux_ld -m modules/tests-with-linux-ld/usr.manifest \
  -d tst-kill \
  -d tst-sigaction \
  -d tst-sigwait \
  -d tst-stdio-rofs \
  -d tst-wctype

Signed-off-by: Waldemar Kozaczuk 

Closes #1286

---
diff --git a/libc/pthread.cc b/libc/pthread.cc
--- a/libc/pthread.cc
+++ b/libc/pthread.cc
@@ -325,7 +325,10 @@ int pthread_getcpuclockid(pthread_t thread, clockid_t 
*clock_id)
 if (clock_id) {
 pthread *p = pthread::from_libc(thread);
 auto id = p->_thread->id();
-*clock_id = id + _OSV_CLOCK_SLOTS;
+//Follow the same formula glibc and musl use to create
+//a negative clock_id that is then used by Linux kernel when
+//handling get_clocktime (see 
https://git.musl-libc.org/cgit/musl/tree/src/thread/pthread_getcpuclockid.c)
+*clock_id = (-id - 1) * 8U + 6;
 }
 return 0;
 }
diff --git a/libc/pthread.hh b/libc/pthread.hh
--- a/libc/pthread.hh
+++ b/libc/pthread.hh
@@ -12,12 +12,6 @@
 extern "C" {
 #endif
 
-// Linux's  defines 9 types of clocks. We reserve space for 16 slots
-// and use the clock ids afterwards for per-thread clocks. This is OSv-
-// specific, and an application doesn't need to know about it - only
-// pthread_getcpuclockid() and clock_gettime() need to know about this.
-#define _OSV_CLOCK_SLOTS 16
-
 #ifdef __cplusplus
 }
 
diff --git a/libc/time.cc b/libc/time.cc
--- a/libc/time.cc
+++ b/libc/time.cc
@@ -96,11 +96,20 @@ int clock_gettime(clockid_t clk_id, struct timespec* ts)
 break;
 
 default:
-if (clk_id < _OSV_CLOCK_SLOTS) {
+//At this point we should only let the negative numbers
+//which represent clock_id for specific thread
+if (clk_id >= 0) {
 return libc_error(EINVAL);
 } else {
-auto thread = sched::thread::find_by_id(clk_id - _OSV_CLOCK_SLOTS);
-fill_ts(thread->thread_clock(), ts);
+//Reverse the formula used in pthread_getcpuclockid()
+//and calculate thread id given clk_id
+pid_t tid = (-clk_id - 2) / 8;
+auto thread = sched::thread::find_by_id(tid);
+if (thread) {
+fill_ts(thread->thread_clock(), ts);
+} else {
+return libc_error(EINVAL);
+}
 }
 }
 
@@ -124,7 +133,9 @@ int clock_getres(clockid_t clk_id, struct timespec* ts)
 case CLOCK_MONOTONIC_RAW:
 break;
 default:
-if (clk_id < _OSV_CLOCK_SLOTS) {
+//At this point we should only let the negative numbers
+//which represent clock_id for specific thread
+if (clk_id >= 0) {
 return libc_error(EINVAL);
 }
 }

-- 
You received this message because you are subscribed to the Google Groups "OSv 
Development" group.
To unsubscribe from this group and stop receiving emails from it, send an email 
to osv-dev+unsubscr...@googlegroups.com.
To view this discussion on the web visit 
https://groups.google.com/d/msgid/osv-dev/d5ae0d060cc96102%40google.com.


[osv-dev] [COMMIT osv master] loader.py: enhance to support debugging programs launched with Linux dynamic linker

2023-12-15 Thread Commit Bot
From: Waldemar Kozaczuk 
Committer: Waldemar Kozaczuk 
Branch: master

loader.py: enhance to support debugging programs launched with Linux dynamic 
linker

Signed-off-by: Waldemar Kozaczuk 

---
diff --git a/scripts/loader.py b/scripts/loader.py
--- a/scripts/loader.py
+++ b/scripts/loader.py
@@ -21,6 +21,8 @@
format_time)
 from osv import trace, debug
 
+from manifest_common import add_var, expand, unsymlink, read_manifest, defines
+
 virtio_driver_type = gdb.lookup_type('virtio::virtio_driver')
 
 class status_enum_class(object):
@@ -131,33 +133,40 @@ def __init__(self):
 # Load data from usr.manifest in build_dir
 mm_path = os.path.join(build_dir, 'usr.manifest')
 try:
-self.data = open(mm_path).read().split('\n')
+_manifest = read_manifest(mm_path)
+self.guest_to_host_map = list(expand(_manifest))
+self.guest_to_host_map = [(x, unsymlink(y % defines)) for (x, y) 
in self.guest_to_host_map]
 except IOError:
-self.data = []
+self.guest_to_host_map = []
 
 def find(self, path):
 '''Try to locate file with help of usr.manifest'''
-files = [ff.split(':', 1)[1].strip() for ff in self.data if 
ff.split(':', 1)[0].strip() == path]
+files = [host for (guest, host) in self.guest_to_host_map if guest == 
path]
 if files:
-file = files[-1]  # the last line in usr.manifest wins
+host_file = files[-1]  # the last line in usr.manifest wins
 else:
-file = ""
-file = self.resolve_symlink(file)
-print('manifest.find_file: path=%s, found file=%s' % (path, file))
+host_file = ""
+host_file = self.resolve_host_file(host_file)
+print('manifest.find_file: path=%s, found file=%s' % (path, host_file))
 # usr.manifest contains lines like "%(gccbase)s/lib64/libgcc_s.so.1" 
too.
 # Filter out such cases.
-if os.path.exists(file):
-return file
+if os.path.exists(host_file):
+return host_file
 else:
 return ""
 
-def resolve_symlink(self, file):
-'''If file is a symlink, try to resolve it with help of usr.manifest'''
+def resolve_host_file(self, file):
 resolved_file = file
+#Handle symlink
 if file.startswith('->'):
-path = file[2:]
-resolved_file = self.find(path)
-# print('manifest.resolve_symlink: file=%s, resolved_file=%s' % 
(file, resolved_file))
+resolved_file = self.find(file[2:].strip())
+else:
+resolved_file = file
+#Handle path to build directory
+if resolved_file.startswith('.'):
+resolved_file = os.path.join(build_dir, resolved_file[2:])
+if not resolved_file.startswith('/'):
+resolved_file = os.path.join(build_dir, resolved_file)
 return resolved_file
 
 manifest = Manifest()
@@ -689,6 +698,7 @@ def __init__(self):
  gdb.COMMAND_USER, gdb.COMPLETE_NONE)
 def invoke(self, arg, from_tty):
 syminfo_resolver.clear_cache()
+object_paths = set()
 for obj in 
read_vector(gdb.lookup_global_symbol('elf::program::s_objs').value()):
 base = to_int(obj['_base'])
 obj_path = obj['_pathname']['_M_dataplus']['_M_p'].string()
@@ -697,8 +707,30 @@ def invoke(self, arg, from_tty):
 print('ERROR: Unable to locate object file for:', obj_path, 
hex(base))
 else:
 print(path, hex(base))
+object_paths.add(path)
 load_elf(path, base)
 
+for vma in vma_list():
+start = ulong(vma['_range']['_start'])
+flags = flagstr(ulong(vma['_flags']))
+perm = permstr(ulong(vma['_perm']))
+
+if 'F' in flags:
+file_vma = vma.cast(gdb.lookup_type('mmu::file_vma').pointer())
+file_ptr = 
file_vma['_file']['px'].cast(gdb.lookup_type('file').pointer())
+dentry_ptr = 
file_ptr['f_dentry']['px'].cast(gdb.lookup_type('dentry').pointer())
+file_path = dentry_ptr['d_path'].string()
+path = translate(file_path)
+if not path:
+print('ERROR: Unable to locate object file for:', 
file_path, hex(start))
+elif path not in object_paths:
+print(path, hex(start))
+try:
+load_elf(path, start)
+object_paths.add(path)
+except gdb.error:
+print('ERROR: Not an ELF file', path, hex(start))
+
 class osv_load_elf(gdb.Command):
 def __init__(self):
 gdb.Command.__init__(self, 'osv load-elf',

-- 
You received this message because you are subscribed to the Google Groups "OSv 
Development" group.
To unsubscribe from this 

[osv-dev] [COMMIT osv master] vfs: make sys_utimensat handle AT_SYMLINK_NOFOLLOW

2023-12-15 Thread Commit Bot
From: Waldemar Kozaczuk 
Committer: Waldemar Kozaczuk 
Branch: master

vfs: make sys_utimensat handle AT_SYMLINK_NOFOLLOW

This patch adds missing logic (marked by TODO) to handle AT_SYMLINK_NOFOLLOW
in the implementation of sys_utimensat().

We do it by extracting relevant code from sys_utimes() into
new helper function handle_symlink_nofollow() which is then reused
in both sys_utimes() and sys_utimensat().

Please note the tst-utimensat.cc has to be slightly adjusted with
ifdef to account for differences between how gibc and OSv libc handles
this case with utimensat():

"Force utimensat to fail when dirfd was AT_FDCWD and pathname is NULL"

Linux manual states this about when EFAULT errno is set:
"EFAULT times pointed to an invalid address; or, dirfd was
AT_FDCWD, and pathname is NULL or an invalid address.

Likewise, we also have to adjust tst-utimes.cc with ifdef to account
for differences between how utimes() called with invalid times argument
gets handled by glibc and OSv libc.

Changes in this patch and 8 previous patches make it possible
to run more unit tests on OSv with linux dynamic linker:

scripts/test.py --linux_ld -m modules/tests-with-linux-ld/usr.manifest \
  -d tst-kill \
  -d tst-pthread-clock \
  -d tst-sigaction \
  -d tst-sigwait \
  -d tst-stdio-rofs \
  -d tst-wctype

Signed-off-by: Waldemar Kozaczuk 

---
diff --git a/fs/vfs/vfs_syscalls.cc b/fs/vfs/vfs_syscalls.cc
--- a/fs/vfs/vfs_syscalls.cc
+++ b/fs/vfs/vfs_syscalls.cc
@@ -1248,41 +1248,52 @@ static void convert_timeval(struct timespec , const 
struct timeval *from)
 }
 }
 
-int
-sys_utimes(char *path, const struct timeval times[2], int flags)
+static int handle_symlink_nofollow(const char *path, int flags, struct dentry 
**dpp)
 {
 int error;
-struct dentry *dp;
-struct timespec timespec_times[2];
-
-DPRINTF(VFSDB_SYSCALL, ("sys_utimes: path=%s\n", path));
-
-if (times && (!is_timeval_valid([0]) || 
!is_timeval_valid([1])))
-return EINVAL;
-
-// Convert each element of timeval array to the timespec type
-convert_timeval(timespec_times[0], times ? times + 0 : nullptr);
-convert_timeval(timespec_times[1], times ? times + 1 : nullptr);
-
+char *_path = const_cast(path);
 if (flags & AT_SYMLINK_NOFOLLOW) {
 struct dentry *ddp;
-error = lookup(path, , nullptr);
+error = lookup(_path, , nullptr);
 if (error) {
 return error;
 }
 
-error = namei_last_nofollow(path, ddp, );
+error = namei_last_nofollow(_path, ddp, dpp);
 if (ddp != nullptr) {
 drele(ddp);
 }
 if (error) {
 return error;
 }
 } else {
-error = namei(path, );
+error = namei(_path, dpp);
 if (error)
 return error;
 }
+return 0;
+}
+
+int
+sys_utimes(char *path, const struct timeval times[2], int flags)
+{
+int error;
+struct dentry *dp;
+struct timespec timespec_times[2];
+
+DPRINTF(VFSDB_SYSCALL, ("sys_utimes: path=%s\n", path));
+
+if (times && (!is_timeval_valid([0]) || 
!is_timeval_valid([1])))
+return EINVAL;
+
+// Convert each element of timeval array to the timespec type
+convert_timeval(timespec_times[0], times ? times + 0 : nullptr);
+convert_timeval(timespec_times[1], times ? times + 1 : nullptr);
+
+error = handle_symlink_nofollow(path, flags, );
+if (error) {
+return error;
+}
 
 if (dp->d_mount->m_flags & MNT_RDONLY) {
 error = EROFS;
@@ -1367,12 +1378,10 @@ sys_utimensat(int dirfd, const char *pathname, const 
struct timespec times[2], i
ap = std::string(fp->f_dentry->d_mount->m_path) + "/" + ap;
 }
 
-/* FIXME: Add support for AT_SYMLINK_NOFOLLOW */
-
-error = namei(ap.c_str(), );
-
-if (error)
+error = handle_symlink_nofollow(ap.c_str(), flags, );
+if (error) {
 return error;
+}
 
 if (dp->d_mount->m_flags & MNT_RDONLY) {
 error = EROFS;
diff --git a/tests/tst-utimensat.cc b/tests/tst-utimensat.cc
--- a/tests/tst-utimensat.cc
+++ b/tests/tst-utimensat.cc
@@ -100,9 +100,9 @@ int main(int argc, char *argv[])
 ret = utimensat(AT_FDCWD, rel_path_bar_to_tmp, times, 0);
 report(ret == 0, "utimensat worked successfully with AT_FDCWD");
 
- /* Use dirfd and relative path of bar to check utimensat */
- ret = utimensat(dirfd, rel_path_bar_to_foo, times, 0);
- report(ret == 0, "utimensat works with dirfd and relative path");
+/* Use dirfd and relative path of bar to check utimensat */
+ret = utimensat(dirfd, rel_path_bar_to_foo, times, 0);
+report(ret == 0, "utimensat works with dirfd and relative path");
 
 /* Force utimensat to fail using invalid dirfd */
 ret = utimensat(100, rel_path_bar_to_foo, times, 0);
@@ -114,15 +114,19 @@ int main(int argc, char *argv[])
 
 /* Force utimensat to fail when dirfd was AT_FDCWD and pathname is NULL */
 ret = 

[osv-dev] [COMMIT osv master] syscalls: fix utimensat

2023-12-15 Thread Commit Bot
From: Waldemar Kozaczuk 
Committer: Waldemar Kozaczuk 
Branch: master

syscalls: fix utimensat

The Linux manual for utimensat states this:

"On Linux, futimens() is a library function implemented on top of
 the utimensat() system call.  To support this, the Linux
 utimensat() system call implements a nonstandard feature: if
 pathname is NULL, then the call modifies the timestamps of the
 file referred to by the file descriptor dirfd (which may refer to
 any type of file).  Using this feature, the call
 futimens(fd, times) is implemented as:

 utimensat(fd, NULL, times, 0);

 Note, however, that the glibc wrapper for utimensat() disallows
 passing NULL as the value for pathname: the wrapper function
 returns the error EINVAL in this case."

To accomodate it, we intruduce new function utimensat4() which is
what linux syscall wrapper delegates to. The new utimensat4() calls
modified sys_utimensat() with the syscall flag argument equal to true.
The modified sys_utimensat() treats the dirfd accordingly in such case.

Signed-off-by: Waldemar Kozaczuk 

---
diff --git a/fs/vfs/main.cc b/fs/vfs/main.cc
--- a/fs/vfs/main.cc
+++ b/fs/vfs/main.cc
@@ -2060,7 +2060,22 @@ int utimensat(int dirfd, const char *pathname, const 
struct timespec times[2], i
 {
 trace_vfs_utimensat(pathname);
 
-auto error = sys_utimensat(dirfd, pathname, times, flags);
+auto error = sys_utimensat(dirfd, pathname, times, flags, false);
+if (error) {
+trace_vfs_utimensat_err(error);
+errno = error;
+return -1;
+}
+
+trace_vfs_utimensat_ret();
+return 0;
+}
+
+int utimensat4(int dirfd, const char *pathname, const struct timespec 
times[2], int flags)
+{
+trace_vfs_utimensat(pathname);
+
+auto error = sys_utimensat(dirfd, pathname, times, flags, true);
 if (error) {
 trace_vfs_utimensat_err(error);
 errno = error;
diff --git a/fs/vfs/vfs.h b/fs/vfs/vfs.h
--- a/fs/vfs/vfs.h
+++ b/fs/vfs/vfs.h
@@ -120,7 +120,7 @@ int  sys_truncate(char *path, off_t length);
 int sys_readlink(char *path, char *buf, size_t bufsize, ssize_t *size);
 int  sys_utimes(char *path, const struct timeval times[2], int flags);
 int  sys_utimensat(int dirfd, const char *pathname,
-   const struct timespec times[2], int flags);
+   const struct timespec times[2], int flags, bool syscall);
 int  sys_futimens(int fd, const struct timespec times[2]);
 int  sys_fallocate(struct file *fp, int mode, loff_t offset, loff_t len);
 
diff --git a/fs/vfs/vfs_syscalls.cc b/fs/vfs/vfs_syscalls.cc
--- a/fs/vfs/vfs_syscalls.cc
+++ b/fs/vfs/vfs_syscalls.cc
@@ -1317,7 +1317,7 @@ void init_timespec(struct timespec &_times, const struct 
timespec *times)
 }
 
 int
-sys_utimensat(int dirfd, const char *pathname, const struct timespec times[2], 
int flags)
+sys_utimensat(int dirfd, const char *pathname, const struct timespec times[2], 
int flags, bool syscall)
 {
 int error;
 std::string ap;
@@ -1356,7 +1356,7 @@ sys_utimensat(int dirfd, const char *pathname, const 
struct timespec times[2], i
if(!fp->f_dentry)
return EBADF;
 
-   if (!(fp->f_dentry->d_vnode->v_type & VDIR))
+   if (!syscall && !(fp->f_dentry->d_vnode->v_type & VDIR))
return ENOTDIR;
 
if (pathname)
@@ -1407,7 +1407,7 @@ sys_futimens(int fd, const struct timespec times[2])
 return EBADF;
 
 std::string pathname = fp->f_dentry->d_path;
-auto error = sys_utimensat(AT_FDCWD, pathname.c_str(), times, 0);
+auto error = sys_utimensat(AT_FDCWD, pathname.c_str(), times, 0, false);
 return error;
 }
 
diff --git a/linux.cc b/linux.cc
--- a/linux.cc
+++ b/linux.cc
@@ -574,6 +574,9 @@ static long sys_brk(void *addr)
 }
 }
 
+#define __NR_utimensat4 __NR_utimensat
+extern int utimensat4(int dirfd, const char *pathname, const struct timespec 
times[2], int flags);
+
 #ifdef SYS_open
 TRACEPOINT(trace_syscall_open, "%d <= \"%s\" 0x%x", int, const char *, int);
 #endif
@@ -692,7 +695,7 @@ TRACEPOINT(trace_syscall_chdir, "%d <= \"%s\"", int, const 
char *);
 TRACEPOINT(trace_syscall_faccessat, "%d <= %d \"%s\" %d %d", int, int, const 
char *, int, int);
 TRACEPOINT(trace_syscall_kill, "%d <= %d %d", int, pid_t, int);
 TRACEPOINT(trace_syscall_alarm, "%d <= %u", int, unsigned int);
-TRACEPOINT(trace_syscall_utimensat, "%d <= %d \"%s\" %p %d", int, int, const 
char *, const struct timespec*, int);
+TRACEPOINT(trace_syscall_utimensat4, "%d <= %d \"%s\" %p %d", int, int, const 
char *, const struct timespec*, int);
 TRACEPOINT(trace_syscall_symlink, "%d <= \"%s\" \"%s\"", int, const char *, 
const char *);
 TRACEPOINT(trace_syscall_rmdir, "%d <= \"%s\"", int, const char *);
 TRACEPOINT(trace_syscall_sethostname, "%d <= \"%s\" %d", int, const char *, 
int);
@@ -843,7 +846,7 @@ OSV_LIBC_API long syscall(long number, ...)
 SYSCALL4(faccessat, int, const char *, int, int);
 SYSCALL2(kill, pid_t, int);
 SYSCALL1(alarm, unsigned int);
-

[osv-dev] [COMMIT osv master] __fxstatat: make it handle AT_SYMLINK_NOFOLLOW

2023-12-15 Thread Commit Bot
From: Waldemar Kozaczuk 
Committer: Waldemar Kozaczuk 
Branch: master

__fxstatat: make it handle AT_SYMLINK_NOFOLLOW

This patch makes tst-symlink tests pass when running on OSv
with dynamic linker. In this case lstat() and stat() are handled
by glibc by calling system call fstatat.

Signed-off-by: Waldemar Kozaczuk 

---
diff --git a/fs/vfs/main.cc b/fs/vfs/main.cc
--- a/fs/vfs/main.cc
+++ b/fs/vfs/main.cc
@@ -639,7 +639,12 @@ int __fxstatat(int ver, int dirfd, const char *pathname, 
struct stat *st,
 }
 
 if (pathname[0] == '/' || dirfd == AT_FDCWD) {
-return stat(pathname, st);
+if (flags & AT_SYMLINK_NOFOLLOW) {
+return lstat(pathname, st);
+}
+else {
+return stat(pathname, st);
+}
 }
 // If AT_EMPTY_PATH and pathname is an empty string, fstatat() operates on
 // dirfd itself, and in that case it doesn't have to be a directory.

-- 
You received this message because you are subscribed to the Google Groups "OSv 
Development" group.
To unsubscribe from this group and stop receiving emails from it, send an email 
to osv-dev+unsubscr...@googlegroups.com.
To view this discussion on the web visit 
https://groups.google.com/d/msgid/osv-dev/9125f7060c9082bb%40google.com.


[osv-dev] [COMMIT osv master] syscalls: expose rename

2023-12-15 Thread Commit Bot
From: Waldemar Kozaczuk 
Committer: Waldemar Kozaczuk 
Branch: master

syscalls: expose rename

Signed-off-by: Waldemar Kozaczuk 

---
diff --git a/linux.cc b/linux.cc
--- a/linux.cc
+++ b/linux.cc
@@ -716,6 +716,7 @@ TRACEPOINT(trace_syscall_long_shmat, "0x%x <= %d 0x%x %d", 
long, int, const void
 TRACEPOINT(trace_syscall_shmctl, "%d <= %d %d %p", int, int, int, struct 
shmid_ds *);
 TRACEPOINT(trace_syscall_shmdt, "%d <= 0x%x", int, const void *)
 TRACEPOINT(trace_syscall_shmget, "%d <= %d %lu %d", int, key_t, size_t, int);
+TRACEPOINT(trace_syscall_rename, "%d <= %s %s", int, const char *, const char 
*);
 
 OSV_LIBC_API long syscall(long number, ...)
 {
@@ -868,6 +869,7 @@ OSV_LIBC_API long syscall(long number, ...)
 SYSCALL3(shmctl, int, int, struct shmid_ds *);
 SYSCALL1(shmdt, const void *);
 SYSCALL3(shmget, key_t, size_t, int);
+SYSCALL2(rename, const char *, const char *);
 }
 
 debug_always("syscall(): unimplemented system call %d\n", number);

-- 
You received this message because you are subscribed to the Google Groups "OSv 
Development" group.
To unsubscribe from this group and stop receiving emails from it, send an email 
to osv-dev+unsubscr...@googlegroups.com.
To view this discussion on the web visit 
https://groups.google.com/d/msgid/osv-dev/7d54ff060c9082d1%40google.com.


[osv-dev] [COMMIT osv master] tests-with-linux-ld: remove non-applicable tst-reloc

2023-12-15 Thread Commit Bot
From: Waldemar Kozaczuk 
Committer: Waldemar Kozaczuk 
Branch: master

tests-with-linux-ld: remove non-applicable tst-reloc

Signed-off-by: Waldemar Kozaczuk 

---
diff --git a/modules/tests-with-linux-ld/Makefile 
b/modules/tests-with-linux-ld/Makefile
--- a/modules/tests-with-linux-ld/Makefile
+++ b/modules/tests-with-linux-ld/Makefile
@@ -42,10 +42,6 @@ $(out)/tests-with-linux-ld/tst-non-pie: CXXFLAGS:=$(subst 
-fpie,-no-pie,$(CXXFLA
 $(out)/tests-with-linux-ld/tst-non-pie: $(src)/tests/tst-non-pie.cc
$(call quiet, $(CXX) $(CXXFLAGS) $(LDFLAGS) -o $@ $< $(LIBS), LD 
tests-with-linux-ld/tst-non-pie)
 
-$(out)/tests-with-linux-ld/tst-reloc.o: CFLAGS:=$(subst -fPIC,-fpie,$(CFLAGS))
-$(out)/tests-with-linux-ld/tst-reloc: $(src)/tests/tst-reloc.c
-   $(call quiet, $(CC) $(CFLAGS) $(LDFLAGS) -pie -o $@ $< $(LIBS), LD 
tests-with-linux-ld/tst-reloc)
-
 $(out)/tests-with-linux-ld/tst-bitset-iter.o: COMMON:=-I $(src)/include 
$(COMMON)
 $(out)/tests-with-linux-ld/tst-queue-mpsc.o: COMMON:=-I $(src)/include -DLINUX 
$(COMMON)
 $(out)/tests-with-linux-ld/tst-poll.o: COMMON:=-I $(src)/include $(COMMON)
@@ -100,7 +96,7 @@ tests := tst-pthread misc-ramdisk tst-vblk \
libtls.so libtls_gold.so tst-tls tst-tls-gold \
tst-sigaction tst-syscall tst-ifaddrs tst-getdents \
tst-netlink tst-pthread-create misc-futex-perf \
-   misc-syscall-perf tst-reloc misc-vdso-perf misc-scheduler
+   misc-syscall-perf misc-vdso-perf misc-scheduler
 
 ifeq ($(arch),x64)
 tests += tst-mmx-fpu
@@ -192,7 +188,7 @@ common.manifest: build_all_tests $(lastword 
$(MAKEFILE_LIST)) usr.manifest.skel
@cat usr.manifest.skel > $@
@case "$(CROSS_PREFIX)" in \
"aarch64"*) ../tests/add_aarch64_boost_libraries.sh $(OSV_BASE) 
>> $@ ;; \
-   *) LD_LIBRARY_PATH=$(boost-lib-dir) ldd $(addprefix 
$(out)/tests-with-linux-ld/,$(boost-tests)) $(addprefix 
$(out)/tests-with-linux-ld/,$(boost-program-options-tests)) | grep libboost | 
sed 's/ *[^ ] *\(.*\) => \(.*\) .*/\/usr/\/lib64\/\1: \2/' | sort | uniq >> $@ 
;; \
+   *) LD_LIBRARY_PATH=$(boost-lib-dir) ldd $(addprefix 
$(out)/tests-with-linux-ld/,$(boost-tests)) $(addprefix 
$(out)/tests-with-linux-ld/,$(boost-program-options-tests)) | grep libboost | 
sed 's/ *[^ ] *\(.*\) => \(.*\) .*/\/usr\/lib64\/\1: \2/' | sort | uniq >> $@ 
;; \
esac
@echo $(common-tests) | tr ' ' '\n' | awk '{print "/tests/" $$0 ": 
./tests/" $$0}' >> $@
 

-- 
You received this message because you are subscribed to the Google Groups "OSv 
Development" group.
To unsubscribe from this group and stop receiving emails from it, send an email 
to osv-dev+unsubscr...@googlegroups.com.
To view this discussion on the web visit 
https://groups.google.com/d/msgid/osv-dev/6a32af060c908239%40google.com.


[osv-dev] [COMMIT osv master] vdso: make clock_gettime() return -errno if it fails

2023-12-15 Thread Commit Bot
From: Waldemar Kozaczuk 
Committer: Waldemar Kozaczuk 
Branch: master

vdso: make clock_gettime() return -errno if it fails

Signed-off-by: Waldemar Kozaczuk 

---
diff --git a/libc/vdso/vdso.cc b/libc/vdso/vdso.cc
--- a/libc/vdso/vdso.cc
+++ b/libc/vdso/vdso.cc
@@ -21,7 +21,11 @@ extern "C" __attribute__((__visibility__("default")))
 int __vdso_clock_gettime(clockid_t clk_id, struct timespec *tp)
 {
 arch::tls_switch _tls_switch;
-return clock_gettime(clk_id, tp);
+if (clock_gettime(clk_id, tp) < 0) {
+return -errno;
+} else {
+return 0;
+}
 }
 #endif
 
@@ -35,7 +39,11 @@ int __kernel_gettimeofday(struct timeval *tv, struct 
timezone *tz)
 __attribute__((__visibility__("default")))
 int __kernel_clock_gettime(clockid_t clk_id, struct timespec *tp)
 {
-return clock_gettime(clk_id, tp);
+if (clock_gettime(clk_id, tp) < 0) {
+return -errno;
+} else {
+return 0;
+}
 }
 
 __attribute__((__visibility__("default")))

-- 
You received this message because you are subscribed to the Google Groups "OSv 
Development" group.
To unsubscribe from this group and stop receiving emails from it, send an email 
to osv-dev+unsubscr...@googlegroups.com.
To view this discussion on the web visit 
https://groups.google.com/d/msgid/osv-dev/3f99d1060c908218%40google.com.


[osv-dev] [COMMIT osv master] syscall: expose shmat, shmctl, shmdt, shmget

2023-12-15 Thread Commit Bot
From: Waldemar Kozaczuk 
Committer: Waldemar Kozaczuk 
Branch: master

syscall: expose shmat, shmctl, shmdt, shmget

Signed-off-by: Waldemar Kozaczuk 

---
diff --git a/linux.cc b/linux.cc
--- a/linux.cc
+++ b/linux.cc
@@ -48,6 +48,7 @@
 #include 
 #include 
 #include 
+#include 
 #include 
 #include 
 #include "tls-switch.hh"
@@ -225,12 +226,17 @@ static int sys_sched_setaffinity(
 pid, len, reinterpret_cast(mask));
 }
 
+#define __NR_long_mmap __NR_mmap
+
+#define __NR_long_shmat __NR_shmat
 // Only void* return value of mmap is type casted, as syscall returns long.
 long long_mmap(void *addr, size_t length, int prot, int flags, int fd, off_t 
offset) {
 return (long) mmap(addr, length, prot, flags, fd, offset);
 }
-#define __NR_long_mmap __NR_mmap
 
+long long_shmat(int shmid, const void *shmaddr, int shmflg) {
+return (long) shmat(shmid, shmaddr, shmflg);
+}
 
 #define SYSCALL0(fn) case (__NR_##fn): do { long ret = fn(); 
trace_syscall_##fn(ret); return ret; } while (0)
 
@@ -706,6 +712,10 @@ TRACEPOINT(trace_syscall_sys_clone3, "%d <= %p %lu", int, 
struct clone_args *, s
 TRACEPOINT(trace_syscall_prlimit64, "%d <= %u %d %p %p", int, pid_t, int, 
const struct rlimit *, struct rlimit *);
 TRACEPOINT(trace_syscall_msync, "%d <= 0x%x %lu %d", int, void *, size_t, int);
 TRACEPOINT(trace_syscall_truncate, "%d <= %s %ld", int, const char *, off_t);
+TRACEPOINT(trace_syscall_long_shmat, "0x%x <= %d 0x%x %d", long, int, const 
void *, int);
+TRACEPOINT(trace_syscall_shmctl, "%d <= %d %d %p", int, int, int, struct 
shmid_ds *);
+TRACEPOINT(trace_syscall_shmdt, "%d <= 0x%x", int, const void *)
+TRACEPOINT(trace_syscall_shmget, "%d <= %d %lu %d", int, key_t, size_t, int);
 
 OSV_LIBC_API long syscall(long number, ...)
 {
@@ -854,6 +864,10 @@ OSV_LIBC_API long syscall(long number, ...)
 SYSCALL4(prlimit64, pid_t, int, const struct rlimit *, struct rlimit *);
 SYSCALL3(msync, void *, size_t, int);
 SYSCALL2(truncate, const char *, off_t);
+SYSCALL3(long_shmat, int, const void *, int);
+SYSCALL3(shmctl, int, int, struct shmid_ds *);
+SYSCALL1(shmdt, const void *);
+SYSCALL3(shmget, key_t, size_t, int);
 }
 
 debug_always("syscall(): unimplemented system call %d\n", number);

-- 
You received this message because you are subscribed to the Google Groups "OSv 
Development" group.
To unsubscribe from this group and stop receiving emails from it, send an email 
to osv-dev+unsubscr...@googlegroups.com.
To view this discussion on the web visit 
https://groups.google.com/d/msgid/osv-dev/54f336060c908270%40google.com.


[osv-dev] [COMMIT osv master] syscall: add msync syscall

2023-12-14 Thread Commit Bot
From: Waldemar Kozaczuk 
Committer: Waldemar Kozaczuk 
Branch: master

syscall: add msync syscall

This allows tst-mmap.cc to execute successfully on OSv with Linux
dynamically linker.

Signed-off-by: Waldemar Kozaczuk 

---
diff --git a/linux.cc b/linux.cc
--- a/linux.cc
+++ b/linux.cc
@@ -704,6 +704,7 @@ TRACEPOINT(trace_syscall_sys_clone, "%d <= 0x%x 0x%x %p %p 
%lu", int, unsigned l
 TRACEPOINT(trace_syscall_sys_clone3, "%d <= %p %lu", int, struct clone_args *, 
size_t);
 #endif
 TRACEPOINT(trace_syscall_prlimit64, "%d <= %u %d %p %p", int, pid_t, int, 
const struct rlimit *, struct rlimit *);
+TRACEPOINT(trace_syscall_msync, "%d <= 0x%x %lu %d", int, void *, size_t, int);
 
 OSV_LIBC_API long syscall(long number, ...)
 {
@@ -850,6 +851,7 @@ OSV_LIBC_API long syscall(long number, ...)
 SYSCALL2(sys_clone3, struct clone_args *, size_t);
 #endif
 SYSCALL4(prlimit64, pid_t, int, const struct rlimit *, struct rlimit *);
+SYSCALL3(msync, void *, size_t, int);
 }
 
 debug_always("syscall(): unimplemented system call %d\n", number);

-- 
You received this message because you are subscribed to the Google Groups "OSv 
Development" group.
To unsubscribe from this group and stop receiving emails from it, send an email 
to osv-dev+unsubscr...@googlegroups.com.
To view this discussion on the web visit 
https://groups.google.com/d/msgid/osv-dev/a24a37060c84e45c%40google.com.


[osv-dev] [COMMIT osv master] syscall: expose truncate

2023-12-14 Thread Commit Bot
From: Waldemar Kozaczuk 
Committer: Waldemar Kozaczuk 
Branch: master

syscall: expose truncate

This patch make tst-truncate.cc pass on OSv when running with
Linux dynamic linker.

Signed-off-by: Waldemar Kozaczuk 

---
diff --git a/linux.cc b/linux.cc
--- a/linux.cc
+++ b/linux.cc
@@ -705,6 +705,7 @@ TRACEPOINT(trace_syscall_sys_clone3, "%d <= %p %lu", int, 
struct clone_args *, s
 #endif
 TRACEPOINT(trace_syscall_prlimit64, "%d <= %u %d %p %p", int, pid_t, int, 
const struct rlimit *, struct rlimit *);
 TRACEPOINT(trace_syscall_msync, "%d <= 0x%x %lu %d", int, void *, size_t, int);
+TRACEPOINT(trace_syscall_truncate, "%d <= %s %ld", int, const char *, off_t);
 
 OSV_LIBC_API long syscall(long number, ...)
 {
@@ -852,6 +853,7 @@ OSV_LIBC_API long syscall(long number, ...)
 #endif
 SYSCALL4(prlimit64, pid_t, int, const struct rlimit *, struct rlimit *);
 SYSCALL3(msync, void *, size_t, int);
+SYSCALL2(truncate, const char *, off_t);
 }
 
 debug_always("syscall(): unimplemented system call %d\n", number);

-- 
You received this message because you are subscribed to the Google Groups "OSv 
Development" group.
To unsubscribe from this group and stop receiving emails from it, send an email 
to osv-dev+unsubscr...@googlegroups.com.
To view this discussion on the web visit 
https://groups.google.com/d/msgid/osv-dev/b5a7a4060c84e449%40google.com.


[osv-dev] [COMMIT osv master] tst-realloc: test malloc_usable_size() only when running with OSv libc

2023-12-14 Thread Commit Bot
From: Waldemar Kozaczuk 
Committer: Waldemar Kozaczuk 
Branch: master

tst-realloc: test malloc_usable_size() only when running with OSv libc

The results returned malloc_usable_size() are very much dependant
on the malloc() implementation. Let us disable this part of the
tst-realloc.c when built with glibc.

As of this commit, most of the unit tests should pass when running on OSv
with Linux dynamic linker:

scripts/test.py --linux_ld -m modules/tests-with-linux-ld/usr.manifest \
  -d tst-futimesat \
  -d tst-kill \
  -d tst-mmap \
  -d tst-pthread-clock \
  -d tst-reloc \
  -d tst-shm \
  -d tst-sigaction \
  -d tst-sigwait \
  -d tst-stdio-rofs \
  -d tst-symlink \
  -d tst-symlink-rofs \
  -d tst-time \
  -d tst-truncate \
  -d tst-utimensat \
  -d tst-utimes \
  -d tst-wctype

Signed-off-by: Waldemar Kozaczuk 

---
diff --git a/tests/tst-realloc.cc b/tests/tst-realloc.cc
--- a/tests/tst-realloc.cc
+++ b/tests/tst-realloc.cc
@@ -38,6 +38,7 @@ static void test_realloc(size_t original_size, size_t 
new_size)
 std::cerr << "PASSED realloc() for original_size: " << original_size << ", 
new_size: " << new_size << std::endl;
 }
 
+#ifdef __OSV__
 static void test_usable_size(size_t size, size_t expected_usable_size)
 {
 void* ptr = malloc(size);
@@ -46,6 +47,7 @@ static void test_usable_size(size_t size, size_t 
expected_usable_size)
 
 std::cerr << "PASSED malloc_usable_size() for size: " << size << std::endl;
 }
+#endif
 
 int main()
 {
@@ -82,13 +84,15 @@ int main()
 buf = malloc(16);
 assert(!realloc(buf, 0));
 
+#ifdef __OSV__
 test_usable_size(1, 8);
 test_usable_size(8, 8);
 test_usable_size(67, 128);
 test_usable_size(0x4010, 0x4FC0);
 test_usable_size(0x10, 0x100FC0);
 test_usable_size(0x20, 0x200FC0);
+#endif
 
 std::cerr << "PASSED\n";
 return 0;
-}
\ No newline at end of file
+}

-- 
You received this message because you are subscribed to the Google Groups "OSv 
Development" group.
To unsubscribe from this group and stop receiving emails from it, send an email 
to osv-dev+unsubscr...@googlegroups.com.
To view this discussion on the web visit 
https://groups.google.com/d/msgid/osv-dev/8e548d060c84e4cd%40google.com.


[osv-dev] [COMMIT osv master] tests: tweak tst-dlfcn.cc to make it work with glibc

2023-12-14 Thread Commit Bot
From: Waldemar Kozaczuk 
Committer: Waldemar Kozaczuk 
Branch: master

tests: tweak tst-dlfcn.cc to make it work with glibc

This patch tweaks tst-dlfcn.cc to make it work with both glibc
and OSv libc:

- compare dli_sname returned by dladdr() with _IO_vfprintf when built
  with glibc

- call dlerror() to clear any old error conditions before calling
  dlopen()

- take into account differences between what exact error message is
  reported by dlerror() when built with glibc

Signed-off-by: Waldemar Kozaczuk 

---
diff --git a/tests/tst-dlfcn.cc b/tests/tst-dlfcn.cc
--- a/tests/tst-dlfcn.cc
+++ b/tests/tst-dlfcn.cc
@@ -52,15 +52,27 @@ BOOST_AUTO_TEST_CASE(test_dladdr)
 BOOST_REQUIRE(vfprintf != info.dli_saddr);
 
 BOOST_REQUIRE(dladdr(adj_addr(vfprintf, 0), ) != 0);
+#ifdef LINUX
+BOOST_CHECK_EQUAL("_IO_vfprintf", info.dli_sname);
+#else
 BOOST_CHECK_EQUAL("vfprintf", info.dli_sname);
+#endif
 BOOST_CHECK_EQUAL(vfprintf, info.dli_saddr);
 
 BOOST_REQUIRE(dladdr(adj_addr(vfprintf, 2), ) != 0);
+#ifdef LINUX
+BOOST_CHECK_EQUAL("_IO_vfprintf", info.dli_sname);
+#else
 BOOST_CHECK_EQUAL("vfprintf", info.dli_sname);
+#endif
 BOOST_CHECK_EQUAL(vfprintf, info.dli_saddr);
 
 BOOST_REQUIRE(dladdr(adj_addr(vfprintf, 4), ) != 0);
+#ifdef LINUX
+BOOST_CHECK_EQUAL("_IO_vfprintf", info.dli_sname);
+#else
 BOOST_CHECK_EQUAL("vfprintf", info.dli_sname);
+#endif
 BOOST_CHECK_EQUAL(vfprintf, info.dli_saddr);
 }
 
@@ -201,25 +213,36 @@ BOOST_AUTO_TEST_CASE(dlsym_handle_global_sym)
 {
 // check that we do not look into global group
 // when looking up symbol by handle
+dlerror();
 void* handle = dlopen("/tests/libtest_empty.so", RTLD_NOW);
 dlopen("libtest_with_dependency.so", RTLD_NOW | RTLD_GLOBAL);
 void* sym = dlsym(handle, "getRandomNumber");
 BOOST_REQUIRE(sym == nullptr);
-auto err_msg = std::string(dlerror());
+auto dl_err = dlerror();
+BOOST_REQUIRE(dl_err != nullptr);
+auto err_msg = std::string(dl_err);
 BOOST_TEST_CONTEXT(err_msg)
+#ifdef LINUX
+BOOST_REQUIRE_NE(err_msg.find("undefined symbol: getRandomNumber"),
+std::string::npos);
+#else
 BOOST_REQUIRE_NE(err_msg.find("dlsym: symbol getRandomNumber not found"),
 std::string::npos);
-// BOOST_REQUIRE_NE(err_msg.find("undefined symbol: getRandomNumber"),
-// std::string::npos);
+#endif
 
+dlerror();
 sym = dlsym(handle, "DlSymTestFunction");
 BOOST_REQUIRE(sym == nullptr);
-err_msg = std::string(dlerror());
+dl_err = dlerror();
+err_msg = std::string(dl_err);
 BOOST_TEST_CONTEXT(err_msg)
+#ifdef LINUX
+BOOST_REQUIRE_NE(err_msg.find("undefined symbol: DlSymTestFunction"),
+std::string::npos);
+#else
 BOOST_REQUIRE_NE(err_msg.find("dlsym: symbol DlSymTestFunction not found"),
 std::string::npos);
-// BOOST_REQUIRE_NE(err_msg.find("undefined symbol: DlSymTestFunction"),
-// std::string::npos);
+#endif
 dlclose(handle);
 }
 
@@ -229,14 +252,20 @@ BOOST_AUTO_TEST_CASE(dlsym_handle_empty_symbol)
 void* handle = dlopen("/tests/libtest_dlsym_from_this.so", RTLD_NOW);
 BOOST_TEST_CONTEXT(dlerror())
 BOOST_REQUIRE(handle != nullptr);
+dlerror();
 void* sym = dlsym(handle, "");
 BOOST_REQUIRE(sym == nullptr);
-auto err_msg = std::string(dlerror());
+auto dl_err = dlerror();
+BOOST_REQUIRE(dl_err != nullptr);
+auto err_msg = std::string(dl_err);
 BOOST_TEST_CONTEXT(err_msg)
+#ifdef LINUX
+BOOST_REQUIRE_NE(err_msg.find("undefined symbol: "),
+std::string::npos);
+#else
 BOOST_REQUIRE_NE(err_msg.find("dlsym: symbol  not found"),
 std::string::npos);
-// BOOST_REQUIRE_NE(err_msg.find("undefined symbol: "),
-// std::string::npos);
+#endif
 dlclose(handle);
 }
 

-- 
You received this message because you are subscribed to the Google Groups "OSv 
Development" group.
To unsubscribe from this group and stop receiving emails from it, send an email 
to osv-dev+unsubscr...@googlegroups.com.
To view this discussion on the web visit 
https://groups.google.com/d/msgid/osv-dev/7abba3060c84e464%40google.com.


[osv-dev] [COMMIT osv master] tests: disable named semaphore testing when running on OSv with ld.so

2023-12-14 Thread Commit Bot
From: Waldemar Kozaczuk 
Committer: Waldemar Kozaczuk 
Branch: master

tests: disable named semaphore testing when running on OSv with ld.so

Even though OSv implements named semaphores it does so without
/dev/shm pseudo filesystem.

When running the tst-semaphore.c test on OSv with Linux dynamic
linker, the glibc implementation of it ends up trying to create
files under /dev/shm which fail:

0x41a6b040 /lib64/ld-linux  0 0.303028066 syscall_openat   
-1 <= -100 "/dev/shm/sem.name" 042 0
0x41a6b040 /lib64/ld-linux  0 0.303452134 syscall_getrandom
-1 <= 0x20200558 8 1
0x41a6b040 /lib64/ld-linux  0 0.303455735 syscall_fstatat  
-1 <= -100 "/dev/shm/sem.Y244sc" 0x20200450 0400
0x41a6b040 /lib64/ld-linux  0 0.303458186 syscall_openat   
-1 <= -100 "/dev/shm/sem.Y244sc" 0302 511

For now, we disable the named semaphore testing of the unit test
when built to be part of tests-with-linux-ld.

Signed-off-by: Waldemar Kozaczuk 

---
diff --git a/modules/tests-with-linux-ld/Makefile 
b/modules/tests-with-linux-ld/Makefile
--- a/modules/tests-with-linux-ld/Makefile
+++ b/modules/tests-with-linux-ld/Makefile
@@ -58,6 +58,7 @@ $(out)/tests-with-linux-ld/tst-fallocate.o: COMMON:=-D__OSV__ 
$(COMMON)
 $(out)/tests-with-linux-ld/tst-fread.o: COMMON:=-D__OSV__ $(COMMON)
 $(out)/tests-with-linux-ld/tst-net_if_test.o: COMMON:=-D__OSV__ $(COMMON)
 $(out)/tests-with-linux-ld/tst-pipe.o: COMMON:=-D__OSV__ $(COMMON)
+$(out)/tests-with-linux-ld/tst-semaphore.o: 
COMMON:=-D__DISABLE_NAMED_SEMAPHORES__ $(COMMON)
 $(out)/tests-with-linux-ld/tst-uio.o: COMMON:=-D__OSV__ $(COMMON)
 
 #
diff --git a/tests/tst-semaphore.c b/tests/tst-semaphore.c
--- a/tests/tst-semaphore.c
+++ b/tests/tst-semaphore.c
@@ -56,7 +56,8 @@ int main(void) {
 assert(sem_destroy(_sync) == 0);
 assert(sem_destroy(_done) == 0);
 
-///Named sempahore test
+#ifndef __DISABLE_NAMED_SEMAPHORES__
+///Named semaphore test
 
 //Create and open two handles to a named semaphore
 sem_t *named_sem1 = sem_open("name", O_CREAT, 0777, 1);
@@ -96,6 +97,7 @@ int main(void) {
 //Close handles
 assert(sem_close(named_sem1) == 0);
 assert(sem_close(named_sem2) == 0);
+#endif
 
 return 0;
 }

-- 
You received this message because you are subscribed to the Google Groups "OSv 
Development" group.
To unsubscribe from this group and stop receiving emails from it, send an email 
to osv-dev+unsubscr...@googlegroups.com.
To view this discussion on the web visit 
https://groups.google.com/d/msgid/osv-dev/67a4f5060c84e491%40google.com.


[osv-dev] [COMMIT osv master] tests: make tst-string.cc work with glibc

2023-12-14 Thread Commit Bot
From: Waldemar Kozaczuk 
Committer: Waldemar Kozaczuk 
Branch: master

tests: make tst-string.cc work with glibc

The musl version of strsignal() used by OSv returns slightly different
message string than glibc. More specifically glibc appends the signal
number to the string.

We could change musl version to behave exactly like glibc, but instead
we tweak the tst-string.cc with ifdef to test correct message version
depending how it is compiled.

In the end, the test now works on Linux and on OSv with Linux dynamic
linker.

Signed-off-by: Waldemar Kozaczuk 

---
diff --git a/tests/tst-string.cc b/tests/tst-string.cc
--- a/tests/tst-string.cc
+++ b/tests/tst-string.cc
@@ -75,25 +75,43 @@ TEST(STRING_TEST, strsignal) {
   //ASSERT_STREQ("Real-time signal 14", strsignal(SIGRTMIN + 14));
 
   // Errors.
+#ifdef __OSV__
   ASSERT_STREQ("Unknown signal", strsignal(-1)); // Too small.
   ASSERT_STREQ("Unknown signal", strsignal(0)); // Still too small.
   ASSERT_STREQ("Unknown signal", strsignal(1234)); // Too large.
+#else
+  ASSERT_STREQ("Unknown signal -1", strsignal(-1)); // Too small.
+  ASSERT_STREQ("Unknown signal 0", strsignal(0)); // Still too small.
+  ASSERT_STREQ("Unknown signal 1234", strsignal(1234)); // Too large.
+#endif
 }
 
 static void* ConcurrentStrSignalFn(void*) {
+#ifdef __OSV__
   bool equal = (strcmp("Unknown signal", strsignal(2002)) == 0);
+#else
+  bool equal = (strcmp("Unknown signal 2002", strsignal(2002)) == 0);
+#endif
   return reinterpret_cast(equal);
 }
 
 TEST(STRING_TEST, strsignal_concurrent) {
   const char* strsignal1001 = strsignal(1001);
+#ifdef __OSV__
   ASSERT_STREQ("Unknown signal", strsignal1001);
+#else
+  ASSERT_STREQ("Unknown signal 1001", strsignal1001);
+#endif
 
   pthread_t t;
   ASSERT_EQ(0, pthread_create(, nullptr, ConcurrentStrSignalFn, nullptr));
   void* result;
   ASSERT_EQ(0, pthread_join(t, ));
   ASSERT_TRUE(static_cast(result));
 
+#ifdef __OSV__
   ASSERT_STREQ("Unknown signal", strsignal1001);
+#else
+  ASSERT_STREQ("Unknown signal 1001", strsignal1001);
+#endif
 }

-- 
You received this message because you are subscribed to the Google Groups "OSv 
Development" group.
To unsubscribe from this group and stop receiving emails from it, send an email 
to osv-dev+unsubscr...@googlegroups.com.
To view this discussion on the web visit 
https://groups.google.com/d/msgid/osv-dev/5458a3060c84e439%40google.com.


[osv-dev] [COMMIT osv master] tests: make tst-pthread-barrier.cc work with glibc

2023-12-14 Thread Commit Bot
From: Waldemar Kozaczuk 
Committer: Waldemar Kozaczuk 
Branch: master

tests: make tst-pthread-barrier.cc work with glibc

Calling pthread_barrier_init() with last 2 arguments equal to 0
is handled more leniently on OSv with OSv libc. The same call
crashes with segmentation fault when compiled with glibc.

Regardless, if that is a glibc bug, we deactivate this particular
part of the test with ifdef so that the test can be executed
on Linux and on OSv with Linux dynamic linker.

Signed-off-by: Waldemar Kozaczuk 

---
diff --git a/tests/tst-pthread-barrier.cc b/tests/tst-pthread-barrier.cc
--- a/tests/tst-pthread-barrier.cc
+++ b/tests/tst-pthread-barrier.cc
@@ -69,10 +69,12 @@ int main(void)
 printf("Sizeof pthread_barrierattr_t: %ld\n", sizeof(attr));
 report("sizeof pthread_barrierattr_t is 4 bytes\n", sizeof(attr) == 4);
 
+#ifdef __OSV__
 // Try an invalid initialization (-1 or 0 or a null pthread_barrier_t*)
 retval = pthread_barrier_init(NULL, NULL, 4);
 report("pthread_barrier_init (pthread_barrier_t* == NULL)",
retval == EINVAL);
+#endif
 retval = pthread_barrier_init(, NULL, -1);
 report("pthread_barrier_init (count == -1)", retval == EINVAL);
 retval = pthread_barrier_init(, NULL, 0);

-- 
You received this message because you are subscribed to the Google Groups "OSv 
Development" group.
To unsubscribe from this group and stop receiving emails from it, send an email 
to osv-dev+unsubscr...@googlegroups.com.
To view this discussion on the web visit 
https://groups.google.com/d/msgid/osv-dev/40a642060c84e43a%40google.com.


[osv-dev] [COMMIT osv master] netlink: make response message nl_pid equal to 0

2023-12-14 Thread Commit Bot
From: Waldemar Kozaczuk 
Committer: Waldemar Kozaczuk 
Branch: master

netlink: make response message nl_pid equal to 0

Just like Golang, glibc uses netlink interface to implement
functions like getifaddrs() and if_nameindex() behind the hood.
Unlike Golang, glibc validates the netlink responses received
from kernel to have nl_pid = 0 in their headers and ignores them
otherwise and app seems to hang in result.

To fix it, we tweak OSv netlink implementation to initialize
netlink reponse header with zeros. As a result, following unit tests
pass now:

- tst-ifaddrs.cc
- tst-net_if_test.cc
- tst-netlink.cc

Signed-off-by: Waldemar Kozaczuk 

---
diff --git a/bsd/sys/compat/linux/linux_netlink.cc 
b/bsd/sys/compat/linux/linux_netlink.cc
--- a/bsd/sys/compat/linux/linux_netlink.cc
+++ b/bsd/sys/compat/linux/linux_netlink.cc
@@ -71,7 +71,7 @@ std::atomic _nl_next_gen_pid(2);
 
 MALLOC_DEFINE(M_NETLINK, "netlink", "netlink socket");
 
-static struct  bsd_sockaddr netlink_src = { 2, PF_NETLINK, };
+static struct  bsd_sockaddr_nl netlink_src = { sizeof(bsd_sockaddr_nl), 
PF_NETLINK, 0, 0, 0};
 
 
 
@@ -293,7 +293,7 @@ netlink_input(struct mbuf *m)
 
netlink_proto.sp_family = PF_NETLINK;
 
-   raw_input_ext(m, _proto, _src, raw_input_netlink_cb);
+   raw_input_ext(m, _proto, (bsd_sockaddr*)_src, 
raw_input_netlink_cb);
 }
 
 void
diff --git a/tests/tst-netlink.c b/tests/tst-netlink.c
--- a/tests/tst-netlink.c
+++ b/tests/tst-netlink.c
@@ -97,6 +97,10 @@ int test_netlink(struct nlmsghdr* req, pid_t pid, void 
(*handle_response)(struct
 msg.msg_iov = iov;//Check if we can improve things 
downstream with some asserts or even error handling
 msg.msg_iovlen = 1;
 
+memset(_addr, 0, sizeof(dst_addr));
+msg.msg_name = _addr; //Set msg_name to make kernel return source 
address
+msg.msg_namelen = sizeof(dst_addr);
+
 memset(buf, 0, BUFSIZE);
 msg.msg_iov[0].iov_base = buf;
 msg.msg_iov[0].iov_len = BUFSIZE;
@@ -106,6 +110,9 @@ int test_netlink(struct nlmsghdr* req, pid_t pid, void 
(*handle_response)(struct
 die("recvmsg FAILED");
 }
 
+assert(dst_addr.nl_family == AF_NETLINK);
+assert(dst_addr.nl_pid == 0); //nl_pid = 0 indicates it came from 
kernel
+
 for (struct nlmsghdr *rsp = (struct nlmsghdr *)buf;
  NLMSG_OK(rsp, len); rsp = NLMSG_NEXT(rsp, len))
 {

-- 
You received this message because you are subscribed to the Google Groups "OSv 
Development" group.
To unsubscribe from this group and stop receiving emails from it, send an email 
to osv-dev+unsubscr...@googlegroups.com.
To view this discussion on the web visit 
https://groups.google.com/d/msgid/osv-dev/182764060c84e4b1%40google.com.


[osv-dev] [COMMIT osv master] tests: make tst-ctype.cc work with glibc

2023-12-14 Thread Commit Bot
From: Waldemar Kozaczuk 
Committer: Waldemar Kozaczuk 
Branch: master

tests: make tst-ctype.cc work with glibc

The tst-ctype unit test, which originates from Google Bionic project,
works fine on OSv with OSv libc. However it does not work correctly
on Linux or when runing it on OSv with Linux dynamic linker.

More specifically functions like isalpha_l() crash when passed LC_GLOBAL_LOCALE
argument. The glibc manual states this:

"The behavior is undefined if the locale argument to isalpha_l()
is the special locale object LC_GLOBAL_LOCALE or is not a valid
locale object handle."

Therefore we tweak this test by instantiating initial locale
using newlocale() and use it when necessary instead of LC_GLOBAL_LOCALE.
This way the test works regardless how it is executed.

Signed-off-by: Waldemar Kozaczuk 

---
diff --git a/tests/tst-ctype.cc b/tests/tst-ctype.cc
--- a/tests/tst-ctype.cc
+++ b/tests/tst-ctype.cc
@@ -47,6 +47,8 @@ namespace utf = boost::unit_test;
 static constexpr int kMin = -1;
 static constexpr int kMax = 256;
 
+static locale_t c_locale = newlocale(LC_ALL, "C.UTF-8", nullptr);
+
 TEST(ctype, isalnum) {
   for (int i = kMin; i < kMax; ++i) {
 if ((i >= '0' && i <= '9') ||
@@ -64,9 +66,9 @@ TEST(ctype, isalnum_l) {
 if ((i >= '0' && i <= '9') ||
 (i >= 'A' && i <= 'Z') ||
 (i >= 'a' && i <= 'z')) {
-  EXPECT_TRUE(isalnum_l(i, LC_GLOBAL_LOCALE));
+  EXPECT_TRUE(isalnum_l(i, c_locale));
 } else {
-  EXPECT_FALSE(isalnum_l(i, LC_GLOBAL_LOCALE));
+  EXPECT_FALSE(isalnum_l(i, c_locale));
 }
   }
 }
@@ -86,9 +88,9 @@ TEST(ctype, isalpha_l) {
   for (int i = kMin; i < kMax; ++i) {
 if ((i >= 'A' && i <= 'Z') ||
 (i >= 'a' && i <= 'z')) {
-  EXPECT_TRUE(isalpha_l(i, LC_GLOBAL_LOCALE));
+  EXPECT_TRUE(isalpha_l(i, c_locale));
 } else {
-  EXPECT_FALSE(isalpha_l(i, LC_GLOBAL_LOCALE));
+  EXPECT_FALSE(isalpha_l(i, c_locale));
 }
   }
 }
@@ -116,9 +118,9 @@ TEST(ctype, isblank) {
 TEST(ctype, isblank_l) {
   for (int i = kMin; i < kMax; ++i) {
 if (i == '\t' || i == ' ') {
-  EXPECT_TRUE(isblank_l(i, LC_GLOBAL_LOCALE));
+  EXPECT_TRUE(isblank_l(i, c_locale));
 } else {
-  EXPECT_FALSE(isblank_l(i, LC_GLOBAL_LOCALE));
+  EXPECT_FALSE(isblank_l(i, c_locale));
 }
   }
 }
@@ -136,9 +138,9 @@ TEST(ctype, iscntrl) {
 TEST(ctype, iscntrl_l) {
   for (int i = kMin; i < kMax; ++i) {
 if ((i >= 0 && i < ' ') || i == 0x7f) {
-  EXPECT_TRUE(iscntrl_l(i, LC_GLOBAL_LOCALE));
+  EXPECT_TRUE(iscntrl_l(i, c_locale));
 } else {
-  EXPECT_FALSE(iscntrl_l(i, LC_GLOBAL_LOCALE));
+  EXPECT_FALSE(iscntrl_l(i, c_locale));
 }
   }
 }
@@ -156,9 +158,9 @@ TEST(ctype, isdigit) {
 TEST(ctype, isdigit_l) {
   for (int i = kMin; i < kMax; ++i) {
 if (i >= '0' && i <= '9') {
-  EXPECT_TRUE(isdigit_l(i, LC_GLOBAL_LOCALE));
+  EXPECT_TRUE(isdigit_l(i, c_locale));
 } else {
-  EXPECT_FALSE(isdigit_l(i, LC_GLOBAL_LOCALE));
+  EXPECT_FALSE(isdigit_l(i, c_locale));
 }
   }
 }
@@ -176,9 +178,9 @@ TEST(ctype, isgraph) {
 TEST(ctype, isgraph_l) {
   for (int i = kMin; i < kMax; ++i) {
 if (i >= '!' && i <= '~') {
-  EXPECT_TRUE(isgraph_l(i, LC_GLOBAL_LOCALE));
+  EXPECT_TRUE(isgraph_l(i, c_locale));
 } else {
-  EXPECT_FALSE(isgraph_l(i, LC_GLOBAL_LOCALE));
+  EXPECT_FALSE(isgraph_l(i, c_locale));
 }
   }
 }
@@ -196,9 +198,9 @@ TEST(ctype, islower) {
 TEST(ctype, islower_l) {
   for (int i = kMin; i < kMax; ++i) {
 if (i >= 'a' && i <= 'z') {
-  EXPECT_TRUE(islower_l(i, LC_GLOBAL_LOCALE));
+  EXPECT_TRUE(islower_l(i, c_locale));
 } else {
-  EXPECT_FALSE(islower_l(i, LC_GLOBAL_LOCALE));
+  EXPECT_FALSE(islower_l(i, c_locale));
 }
   }
 }
@@ -216,9 +218,9 @@ TEST(ctype, isprint) {
 TEST(ctype, isprint_l) {
   for (int i = kMin; i < kMax; ++i) {
 if (i >= ' ' && i <= '~') {
-  EXPECT_TRUE(isprint_l(i, LC_GLOBAL_LOCALE));
+  EXPECT_TRUE(isprint_l(i, c_locale));
 } else {
-  EXPECT_FALSE(isprint_l(i, LC_GLOBAL_LOCALE));
+  EXPECT_FALSE(isprint_l(i, c_locale));
 }
   }
 }
@@ -242,9 +244,9 @@ TEST(ctype, ispunct_l) {
 (i >= ':' && i <= '@') ||
 (i >= '[' && i <= '`') ||
 (i >= '{' && i <= '~')) {
-  EXPECT_TRUE(ispunct_l(i, LC_GLOBAL_LOCALE));
+  EXPECT_TRUE(ispunct_l(i, c_locale));
 } else {
-  EXPECT_FALSE(ispunct_l(i, LC_GLOBAL_LOCALE));
+  EXPECT_FALSE(ispunct_l(i, c_locale));
 }
   }
 }
@@ -262,9 +264,9 @@ TEST(ctype, isspace) {
 TEST(ctype, isspace_l) {
   for (int i = kMin; i < kMax; ++i) {
 if ((i >= '\t' && i <= '\r') || i == ' ') {
-  EXPECT_TRUE(isspace_l(i, LC_GLOBAL_LOCALE));
+  EXPECT_TRUE(isspace_l(i, c_locale));
 } else {
-  EXPECT_FALSE(isspace_l(i, LC_GLOBAL_LOCALE));
+  EXPECT_FALSE(isspace_l(i, c_locale));
 }
   }
 }
@@ -282,9 +284,9 @@ TEST(ctype, isupper) {
 TEST(ctype, isupper_l) {
   for (int i = kMin; i < 

[osv-dev] [COMMIT osv master] tests-with-linux-ld: add misc-loadbalance.cc

2023-12-14 Thread Commit Bot
From: Waldemar Kozaczuk 
Committer: Waldemar Kozaczuk 
Branch: master

tests-with-linux-ld: add misc-loadbalance.cc

Signed-off-by: Waldemar Kozaczuk 

---
diff --git a/tests/misc-loadbalance.cc b/tests/misc-loadbalance.cc
--- a/tests/misc-loadbalance.cc
+++ b/tests/misc-loadbalance.cc
@@ -210,7 +210,9 @@ int main()
 concurrent_loops(looplen, 4, secs, 2.0);
 concurrent_loops(looplen, 3, secs, 1.5);
 
+#ifdef __OSV__
 concurrent_loops_priority(looplen, secs);
+#endif
 
 std::cout << "\nStarting intermittent background thread:\n";
 // Estimate the loop length required for taking 1ms.

-- 
You received this message because you are subscribed to the Google Groups "OSv 
Development" group.
To unsubscribe from this group and stop receiving emails from it, send an email 
to osv-dev+unsubscr...@googlegroups.com.
To view this discussion on the web visit 
https://groups.google.com/d/msgid/osv-dev/0474a7060c84e423%40google.com.


[osv-dev] [COMMIT osv master] tests-with-linux-ld: use OSv specific names where necessary

2023-12-14 Thread Commit Bot
From: Waldemar Kozaczuk 
Committer: Waldemar Kozaczuk 
Branch: master

tests-with-linux-ld: use OSv specific names where necessary

Most unit tests that are part of tests-with-linux-ld should be
compiled without __OSV__ macro enabled to make them runnable
on both OSv and Linux.

However is some handful of cases we need to pass __OSV__ so
that correct OSv-specific name (console, block device, NIC)
is used. Those tests have to recompiled without -D__OSV__
to make them runnable on Linux of course.

As a result fewer unit tests can be ignored when running:

scripts/test.py -v --linux_ld -m modules/tests-with-linux-ld/usr.manifest \
  -d tst-ctype \
  -d tst-dlfcn \
  -d tst-futimesat \
  -d tst-ifaddrs \
  -d tst-kill \
  -d tst-mmap \
  -d tst-net_if_test \
  -d tst-netlink \
  -d tst-pthread-barrier \
  -d tst-pthread-clock \
  -d tst-realloc \
  -d tst-reloc \
  -d tst-semaphore \
  -d tst-shm \
  -d tst-sigaction \
  -d tst-sigwait \
  -d tst-stdio-rofs \
  -d tst-string \
  -d tst-symlink \
  -d tst-symlink-rofs \
  -d tst-time \
  -d tst-truncate \
  -d tst-ttyname \
  -d tst-utimensat \
  -d tst-utimes \
  -d tst-wctype

Signed-off-by: Waldemar Kozaczuk 

---
diff --git a/modules/tests-with-linux-ld/Makefile 
b/modules/tests-with-linux-ld/Makefile
--- a/modules/tests-with-linux-ld/Makefile
+++ b/modules/tests-with-linux-ld/Makefile
@@ -53,6 +53,12 @@ $(out)/tests-with-linux-ld/rofs/tst-readdir.o: 
COMMON:=-Wno-deprecated-declarati
 $(out)/tests-with-linux-ld/tst-tcp-listen.o: COMMON:=-I $(src)/include 
$(COMMON)
 
 $(out)/tests-with-linux-ld/misc-fsx.o: COMMON:=-D__OSV__ $(COMMON)
+$(out)/tests-with-linux-ld/tst-console.o: COMMON:=-D__OSV__ $(COMMON)
+$(out)/tests-with-linux-ld/tst-fallocate.o: COMMON:=-D__OSV__ $(COMMON)
+$(out)/tests-with-linux-ld/tst-fread.o: COMMON:=-D__OSV__ $(COMMON)
+$(out)/tests-with-linux-ld/tst-net_if_test.o: COMMON:=-D__OSV__ $(COMMON)
+$(out)/tests-with-linux-ld/tst-pipe.o: COMMON:=-D__OSV__ $(COMMON)
+$(out)/tests-with-linux-ld/tst-uio.o: COMMON:=-D__OSV__ $(COMMON)
 
 #
 # These 4 tests are compiled from the same source files but passed in 
READ_ONLY_FS
diff --git a/tests/tst-uio.cc b/tests/tst-uio.cc
--- a/tests/tst-uio.cc
+++ b/tests/tst-uio.cc
@@ -73,9 +73,9 @@ int main()
 // keeps track of how much it copied. Unbelievably, we had such a bug
 // in OSv and didn't notice it for over a year.
 #ifdef __OSV__
-const char* fn = "/tests/tst-regex.so";   // A file roughly 200KB in size.
+const char* fn = "/tests/libdlext_test.so";   // A file roughly 200KB in 
size.
 #else
-const char* fn = "build/release/tests/tst-regex.so";
+const char* fn = "build/release/tests/libdlext_test.so";
 #endif
 int fd;
 expect_success(fd, open(fn, O_RDONLY));

-- 
You received this message because you are subscribed to the Google Groups "OSv 
Development" group.
To unsubscribe from this group and stop receiving emails from it, send an email 
to osv-dev+unsubscr...@googlegroups.com.
To view this discussion on the web visit 
https://groups.google.com/d/msgid/osv-dev/f093ef060c84e364%40google.com.


[osv-dev] [COMMIT osv master] tests: enhance test.py to run tests with Linux dynamic linker

2023-12-14 Thread Commit Bot
From: Waldemar Kozaczuk 
Committer: Waldemar Kozaczuk 
Branch: master

tests: enhance test.py to run tests with Linux dynamic linker

This patch modifies scripts/test.py to support new option '--linux_ld'.
This new option instructs test.py to launch unit tests with Linux dynamic linker
by prepending '/lib64/ld-linux-x86-64.so.2 ' to the command line.

In order to run a test with Linux dynamic linker one has to build
the image tests-with-linux-ld as explained in the previous patch
and run test.py like so:

scripts/test.py -m modules/tests-with-linux-ld/usr.manifest --linux_ld -n 
tst-seek

Please note that some tests do not pass when running with Linux dynamic linker
for all kinds of reasons which will be addressed by following commits.

This command to run all unit tests that pass:

dl=linux ./scripts/build image=tests-with-linux-ld fs=rofs

scripts/test.py -v --linux_ld -m modules/tests-with-linux-ld/usr.manifest \
  -d tst-ctype \
  -d tst-console \
  -d tst-dlfcn \
  -d tst-dns-resolver \
  -d tst-fread \
  -d tst-futimesat \
  -d tst-ifaddrs \
  -d tst-kill \
  -d tst-mmap \
  -d tst-net_if_test \
  -d tst-netlink \
  -d tst-pipe \
  -d tst-pthread-barrier \
  -d tst-pthread-clock \
  -d tst-realloc \
  -d tst-reloc \
  -d tst-semaphore \
  -d tst-shm \
  -d tst-sigaction \
  -d tst-sigwait \
  -d tst-stdio-rofs \
  -d tst-string \
  -d tst-symlink \
  -d tst-symlink-rofs \
  -d tst-time \
  -d tst-truncate \
  -d tst-ttyname \
  -d tst-uio \
  -d tst-utimensat \
  -d tst-utimes \
  -d tst-wctype

Signed-off-by: Waldemar Kozaczuk 

---
diff --git a/modules/java-tests/Makefile b/modules/java-tests/Makefile
--- a/modules/java-tests/Makefile
+++ b/modules/java-tests/Makefile
@@ -70,8 +70,8 @@ java_isolated_cmd := 'java_isolated: /java_isolated.so -cp 
/tests/java/tests.jar
 -Disolates.jar=/tests/java/isolates.jar org.junit.runner.JUnitCore 
io.osv.AllTestsThatTestIsolatedApp'
 java_non_isolated_cmd := 'java_non_isolated: /java.so -cp 
/tests/java/tests.jar:/tests/java/isolates.jar \
 -Disolates.jar=/tests/java/isolates.jar org.junit.runner.JUnitCore 
io.osv.AllTestsThatTestNonIsolatedApp'
-java_no_wrapper_cmd := 'java_no_wrapper: /usr/bin/java -cp 
/tests/java/tests.jar org.junit.runner.JUnitCore io.osv.BasicTests !'
-java_perms_cmd := 'java-perms: /usr/bin/java -cp /tests/java/tests.jar 
io.osv.TestDomainPermissions !'
+java_no_wrapper_cmd := 'java_no_wrapper: /usr/lib/jvm/java/bin/java -cp 
/tests/java/tests.jar org.junit.runner.JUnitCore io.osv.BasicTests !'
+java_perms_cmd := 'java-perms: /usr/lib/jvm/java/bin/java -cp 
/tests/java/tests.jar io.osv.TestDomainPermissions !'
 
 .PHONY: test_commands
 
diff --git a/modules/tests-with-linux-ld/Makefile 
b/modules/tests-with-linux-ld/Makefile
--- a/modules/tests-with-linux-ld/Makefile
+++ b/modules/tests-with-linux-ld/Makefile
@@ -171,7 +171,7 @@ usr.manifest: build_all_tests $(lastword $(MAKEFILE_LIST)) 
usr.manifest.skel FOR
@cat $@.skel > $@
@case "$(CROSS_PREFIX)" in \
"aarch64"*) ../tests/add_aarch64_boost_libraries.sh $(OSV_BASE) 
>> $@ ;; \
-   *) LD_LIBRARY_PATH=$(boost-lib-dir) ldd $(addprefix 
$(out)/tests-with-linux-ld/,$(boost-tests)) $(addprefix 
$(out)/tests-with-linux-ld/,$(boost-program-options-tests)) | grep libboost | 
sed 's/ *[^ ] *\(.*\) => \(.*\) .*/\/lib\/x86_64-linux-gnu\/\1: \2/' | sort | 
uniq >> $@ ;; \
+   *) LD_LIBRARY_PATH=$(boost-lib-dir) ldd $(addprefix 
$(out)/tests-with-linux-ld/,$(boost-tests)) $(addprefix 
$(out)/tests-with-linux-ld/,$(boost-program-options-tests)) | grep libboost | 
sed 's/ *[^ ] *\(.*\) => \(.*\) .*/\/usr\/lib64\/\1: \2/' | sort | uniq >> $@ 
;; \
esac
@echo $(all_tests) | tr ' ' '\n' | grep -v 
"tests-with-linux-ld/rofs/tst-.*" | awk '{print "/" $$0 ": ./" $$0}' | sed 
's/^\/tests-with-linux-ld/\/tests/' >> $@
@echo $(all_tests) | tr ' ' '\n' | grep 
"tests-with-linux-ld/rofs/tst-.*" | awk 'BEGIN { FS = "/" } ; { print "/tests/" 
$$3 "-rofs: ./tests-with-linux-ld/" $$2 "/" $$3 ""}' >> $@
@@ -185,7 +185,7 @@ common.manifest: build_all_tests $(lastword 
$(MAKEFILE_LIST)) usr.manifest.skel
@cat usr.manifest.skel > $@
@case "$(CROSS_PREFIX)" in \
"aarch64"*) ../tests/add_aarch64_boost_libraries.sh $(OSV_BASE) 
>> $@ ;; \
-   *) LD_LIBRARY_PATH=$(boost-lib-dir) ldd $(addprefix 
$(out)/tests-with-linux-ld/,$(boost-tests)) $(addprefix 
$(out)/tests-with-linux-ld/,$(boost-program-options-tests)) | grep libboost | 
sed 's/ *[^ ] *\(.*\) => \(.*\) .*/\/lib\/x86_64-linux-gnu\/\1: \2/' | sort | 
uniq >> $@ ;; \
+   *) LD_LIBRARY_PATH=$(boost-lib-dir) ldd $(addprefix 
$(out)/tests-with-linux-ld/,$(boost-tests)) $(addprefix 
$(out)/tests-with-linux-ld/,$(boost-program-options-tests)) | grep libboost | 
sed 's/ *[^ ] *\(.*\) => \(.*\) .*/\/usr/\/lib64\/\1: \2/' | sort | uniq >> $@ 
;; \
esac
@echo $(common-tests) | tr ' ' '\n' | awk '{print "/tests/" 

[osv-dev] [COMMIT osv master] tests with linux ld: add misc-loadbalance and misc-scheduler

2023-12-14 Thread Commit Bot
From: Waldemar Kozaczuk 
Committer: Waldemar Kozaczuk 
Branch: master

tests with linux ld: add misc-loadbalance and misc-scheduler

Signed-off-by: Waldemar Kozaczuk 

---
diff --git a/modules/tests-with-linux-ld/Makefile 
b/modules/tests-with-linux-ld/Makefile
--- a/modules/tests-with-linux-ld/Makefile
+++ b/modules/tests-with-linux-ld/Makefile
@@ -65,7 +65,7 @@ zfs-only-tests := tst-readdir tst-fallocate 
tst-concurrent-read
 specific-fs-tests := $($(fs_type)-only-tests)
 
 tests := tst-pthread misc-ramdisk tst-vblk \
-   misc-console misc-readbench misc-mmap-anon-perf \
+   misc-console misc-readbench misc-mmap-anon-perf misc-loadbalance \
tst-mmap-file misc-mmap-big-file tst-mmap tst-elf-permissions \
tst-queue-mpsc tst-af-local tst-pipe tst-yield \
misc-ctxsw tst-read tst-symlink tst-openat \
@@ -93,7 +93,7 @@ tests := tst-pthread misc-ramdisk tst-vblk \
libtls.so libtls_gold.so tst-tls tst-tls-gold \
tst-sigaction tst-syscall tst-ifaddrs tst-getdents \
tst-netlink tst-pthread-create misc-futex-perf \
-   misc-syscall-perf tst-reloc misc-vdso-perf
+   misc-syscall-perf tst-reloc misc-vdso-perf misc-scheduler
 
 ifeq ($(arch),x64)
 tests += tst-mmx-fpu

-- 
You received this message because you are subscribed to the Google Groups "OSv 
Development" group.
To unsubscribe from this group and stop receiving emails from it, send an email 
to osv-dev+unsubscr...@googlegroups.com.
To view this discussion on the web visit 
https://groups.google.com/d/msgid/osv-dev/dc3800060c84e359%40google.com.


[osv-dev] [COMMIT osv master] tests: add new tests-with-linux-ld module

2023-12-14 Thread Commit Bot
From: Waldemar Kozaczuk 
Committer: Waldemar Kozaczuk 
Branch: master

tests: add new tests-with-linux-ld module

This patch adds new module tests-with-linux-ld intended to allow
building and running unit tests with Linux dynamic linker on OSv.

The new module is similar to the original tests one except its
makefile is written to compile and link the unit test programs
with headers and shared libraries from Linux host. The resulting
executables should be runnable on Linux host as well as on OSv.
Please note that only subset of the original tests that do not
use OSv internal APIs and can be run on Linux are part of this module.

Besides new module, this patch also makes some necessary changes
(mostly #ifdef) to allow compiling with headers from host.

One can build new test image like so:

dl=linux ./scripts/build image=tests-with-linux-ld fs=rofs

Prepending with 'dl=linux' forces the relevant scripts downstream
to add standard Linux libraries from host:

/lib64/ld-linux-x86-64.so.2: /lib64/ld-linux-x86-64.so.2
/lib64/libc.so.6: /lib64/libc.so.6
/lib64/libgcc_s.so.1: /lib64/libgcc_s.so.1
/lib64/libm.so.6: /lib64/libm.so.6
/lib64/libstdc++.so.6: /lib64/libstdc++.so.6

Signed-off-by: Waldemar Kozaczuk 

---
diff --git a/modules/tests-with-linux-ld/.gitignore 
b/modules/tests-with-linux-ld/.gitignore
--- a/modules/tests-with-linux-ld/.gitignore
+++ b/modules/tests-with-linux-ld/.gitignore
@@ -0,0 +1,3 @@
+usr.manifest
+fs.manifest
+common.manifest
diff --git a/modules/tests-with-linux-ld/Makefile 
b/modules/tests-with-linux-ld/Makefile
--- a/modules/tests-with-linux-ld/Makefile
+++ b/modules/tests-with-linux-ld/Makefile
@@ -0,0 +1,203 @@
+module: usr.manifest common.manifest fs.manifest build_all_tests
+
+include ../common.gmk
+
+# Ask make to not delete "intermediate" results, such as the .o in the chain
+# .cc -> .o -> . Otherwise, during the first build, make considers the .o
+# to be intermediate, and deletes it, but the newly-created ".d" files lists
+# the ".o" as a target - so it needs to be created again on the second make.
+# See commit fac05c95 for a longer explanation.
+.SECONDARY:
+
+COMMON = -g -O2 -fpie -DBOOST_TEST_DYN_LINK \
+   -U _FORTIFY_SOURCE -Wall -Wformat=0 -Wno-pointer-arith
+
+INCLUDES =
+
+LIBS = $(libgcc_s_dir)/libgcc_s.so.1
+
+CXXFLAGS = -std=gnu++11 $(COMMON)
+CFLAGS = -std=gnu99 $(COMMON)
+
+$(out)/tests-with-linux-ld/%.o: $(src)/tests/%.cc
+   $(makedir)
+   $(call quiet, $(CXX) $(CXXFLAGS) -c -o $@ $<, CXX $*.cc)
+$(out)/tests-with-linux-ld/%.o: $(src)/tests/%.c
+   $(makedir)
+   $(call quiet, $(CC) $(CFLAGS) -c -o $@ $< , CC $*.c)
+$(out)/tests-with-linux-ld/rofs/%.o: $(src)/tests/%.cc
+   $(makedir)
+   $(call quiet, $(CXX) $(CXXFLAGS) -DREAD_ONLY_FS -c -o $@ $<, CXX $*.cc)
+$(out)/tests-with-linux-ld/rofs/%.o: $(src)/tests/%.c
+   $(makedir)
+   $(call quiet, $(CC) $(CFLAGS) -DREAD_ONLY_FS -c -o $@ $< , CC $*.c)
+$(out)/tests-with-linux-ld/%: $(out)/tests-with-linux-ld/%.o
+   $(call quiet, $(CXX) $(CXXFLAGS) $(LDFLAGS) -o $@ $< $(LIBS), LD $*)
+$(out)/%.so: $(out)/%.o
+   $(call quiet, $(CXX) $(CXXFLAGS) $(LDFLAGS) -shared -o $@ $< $(LIBS), 
LD $*.so)
+
+$(out)/tests-with-linux-ld/tst-non-fpic.o: CXXFLAGS:=$(subst -fpie,-fno-pic 
-mcmodel=large,$(CXXFLAGS))
+
+$(out)/tests-with-linux-ld/tst-non-pie: CXXFLAGS:=$(subst 
-fpie,-no-pie,$(CXXFLAGS))
+$(out)/tests-with-linux-ld/tst-non-pie: $(src)/tests/tst-non-pie.cc
+   $(call quiet, $(CXX) $(CXXFLAGS) $(LDFLAGS) -o $@ $< $(LIBS), LD 
tests-with-linux-ld/tst-non-pie)
+
+$(out)/tests-with-linux-ld/tst-reloc.o: CFLAGS:=$(subst -fPIC,-fpie,$(CFLAGS))
+$(out)/tests-with-linux-ld/tst-reloc: $(src)/tests/tst-reloc.c
+   $(call quiet, $(CC) $(CFLAGS) $(LDFLAGS) -pie -o $@ $< $(LIBS), LD 
tests-with-linux-ld/tst-reloc)
+
+$(out)/tests-with-linux-ld/tst-bitset-iter.o: COMMON:=-I $(src)/include 
$(COMMON)
+$(out)/tests-with-linux-ld/tst-queue-mpsc.o: COMMON:=-I $(src)/include -DLINUX 
$(COMMON)
+$(out)/tests-with-linux-ld/tst-poll.o: COMMON:=-I $(src)/include $(COMMON)
+$(out)/tests-with-linux-ld/rofs/tst-readdir.o: 
COMMON:=-Wno-deprecated-declarations $(COMMON)
+$(out)/tests-with-linux-ld/tst-tcp-listen.o: COMMON:=-I $(src)/include 
$(COMMON)
+
+$(out)/tests-with-linux-ld/misc-fsx.o: COMMON:=-D__OSV__ $(COMMON)
+
+#
+# These 4 tests are compiled from the same source files but passed in 
READ_ONLY_FS
+# to switch relevant logic in those tests to exercise scenarios applicable
+# to read-only filesystem
+rofs-only-tests := rofs/tst-chdir rofs/tst-symlink rofs/tst-readdir 
rofs/tst-concurrent-read
+
+zfs-only-tests := tst-readdir tst-fallocate tst-concurrent-read
+
+specific-fs-tests := $($(fs_type)-only-tests)
+
+tests := tst-pthread misc-ramdisk tst-vblk \
+   misc-console misc-readbench misc-mmap-anon-perf \
+   tst-mmap-file misc-mmap-big-file tst-mmap tst-elf-permissions \
+   tst-queue-mpsc tst-af-local tst-pipe tst-yield \
+   misc-ctxsw tst-read tst-symlink 

[osv-dev] [COMMIT osv master] perf: generate $(out)/osv.kallsyms from osv.syms

2023-12-07 Thread Commit Bot
From: Waldemar Kozaczuk 
Committer: Waldemar Kozaczuk 
Branch: master

perf: generate $(out)/osv.kallsyms from osv.syms

One can easily run perf top/record/report against running OSv:

```
perf kvm --guest --guestkallsyms=build/last/osv.kallsyms top -e "cycles:ppp" -p 
$(pgrep qemu-system)

perf kvm --guest --guestkallsyms=build/last/osv.kallsyms record -e "cycles:ppp" 
-p $(pgrep qemu-system) -v
perf kvm --guest --guestkallsyms=build/last/osv.kallsyms report --stdio
```

Signed-off-by: Waldemar Kozaczuk 

---
diff --git a/Makefile b/Makefile
--- a/Makefile
+++ b/Makefile
@@ -2139,6 +2139,9 @@ $(out)/loader.elf: $(stage1_targets) 
arch/$(arch)/loader.ld $(out)/bootfs.o $(ou
@# rule because that caused bug #545.
@readelf --dyn-syms --wide $(out)/loader.elf > $(out)/osv.syms
@scripts/libosv.py $(out)/osv.syms $(out)/libosv.ld 
`scripts/osv-version.sh` | $(CC) -c -o $(out)/osv.o -x assembler -
+   @echo ' T _text' > $(out)/osv.kallsyms
+   @echo ' T _stext' >> $(out)/osv.kallsyms
+   @grep ': ' $(out)/osv.syms | grep -v 'NOTYPE' | awk '{ print $$2 " 
T " $$8 }' | c++filt >> $(out)/osv.kallsyms
$(call quiet, $(CC) $(out)/osv.o -nostdlib -shared -o $(out)/libosv.so 
-T $(out)/libosv.ld, LIBOSV.SO)
 
 $(out)/zfs_builder.elf: $(stage1_targets) arch/$(arch)/loader.ld 
$(out)/zfs_builder_bootfs.o $(out)/libvdso-content.o $(loader_options_dep) 
$(version_script_file)

-- 
You received this message because you are subscribed to the Google Groups "OSv 
Development" group.
To unsubscribe from this group and stop receiving emails from it, send an email 
to osv-dev+unsubscr...@googlegroups.com.
To view this discussion on the web visit 
https://groups.google.com/d/msgid/osv-dev/f0c05e060bf28a7d%40google.com.


[osv-dev] [COMMIT osv master] aws: add new script to easily deploy to and run OSv on EC2

2023-11-30 Thread Commit Bot
From: Waldemar Kozaczuk 
Committer: Waldemar Kozaczuk 
Branch: master

aws: add new script to easily deploy to and run OSv on EC2

This patch adds new script deploy_to_aws.sh which streamlines
the process of uploading OSv image (usr.img) to AWS as a snapshot,
creating AMI out of it and finally instantiating simple stack
with a single EC2 instance.

To run the scripts you need to have AWS cli installed and configured
to work with your account. In addition, you also need to clone
the snapshot tool https://github.com/awslabs/flexible-snapshot-proxy
and adjust deploy_to_aws.sh accordingly to point to it on your local
filesystem.

The workflow is this:

1. Build your desired OSv image, for example (use fs_size_mb to limit
   the image size):
   ./scripts/build image=golang-pie-httpserver,httpserver-monitoring-api 
fs_size_mb=72

2. Run the image locally or use imgedit.py to set/change desired boot command
   line:
   ./scripts/imgedit.py setargs build/release/usr.img ""

3. Run deploy_to_aws.sh to upload the image to AWS and create the stack:
   ./scripts/deploy_to_aws.sh 

Behind the scenes, deploy_to_aws.sh converts usr.img to usr.raw, then
uploads usr.raw to AWS as a snapshot using flexible-snapshot-proxy tool
and then creates AMI out of it (this patch adjusts ec2-make-ami.py to
support creating AMI our of pre-created snapshost). Finally, it uses
aws cli to create a stack with single EC2 instance based on the new AMI.

Before you use deploy_to_aws.sh make sure to fill in the values of your
account VPC ID and subnet ID in scripts/aws/instance-parameters.json.
Also change instance type to the one you desire (the t2-* instances use
Xen and t3, t4, etc use Nitro).

The new process is much simpler and faster. You no longer need to run
ec2-make-ami.py on some tool EC2 instance to build OSv ami which also
takes long time. Instead you can build and deploy your OSv image locally
under 30 seconds time.

Signed-off-by: Waldemar Kozaczuk 

---
diff --git a/scripts/aws/instance-parameters.json 
b/scripts/aws/instance-parameters.json
--- a/scripts/aws/instance-parameters.json
+++ b/scripts/aws/instance-parameters.json
@@ -0,0 +1,22 @@
+[
+  {
+"ParameterKey": "pVpcId",
+"ParameterValue": "vpc-"
+  },
+  {
+"ParameterKey": "pSubnetId",
+"ParameterValue": "subnet-"
+  },
+  {
+"ParameterKey": "pInstanceName",
+"ParameterValue": "INSTANCE_NAME"
+  },
+  {
+"ParameterKey": "pInstanceType",
+"ParameterValue": "t2.nano"
+  },
+  {
+"ParameterKey": "pImageId",
+"ParameterValue": "AMI_ID"
+  }
+]
diff --git a/scripts/aws/instance.yaml b/scripts/aws/instance.yaml
--- a/scripts/aws/instance.yaml
+++ b/scripts/aws/instance.yaml
@@ -0,0 +1,70 @@
+AWSTemplateFormatVersion: '2010-09-09'
+Description: Create EC2 Instance
+Parameters:
+  pVpcId:
+Description: ID of the VPC
+Type: AWS::EC2::VPC::Id
+Default: '-'
+  pSubnetId:
+Description: Subnet ID
+Type: AWS::EC2::Subnet::Id
+Default: '-'
+  pInstanceName:
+Description: Instance Name
+Type: String
+Default: '-'
+  pInstanceType:
+Description: Size of the Instance
+Type: String
+AllowedValues:
+- t2.nano
+- t2.micro
+- t2.small
+- t3.nano
+- t3.micro
+- t3.small
+Default: t2.nano
+  pImageId:
+Description: AMI for the instances
+Type: AWS::EC2::Image::Id
+Default: '-'
+Resources:
+  InstanceSecurityGroup:
+Type: AWS::EC2::SecurityGroup
+Properties:
+  GroupDescription: Allow external addresses to access to management 
console
+  SecurityGroupIngress:
+  - IpProtocol: tcp
+FromPort: '9000'
+ToPort: '9000'
+CidrIp: '0.0.0.0/0'
+  - IpProtocol: tcp
+FromPort: '8000'
+ToPort: '8000'
+CidrIp: '0.0.0.0/0'
+  VpcId:
+Ref: pVpcId
+  Instance:
+Type: AWS::EC2::Instance
+Properties:
+  ImageId:
+Ref: pImageId
+  InstanceType:
+Ref: pInstanceType
+  SecurityGroupIds:
+- Ref: InstanceSecurityGroup
+  SubnetId:
+Ref: pSubnetId
+  Tags:
+- Key: Name
+  Value:
+Ref: pInstanceName
+Outputs:
+  PublicDnsName:
+Value:
+  Fn::GetAtt:
+  - Instance
+  - PublicDnsName
+
+  InstanceID:
+Value: !Ref Instance
diff --git a/scripts/deploy_to_aws.sh b/scripts/deploy_to_aws.sh
--- a/scripts/deploy_to_aws.sh
+++ b/scripts/deploy_to_aws.sh
@@ -0,0 +1,24 @@
+#!/bin/bash
+#
+
+NAME=$1
+
+qemu-img convert -O raw build/release/usr.img build/release/usr.raw
+echo "Converted to raw image"
+
+snapshot_id=$(python3 ~/projects/flexible-snapshot-proxy/src/main.py upload 
build/release/usr.raw | tail -n 1)
+echo "Created snapshot: $snapshot_id"
+
+ami_id=$(./scripts/ec2-make-ami.py -n "$NAME" -s "$snapshot_id" | grep '^ami' 
| tail -n 1)
+echo "Created AMI: $ami_id"
+
+cat scripts/aws/instance-parameters.json | sed -e "s/INSTANCE_NAME/$NAME/" | 
sed -e "s/AMI_ID/$ami_id/" > 

[osv-dev] [COMMIT osv master] build: do not strip symlinks

2023-11-29 Thread Commit Bot
From: Waldemar Kozaczuk 
Committer: Waldemar Kozaczuk 
Branch: master

build: do not strip symlinks

Signed-off-by: Waldemar Kozaczuk 

---
diff --git a/scripts/manifest_common.py b/scripts/manifest_common.py
--- a/scripts/manifest_common.py
+++ b/scripts/manifest_common.py
@@ -69,7 +69,7 @@ def to_strip(filename):
ff.startswith(osvdir + "/apps")
 
 stripped_filename = filename
-if filename.endswith(".so") and to_strip(filename):
+if filename.endswith(".so") and not filename.startswith("->") and 
to_strip(filename):
 stripped_filename = filename[:-3] + "-stripped.so"
 if not os.path.exists(stripped_filename) \
 or (os.path.getmtime(stripped_filename) < \

-- 
You received this message because you are subscribed to the Google Groups "OSv 
Development" group.
To unsubscribe from this group and stop receiving emails from it, send an email 
to osv-dev+unsubscr...@googlegroups.com.
To view this discussion on the web visit 
https://groups.google.com/d/msgid/osv-dev/5e8beb060b58143b%40google.com.


[osv-dev] [COMMIT osv master] build: create ramfs images with empty partition table

2023-11-29 Thread Commit Bot
From: Waldemar Kozaczuk 
Committer: Waldemar Kozaczuk 
Branch: master

build: create ramfs images with empty partition table

Signed-off-by: Waldemar Kozaczuk 

---
diff --git a/scripts/build b/scripts/build
--- a/scripts/build
+++ b/scripts/build
@@ -401,7 +401,11 @@ rofs_with_zfs)
 ramfs|virtiofs)
# No need to create extra fs like above: ramfs is already created (as 
the
# bootfs) and virtio-fs is specified with virtiofsd at run time
-   qemu-img convert -f raw -O qcow2 loader.img usr.img ;;
+   image_size=$((partition_offset))
+   cp $bare $raw_disk.raw
+   "$SRC"/scripts/imgedit.py setpartition "-f raw ${raw_disk}.raw" 2 
$partition_offset 0
+   qemu-img resize ${raw_disk}.raw ${image_size}b >/dev/null 2>&1
+   qemu-img convert -f raw -O qcow2 $raw_disk.raw $qcow2_disk.img ;;
 esac
 # Prepend the root fs type option to the command line (preserved by run.py)
 cmdline=$(cat cmdline)

-- 
You received this message because you are subscribed to the Google Groups "OSv 
Development" group.
To unsubscribe from this group and stop receiving emails from it, send an email 
to osv-dev+unsubscr...@googlegroups.com.
To view this discussion on the web visit 
https://groups.google.com/d/msgid/osv-dev/4a5807060b581462%40google.com.


[osv-dev] [COMMIT osv master] Add missing syscall tracepoints

2023-11-04 Thread Commit Bot
From: Waldemar Kozaczuk 
Committer: Waldemar Kozaczuk 
Branch: master

Add missing syscall tracepoints

Signed-off-by: Waldemar Kozaczuk 

---
diff --git a/linux.cc b/linux.cc
--- a/linux.cc
+++ b/linux.cc
@@ -696,6 +696,14 @@ TRACEPOINT(trace_syscall_timerfd_settime, "%d <= %d %d %p 
%p", int, int, int, co
 TRACEPOINT(trace_syscall_timerfd_gettime, "%d <= %d %p", int, int, struct 
itimerspec*);
 TRACEPOINT(trace_syscall_chmod, "%d <= \"%s\" %d", int, const char *, mode_t);
 TRACEPOINT(trace_syscall_fchmod, "%d <= %d %d", int, int, mode_t);
+TRACEPOINT(trace_syscall_arch_prctl, "0x%x <= %d 0x%x", long, int, unsigned 
long);
+TRACEPOINT(trace_syscall_sys_set_robust_list, "%d <= %p %lu", long, struct 
robust_list_head *, size_t);
+TRACEPOINT(trace_syscall_sys_set_tid_address, "%d <= %p", long, int *);
+#ifdef __x86_64__
+TRACEPOINT(trace_syscall_sys_clone, "%d <= 0x%x 0x%x %p %p %lu", int, unsigned 
long, void *, int *, int *, unsigned long);
+TRACEPOINT(trace_syscall_sys_clone3, "%d <= %p %lu", int, struct clone_args *, 
size_t);
+#endif
+TRACEPOINT(trace_syscall_prlimit64, "%d <= %u %d %p %p", int, pid_t, int, 
const struct rlimit *, struct rlimit *);
 
 OSV_LIBC_API long syscall(long number, ...)
 {

-- 
You received this message because you are subscribed to the Google Groups "OSv 
Development" group.
To unsubscribe from this group and stop receiving emails from it, send an email 
to osv-dev+unsubscr...@googlegroups.com.
To view this discussion on the web visit 
https://groups.google.com/d/msgid/osv-dev/88bfa2060957310d%40google.com.


[osv-dev] [COMMIT osv master] add syscall tracepoints

2023-11-04 Thread Commit Bot
From: Waldemar Kozaczuk 
Committer: WALDEMAR KOZACZUK 
Branch: master

add syscall tracepoints

This patch adds tracepoints for the syscalls implemented in linux.cc.
The new tracepoints capture both the arguments and return value.
The format is somewhat similar to Linux strace where the return value
is first like so:

TRACEPOINT(trace_syscall_open, "%d <= \"%s\" 0x%x", int, const char *, int);

Having both input and output captured by one single tracepoint instead of
two makes it easier to observe things using new OSv strace capability.

With strace enabled one can observe what syscalls application makes
like in this example:

./scripts/run.py -e '--strace --trace=syscall\* /httpserver.so'
OSv v0.57.0-76-g76e30467
eth0: 192.168.122.15
Booted up in 134.48 ms
Cmdline: /httpserver.so
/httpserver.so0  0.099321648 syscall_sys_sched_getaffinity(8 <= 0 8192 
0x201fef10)
/httpserver.so0  0.099337794 syscall_openat(-1 <= -100 
"/sys/kernel/mm/transparent_hugepage/hpage_pmd_siz" 00 0)
/httpserver.so0  0.103745244 syscall_rt_sigprocmask(0 <= 2 0 0x10491e40 
8)
/httpserver.so0  0.104619488 syscall_sigaltstack(0 <= 0 0x20200e80)
/httpserver.so0  0.104619950 syscall_sigaltstack(0 <= 0x20200e40 0)
/httpserver.so0  0.104622744 syscall_rt_sigprocmask(0 <= 2 0x20200e90 0 
8)
/httpserver.so0  0.104623578 syscall_gettid(45 <=)
>/httpserver.so   0  0.104778424 syscall_sigaltstack(0 <= 0 0x200027010ea8)
>/httpserver.so   0  0.104778871 syscall_sigaltstack(0 <= 0x200027010e68 0)
>/httpserver.so   0  0.104782909 syscall_rt_sigprocmask(0 <= 2 0x200027010eb8 0 
>8)
>/httpserver.so   0  0.104783155 syscall_gettid(46 <=)
>/httpserver.so   0  0.105711348 syscall_nanosleep(0 <= 0x200027010e70 0)
>/httpserver.so   0  0.105731443 syscall_getpid(2 <=)
>/httpserver.so   0  0.105732262 syscall_tgkill(-1 <= 2 45 23)
Go version: go1.18.1, listening on port 8000 ...
>/httpserver.so   0  0.105758384 syscall_nanosleep(0 <= 0x200027010e70 0)
>/httpserver.so   0  0.105783045 syscall_nanosleep(0 <= 0x200027010e70 0)
>/httpserver.so   0  0.105806418 syscall_nanosleep(0 <= 0x200027010e70 0)
>/httpserver.so   0  0.105830684 syscall_nanosleep(0 <= 0x200027010e70 0)

Fixes #1261

Signed-off-by: Waldemar Kozaczuk 

---
diff --git a/fs/vfs/main.cc b/fs/vfs/main.cc
--- a/fs/vfs/main.cc
+++ b/fs/vfs/main.cc
@@ -1550,7 +1550,7 @@ char *getcwd(char *path, size_t size)
 }
 
 TRACEPOINT(trace_vfs_dup, "%d", int);
-TRACEPOINT(trace_vfs_dup_ret, "\"%s\"", int);
+TRACEPOINT(trace_vfs_dup_ret, "%d", int);
 TRACEPOINT(trace_vfs_dup_err, "%d", int);
 /*
  * Duplicate a file descriptor
@@ -1645,7 +1645,7 @@ int dup2(int oldfd, int newfd)
 #define SETFL (O_APPEND | O_ASYNC | O_DIRECT | O_NOATIME | O_NONBLOCK)
 
 TRACEPOINT(trace_vfs_fcntl, "%d %d 0x%x", int, int, int);
-TRACEPOINT(trace_vfs_fcntl_ret, "\"%s\"", int);
+TRACEPOINT(trace_vfs_fcntl_ret, "%d", int);
 TRACEPOINT(trace_vfs_fcntl_err, "%d", int);
 
 extern "C" OSV_LIBC_API
diff --git a/linux.cc b/linux.cc
--- a/linux.cc
+++ b/linux.cc
@@ -14,6 +14,7 @@
 #include 
 #include 
 #include 
+#include 
 #include 
 
 #include 
@@ -196,8 +197,8 @@ static long set_mempolicy(int policy, unsigned long *nmask,
 // As explained in the sched_getaffinity(2) manual page, the interface of the
 // sched_getaffinity() function is slightly different than that of the actual
 // system call we need to implement here.
-#define __NR_sched_getaffinity_syscall __NR_sched_getaffinity
-static int sched_getaffinity_syscall(
+#define __NR_sys_sched_getaffinity __NR_sched_getaffinity
+static int sys_sched_getaffinity(
 pid_t pid, unsigned len, unsigned long *mask)
 {
 int ret = sched_getaffinity(
@@ -216,8 +217,8 @@ static int sched_getaffinity_syscall(
 return ret;
 }
 
-#define __NR_sched_setaffinity_syscall __NR_sched_setaffinity
-static int sched_setaffinity_syscall(
+#define __NR_sys_sched_setaffinity __NR_sched_setaffinity
+static int sys_sched_setaffinity(
 pid_t pid, unsigned len, unsigned long *mask)
 {
 return sched_setaffinity(
@@ -231,16 +232,18 @@ long long_mmap(void *addr, size_t length, int prot, int 
flags, int fd, off_t off
 #define __NR_long_mmap __NR_mmap
 
 
-#define SYSCALL0(fn) case (__NR_##fn): return fn()
+#define SYSCALL0(fn) case (__NR_##fn): do { long ret = fn(); 
trace_syscall_##fn(ret); return ret; } while (0)
 
-#define SYSCALL1(fn, __t1)  \
-case (__NR_##fn): do {  \
-va_list args;   \
-__t1 arg1;  \
-va_start(args, number); \
-arg1 = va_arg(args, __t1);  \
-va_end(args);   \
-return fn(arg1);\
+#define SYSCALL1(fn, __t1) \
+case (__NR_##fn): do { \
+va_list args;  \
+__t1 arg1; \
+va_start(args, number);\
+arg1 = va_arg(args, __t1); \
+ 

[osv-dev] [COMMIT osv master] Implement prlimit syscall

2023-11-04 Thread Commit Bot
From: Waldemar Kozaczuk 
Committer: WALDEMAR KOZACZUK 
Branch: master

Implement prlimit syscall

Signed-off-by: Waldemar Kozaczuk 

---
diff --git a/exported_symbols/osv_ld-musl.so.1.symbols 
b/exported_symbols/osv_ld-musl.so.1.symbols
--- a/exported_symbols/osv_ld-musl.so.1.symbols
+++ b/exported_symbols/osv_ld-musl.so.1.symbols
@@ -740,6 +740,8 @@ pread64
 preadv
 preadv64
 printf
+prlimit
+prlimit64
 __progname
 __progname_full
 program_invocation_name
diff --git a/exported_symbols/osv_libc.so.6.symbols 
b/exported_symbols/osv_libc.so.6.symbols
--- a/exported_symbols/osv_libc.so.6.symbols
+++ b/exported_symbols/osv_libc.so.6.symbols
@@ -610,6 +610,8 @@ __pread64_chk
 preadv
 preadv64
 printf
+prlimit
+prlimit64
 __printf_chk
 __progname
 __progname_full
diff --git a/libc/libc.cc b/libc/libc.cc
--- a/libc/libc.cc
+++ b/libc/libc.cc
@@ -102,8 +102,28 @@ int setrlimit(int resource, const struct rlimit *rlim)
 // osv - no limits
 return 0;
 }
+
+int prlimit(pid_t pid, int resource, const struct rlimit *new_limit, struct 
rlimit *old_limit)
+{
+if (pid != getpid() && pid != 0) {
+errno = EINVAL;
+return -1;
+}
+
+if (old_limit && getrlimit(resource, old_limit)) {
+return -1;
+}
+
+if (new_limit && setrlimit(resource, new_limit)) {
+return -1;
+}
+
+return 0;
+}
 LFS64(getrlimit);
 LFS64(setrlimit);
+#undef prlimit64
+LFS64(prlimit);
 
 uid_t geteuid()
 {
diff --git a/linux.cc b/linux.cc
--- a/linux.cc
+++ b/linux.cc
@@ -699,6 +699,7 @@ OSV_LIBC_API long syscall(long number, ...)
 SYSCALL5(sys_clone, unsigned long, void *, int *, int *, unsigned long);
 SYSCALL2(sys_clone3, struct clone_args *, size_t);
 #endif
+SYSCALL4(prlimit64, pid_t, int, const struct rlimit *, struct rlimit *);
 }
 
 debug_always("syscall(): unimplemented system call %d\n", number);

-- 
You received this message because you are subscribed to the Google Groups "OSv 
Development" group.
To unsubscribe from this group and stop receiving emails from it, send an email 
to osv-dev+unsubscr...@googlegroups.com.
To view this discussion on the web visit 
https://groups.google.com/d/msgid/osv-dev/b0a95c0609570724%40google.com.


[osv-dev] [COMMIT osv master] implement clone/3, set_robust_list and set_tid_address syscalls

2023-11-04 Thread Commit Bot
From: Waldemar Kozaczuk 
Committer: WALDEMAR KOZACZUK 
Branch: master

implement clone/3, set_robust_list and set_tid_address syscalls

This PR implements clone, clone3, set_robust_list, and set_tid_address
syscalls needed to support running multi-threaded static executables on
OSv.

Bulk of this patch is implementation of the clone and its clone3 variant.
More specifically the sys_clone() implements only the tiny subset of
what the Linux manual describes - handling of CLONE_THREAD - which
is what is used by glibc to implement pthread_create().

In essence, the sys_clone() creates new thread, sets application TCB
if present, and then when started new thread executes code implemented in 
assembly
to restore most of the registers and jump to the instruction where the
parent thread calling clone would execute next. So effectively a thread
calling the clone syscall would "clone" itself by creating new child thread
that resumes in the same place in code right after the syscall instruction
which is held in the RCX register. All the registers to be restored in the child
thread are copied from the frame of the parent thread syscall stack.
The detailed comments explaining the implementation of clone() can be found
intertwined with the code of sys_clone() in clone.cc.

This patch also implements two other related syscalls - set_robust_list and
set_tid_address - which are mostly described here - 
https://www.kernel.org/doc/Documentation/robust-futexes.txt.

With this patch following simple example compiled as a static
executable runs fine on OSv:

void* secondary(void *ignore)
{
printf("secondary thread\n");
}

void main() {
pthread_t threads[10];
for (int i = 0; i < 10; i++)
   pthread_create([i], NULL, secondary, NULL);

printf("Created 10 threads\n");

for (int i = 0; i < 10; i++)
   pthread_join(threads[i], null);
printf("Joined 10 threads\n");
}

Fixes #1139

Signed-off-by: Waldemar Kozaczuk 

Reverse futex/clear_id cleanup

---
diff --git a/Makefile b/Makefile
--- a/Makefile
+++ b/Makefile
@@ -1013,6 +1013,7 @@ objects += arch/x64/apic.o
 objects += arch/x64/apic-clock.o
 objects += arch/x64/entry-xen.o
 objects += arch/x64/prctl.o
+objects += arch/x64/clone.o
 objects += arch/x64/vmlinux.o
 objects += arch/x64/vmlinux-boot64.o
 objects += arch/x64/pvh-boot.o
diff --git a/arch/x64/arch-switch.hh b/arch/x64/arch-switch.hh
--- a/arch/x64/arch-switch.hh
+++ b/arch/x64/arch-switch.hh
@@ -360,6 +360,11 @@ void thread::free_syscall_stack()
 }
 }
 
+void* thread::get_syscall_stack_top()
+{
+return _state._syscall_stack_descriptor.stack_top;
+}
+
 void thread_main_c(thread* t)
 {
 arch::irq_enable();
diff --git a/arch/x64/clone.cc b/arch/x64/clone.cc
--- a/arch/x64/clone.cc
+++ b/arch/x64/clone.cc
@@ -0,0 +1,134 @@
+/*
+ * Copyright (C) 2023 Waldemar Kozaczuk
+ *
+ * This work is open source software, licensed under the terms of the
+ * BSD license as described in the LICENSE file in the top-level directory.
+ */
+
+#include "arch.hh"
+#include 
+#include 
+#include "tls-switch.hh"
+
+#define CLONE_THREAD   0x0001
+#define CLONE_SETTLS   0x0008
+#define CLONE_CHILD_SETTID 0x0100
+#define CLONE_PARENT_SETTID0x0010
+#define CLONE_CHILD_CLEARTID   0x0020
+
+static constexpr size_t CHILD_FRAME_OFFSET = 136;
+static constexpr size_t PARENT_FRAME_OFFSET = 120;
+static constexpr size_t FRAME_SIZE = 120;
+static constexpr size_t RSP_OFFSET = 8;
+static constexpr size_t RAX_OFFSET = 16;
+
+int sys_clone(unsigned long flags, void *child_stack, int *ptid, int *ctid, 
unsigned long newtls)
+{   //
+//We only support "cloning" of threads so fork() would fail but 
pthread_create() should
+//succeed
+if (!(flags & CLONE_THREAD)) {
+   errno = ENOSYS;
+   return -1;
+}
+//
+//Validate we have non-empty stack
+if (!child_stack) {
+   errno = EINVAL;
+   return -1;
+}
+//
+//Validate ptid and ctid which we would be setting down if requested by 
these flags
+if (((flags & CLONE_PARENT_SETTID) && !ptid) ||
+((flags & CLONE_CHILD_SETTID) && !ctid) ||
+((flags & CLONE_SETTLS) && !newtls)) {
+   errno = EFAULT;
+   return -1;
+}
+//
+//If the parent thread is pinned we should make new thread inherit this
+auto parent_pinned_cpu = sched::thread::current()->pinned() ? 
sched::cpu::current() : nullptr;
+//
+//Create new child thread
+auto t = sched::thread::make([=] {
+   //
+   //Switch to app TCB if one specified
+   u64 app_tcb = sched::thread::current()->get_app_tcb();
+   if (app_tcb) {
+   arch::set_fsbase(app_tcb);
+   }
+   //
+   //Restore registers from the syscall stack and jump to the caller
+   //We are restoring the registers based on how they were saved
+   //on the syscall stack of the parent
+   const size_t frame_offset = CHILD_FRAME_OFFSET;
+   asm volatile
+ ("movq 

[osv-dev] [COMMIT osv master] Implement arch_prctl syscall to support TLS in statically linked executables

2023-11-04 Thread Commit Bot
From: Waldemar Kozaczuk 
Committer: WALDEMAR KOZACZUK 
Branch: master

Implement arch_prctl syscall to support TLS in statically linked executables

Even simplest executables need thread local storage (TLS) and a good example
of it is errno which is a thread local variable. The OSv kernel itself uses
many thread local variables and when running dynamically linked executables
it shares the TLS memory block with the application. In this case OSv fully
controls the setup of TLS and stores the pointer to TCB (Thread Control Block)
as part of a thread state and updates the FS register on every context switch.

On other hand, the statically linked executables setup their TLS and register
it with kernel by executing syscall arch_prtcl on x86_64. In order to support
it in OSv, we need to implement the arch_prtcl syscall and modify some key
places in kernel code - syscall handler, exception handlers and VDSO -
to switch from application TLS to the kernel one and back.

The newly implemented arch_prtcl syscall on ARCH_SET_FS stores the application
TCB address in the new field app_tcb added to the thread_control_block 
structure.
At the same time, we modify following places to support switching between the
application and kernel TCB if necessary (app_tcb != 0):

The exception handler assembly in entry.S is modified to detect if on entry
the current FS register points to the kernel TCB and if not it switches to the 
kernel
one; on exit from exception, it switches back to the application TCB. To make 
this
possible we "duplicate" the current thread kernel TCB address and store it in 
the new
field _current_thread_kernel_tcb of the cpu_arch structure which is updated
on every context switch and can be accessed in assembly as gs:16. The first
8 bytes field self of the thread control block holds the address to itself
so we can easily compare fs:0 with gs:16 to know if FS register points to the
kernel TCB or not. Please note this scheme is simpler and faster than one of
the original version relying on extra counter that also required the interrupts
to be disabled. It also works in nested scenarios - for example a page fault
interrupted during a sleep.

Similarly, we also change the syscall handler and VDSO code where we use simple
RAII utility - tls_switch - to detect if current thread has non-zero application
TCB and if so to switch to the kernel one before the code in scope and switch
back to the application one after. This scheme is a little different from the
exception handler because both syscall and VDSO functions are only executed
on application threads which could have the FS register pointing to the 
application
TCB for example when running statically linked executables and we do not need to
deal with nesting.

In addision, the vdso code is changed to C++ to allow using the C++ RAII utility
described above.

In essence, this PR makes possible to launch simple statically linked 
executables
like "Hello World" on OSv:

gcc -static -o hello-static apps/native-example/hello.c

./scripts/run.py -e /hello-static
OSv v0.57.0-74-g2a835078
Booted up in 142.76 ms
Cmdline: /hello-static
WARNING: Statically linked executables are only supported to limited extent!
syscall(): unimplemented system call 218
syscall(): unimplemented system call 273
syscall(): unimplemented system call 334
syscall(): unimplemented system call 302
Hello from C code

Please note, that the code changes touch some critical places of the kernel
functionality - context switching, syscall handling, exception handling, and 
VDSO
implementation - and thus may slightly affect their performance.

As far as context switching goes, this patch adds only a single memory write
operation that does not seem to affect it in any measurable way based on what
the misc-ctxsw.cc indicates.

On the other hand, one could see the syscall handling cost go up by 2-5 ns 
(3-5% of
the total cost ~100ns based on what misc-syscall-perf.cc measures) when 
executing statically
linked executables due to the fact we need to switch the fsbase from the app 
TCB to the kernel
one and back. The good news is that the syscall handling does not seem to be 
affected
in any significant way when running dynamically linked executables.

The VDSO function calls are affected most 7-10 ns (from 23 to 30ns) even though 
the VDSO code
uses the same exact tls_switch RAII utility and seems to get inlined in similar 
way
as above in the syscall handler.

Finally, I did not measure the impact of changes to the exception handling 
(interrupts, page faults, etc)
but I think it should be similar to syscall handling. The interrupts are in 
general relatively expensive in
the virtualized environment (guest/host) as this email by Avi Kivity explains - 
https://groups.google.com/g/osv-dev/c/w_fuxsYla-M/m/WxpRZTXQ-twJ.
On top of this, the the FPU saving/restoring takes ~60ns which is far more 
expensive than switching fsbase.

Fixes #1137

Signed-off-by: Waldemar Kozaczuk 

---
diff --git a/Makefile b/Makefile

[osv-dev] [COMMIT osv master] Support executing dynamically-linked executables via Linux dynamic linker

2023-11-04 Thread Commit Bot
From: Waldemar Kozaczuk 
Committer: WALDEMAR KOZACZUK 
Branch: master

Support executing dynamically-linked executables via Linux dynamic linker

This PR enhances OSv to be able to launch dynamically linked executables(PIEs 
or non-PIEs)
using the Linux dynamic linker - ld-linux-x86-64.so.2 on x86_64 or 
ld-linux-aarch64.so.1
on aarch64 like so:

./scripts/run.py -e '/lib64/ld-linux-x86-64.so.2 /hello'

The motivation is to allow running dynamically linked executables that depend 
on libc symbols
not implemented by the OSv kernel. In this case, the executables launched this 
way would
interact only through system calls just like the statically linked ones do. 
Actually,
my experiments so far seem to show that there are no extra changes needed 
beyond the same
code enhancements to support statically linked executables.

When executing dynamically linked executable, Linux reads the PT_INTERP segment 
to
determine the program interpreter to launch with (typically 
ld-linux-x86-64.so.2), probably
unmaps the executable, loads the program interpreter and delegates futher 
execution by
jumping to its entry point.

On OSv we could implement similar logic but even though not complicated it would
involve some not in significant coding. So this patch takes a simpler approach
where we assume a user would explicitly run the Linux program interpreter and 
pass
the path of the executable as its parameter which by the way is a valid thing 
to do on Linux.
To that end we enhance OSv dynamic linker to process ld-linux-x86-64.so.2 in 
the same
way we process a statically linked executable (in reality ld-linux-x86-64.so.2 
is
a statically linked shared library with non-zero entry point):

- do not relocate
- do not call INIT and FINI functions
- launch by jumping to the entry point

We detect if given ELF is a Linux program interpreter by comparing its soname
to the architecture specific string - ld-linux-x86-64.so.2 on x86_64 and 
ld-linux-aarch64.so.1
on aarch64.

Fixes #1266

Signed-off-by: Waldemar Kozaczuk 

---
diff --git a/core/app.cc b/core/app.cc
--- a/core/app.cc
+++ b/core/app.cc
@@ -217,7 +217,7 @@ application::application(const std::string& command,
 throw launch_error("Failed to load object: " + command);
 }
 
-if (_lib->is_statically_linked_executable()) {
+if (_lib->is_statically_linked_executable() || _lib->is_linux_dl()) {
 //Augment auxiliary vector with extra entries like AT_PHDR, AT_ENTRY, 
etc
 //that are necessary by a static executable to bootstrap itself
 augment_auxv();
@@ -326,7 +326,7 @@ void application::main()
 elf::get_program()->init_library(_args.size(), _argv.get());
 sched::thread::current()->set_name(_command);
 
-if (_lib->is_statically_linked_executable()) {
+if (_lib->is_statically_linked_executable() || _lib->is_linux_dl()) {
 run_entry_point(_lib->entry_point(), _args.size(), _argv.get(), 
_argv_size);
 } else {
 if (_main) {
diff --git a/core/elf.cc b/core/elf.cc
--- a/core/elf.cc
+++ b/core/elf.cc
@@ -1169,7 +1169,7 @@ std::string object::pathname()
 // Run the object's static constructors or similar initialization
 void object::run_init_funcs(int argc, char** argv)
 {
-if (is_statically_linked_executable()) {
+if (is_statically_linked_executable() || is_linux_dl()) {
 return;
 }
 // Invoke any init functions if present and pass in argc and argv
@@ -1197,7 +1197,7 @@ void object::run_init_funcs(int argc, char** argv)
 // Run the object's static destructors or similar finalization
 void object::run_fini_funcs()
 {
-if (is_statically_linked_executable()) {
+if (is_statically_linked_executable() || is_linux_dl()) {
 return;
 }
 if(!_init_called){
@@ -1323,11 +1323,11 @@ program::program(void* addr)
 // Our kernel already supplies the features of a bunch of traditional
 // shared libraries:
 static const auto supplied_modules = {
+  linux_dl_soname,
   "libresolv.so.2",
   "libc.so.6",
   "libm.so.6",
 #ifdef __x86_64__
-  "ld-linux-x86-64.so.2",
   "libc.musl-x86_64.so.1",
   // As noted in issue #1040 Boost version 1.69.0 and above is
   // compiled with hidden visibility, so even if the kernel uses
@@ -1340,7 +1340,6 @@ program::program(void* addr)
 #endif
 #endif /* __x86_64__ */
 #ifdef __aarch64__
-  "ld-linux-aarch64.so.1",
 #if BOOST_VERSION < 106900
 #if HIDE_SYMBOLS < 1
   "libboost_system-mt.so.1.55.0",
@@ -1478,7 +1477,7 @@ program::load_object(std::string name, 
std::vector extra_path,
 //Do not relocate static executables as they are linked with its own
 //dynamic linker. Also do not try to load any dependant libraries
 //as they do not apply to statically linked executables.
-if (!ef->is_statically_linked_executable()) {
+if (!ef->is_statically_linked_executable() && !ef->is_linux_dl()) {
 

[osv-dev] [COMMIT osv master] signals: make signal mask layout match Linux

2023-11-04 Thread Commit Bot
From: Waldemar Kozaczuk 
Committer: WALDEMAR KOZACZUK 
Branch: master

signals: make signal mask layout match Linux

In theory, it would not matter how we encode the information about
which signals are masked as long as our implementation of it stays
completely internal. But in reality, when we implement rt_sigprocmask
and other related syscalls, the users of those like glibc rely on
the format of the bitmask used by Linux kernel. In Linux, the least
significant bit (or the most right) represents the mask of the signal
number 1, the 2nd bit represents the mask of the signal number 2, etc
all the way to the maximum number 64. So we must ensure that our internal
sigset structure can fit all these 64 bits and nothing more as some
runtimes like golang assume the oldset argument of the rt_sigprocmask
can be as small as NSIG/8 (8 bytes).

To get or set a specific signal mask bit, we simply substract 1
from the signal number -  = signum - 1.

This patch in effect reverts the commit 0e13562f8ebb90f66fb1f24324099526f87d9dfc
and changes the nsignals constant back to the value 64. And besides
adjusting the code that accesses signal mask to match that of Linux (signo -1),
it also changes code that accesses signal_actions and waiters to
use the similar access pattern (signo - 1) to make all things consistent
and sane.

Fixes #1255

Signed-off-by: Waldemar Kozaczuk 

---
diff --git a/libc/signal.cc b/libc/signal.cc
--- a/libc/signal.cc
+++ b/libc/signal.cc
@@ -69,6 +69,8 @@ inline bool is_sig_ign(const struct sigaction ) {
 return (!(sa.sa_flags & SA_SIGINFO) && sa.sa_handler == SIG_IGN);
 }
 
+//Similar to signal actions and mask, list of "waiters" for a given signal
+//with number "signo" is stored at the index "signo - 1"
 typedef std::list thread_list;
 static std::array waiters;
 mutex waiters_mutex;
@@ -78,29 +80,30 @@ int wake_up_signal_waiters(int signo)
 SCOPE_LOCK(waiters_mutex);
 int woken = 0;
 
-for (auto& t: waiters[signo]) {
+unsigned sigidx = signo - 1;
+for (auto& t: waiters[sigidx]) {
 woken++;
 t->remote_thread_local_var(thread_pending_signal) = signo;
 t->wake();
 }
 return woken;
 }
 
-void wait_for_signal(int signo)
+void wait_for_signal(unsigned int sigidx)
 {
 SCOPE_LOCK(waiters_mutex);
-waiters[signo].push_front(sched::thread::current());
+waiters[sigidx].push_front(sched::thread::current());
 }
 
-void unwait_for_signal(sched::thread *t, int signo)
+void unwait_for_signal(sched::thread *t, unsigned int sigidx)
 {
 SCOPE_LOCK(waiters_mutex);
-waiters[signo].remove(t);
+waiters[sigidx].remove(t);
 }
 
-void unwait_for_signal(int signo)
+void unwait_for_signal(unsigned int sigidx)
 {
-unwait_for_signal(sched::thread::current(), signo);
+unwait_for_signal(sched::thread::current(), sigidx);
 }
 
 void __attribute__((constructor)) signals_register_thread_notifier()
@@ -120,7 +123,8 @@ void __attribute__((constructor)) 
signals_register_thread_notifier()
 
 void generate_signal(siginfo_t , exception_frame* ef)
 {
-if (pthread_self() && thread_signals()->mask[siginfo.si_signo]) {
+unsigned sigidx = siginfo.si_signo - 1;
+if (pthread_self() && thread_signals()->mask[sigidx]) {
 // FIXME: need to find some other thread to deliver
 // FIXME: the signal to.
 //
@@ -129,11 +133,11 @@ void generate_signal(siginfo_t , exception_frame* 
ef)
 // needs to be running to generate them. So definitely not waiting.
 abort();
 }
-if (is_sig_dfl(signal_actions[siginfo.si_signo])) {
+if (is_sig_dfl(signal_actions[sigidx])) {
 // Our default is to abort the process
 abort();
-} else if(!is_sig_ign(signal_actions[siginfo.si_signo])) {
-arch::build_signal_frame(ef, siginfo, 
signal_actions[siginfo.si_signo]);
+} else if(!is_sig_ign(signal_actions[sigidx])) {
+arch::build_signal_frame(ef, siginfo, signal_actions[sigidx]);
 }
 }
 
@@ -166,21 +170,36 @@ int sigfillset(sigset_t *sigset)
 OSV_LIBC_API
 int sigaddset(sigset_t *sigset, int signum)
 {
-from_libc(sigset)->mask.set(signum);
+if (signum < 1 || signum > (int)nsignals) {
+errno = EINVAL;
+return -1;
+}
+unsigned sigidx = signum - 1;
+from_libc(sigset)->mask.set(sigidx);
 return 0;
 }
 
 OSV_LIBC_API
 int sigdelset(sigset_t *sigset, int signum)
 {
-from_libc(sigset)->mask.reset(signum);
+if (signum < 1 || signum > (int)nsignals) {
+errno = EINVAL;
+return -1;
+}
+unsigned sigidx = signum - 1;
+from_libc(sigset)->mask.reset(sigidx);
 return 0;
 }
 
 OSV_LIBC_API
 int sigismember(const sigset_t *sigset, int signum)
 {
-return from_libc(sigset)->mask.test(signum);
+if (signum < 1 || signum > (int)nsignals) {
+errno = EINVAL;
+return -1;
+}
+unsigned sigidx = signum - 1;
+return from_libc(sigset)->mask.test(sigidx);
 }
 
 OSV_LIBC_API
@@ -231,15 +250,16 @@ 

[osv-dev] [COMMIT osv master] Add strace-like functionality

2023-11-04 Thread Commit Bot
From: Waldemar Kozaczuk 
Committer: WALDEMAR KOZACZUK 
Branch: master

Add strace-like functionality

This PR implements simple strace-like mechanism described by #1263.

In essence, it adds a new structure trace_log that holds a fixed size (64K)
array of pointers to trace_records and acts as a ring buffer between threads
capturing active tracepoints (producers) and new thread strace (single consumer)
that prints them to the console. The write pointer - write_offset - is atomic 
that
rolls over every 64K and the strace thread follows it and tries to print
each trace (see print_trace).

The strace can be activated by adding "--strace" kernel command line argument.

./scripts/firecracker.py -e '--trace=vfs* --strace /hello'
OSv v0.57.0-75-gb7150705
init  0  0.001666500 vfs_mkdir("/dev" 0755)
init  0  0.001685758 vfs_mkdir_ret()
init  0  0.001698722 vfs_dup(0)
init  0  0.001699344 vfs_dup_ret(1)
init  0  0.001699520 vfs_dup(0)
init  0  0.001699644 vfs_dup_ret(2)
>init 0  0.006818073 vfs_mkdir("/rofs" 0755)
>init 0  0.006821609 vfs_mkdir_ret()
Booted up in 13.77 ms
Cmdline: /hello
>init 0  0.008424749 vfs_open("/usr/lib/fs" 0x0 00)
Hello from C code
>init 0  0.008429978 vfs_open_err(2)
>init 0  0.008435063 vfs_open("/etc/fstab" 0x0 00)
>init 0  0.008438018 vfs_open_ret(3)
>init 0  0.008531832 vfs_open("/dev" 0x0 00)
>init 0  0.008534193 vfs_open_ret(4)
>init 0  0.008534456 vfs_close(4)
>init 0  0.008537971 vfs_close_ret()
>init 0  0.008554489 vfs_open("/proc" 0x0 00)
>init 0  0.008556158 vfs_open_ret(4)
>init 0  0.008556252 vfs_close(4)
>init 0  0.008556763 vfs_close_ret()
>init 0  0.008583356 vfs_open("/sys" 0x0 00)
>init 0  0.008585037 vfs_open_ret(4)
>init 0  0.008585171 vfs_close(4)
>init 0  0.008585676 vfs_close_ret()
>init 0  0.008605026 vfs_open("/tmp" 0x0 00)
>init 0  0.008606443 vfs_open_ret(4)
>init 0  0.008606579 vfs_close(4)
>init 0  0.008607093 vfs_close_ret()
>init 0  0.008619345 vfs_close(3)
>init 0  0.008620198 vfs_close_ret()
>init 0  0.008634389 vfs_ioctl(1 0x5401)
>init 0  0.008634974 vfs_ioctl_ret()
>init 0  0.008635317 vfs_pwritev(1 0x20501690 0x2 0x)
>init 0  0.008953023 vfs_pwritev_ret(0x16)
>init 0  0.008954858 vfs_pwritev(1 0x20501660 0x2 0x)
>init 0  0.009209918 vfs_pwritev_ret(0x11)
>init 0  0.009235297 vfs_open("/init" 0x0 00)
>init 0  0.009237413 vfs_open_err(2)
>init 0  0.009259309 vfs_lstat(pathname=/hello, stat=0x204fe5e0)
>init 0  0.009265126 vfs_lstat_ret()
>init 0  0.009265804 vfs_open("/hello" 0x0 00)
>init 0  0.009267528 vfs_open_ret(3)
>init 0  0.009267772 vfs_close(3)
>init 0  0.009268057 vfs_close_ret()
/hello0  0.009949201 vfs_pwritev(1 0x20601e90 0x2 0x)
/hello0  0.011528630 vfs_pwritev_ret(0x12)

Fixes #1263

---
diff --git a/Makefile b/Makefile
--- a/Makefile
+++ b/Makefile
@@ -1045,6 +1045,7 @@ objects += core/mmio.o
 objects += core/kprintf.o
 objects += core/trace.o
 objects += core/trace-count.o
+objects += core/strace.o
 objects += core/callstack.o
 objects += core/poll.o
 objects += core/select.o
diff --git a/core/strace.cc b/core/strace.cc
--- a/core/strace.cc
+++ b/core/strace.cc
@@ -0,0 +1,145 @@
+/*
+ * Copyright (C) 2023 Waldemar Kozaczuk
+ *
+ * This work is open source software, licensed under the terms of the
+ * BSD license as described in the LICENSE file in the top-level directory.
+ */
+
+#include 
+#include 
+#include "drivers/console.hh"
+
+trace_log* _trace_log = nullptr;
+
+static void print_trace(trace_record* tr) {
+char msg[512];
+auto tp = tr->tp;
+float time = tr->time;
+std::string thread_name = tr->thread_name.data();
+
+auto len = snprintf(msg, 512, "%-15s %3d %12.9f %s(", thread_name.c_str(), 
tr->cpu, time / 10, tp->name);
+auto left = 512 - len;
+auto m = msg + len;
+
+auto fmt = tp->format;
+//Copy all up to 1st '%'
+while (*fmt && *fmt != '%' && left > 0) {
+   *m++ = *fmt++;
+   left--;
+   len++;
+}
+
+auto buf = tr->buffer;
+auto sig = tr->tp->sig;
+int written = 0;
+
+if (tr->backtrace) {
+buf += tracepoint_base::backtrace_len * sizeof(void*);
+}
+
+while (*sig != 0 && left > 2) {
+//Copy fragment of tp->format up to next '%'
+char _fmt[128];
+int i = 0;
+do {
+   _fmt[i++] = *fmt++;
+} while (*fmt && *fmt != '%');
+_fmt[i] = 0;
+
+//Detect type of data, deserialize and print to the msg
+   

[osv-dev] [COMMIT osv master] Fix a typo in README.md

2023-10-30 Thread Commit Bot
From: Alex 
Committer: WALDEMAR KOZACZUK 
Branch: master

Fix a typo in README.md

Fix a typo in README.md

---
diff --git a/README.md b/README.md
--- a/README.md
+++ b/README.md
@@ -58,7 +58,7 @@ route. For introduction please read this
 [crash 
course](https://github.com/cloudius-systems/osv/wiki/Build-and-run-apps-on-OSv-using-Capstan).
 For more details about *capstan* please read 
 this more detailed 
[documentation](https://github.com/cloudius-systems/capstan#documentation). 
Pre-built OSv kernel files
-(`ovs-loader.qemu`) can be automatically downloaded by *capstan* from 
+(`osv-loader.qemu`) can be automatically downloaded by *capstan* from 
 the [OSv regular releases 
page](https://github.com/cloudius-systems/osv/releases) or manually from 
 the [nightly releases 
repo](https://github.com/osvunikernel/osv-nightly-releases/releases/tag/ci-master-latest).
 

-- 
You received this message because you are subscribed to the Google Groups "OSv 
Development" group.
To unsubscribe from this group and stop receiving emails from it, send an email 
to osv-dev+unsubscr...@googlegroups.com.
To view this discussion on the web visit 
https://groups.google.com/d/msgid/osv-dev/4df6d20608f06f7e%40google.com.


[osv-dev] [COMMIT osv master] tests: add misc-vdso-perf.c to measure performance of VDSO

2023-10-15 Thread Commit Bot
From: Waldemar Kozaczuk 
Committer: Waldemar Kozaczuk 
Branch: master

tests: add misc-vdso-perf.c to measure performance of VDSO

This patch adds a simple test program intended to measure performance of VDSO
based functions like clock_gettime(), gettimeofday() and time().
It will be useful to evaluate overhead of future changes to support
running statically linked executables on OSv.

Signed-off-by: Waldemar Kozaczuk 

---
diff --git a/modules/tests/Makefile b/modules/tests/Makefile
--- a/modules/tests/Makefile
+++ b/modules/tests/Makefile
@@ -139,7 +139,8 @@ tests := tst-pthread.so misc-ramdisk.so tst-vblk.so 
tst-bsd-evh.so \
libtls.so libtls_gold.so tst-tls.so tst-tls-gold.so tst-tls-pie.so \
tst-sigaction.so tst-syscall.so tst-ifaddrs.so tst-getdents.so \
tst-netlink.so misc-zfs-io.so misc-zfs-arc.so tst-pthread-create.so \
-   misc-futex-perf.so misc-syscall-perf.so tst-brk.so tst-reloc.so
+   misc-futex-perf.so misc-syscall-perf.so tst-brk.so tst-reloc.so \
+   misc-vdso-perf.so
 #  libstatic-thread-variable.so tst-static-thread-variable.so \
 #  tst-f128.so \
 
diff --git a/tests/misc-vdso-perf.c b/tests/misc-vdso-perf.c
--- a/tests/misc-vdso-perf.c
+++ b/tests/misc-vdso-perf.c
@@ -0,0 +1,54 @@
+/*
+ * Copyright (C) 2023 Waldemar Kozaczuk
+ *
+ * This work is open source software, licensed under the terms of the
+ * BSD license as described in the LICENSE file in the top-level directory.
+ */
+
+/* Simple test program intended to measure performance of VDSO
+   based functions like clock_gettime(), gettimeofday() and time().
+   It will be useful to evaluate overhead of future changes to support
+   running statically linked executables on OSv.
+
+ - build as static executable:
+   gcc -o misc-vdso-perf-static tests/misc-vdso-perf.c -static
+
+ - build as static PIE:
+   gcc -o misc-vdso-perf-static-pie tests/misc-vdso-perf.c -pie -static-pie
+*/
+
+#define _GNU_SOURCE
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+
+uint64_t nstime()
+{
+struct timeval tv;
+gettimeofday(, NULL);
+uint64_t mul = 10, mul2 = 1000;
+return tv.tv_sec * mul + tv.tv_usec * mul2;
+}
+
+int main(int argc, char **argv)
+{
+long count = 5000;
+long loop = count;
+uint64_t start = nstime();
+
+struct timespec ts1;
+while (loop--) {
+assert(0 == clock_gettime(CLOCK_MONOTONIC, ));
+}
+
+uint64_t end = nstime();
+
+long average_syscall_duration = (end - start) / count;
+printf("%lu ns (elapsed %.2f sec) %s\n", average_syscall_duration, (end - 
start) / 10.0, ": average clock_gettime duration");
+}
+

-- 
You received this message because you are subscribed to the Google Groups "OSv 
Development" group.
To unsubscribe from this group and stop receiving emails from it, send an email 
to osv-dev+unsubscr...@googlegroups.com.
To view this discussion on the web visit 
https://groups.google.com/d/msgid/osv-dev/5c75cc0607c447d3%40google.com.


[osv-dev] [COMMIT osv master] syscalls: add 40 syscalls needed to run statically linked executables

2023-10-12 Thread Commit Bot
From: Waldemar Kozaczuk 
Committer: Waldemar Kozaczuk 
Branch: master

syscalls: add 40 syscalls needed to run statically linked executables

Signed-off-by: Waldemar Kozaczuk 

---
diff --git a/linux.cc b/linux.cc
--- a/linux.cc
+++ b/linux.cc
@@ -40,7 +40,15 @@
 #include 
 #include 
 #include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
 #include 
+#include 
 
 #include 
 
@@ -606,6 +614,46 @@ OSV_LIBC_API long syscall(long number, ...)
 SYSCALL4(mknodat, int, const char *, mode_t, dev_t);
 SYSCALL5(statx, int, const char *, int, unsigned int, struct statx *);
 SYSCALL3(sys_getcpu, unsigned int *, unsigned int *, void *);
+SYSCALL1(dup, int);
+SYSCALL2(dup2, unsigned int , unsigned int);
+SYSCALL3(mprotect, void *, size_t, int);
+SYSCALL2(access, const char *, int);
+SYSCALL3(writev, int, const struct iovec *, int);
+SYSCALL3(readlink, const char *, char *, size_t);
+SYSCALL0(geteuid);
+SYSCALL0(getegid);
+SYSCALL2(gettimeofday, struct timeval *, struct timezone *);
+SYSCALL3(poll, struct pollfd *, nfds_t, int);
+SYSCALL0(getppid);
+SYSCALL1(epoll_create, int);
+SYSCALL1(sysinfo, struct sysinfo *);
+SYSCALL1(time, time_t *);
+SYSCALL4(sendfile, int, int, off_t *, size_t);
+SYSCALL4(socketpair, int, int, int, int *);
+SYSCALL2(shutdown, int, int);
+SYSCALL1(unlink, const char *);
+SYSCALL3(readv, unsigned long, const struct iovec *, unsigned long);
+SYSCALL2(getrusage, int, struct rusage *);
+SYSCALL3(accept, int, struct sockaddr *, socklen_t *);
+SYSCALL1(fchdir, unsigned int);
+SYSCALL1(pipe, int*);
+SYSCALL2(fstatfs, unsigned int, struct statfs *);
+SYSCALL1(umask, mode_t);
+SYSCALL5(prctl, int, unsigned long, unsigned long, unsigned long, unsigned 
long);
+SYSCALL1(chdir, const char *);
+SYSCALL4(faccessat, int, const char *, int, int);
+SYSCALL2(kill, pid_t, int);
+SYSCALL1(alarm, unsigned int);
+SYSCALL4(utimensat, int, const char *, const struct timespec*, int);
+SYSCALL2(symlink, const char *, const char *);
+SYSCALL1(rmdir, const char *);
+SYSCALL2(sethostname, char *, int);
+SYSCALL2(creat, const char *, mode_t);
+SYSCALL2(timerfd_create, int, int);
+SYSCALL4(timerfd_settime, int, int, const struct itimerspec *, struct 
itimerspec *);
+SYSCALL2(timerfd_gettime, int, struct itimerspec*);
+SYSCALL2(chmod, const char *, mode_t);
+SYSCALL2(fchmod, int, mode_t);
 }
 
 debug_always("syscall(): unimplemented system call %d\n", number);

-- 
You received this message because you are subscribed to the Google Groups "OSv 
Development" group.
To unsubscribe from this group and stop receiving emails from it, send an email 
to osv-dev+unsubscr...@googlegroups.com.
To view this discussion on the web visit 
https://groups.google.com/d/msgid/osv-dev/0c5d4206078fae59%40google.com.


[osv-dev] [COMMIT osv master] syscalls: add getcpu

2023-10-12 Thread Commit Bot
From: Waldemar Kozaczuk 
Committer: Waldemar Kozaczuk 
Branch: master

syscalls: add getcpu

The implementation assumes single NUMA node

Signed-off-by: Waldemar Kozaczuk 

---
diff --git a/linux.cc b/linux.cc
--- a/linux.cc
+++ b/linux.cc
@@ -382,6 +382,20 @@ static long sys_getcwd(char *buf, unsigned long size)
 return strlen(ret) + 1;
 }
 
+#define __NR_sys_getcpu __NR_getcpu
+static long sys_getcpu(unsigned int *cpu, unsigned int *node, void *tcache)
+{
+if (cpu) {
+*cpu = sched::cpu::current()->id;
+}
+
+if (node) {
+   *node = 0;
+}
+
+return 0;
+}
+
 #define __NR_sys_ioctl __NR_ioctl
 //
 // We need to define explicit sys_ioctl that takes these 3 parameters to 
conform
@@ -591,6 +605,7 @@ OSV_LIBC_API long syscall(long number, ...)
 SYSCALL4(clock_nanosleep, clockid_t, int, const struct timespec *, struct 
timespec *);
 SYSCALL4(mknodat, int, const char *, mode_t, dev_t);
 SYSCALL5(statx, int, const char *, int, unsigned int, struct statx *);
+SYSCALL3(sys_getcpu, unsigned int *, unsigned int *, void *);
 }
 
 debug_always("syscall(): unimplemented system call %d\n", number);

-- 
You received this message because you are subscribed to the Google Groups "OSv 
Development" group.
To unsubscribe from this group and stop receiving emails from it, send an email 
to osv-dev+unsubscr...@googlegroups.com.
To view this discussion on the web visit 
https://groups.google.com/d/msgid/osv-dev/f6f82406078fad3f%40google.com.


[osv-dev] [COMMIT osv master] syscall: use GS register to store syscall stack address

2023-10-12 Thread Commit Bot
From: Waldemar Kozaczuk 
Committer: WALDEMAR KOZACZUK 
Branch: master

syscall: use GS register to store syscall stack address

The original syscall implementation utilized the TCB (Thread Control Block)
to store the syscall stack address. On each context switch, the
FS segment register would be reset to point to the thread specific
TCB where syscall handler would fetch its stack address from.

In order to support statically linked executables which allocate and use
its own TCB, OSv needs to be able to switch the FS register between
the user and kernel address when handling syscalls. The syscall handler
can no longer fetch its stack address from kernel TCB because the FS
register points to the app TCB. In order to break this dependency,
this patch changes all relevant code to move the syscall stack address
and syscall caller stack pointer to the per-CPU memory area addressed
by the GS segment register.

To that end, we define new structure - syscall_stack_descriptor - that
describes a syscall stack: its top and SYSCALL caller stack pointer.
Then, we add new field '_syscall_stack_descriptor' to the thread_state
to store each thread allocated syscall stack information.

In addition, we add new field '_current_syscall_stack_descriptor' to
the per-cpu structure arch_cpu and initialize each cpu GS register
to point to it. Finally, the thread::switch_to() is changed
to update the stack_top and caller_stack_pointer fields of
'_current_syscall_stack_descriptor' on each context switch so that
the syscall handler can fetch syscall stack address using GS segment.

Signed-off-by: Waldemar Kozaczuk 

---
diff --git a/arch/aarch64/arch-switch.hh b/arch/aarch64/arch-switch.hh
--- a/arch/aarch64/arch-switch.hh
+++ b/arch/aarch64/arch-switch.hh
@@ -164,6 +164,10 @@ void thread::free_tcb()
 free(_tcb);
 }
 
+void thread::free_syscall_stack()
+{
+}
+
 void thread_main_c(thread* t)
 {
 arch::irq_enable();
diff --git a/arch/x64/arch-cpu.hh b/arch/x64/arch-cpu.hh
--- a/arch/x64/arch-cpu.hh
+++ b/arch/x64/arch-cpu.hh
@@ -13,6 +13,8 @@
 #include "cpuid.hh"
 #include "osv/pagealloc.hh"
 #include 
+#include "syscall.hh"
+#include "msr.hh"
 
 struct init_stack {
 char stack[4096] __attribute__((aligned(16)));
@@ -46,6 +48,13 @@ struct arch_cpu {
 u32 apic_id;
 u32 acpi_id;
 u64 gdt[nr_gdt];
+// This field holds a syscall stack descriptor of a current thread
+// which is updated on every context switch (see arch-switch.hh).
+// We keep this field in this per-cpu structure and initialize GS register
+// of the corresponding cpu to point to it (see init_on_cpu() down below),
+// in order to make it possible to access it in assembly code through
+// a known offset at %gs:0.
+syscall_stack_descriptor _current_syscall_stack_descriptor;
 void init_on_cpu();
 void set_ist_entry(unsigned ist, char* base, size_t size);
 char* get_ist_entry(unsigned ist);
@@ -181,6 +190,8 @@ inline void arch_cpu::init_on_cpu()
 processor::init_fpu();
 
 processor::init_syscall();
+
+processor::wrmsr(msr::IA32_GS_BASE, 
reinterpret_cast(&_current_syscall_stack_descriptor.stack_top));
 }
 
 struct exception_guard {
diff --git a/arch/x64/arch-switch.hh b/arch/x64/arch-switch.hh
--- a/arch/x64/arch-switch.hh
+++ b/arch/x64/arch-switch.hh
@@ -37,10 +37,10 @@
 #define LARGE_SYSCALL_STACK_DEPTH (LARGE_SYSCALL_STACK_SIZE - 
SYSCALL_STACK_RESERVED_SPACE_SIZE)
 
 #define SET_SYSCALL_STACK_TYPE_INDICATOR(value) \
-*reinterpret_cast(_tcb->syscall_stack_top) = value;
+*reinterpret_cast(_state._syscall_stack_descriptor.stack_top) = value;
 
 #define GET_SYSCALL_STACK_TYPE_INDICATOR() \
-*reinterpret_cast(_tcb->syscall_stack_top)
+*reinterpret_cast(_state._syscall_stack_descriptor.stack_top)
 
 #define TINY_SYSCALL_STACK_INDICATOR 0l
 #define LARGE_SYSCALL_STACK_INDICATOR 1l
@@ -88,8 +88,14 @@ void thread::switch_to()
 barrier();
 auto c = _detached_state->_cpu;
 old->_state.exception_stack = c->arch.get_exception_stack();
+// save the old thread SYSCALL caller stack pointer in the syscall stack 
descriptor
+old->_state._syscall_stack_descriptor.caller_stack_pointer = 
c->arch._current_syscall_stack_descriptor.caller_stack_pointer;
 c->arch.set_interrupt_stack(&_arch);
 c->arch.set_exception_stack(_state.exception_stack);
+// set this cpu current thread syscall stack descriptor to the values 
copied from the new thread syscall stack descriptor
+// so that the syscall handler can reference the current thread syscall 
stack top using the GS register
+c->arch._current_syscall_stack_descriptor.caller_stack_pointer = 
_state._syscall_stack_descriptor.caller_stack_pointer;
+c->arch._current_syscall_stack_descriptor.stack_top = 
_state._syscall_stack_descriptor.stack_top;
 auto fpucw = processor::fnstcw();
 auto mxcsr = processor::stmxcsr();
 asm volatile
@@ -161,6 +167,25 @@ void thread::init_stack()
 _state.rip = reinterpret_cast(thread_main);
  

[osv-dev] [COMMIT osv master] core: support launching statically linked executables

2023-10-03 Thread Commit Bot
From: Waldemar Kozaczuk 
Committer: WALDEMAR KOZACZUK 
Branch: master

core: support launching statically linked executables

This patch enhances the dynamic linker to support launching statically
linked executables.

The dynamically linked executables or shared libraries are typically launched
by calling a "main" function which is exported and can be resolved using
the ELF symbol table. The statically linked executables do not export
such symbol and instead must be launched by jumping to the ELF entry
point specified in the header. To that end this patch implements new
functions - run_entry_point() - specific for each architecture - x86_64 and
aarch64 which fundamentally do similar thing - put argc, argv,
environment variables and auxiliary vector on stack and jump to the elf
entry point.

In addition this patch enhances other parts of the dynamic linker logic
to do things a little bit differently for statically linked executables:

- do not relocate as the executable comes with its own code that does
  this

- do not call INIT and FINI function for the same reason as above

- do not try to load dependant libraries as they would be none

This patch also adds augment_auxv() to add extra auxiliary vector
entries needed by statically linked executables to bootstrap
themselves

In addition, this 2nd version of the PR renames the method
is_statically_linked() to is_statically_linked_executable() and fixes
it by identifying if an ELF is not a shared library. So it should
fix the "liblua.so" problem reported by Nadav.

Signed-off-by: Waldemar Kozaczuk 

---
diff --git a/arch/aarch64/arch-elf.hh b/arch/aarch64/arch-elf.hh
--- a/arch/aarch64/arch-elf.hh
+++ b/arch/aarch64/arch-elf.hh
@@ -23,4 +23,38 @@ enum {
 
 #define ELF_KERNEL_MACHINE_TYPE 183
 
+static constexpr unsigned SAFETY_BUFFER = 256;
+#include 
+
+inline void run_entry_point(void* ep, int argc, char** argv, int argv_size)
+{
+//The layout of the stack and state of all relevant registers is similar
+//to how it looks for x86_64. The main difference (possibly for now)
+//is the inlined assembly
+int argc_plus_argv_stack_size = argv_size + 1;
+
+//Capture current stack pointer
+void *stack;
+asm volatile ("mov %0, sp" : "=r"(stack));
+
+//The code below puts argv and auxv vector onto the stack but it may
+//also end up using some of the stack. To make sure there is no collision
+//let us leave some space - SAFETY_BUFFER - between current stack pointer
+//and the position on the stack we will be writing to.
+stack -= (SAFETY_BUFFER + argc_plus_argv_stack_size * sizeof(char*));
+
+//According to the document above the stack pointer should be 16-bytes 
aligned
+stack = align_down(stack, 16);
+
+*reinterpret_cast(stack) = argc;
+memcpy(stack + sizeof(char*), argv, argv_size * sizeof(char*));
+
+//Set stack pointer and jump to the ELF entry point
+asm volatile (
+"mov sp, %1\n\t" //set stack
+"blr %0\n\t"
+:
+: "r"(ep), "r"(stack));
+}
+
 #endif /* ARCH_ELF_HH */
diff --git a/arch/x64/arch-elf.hh b/arch/x64/arch-elf.hh
--- a/arch/x64/arch-elf.hh
+++ b/arch/x64/arch-elf.hh
@@ -42,4 +42,45 @@ enum {
 
 #define ELF_KERNEL_MACHINE_TYPE 62
 
+static constexpr unsigned SAFETY_BUFFER = 256;
+#include 
+
+inline void run_entry_point(void* ep, int argc, char** argv, int argv_size)
+{
+//The layout of the stack and state of all relevant registers is described
+//in detail in the section 3.4 (Process Initialization) of the System V 
Application
+//Binary Interface AMD64 Architecture Processor Supplement Draft Version 
0.95
+//(see https://refspecs.linuxfoundation.org/elf/x86_64-abi-0.95.pdf)
+int argc_plus_argv_stack_size = argv_size + 1;
+
+//Capture current stack pointer
+void *stack;
+asm volatile ("movq %%rsp, %0" : "=r"(stack));
+
+//The code below puts argv and auxv vector onto the stack but it may
+//also end up using some of the stack. To make sure there is no collision
+//let us leave some space - SAFETY_BUFFER - between current stack pointer
+//and the position on the stack we will be writing to.
+stack -= (SAFETY_BUFFER + argc_plus_argv_stack_size * sizeof(char*));
+
+//According to the document above the stack pointer should be 16-bytes 
aligned
+stack = align_down(stack, 16);
+
+//... and it should start with argc, followed by argv, environment 
pointers and
+//auxiliary vector entries. For details look at application::prepare_argv()
+*reinterpret_cast(stack) = argc;
+memcpy(stack + sizeof(char*), argv, argv_size * sizeof(char*));
+
+//TODO: Reset SSE2 and floating point registers and RFLAGS as the "Special 
Registers"
+//  paragraph of the section of 3.4 (Process Initialization) of the 
"System V Application
+//  Binary Interface" document states
+//Set stack pointer, reset rdx and jump to the ELF entry point
+asm volatile (
+"movq %1, 

[osv-dev] [COMMIT osv master] pselect6: implement the case when sigmask is not NULL

2023-10-03 Thread Commit Bot
From: Waldemar Kozaczuk 
Committer: WALDEMAR KOZACZUK 
Branch: master

pselect6: implement the case when sigmask is not NULL

This patch completes the implementation of the pselect6
syscall to handle the case when sigmask is not NULL.
It does by calling pthread_sigmask() and select() as explained
by the manual page https://linux.die.net/man/2/pselect6.

Signed-off-by: Waldemar Kozaczuk 

---
diff --git a/linux.cc b/linux.cc
--- a/linux.cc
+++ b/linux.cc
@@ -423,21 +423,44 @@ static int sys_ioctl(unsigned int fd, unsigned int 
command, unsigned long arg)
 }
 }
 
+struct sys_sigset {
+const sigset_t *ss; /* Pointer to signal set */
+size_t  ss_len; /* Size (in bytes) of object pointed to by 'ss' */
+};
+
 static int pselect6(int nfds, fd_set *readfds, fd_set *writefds,
-   fd_set *exceptfds, const struct timespec *timeout_ts,
-   void *sig)
+   fd_set *exceptfds, struct timespec *timeout_ts,
+   sys_sigset* sigmask)
 {
 // As explained in the pselect(2) manual page, the system call pselect 
accepts
 // pointer to a structure holding pointer to sigset_t and its size which 
is different
-// the glibc version of pselect(). For now we are delaying implementation 
of this call
-// scenario and raising an error when such call happens.
-if(sig) {
-WARN_ONCE("pselect6(): unimplemented with not-null sigmask\n");
-errno = ENOSYS;
-return -1;
+// from the glibc version of pselect().
+// On top of this, the Linux pselect6() system call modifies its timeout 
argument
+// unlike the glibc pselect() function. Our implementation below is to 
great extent
+// similar to that of pselect() in core/select.cc
+sigset_t origmask;
+struct timeval timeout;
+
+if (timeout_ts) {
+timeout.tv_sec = timeout_ts->tv_sec;
+timeout.tv_usec = timeout_ts->tv_nsec / 1000;
+}
+
+if (sigmask) {
+sigprocmask(SIG_SETMASK, sigmask->ss, );
 }
 
-return pselect(nfds, readfds, writefds, exceptfds, timeout_ts, NULL);
+auto ret = select(nfds, readfds, writefds, exceptfds,
+timeout_ts == NULL? NULL : );
+if (sigmask) {
+sigprocmask(SIG_SETMASK, , NULL);
+}
+
+if (timeout_ts) {
+timeout_ts->tv_sec = timeout.tv_sec;
+timeout_ts->tv_nsec = timeout.tv_usec * 1000;
+}
+return ret;
 }
 
 static int tgkill(int tgid, int tid, int sig)
@@ -536,7 +559,7 @@ OSV_LIBC_API long syscall(long number, ...)
 SYSCALL2(flock, int, int);
 SYSCALL4(pwrite64, int, const void *, size_t, off_t);
 SYSCALL1(fdatasync, int);
-SYSCALL6(pselect6, int, fd_set *, fd_set *, fd_set *, const struct 
timespec *, void *);
+SYSCALL6(pselect6, int, fd_set *, fd_set *, fd_set *, struct timespec *, 
sys_sigset*);
 SYSCALL3(fcntl, int, int, int);
 SYSCALL4(pread64, int, void *, size_t, off_t);
 SYSCALL2(ftruncate, int, off_t);

-- 
You received this message because you are subscribed to the Google Groups "OSv 
Development" group.
To unsubscribe from this group and stop receiving emails from it, send an email 
to osv-dev+unsubscr...@googlegroups.com.
To view this discussion on the web visit 
https://groups.google.com/d/msgid/osv-dev/5d96720606d403d6%40google.com.


[osv-dev] [COMMIT osv master] syscalls: make exit terminate current thread

2023-10-01 Thread Commit Bot
From: Waldemar Kozaczuk 
Committer: WALDEMAR KOZACZUK 
Branch: master

syscalls: make exit terminate current thread

Unlike the libc exit() function, the syscal exit in Linux
is supposed to terminate current thread. This patch fixes
incorrect implementation that delegates to exit().

Signed-off-by: Waldemar Kozaczuk 

---
diff --git a/linux.cc b/linux.cc
--- a/linux.cc
+++ b/linux.cc
@@ -357,7 +357,7 @@ int rt_sigprocmask(int how, sigset_t * nset, sigset_t * 
oset, size_t sigsetsize)
 
 static int sys_exit(int ret)
 {
-exit(ret);
+sched::thread::current()->exit();
 return 0;
 }
 

-- 
You received this message because you are subscribed to the Google Groups "OSv 
Development" group.
To unsubscribe from this group and stop receiving emails from it, send an email 
to osv-dev+unsubscr...@googlegroups.com.
To view this discussion on the web visit 
https://groups.google.com/d/msgid/osv-dev/7903ad0606b0a449%40google.com.


[osv-dev] [COMMIT osv master] libc: added named semaphore functions to the list of exported symbols

2023-09-13 Thread Commit Bot
From: Waldemar Kozaczuk 
Committer: Waldemar Kozaczuk 
Branch: master

libc: added named semaphore functions to the list of exported symbols

---
diff --git a/exported_symbols/osv_ld-musl.so.1.symbols 
b/exported_symbols/osv_ld-musl.so.1.symbols
--- a/exported_symbols/osv_ld-musl.so.1.symbols
+++ b/exported_symbols/osv_ld-musl.so.1.symbols
@@ -907,11 +907,14 @@ secure_getenv
 seed48
 seekdir
 select
+sem_close
 sem_destroy
 sem_init
+sem_open
 sem_post
 sem_timedwait
 sem_trywait
+sem_unlink
 sem_wait
 send
 sendfile
diff --git a/exported_symbols/osv_libc.so.6.symbols 
b/exported_symbols/osv_libc.so.6.symbols
--- a/exported_symbols/osv_libc.so.6.symbols
+++ b/exported_symbols/osv_libc.so.6.symbols
@@ -837,6 +837,7 @@ strerror_l
 strerror_r
 strfmon
 strfmon_l
+strfromf128
 strftime
 __strftime_l
 strftime_l
@@ -865,6 +866,7 @@ strtod_l
 strtof
 __strtof_l
 strtof_l
+strtof128
 strtoimax
 strtok
 __strtok_r
diff --git a/exported_symbols/osv_libpthread.so.0.symbols 
b/exported_symbols/osv_libpthread.so.0.symbols
--- a/exported_symbols/osv_libpthread.so.0.symbols
+++ b/exported_symbols/osv_libpthread.so.0.symbols
@@ -94,11 +94,14 @@ read
 recv
 recvfrom
 recvmsg
+sem_close
 sem_destroy
 sem_init
+sem_open
 sem_post
 sem_timedwait
 sem_trywait
+sem_unlink
 sem_wait
 send
 sendmsg
diff --git a/libc/sem.cc b/libc/sem.cc
--- a/libc/sem.cc
+++ b/libc/sem.cc
@@ -55,7 +55,7 @@ OSV_LIBC_API
 int sem_init(sem_t* s, int pshared, unsigned val)
 {
 static_assert(sizeof(indirect_semaphore) <= sizeof(*s), "sem_t overflow");
-posix_semaphore *sem = new posix_semaphore(val, 1, false); 
+posix_semaphore *sem = new posix_semaphore(val, 1, false);
 new (s) indirect_semaphore(sem);
 return 0;
 }
@@ -114,7 +114,7 @@ sem_t *sem_open(const char *name, int oflag, ...)
 {
 SCOPE_LOCK(named_semaphores_mutex);
 auto iter = named_semaphores.find(std::string(name));
-
+
 if (iter != named_semaphores.end()) {
 //opening already named semaphore
 if (oflag & O_EXCL && oflag & O_CREAT) {
@@ -136,13 +136,13 @@ sem_t *sem_open(const char *name, int oflag, ...)
 errno = EINVAL;
 return SEM_FAILED;
 }
-
+
 indirect_semaphore *indp = new std::unique_ptr(
 new posix_semaphore(value, 1, true));
 named_semaphores.emplace(std::string(name), indp);
 return reinterpret_cast(indp);
 }
-
+
 errno = ENOENT;
 return SEM_FAILED;
 }
@@ -160,7 +160,7 @@ int sem_unlink(const char *name)
 named_semaphores.erase(iter);
 return 0;
 }
-
+
 errno = ENOENT;
 return -1;
 }
diff --git a/tests/tst-semaphore.c b/tests/tst-semaphore.c
--- a/tests/tst-semaphore.c
+++ b/tests/tst-semaphore.c
@@ -66,7 +66,7 @@ int main(void) {
 
 //Can't create a new named semaphore without O_CREAT
 assert(sem_open("other", 0, 0777, 1) == SEM_FAILED);
-assert(sem_open("other", O_EXCL | O_SYNC, 0777, 1) == SEM_FAILED); 
+assert(sem_open("other", O_EXCL | O_SYNC, 0777, 1) == SEM_FAILED);
 
 //Any other flags should have no effect if the named semaphore does not 
exist
 sem_t *named_sem3 = sem_open("other", O_EXCL | O_CREAT | O_SYNC, 0777, 1);

-- 
You received this message because you are subscribed to the Google Groups "OSv 
Development" group.
To unsubscribe from this group and stop receiving emails from it, send an email 
to osv-dev+unsubscr...@googlegroups.com.
To view this discussion on the web visit 
https://groups.google.com/d/msgid/osv-dev/7ee4170605449565%40google.com.


[osv-dev] [COMMIT osv master] libc: fix bug in sem_unlink()

2023-09-13 Thread Commit Bot
From: Nadav Har'El 
Committer: Waldemar Kozaczuk 
Branch: master

libc: fix bug in sem_unlink()

The function sem_unlink() should remove the name of a named semaphore,
but it should still be usable until its last user sem_close()es it.

Our implementation of this was broken... The named_semaphore hash table
held unique_ptr, which means that as soon as an item
was removed from this hash table (when it was sem_unlink()ed), the
unique_ptr was destroyed leading to the destruction of the semaphore
itself.

The solution is for the hash table to hold pointers to indirect_semaphore,
not the indirect_semaphore itself. Before this patch, tst-semaphore.so
crashed, and after it it passes.

We could have perhaps achieved the same thing in a simpler way by
using std::shared_ptr instead of std::unique_ptr, which already holds
its reference count, and sitting in the named_semaphore hash table
would just be yet another reference. Or alternatively, don't use
unique_ptr at all and just use raw pointers. But let's leave such
cleanups, to the future, if we want to do them at all.

Fixes #1258

Signed-off-by: Nadav Har'El 

Closes #1259

---
diff --git a/libc/sem.cc b/libc/sem.cc
--- a/libc/sem.cc
+++ b/libc/sem.cc
@@ -106,7 +106,7 @@ int sem_trywait(sem_t* s)
 return 0;
 }
 
-static std::unordered_map named_semaphores;
+static std::unordered_map named_semaphores;
 static mutex named_semaphores_mutex;
 
 OSV_LIBC_API
@@ -122,8 +122,8 @@ sem_t *sem_open(const char *name, int oflag, ...)
 return SEM_FAILED;
 }
 
-iter->second->add_reference();
-return reinterpret_cast(&(iter->second));
+(*iter->second)->add_reference();
+return reinterpret_cast(iter->second);
 }
 else if (oflag & O_CREAT) {
 //creating new semaphore
@@ -137,9 +137,10 @@ sem_t *sem_open(const char *name, int oflag, ...)
 return SEM_FAILED;
 }
 
-named_semaphores.emplace(std::string(name),
-std::unique_ptr(new posix_semaphore(value, 1, 
true)));
-return reinterpret_cast(_semaphores[std::string(name)]);
+indirect_semaphore *indp = new std::unique_ptr(
+new posix_semaphore(value, 1, true));
+named_semaphores.emplace(std::string(name), indp);
+return reinterpret_cast(indp);
 }
 
 errno = ENOENT;
@@ -152,9 +153,9 @@ int sem_unlink(const char *name)
 SCOPE_LOCK(named_semaphores_mutex);
 auto iter = named_semaphores.find(std::string(name));
 if (iter != named_semaphores.end()) {
-iter->second->unlink();
-if (iter->second->not_referenced()) {
-sem_destroy(reinterpret_cast(>second));
+(*iter->second)->unlink();
+if ((*iter->second)->not_referenced()) {
+sem_destroy(reinterpret_cast(iter->second));
 }
 named_semaphores.erase(iter);
 return 0;

-- 
You received this message because you are subscribed to the Google Groups "OSv 
Development" group.
To unsubscribe from this group and stop receiving emails from it, send an email 
to osv-dev+unsubscr...@googlegroups.com.
To view this discussion on the web visit 
https://groups.google.com/d/msgid/osv-dev/84917e0605446699%40google.com.


[osv-dev] [COMMIT osv master] libc: fix sem_open() signature

2023-09-13 Thread Commit Bot
From: Waldemar Kozaczuk 
Committer: Nadav Har'El 
Branch: master

libc: fix sem_open() signature

The signature of the sem_open is different in musl header than it is in the
sem.cc (as a matter of fact the last 2 arguments are only required with
O_CREAT), resulting on sem_open() not being "extern C" and not being
exported to applications. The test tests/tst-semaphore.so couldn't run
because of this. This patch fixes this.

---
diff --git a/libc/sem.cc b/libc/sem.cc
--- a/libc/sem.cc
+++ b/libc/sem.cc
@@ -110,7 +110,7 @@ static std::unordered_map 
named_semaphores;
 static mutex named_semaphores_mutex;
 
 OSV_LIBC_API
-sem_t *sem_open(const char *name, int oflag, mode_t mode, unsigned int value)
+sem_t *sem_open(const char *name, int oflag, ...)
 {
 SCOPE_LOCK(named_semaphores_mutex);
 auto iter = named_semaphores.find(std::string(name));
@@ -127,6 +127,11 @@ sem_t *sem_open(const char *name, int oflag, mode_t mode, 
unsigned int value)
 }
 else if (oflag & O_CREAT) {
 //creating new semaphore
+va_list ap;
+va_start(ap, oflag);
+va_arg(ap, mode_t);
+unsigned value = va_arg(ap, unsigned);
+va_end(ap);
 if (value > SEM_VALUE_MAX) {
 errno = EINVAL;
 return SEM_FAILED;

-- 
You received this message because you are subscribed to the Google Groups "OSv 
Development" group.
To unsubscribe from this group and stop receiving emails from it, send an email 
to osv-dev+unsubscr...@googlegroups.com.
To view this discussion on the web visit 
https://groups.google.com/d/msgid/osv-dev/73fc78060537baf2%40google.com.


[osv-dev] [COMMIT osv master] Merge 'Fix build for Fedora 38' from Nadav Har'El

2023-09-12 Thread Commit Bot
From: Waldemar Kozaczuk 
Committer: Waldemar Kozaczuk 
Branch: master

Merge 'Fix build for Fedora 38' from Nadav Har'El

This this small series we fix what needs to be fixed to build OSv on Fedora 38:
1. Update setup.py to allow Fedora 38
2. Add two stub functions that new libstdc++ started to need (but it's fine 
that they are stub)
3. Add a missing include directive

After this patch, the OSv kernel builds and also some images (e.g., "rogue"). 
The default (Lua shell) build builds but doesn't run and will need to be fixed 
later.

Fixes #1238.

Closes #1257

* https://github.com/cloudius-systems/osv:
  libc: fix build on C++11 mode of recent gcc
  tests: fix missing include
  libc: add stub strfromf128(), strtof128()
  setup.py: support Fedora 38

---
diff --git a/Makefile b/Makefile
--- a/Makefile
+++ b/Makefile
@@ -1721,6 +1721,7 @@ $(out)/libc/stdlib/qsort_r.o: COMMON += 
-Wno-dangling-pointer
 libc += stdlib/strtol.o
 libc += stdlib/strtod.o
 libc += stdlib/wcstol.o
+libc += stdlib/unimplemented.o
 
 libc += string/__memcpy_chk.o
 libc += string/explicit_bzero.o
diff --git a/include/api/stdlib.h b/include/api/stdlib.h
--- a/include/api/stdlib.h
+++ b/include/api/stdlib.h
@@ -22,12 +22,18 @@ double atof (const char *);
 float strtof (const char *__restrict, char **__restrict);
 double strtod (const char *__restrict, char **__restrict);
 long double strtold (const char *__restrict, char **__restrict);
+__float128 strtof128 (const char *__restrict, char **__restrict);
 
 long strtol (const char *__restrict, char **__restrict, int);
 unsigned long strtoul (const char *__restrict, char **__restrict, int);
 long long strtoll (const char *__restrict, char **__restrict, int);
 unsigned long long strtoull (const char *__restrict, char **__restrict, int);
 
+int strfromd (char *__restrict, size_t, const char *__restrict, double);
+int strfromf (char *__restrict, size_t, const char *__restrict, float);
+int strfromld (char *__restrict, size_t, const char *__restrict, long double);
+int strfromf128 (char *__restrict, size_t, const char *__restrict, __float128);
+
 int rand (void);
 void srand (unsigned);
 
diff --git a/libc/sem.cc b/libc/sem.cc
--- a/libc/sem.cc
+++ b/libc/sem.cc
@@ -132,7 +132,8 @@ sem_t *sem_open(const char *name, int oflag, mode_t mode, 
unsigned int value)
 return SEM_FAILED;
 }
 
-named_semaphores.emplace(std::string(name), 
std::make_unique(value, 1, true)); 
+named_semaphores.emplace(std::string(name),
+std::unique_ptr(new posix_semaphore(value, 1, 
true)));
 return reinterpret_cast(_semaphores[std::string(name)]);
 }
 
@@ -168,4 +169,4 @@ int sem_close(sem_t *sem)
 sem_destroy(sem);
 }
 return 0;
-}
\ No newline at end of file
+}
diff --git a/libc/stdlib/unimplemented.cc b/libc/stdlib/unimplemented.cc
--- a/libc/stdlib/unimplemented.cc
+++ b/libc/stdlib/unimplemented.cc
@@ -0,0 +1,23 @@
+/* Based on recent addition to Musl, see
+ */
+
+#include 
+#include 
+
+// We are missing an implementation of the new C23 functions strfrom[fdl]
+// and eventually we can get such an implementation from Musl (see a
+// proposal in https://www.openwall.com/lists/musl/2023/05/31/28), but
+// for now we'll just leave these functions missing - and applications that
+// try to use them will report the missing function.
+//
+// But for strfromf128() we need an stub now, because recent versions of
+// libstdc++ started to use them. It's fine that the implementation is just
+// a stub - whatever code uses the new C++ feature should fail reporting
+// the unimplemented feature.
+// Later, when we implement this function, we already have a test for it
+// in tests/tst-f128.cc.
+UNIMPL(int strfromf128(char *, size_t, const char *, __float128))
+
+// Similarly, recent versions of libstdc++ need strtof128, but don't actually
+// use it until the user really uses the __float128 type.
+UNIMPL(__float128 strtof128(const char *, char **))
diff --git a/modules/tests/Makefile b/modules/tests/Makefile
--- a/modules/tests/Makefile
+++ b/modules/tests/Makefile
@@ -141,6 +141,7 @@ tests := tst-pthread.so misc-ramdisk.so tst-vblk.so 
tst-bsd-evh.so \
tst-netlink.so misc-zfs-io.so misc-zfs-arc.so tst-pthread-create.so \
misc-futex-perf.so misc-syscall-perf.so tst-brk.so tst-reloc.so
 #  libstatic-thread-variable.so tst-static-thread-variable.so \
+#  tst-f128.so \
 
 ifeq ($(arch),x64)
 tests += tst-mmx-fpu.so
diff --git a/scripts/setup.py b/scripts/setup.py
--- a/scripts/setup.py
+++ b/scripts/setup.py
@@ -150,8 +150,15 @@ class Fedora_37(object):
 ec2_post_install = None
 version = '37'
 
+class Fedora_38(object):
+packages = []
+ec2_packages = []
+test_packages = []
+ec2_post_install = None
+version = '38'
+
 versions = [
-Fedora_27, Fedora_28, Fedora_29, Fedora_30, Fedora_31, Fedora_32, 
Fedora_33, Fedora_34, Fedora_35, Fedora_37
+

[osv-dev] [COMMIT osv master] libc: fix build on C++11 mode of recent gcc

2023-09-12 Thread Commit Bot
From: Nadav Har'El 
Committer: Nadav Har'El 
Branch: master

libc: fix build on C++11 mode of recent gcc

A recently-added named-semaphore feature used std::make_unique<>().
It's hard to remember now, but std::make_unique() wasn't a part of the
original C++11, and was only added later, in C++14. Recent versions of
gcc (such as 13.2.1) started warning when it is used in C++11 compilation
mode, so the build failed.

Eventually, we should switch OSv to the C++14 (or even later) standards,
but I don't want to do that hastely now just to fix the build. So in
this patch I just do what people used to do before the advent of
std::make_unique (and we do many times in OSv): std::unique_ptr(new ...).

Signed-off-by: Nadav Har'El 

---
diff --git a/libc/sem.cc b/libc/sem.cc
--- a/libc/sem.cc
+++ b/libc/sem.cc
@@ -132,7 +132,8 @@ sem_t *sem_open(const char *name, int oflag, mode_t mode, 
unsigned int value)
 return SEM_FAILED;
 }
 
-named_semaphores.emplace(std::string(name), 
std::make_unique(value, 1, true)); 
+named_semaphores.emplace(std::string(name),
+std::unique_ptr(new posix_semaphore(value, 1, 
true)));
 return reinterpret_cast(_semaphores[std::string(name)]);
 }
 
@@ -168,4 +169,4 @@ int sem_close(sem_t *sem)
 sem_destroy(sem);
 }
 return 0;
-}
\ No newline at end of file
+}

-- 
You received this message because you are subscribed to the Google Groups "OSv 
Development" group.
To unsubscribe from this group and stop receiving emails from it, send an email 
to osv-dev+unsubscr...@googlegroups.com.
To view this discussion on the web visit 
https://groups.google.com/d/msgid/osv-dev/d2feb406052f979a%40google.com.


[osv-dev] [COMMIT osv master] tests: fix missing include

2023-09-12 Thread Commit Bot
From: Nadav Har'El 
Committer: Nadav Har'El 
Branch: master

tests: fix missing include

The test tests/tst-align.cc used intptr_t but forgot to include the
appropriate header file (stdint.h). It got implicitly included on
older versions of stdlibc++ header files, but is no longer the case
in Fedora 38, so let's include the missing header.

Signed-off-by: Nadav Har'El 

---
diff --git a/tests/tst-align.cc b/tests/tst-align.cc
--- a/tests/tst-align.cc
+++ b/tests/tst-align.cc
@@ -20,6 +20,7 @@
 
 #include 
 #include 
+#include 
 
 static int tests = 0, fails = 0;
 

-- 
You received this message because you are subscribed to the Google Groups "OSv 
Development" group.
To unsubscribe from this group and stop receiving emails from it, send an email 
to osv-dev+unsubscr...@googlegroups.com.
To view this discussion on the web visit 
https://groups.google.com/d/msgid/osv-dev/bf0c7b06052f97bc%40google.com.


[osv-dev] [COMMIT osv master] libc: add stub strfromf128(), strtof128()

2023-09-12 Thread Commit Bot
From: Nadav Har'El 
Committer: Nadav Har'El 
Branch: master

libc: add stub strfromf128(), strtof128()

Recent versions of libstdc++ (such as the one that comes with Fedora 38)
started using the functions strtof128() and strfromf128() when some C++
template uses the _Float128 type.

It's not trivial to implement these functions: I created in
tests/tst-f128.cc a test of what these functions need to do according
to these functions on glibc - the test passes only on glibc.
But luckily, we don't really need to implement them - it's enough
to stub them (and they'll abort if ever used by the application).

After this patch, the OSv kernel and various images (e.g., my favorite
"scripts/build image=rogue; scripts/run.py") work on Fedora 38, but
the default image "scripts/build" still doesn't work because of a
missing symbol in Lua.

Signed-off-by: Nadav Har'El 

---
diff --git a/Makefile b/Makefile
--- a/Makefile
+++ b/Makefile
@@ -1721,6 +1721,7 @@ $(out)/libc/stdlib/qsort_r.o: COMMON += 
-Wno-dangling-pointer
 libc += stdlib/strtol.o
 libc += stdlib/strtod.o
 libc += stdlib/wcstol.o
+libc += stdlib/unimplemented.o
 
 libc += string/__memcpy_chk.o
 libc += string/explicit_bzero.o
diff --git a/include/api/stdlib.h b/include/api/stdlib.h
--- a/include/api/stdlib.h
+++ b/include/api/stdlib.h
@@ -22,12 +22,18 @@ double atof (const char *);
 float strtof (const char *__restrict, char **__restrict);
 double strtod (const char *__restrict, char **__restrict);
 long double strtold (const char *__restrict, char **__restrict);
+__float128 strtof128 (const char *__restrict, char **__restrict);
 
 long strtol (const char *__restrict, char **__restrict, int);
 unsigned long strtoul (const char *__restrict, char **__restrict, int);
 long long strtoll (const char *__restrict, char **__restrict, int);
 unsigned long long strtoull (const char *__restrict, char **__restrict, int);
 
+int strfromd (char *__restrict, size_t, const char *__restrict, double);
+int strfromf (char *__restrict, size_t, const char *__restrict, float);
+int strfromld (char *__restrict, size_t, const char *__restrict, long double);
+int strfromf128 (char *__restrict, size_t, const char *__restrict, __float128);
+
 int rand (void);
 void srand (unsigned);
 
diff --git a/libc/stdlib/unimplemented.cc b/libc/stdlib/unimplemented.cc
--- a/libc/stdlib/unimplemented.cc
+++ b/libc/stdlib/unimplemented.cc
@@ -0,0 +1,23 @@
+/* Based on recent addition to Musl, see
+ */
+
+#include 
+#include 
+
+// We are missing an implementation of the new C23 functions strfrom[fdl]
+// and eventually we can get such an implementation from Musl (see a
+// proposal in https://www.openwall.com/lists/musl/2023/05/31/28), but
+// for now we'll just leave these functions missing - and applications that
+// try to use them will report the missing function.
+//
+// But for strfromf128() we need an stub now, because recent versions of
+// libstdc++ started to use them. It's fine that the implementation is just
+// a stub - whatever code uses the new C++ feature should fail reporting
+// the unimplemented feature.
+// Later, when we implement this function, we already have a test for it
+// in tests/tst-f128.cc.
+UNIMPL(int strfromf128(char *, size_t, const char *, __float128))
+
+// Similarly, recent versions of libstdc++ need strtof128, but don't actually
+// use it until the user really uses the __float128 type.
+UNIMPL(__float128 strtof128(const char *, char **))
diff --git a/modules/tests/Makefile b/modules/tests/Makefile
--- a/modules/tests/Makefile
+++ b/modules/tests/Makefile
@@ -141,6 +141,7 @@ tests := tst-pthread.so misc-ramdisk.so tst-vblk.so 
tst-bsd-evh.so \
tst-netlink.so misc-zfs-io.so misc-zfs-arc.so tst-pthread-create.so \
misc-futex-perf.so misc-syscall-perf.so tst-brk.so tst-reloc.so
 #  libstatic-thread-variable.so tst-static-thread-variable.so \
+#  tst-f128.so \
 
 ifeq ($(arch),x64)
 tests += tst-mmx-fpu.so
diff --git a/tests/tst-f128.cc b/tests/tst-f128.cc
--- a/tests/tst-f128.cc
+++ b/tests/tst-f128.cc
@@ -0,0 +1,59 @@
+// Tests for strfrom128() and strtof128() needed because of issue #1238.
+// This test should pass on both OSv and on Linux with recent glibc with
+// those two functions added.
+// This test does NOT currently pass on OSv - we only have a stub
+// implementation of these functions.
+
+#include 
+#include 
+#include 
+
+unsigned int tests_total = 0, tests_failed = 0;
+
+void report(const char* name, bool passed)
+{
+   static const char* status[] = {"FAIL", "PASS"};
+   printf("%s: %s\n", status[passed], name);
+   tests_total += 1;
+   tests_failed += !passed;
+}
+
+int main(void)
+{
+printf("Starting strfromf128()/strtof128() test\n");
+// It appears that gcc truncates floating literals to 64 bit, and
+// with "L" suffix, to 80 bits. To really get 128 bits, the "f128" suffix
+// is needed.
+__float128 pi = 3.14159265358979323846264338327950288419716939937510f128;
+// Successful path for 

[osv-dev] [COMMIT osv master] setup.py: support Fedora 38

2023-09-12 Thread Commit Bot
From: Nadav Har'El 
Committer: Nadav Har'El 
Branch: master

setup.py: support Fedora 38

Add to scripts/setup.py support for Fedora 38. It needs the same thing as
Fedora 37 and all previous Fedora releases.

The build still doesn't work on Fedora 38 - the next patches will fix it.

Signed-off-by: Nadav Har'El 

---
diff --git a/scripts/setup.py b/scripts/setup.py
--- a/scripts/setup.py
+++ b/scripts/setup.py
@@ -150,8 +150,15 @@ class Fedora_37(object):
 ec2_post_install = None
 version = '37'
 
+class Fedora_38(object):
+packages = []
+ec2_packages = []
+test_packages = []
+ec2_post_install = None
+version = '38'
+
 versions = [
-Fedora_27, Fedora_28, Fedora_29, Fedora_30, Fedora_31, Fedora_32, 
Fedora_33, Fedora_34, Fedora_35, Fedora_37
+Fedora_27, Fedora_28, Fedora_29, Fedora_30, Fedora_31, Fedora_32, 
Fedora_33, Fedora_34, Fedora_35, Fedora_37, Fedora_38
 ]
 
 

-- 
You received this message because you are subscribed to the Google Groups "OSv 
Development" group.
To unsubscribe from this group and stop receiving emails from it, send an email 
to osv-dev+unsubscr...@googlegroups.com.
To view this discussion on the web visit 
https://groups.google.com/d/msgid/osv-dev/96ac6c06052f97cb%40google.com.


[osv-dev] [COMMIT osv master] Added POSIX named semaphore implementation

2023-09-10 Thread Commit Bot
From: Landon Johnson 
Committer: Nadav Har'El 
Branch: master

Added POSIX named semaphore implementation

Signed-off-by: Landon Johnson 

Closes #1232

---
diff --git a/libc/sem.cc b/libc/sem.cc
--- a/libc/sem.cc
+++ b/libc/sem.cc
@@ -6,18 +6,46 @@
  */
 
 #include 
+#include 
 #include 
 #include 
 #include 
 #include "libc.hh"
+#include 
 
 // FIXME: smp safety
 
-struct indirect_semaphore : std::unique_ptr {
-explicit indirect_semaphore(unsigned units)
-: std::unique_ptr(new semaphore(units)) {}
+struct posix_semaphore : semaphore {
+private:
+int references;
+bool named;
+public:
+posix_semaphore(int units, int refs, bool named)
+: semaphore(units), references(refs), named(named) {}
+
+void add_reference(){
+references++;
+}
+
+void remove_reference(){
+references--;
+}
+
+bool not_referenced(){
+return references <= 0;
+}
+
+void unlink(){
+named = false;
+}
+
+bool linked(){
+return named;
+}
 };
 
+using indirect_semaphore = std::unique_ptr;
+
 indirect_semaphore& from_libc(sem_t* p)
 {
 return *reinterpret_cast(p);
@@ -27,7 +55,8 @@ OSV_LIBC_API
 int sem_init(sem_t* s, int pshared, unsigned val)
 {
 static_assert(sizeof(indirect_semaphore) <= sizeof(*s), "sem_t overflow");
-new (s) indirect_semaphore(val);
+posix_semaphore *sem = new posix_semaphore(val, 1, false); 
+new (s) indirect_semaphore(sem);
 return 0;
 }
 
@@ -76,3 +105,67 @@ int sem_trywait(sem_t* s)
 return libc_error(EAGAIN);
 return 0;
 }
+
+static std::unordered_map named_semaphores;
+static mutex named_semaphores_mutex;
+
+OSV_LIBC_API
+sem_t *sem_open(const char *name, int oflag, mode_t mode, unsigned int value)
+{
+SCOPE_LOCK(named_semaphores_mutex);
+auto iter = named_semaphores.find(std::string(name));
+
+if (iter != named_semaphores.end()) {
+//opening already named semaphore
+if (oflag & O_EXCL && oflag & O_CREAT) {
+errno = EEXIST;
+return SEM_FAILED;
+}
+
+iter->second->add_reference();
+return reinterpret_cast(&(iter->second));
+}
+else if (oflag & O_CREAT) {
+//creating new semaphore
+if (value > SEM_VALUE_MAX) {
+errno = EINVAL;
+return SEM_FAILED;
+}
+
+named_semaphores.emplace(std::string(name), 
std::make_unique(value, 1, true)); 
+return reinterpret_cast(_semaphores[std::string(name)]);
+}
+
+errno = ENOENT;
+return SEM_FAILED;
+}
+
+OSV_LIBC_API
+int sem_unlink(const char *name)
+{
+SCOPE_LOCK(named_semaphores_mutex);
+auto iter = named_semaphores.find(std::string(name));
+if (iter != named_semaphores.end()) {
+iter->second->unlink();
+if (iter->second->not_referenced()) {
+sem_destroy(reinterpret_cast(>second));
+}
+named_semaphores.erase(iter);
+return 0;
+}
+
+errno = ENOENT;
+return -1;
+}
+
+OSV_LIBC_API
+int sem_close(sem_t *sem)
+{
+SCOPE_LOCK(named_semaphores_mutex);
+indirect_semaphore _sem = from_libc(sem);
+named_sem->remove_reference();
+if (!named_sem->linked() && named_sem->not_referenced()) {
+sem_destroy(sem);
+}
+return 0;
+}
\ No newline at end of file
diff --git a/tests/tst-semaphore.c b/tests/tst-semaphore.c
--- a/tests/tst-semaphore.c
+++ b/tests/tst-semaphore.c
@@ -11,6 +11,7 @@
 #include 
 #include 
 #include 
+#include 
 
 #define THREAD_NUMBER 10
 
@@ -55,5 +56,46 @@ int main(void) {
 assert(sem_destroy(_sync) == 0);
 assert(sem_destroy(_done) == 0);
 
+///Named sempahore test
+
+//Create and open two handles to a named semaphore
+sem_t *named_sem1 = sem_open("name", O_CREAT, 0777, 1);
+assert(named_sem1 != SEM_FAILED);
+sem_t *named_sem2 = sem_open("name", O_EXCL, 0, 0);
+assert(named_sem1 == named_sem2);
+
+//Can't create a new named semaphore without O_CREAT
+assert(sem_open("other", 0, 0777, 1) == SEM_FAILED);
+assert(sem_open("other", O_EXCL | O_SYNC, 0777, 1) == SEM_FAILED); 
+
+//Any other flags should have no effect if the named semaphore does not 
exist
+sem_t *named_sem3 = sem_open("other", O_EXCL | O_CREAT | O_SYNC, 0777, 1);
+assert(named_sem3 != SEM_FAILED);
+assert(sem_unlink("other") == 0);
+assert(sem_close(named_sem3) == 0);
+
+//Close both handles to the semaphore without removing the name
+assert(sem_close(named_sem1) == 0);
+assert(sem_close(named_sem2) == 0);
+
+//Open two more handles to the named sempahore
+named_sem1 = sem_open("name", 0);
+assert(named_sem1 != SEM_FAILED);
+named_sem2 = sem_open("name", 0);
+assert(named_sem1 == named_sem2);
+
+//Can't open existing semaphore with O_CREAT and O_EXCL set
+assert(sem_open("name", O_CREAT | O_EXCL, 0777, 1) == 

[osv-dev] [COMMIT osv master] Merge 'Dynamic linker: support loading and processing statically linked executables' from WALDEMAR KOZACZUK

2023-08-31 Thread Commit Bot
From: Nadav Har'El 
Committer: Nadav Har'El 
Branch: master

Merge 'Dynamic linker: support loading and processing statically linked 
executables' from WALDEMAR KOZACZUK

The two commits provide necessary modifications to the OSv dynamic linker to 
support loading and processing statically linked executables.

Please note these changes are NOT enough to make OSv run statically linked 
executables.

Closes #1253

* github.com:cloudius-systems/osv:
  dynamic linker: support loading and processing static ELF
  dynamic linker: rename is_executable to is_dynamically_linked_executable

---
diff --git a/arch/aarch64/arch-elf.cc b/arch/aarch64/arch-elf.cc
--- a/arch/aarch64/arch-elf.cc
+++ b/arch/aarch64/arch-elf.cc
@@ -66,7 +66,7 @@ bool object::arch_relocate_rela(u32 type, u32 sym, void *addr,
 if (sym) {
 auto sm = symbol(sym);
 ulong tls_offset;
-if (sm.obj->is_executable()) {
+if (sm.obj->is_dynamically_linked_executable()) {
 // If this is an executable (pie or position-dependant one)
 // then the variable is located in the reserved slot of the TLS
 // right where the kernel TLS lives
@@ -119,7 +119,7 @@ void object::arch_relocate_tls_desc(u32 sym, void *addr, 
Elf64_Sxword addend)
 ulong tls_offset;
 if (sym) {
 auto sm = symbol(sym);
-if (sm.obj->is_executable() || sm.obj->is_core()) {
+if (sm.obj->is_dynamically_linked_executable() || sm.obj->is_core()) {
 // If this is an executable (pie or position-dependant one)
 // then the variable is located in the reserved slot of the TLS
 // right where the kernel TLS lives
@@ -163,7 +163,7 @@ void object::prepare_initial_tls(void* buffer, size_t size,
 
 void object::prepare_local_tls(std::vector& offsets)
 {
-if (!_static_tls && !is_executable()) {
+if (!_static_tls && !is_dynamically_linked_executable()) {
 return;
 }
 
diff --git a/arch/aarch64/arch-switch.hh b/arch/aarch64/arch-switch.hh
--- a/arch/aarch64/arch-switch.hh
+++ b/arch/aarch64/arch-switch.hh
@@ -120,7 +120,7 @@ void thread::setup_tcb()
 assert(obj);
 user_tls_size = obj->initial_tls_size();
 user_tls_data = obj->initial_tls();
-if (obj->is_executable()) {
+if (obj->is_dynamically_linked_executable()) {
executable_tls_size = obj->get_tls_size();
 }
 }
diff --git a/arch/x64/arch-elf.cc b/arch/x64/arch-elf.cc
--- a/arch/x64/arch-elf.cc
+++ b/arch/x64/arch-elf.cc
@@ -138,7 +138,7 @@ bool object::arch_relocate_rela(u32 type, u32 sym, void 
*addr,
 if (sym) {
 auto sm = symbol(sym);
 ulong tls_offset;
-if (sm.obj->is_executable()) {
+if (sm.obj->is_dynamically_linked_executable()) {
 // If this is an executable (pie or position-dependant one)
 // then the variable is located in the reserved slot of the TLS
 // right where the kernel TLS lives
@@ -202,7 +202,7 @@ void object::prepare_initial_tls(void* buffer, size_t size,
 
 void object::prepare_local_tls(std::vector& offsets)
 {
-if (!_static_tls && !is_executable()) {
+if (!_static_tls && !is_dynamically_linked_executable()) {
 return;
 }
 
diff --git a/arch/x64/arch-switch.hh b/arch/x64/arch-switch.hh
--- a/arch/x64/arch-switch.hh
+++ b/arch/x64/arch-switch.hh
@@ -210,7 +210,7 @@ void thread::setup_tcb()
 assert(obj);
 user_tls_size = obj->initial_tls_size();
 user_tls_data = obj->initial_tls();
-if (obj->is_executable()) {
+if (obj->is_dynamically_linked_executable()) {
executable_tls_size = obj->get_tls_size();
aligned_executable_tls_size = obj->get_aligned_tls_size();
 }
diff --git a/core/elf.cc b/core/elf.cc
--- a/core/elf.cc
+++ b/core/elf.cc
@@ -123,7 +123,7 @@ object::object(program& prog, std::string pathname)
 , _initial_tls_size(0)
 , _dynamic_table(nullptr)
 , _module_index(_prog.register_dtv(this))
-, _is_executable(false)
+, _is_dynamically_linked_executable(false)
 , _init_called(false)
 , _eh_frame(0)
 , _visibility_thread(nullptr)
@@ -249,9 +249,6 @@ const char * object::symbol_name(const Elf64_Sym * sym) {
 }
 
 void* object::entry_point() const {
-if (!_is_executable) {
-return nullptr;
-}
 return _base + _ehdr.e_entry;
 }
 
@@ -366,13 +363,13 @@ void object::set_base(void* base)
   [](const Elf64_Phdr* a, const Elf64_Phdr* b)
   { return a->p_vaddr < b->p_vaddr; });
 
-if (!is_core() && is_non_pie_executable()) {
-// Verify non-PIE executable does not collide with the kernel
+if (!is_core() && !is_pic()) {
+// Verify non-PIC executable ((aka position dependent)) does not 
collide with the kernel
 if (intersects_with_kernel(p->p_vaddr) || 

[osv-dev] [COMMIT osv master] dynamic linker: support loading and processing static ELF

2023-08-31 Thread Commit Bot
From: Waldemar Kozaczuk 
Committer: Waldemar Kozaczuk 
Branch: master

dynamic linker: support loading and processing static ELF

This patch enhances dynamic linker to support loading and
processing statically linked executables. This does not mean
OSv will now be able to run such ELFs: there are still many key
elements missing which will added in subsequent patches.

This patch however modifies dynamic linker to properly detect
statically linked PIEs and non-PIEs and skip/adjust relevant parts
of the processing logic where necessary:

- adjusts object::set_base() to set _base to 0 for any position
  dependent ELF (non PIC) both statically or dynamically linked

- modifies object::fix_permissions() to skip RELRO processing
  for statically linked executables as it does not apply (at least for now)

- adjusts the lookup_symbol_*() and lookup_addr() methods to abort if
  DT_SYMTAB and DT_STRTAB not present accordingly

- adjusts load_needed() to abort loading dependent objects if DT_NEEDED
  not present

Signed-off-by: Waldemar Kozaczuk 

---
diff --git a/core/elf.cc b/core/elf.cc
--- a/core/elf.cc
+++ b/core/elf.cc
@@ -249,9 +249,6 @@ const char * object::symbol_name(const Elf64_Sym * sym) {
 }
 
 void* object::entry_point() const {
-if (!_is_dynamically_linked_executable) {
-return nullptr;
-}
 return _base + _ehdr.e_entry;
 }
 
@@ -366,13 +363,13 @@ void object::set_base(void* base)
   [](const Elf64_Phdr* a, const Elf64_Phdr* b)
   { return a->p_vaddr < b->p_vaddr; });
 
-if (!is_core() && is_non_pie_executable()) {
-// Verify non-PIE executable does not collide with the kernel
+if (!is_core() && !is_pic()) {
+// Verify non-PIC executable ((aka position dependent)) does not 
collide with the kernel
 if (intersects_with_kernel(p->p_vaddr) || 
intersects_with_kernel(q->p_vaddr + q->p_memsz)) {
-abort("Non-PIE executable [%s] collides with kernel: [%p-%p] !\n",
+abort("Non-PIC executable [%s] collides with kernel: [%p-%p] !\n",
 pathname().c_str(), p->p_vaddr, q->p_vaddr + q->p_memsz);
 }
-// Override the passed in value as the base for non-PIEs (Position 
Dependant Executables)
+// Override the passed in value as the base for non-PICs (Position 
Dependant Executables)
 // needs to be set to 0 because all the addresses in it are absolute
 _base = 0x0;
 } else {
@@ -536,8 +533,8 @@ void object::process_headers()
 abort("Unknown p_type in executable %s: %d\n", pathname(), 
phdr.p_type);
 }
 }
-if (!is_core() && _ehdr.e_type == ET_EXEC && 
!_is_dynamically_linked_executable) {
-abort("Statically linked executables are not supported!\n");
+if (!is_core() && is_statically_linked()) {
+abort("Statically linked executables are not supported yet!\n");
 }
 if (_is_dynamically_linked_executable && _tls_segment) {
 auto app_tls_size = get_aligned_tls_size();
@@ -600,6 +597,12 @@ void object::fix_permissions()
 make_text_writable(false);
 }
 
+//Full RELRO applies to dynamically linked executables only
+if (is_statically_linked()) {
+return;
+}
+
+//Process GNU_RELRO segments only to make GOT and others read-only
 for (auto&& phdr : _phdrs) {
 if (phdr.p_type != PT_GNU_RELRO)
 continue;
@@ -888,6 +891,9 @@ constexpr Elf64_Versym old_version_symbol_mask = 
Elf64_Versym(1) << 15;
 
 Elf64_Sym* object::lookup_symbol_old(const char* name)
 {
+if (!dynamic_exists(DT_SYMTAB)) {
+return nullptr;
+}
 auto symtab = dynamic_ptr(DT_SYMTAB);
 auto strtab = dynamic_ptr(DT_STRTAB);
 auto hashtab = dynamic_ptr(DT_HASH);
@@ -917,6 +923,9 @@ dl_new_hash(const char *s)
 
 Elf64_Sym* object::lookup_symbol_gnu(const char* name, bool self_lookup)
 {
+if (!dynamic_exists(DT_SYMTAB)) {
+return nullptr;
+}
 auto symtab = dynamic_ptr(DT_SYMTAB);
 auto strtab = dynamic_ptr(DT_STRTAB);
 auto hashtab = dynamic_ptr(DT_GNU_HASH);
@@ -1019,6 +1028,9 @@ dladdr_info object::lookup_addr(const void* addr)
 if (addr < _base || addr >= _end) {
 return ret;
 }
+if (!dynamic_exists(DT_STRTAB)) {
+return ret;
+}
 ret.fname = _pathname.c_str();
 ret.base = _base;
 auto strtab = dynamic_ptr(DT_STRTAB);
@@ -1068,6 +1080,9 @@ static std::string dirname(std::string path)
 
 void object::load_needed(std::vector>& loaded_objects)
 {
+if (!dynamic_exists(DT_NEEDED)) {
+return;
+}
 std::vector rpath;
 
 std::string rpath_str;
@@ -1462,7 +1477,7 @@ program::load_object(std::string name, 
std::vector extra_path,
 osv::rcu_dispose(old_modules);
 ef->load_segments();
 ef->process_headers();
-if (!ef->is_non_pie_executable())
+if (ef->is_pic())
_next_alloc = ef->end();
 

[osv-dev] [COMMIT osv master] dynamic linker: rename is_executable to is_dynamically_linked_executable

2023-08-31 Thread Commit Bot
From: Waldemar Kozaczuk 
Committer: Waldemar Kozaczuk 
Branch: master

dynamic linker: rename is_executable to is_dynamically_linked_executable

The object::process_headers() sets _is_executable to true when
it encounters a PT_INTERP header. In essence, it means the ELF
being processed is a dynamic linked executable (and not a shared
library).

In context of the upcoming changes to support statically linked
executable, the name "is_executable()" is unfortunately too general.
Therefore this patch changes it to "is_dynamically_linked_executable()"
to hopefully make it somewhat easier to understand the dynamic linker
code once we support statically linked executables.

Signed-off-by: Waldemar Kozaczuk 

---
diff --git a/arch/aarch64/arch-elf.cc b/arch/aarch64/arch-elf.cc
--- a/arch/aarch64/arch-elf.cc
+++ b/arch/aarch64/arch-elf.cc
@@ -66,7 +66,7 @@ bool object::arch_relocate_rela(u32 type, u32 sym, void *addr,
 if (sym) {
 auto sm = symbol(sym);
 ulong tls_offset;
-if (sm.obj->is_executable()) {
+if (sm.obj->is_dynamically_linked_executable()) {
 // If this is an executable (pie or position-dependant one)
 // then the variable is located in the reserved slot of the TLS
 // right where the kernel TLS lives
@@ -119,7 +119,7 @@ void object::arch_relocate_tls_desc(u32 sym, void *addr, 
Elf64_Sxword addend)
 ulong tls_offset;
 if (sym) {
 auto sm = symbol(sym);
-if (sm.obj->is_executable() || sm.obj->is_core()) {
+if (sm.obj->is_dynamically_linked_executable() || sm.obj->is_core()) {
 // If this is an executable (pie or position-dependant one)
 // then the variable is located in the reserved slot of the TLS
 // right where the kernel TLS lives
@@ -163,7 +163,7 @@ void object::prepare_initial_tls(void* buffer, size_t size,
 
 void object::prepare_local_tls(std::vector& offsets)
 {
-if (!_static_tls && !is_executable()) {
+if (!_static_tls && !is_dynamically_linked_executable()) {
 return;
 }
 
diff --git a/arch/aarch64/arch-switch.hh b/arch/aarch64/arch-switch.hh
--- a/arch/aarch64/arch-switch.hh
+++ b/arch/aarch64/arch-switch.hh
@@ -120,7 +120,7 @@ void thread::setup_tcb()
 assert(obj);
 user_tls_size = obj->initial_tls_size();
 user_tls_data = obj->initial_tls();
-if (obj->is_executable()) {
+if (obj->is_dynamically_linked_executable()) {
executable_tls_size = obj->get_tls_size();
 }
 }
diff --git a/arch/x64/arch-elf.cc b/arch/x64/arch-elf.cc
--- a/arch/x64/arch-elf.cc
+++ b/arch/x64/arch-elf.cc
@@ -138,7 +138,7 @@ bool object::arch_relocate_rela(u32 type, u32 sym, void 
*addr,
 if (sym) {
 auto sm = symbol(sym);
 ulong tls_offset;
-if (sm.obj->is_executable()) {
+if (sm.obj->is_dynamically_linked_executable()) {
 // If this is an executable (pie or position-dependant one)
 // then the variable is located in the reserved slot of the TLS
 // right where the kernel TLS lives
@@ -202,7 +202,7 @@ void object::prepare_initial_tls(void* buffer, size_t size,
 
 void object::prepare_local_tls(std::vector& offsets)
 {
-if (!_static_tls && !is_executable()) {
+if (!_static_tls && !is_dynamically_linked_executable()) {
 return;
 }
 
diff --git a/arch/x64/arch-switch.hh b/arch/x64/arch-switch.hh
--- a/arch/x64/arch-switch.hh
+++ b/arch/x64/arch-switch.hh
@@ -210,7 +210,7 @@ void thread::setup_tcb()
 assert(obj);
 user_tls_size = obj->initial_tls_size();
 user_tls_data = obj->initial_tls();
-if (obj->is_executable()) {
+if (obj->is_dynamically_linked_executable()) {
executable_tls_size = obj->get_tls_size();
aligned_executable_tls_size = obj->get_aligned_tls_size();
 }
diff --git a/core/elf.cc b/core/elf.cc
--- a/core/elf.cc
+++ b/core/elf.cc
@@ -123,7 +123,7 @@ object::object(program& prog, std::string pathname)
 , _initial_tls_size(0)
 , _dynamic_table(nullptr)
 , _module_index(_prog.register_dtv(this))
-, _is_executable(false)
+, _is_dynamically_linked_executable(false)
 , _init_called(false)
 , _eh_frame(0)
 , _visibility_thread(nullptr)
@@ -249,7 +249,7 @@ const char * object::symbol_name(const Elf64_Sym * sym) {
 }
 
 void* object::entry_point() const {
-if (!_is_executable) {
+if (!_is_dynamically_linked_executable) {
 return nullptr;
 }
 return _base + _ehdr.e_entry;
@@ -484,7 +484,7 @@ void object::process_headers()
 _dynamic_table = reinterpret_cast(_base + 
phdr.p_vaddr);
 break;
 case PT_INTERP:
-_is_executable = true;
+_is_dynamically_linked_executable = true;
 break;
 case PT_NOTE: {
 if (phdr.p_memsz < 16) {
@@ -536,10 

[osv-dev] [COMMIT osv master] manifest_from_host.sh: handle dl=linux to not filter libraries

2023-08-28 Thread Commit Bot
From: Waldemar Kozaczuk 
Committer: Waldemar Kozaczuk 
Branch: master

manifest_from_host.sh: handle dl=linux to not filter libraries

Normally, the manifest_from_host.sh filters out the libraries
provided by OSv kernel like libm.so.6, libc.so.6, libstdc++.so.6, etc.

This patch provides a way to disable it by setting the dl environment
variable to linux like so:

dl=linux ./scripts/manifest_from_host.sh /bin/ls

/ls: /usr/bin/ls
/lib64/ld-linux-x86-64.so.2: /lib64/ld-linux-x86-64.so.2
/lib64/libcap.so.2: /lib64/libcap.so.2
/lib64/libc.so.6: /lib64/libc.so.6
/lib64/libpcre2-8.so.0: /lib64/libpcre2-8.so.0
/lib64/libselinux.so.1: /lib64/libselinux.so.1
/etc/ld.so.cache: /etc/ld.so.cache

This is going to be useful to run statically linked executables and
dynamically linked executables under Linux's dynamic linker (aka
ld-linux-*) on OSv.

Signed-off-by: Waldemar Kozaczuk 

---
diff --git a/scripts/manifest_from_host.sh b/scripts/manifest_from_host.sh
--- a/scripts/manifest_from_host.sh
+++ b/scripts/manifest_from_host.sh
@@ -64,16 +64,21 @@ find_library()
 
 output_manifest()
 {
-   local so_path="$1"
+   local so_files="$1"
+   local so_filter="$2"
echo "# " | tee -a $OUTPUT
-   echo "# Dependencies" | tee -a $OUTPUT
+   echo "# Dependencies" | tee -a $OUTPUT
echo "# " | tee -a $OUTPUT
-   if [[ $conf_hide_symbols == 1 ]]; then
-   lddtree $so_path | grep -v "not found" | grep -v "$so_path" | 
grep -v "ld-linux-${MACHINE}" | \
+   if [[ $dl == "linux" ]]; then
+   lddtree $so_files | grep -v "not found" | grep -v "$so_filter" 
| \
+   sed 's/.*=> //' | awk '// { printf("%s: %s\n", $0, $0); 
}' | sort | uniq | tee -a $OUTPUT
+   echo "/etc/ld.so.cache: /etc/ld.so.cache" | tee -a $OUTPUT
+   elif [[ $conf_hide_symbols == 1 ]]; then
+   lddtree $so_files | grep -v "not found" | grep -v "$so_filter" 
| grep -v "ld-linux-${MACHINE}" | \
grep -Pv 
'lib(gcc_s|resolv|c|m|pthread|dl|rt|aio|xenstore|crypt|selinux)\.so([\d.]+)?' | 
\
sed 's/ =>/:/' | sed 's/^\s*lib/\/usr\/lib\/lib/' | 
sort | uniq | tee -a $OUTPUT
else
-   lddtree $so_path | grep -v "not found" | grep -v "$so_path" | 
grep -v "ld-linux-${MACHINE}" | \
+   lddtree $so_files | grep -v "not found" | grep -v "$so_filter" 
| grep -v "ld-linux-${MACHINE}" | \
grep -Pv 
'lib(gcc_s|resolv|c|m|pthread|dl|rt|stdc\+\+|aio|xenstore|crypt|selinux)\.so([\d.]+)?'
 | \
sed 's/ =>/:/' | sed 's/^\s*lib/\/usr\/lib\/lib/' | 
sort | uniq | tee -a $OUTPUT
fi
@@ -147,18 +152,7 @@ if [[ -d $NAME_OR_PATH ]]; then
echo "$GUEST_PATH_ROOT/$SUBDIRECTORY_PATH**: $(realpath 
$NAME_OR_PATH)/$SUBDIRECTORY_PATH**" | tee $OUTPUT
if [[ $RESOLVE == true ]]; then
SO_FILES=$(find $NAME_OR_PATH/$SUBDIRECTORY_PATH -type f -name 
\*so)
-   echo "# " | tee -a $OUTPUT
-   echo "# Dependencies" | tee -a $OUTPUT
-   echo "# " | tee -a $OUTPUT
-   if [[ $conf_hide_symbols == 1 ]]; then
-   lddtree $SO_FILES | grep -v "not found" | grep -v 
"$NAME_OR_PATH/$SUBDIRECTORY_PATH" | grep -v "ld-linux-${MACHINE}" | \
-   grep -Pv 
'lib(gcc_s|resolv|c|m|pthread|dl|rt|aio|xenstore|crypt|selinux)\.so([\d.]+)?' | 
\
-   sed 's/ =>/:/' | sed 
's/^\s*lib/\/usr\/lib\/lib/' | sort | uniq | tee -a $OUTPUT
-   else
-   lddtree $SO_FILES | grep -v "not found" | grep -v 
"$NAME_OR_PATH/$SUBDIRECTORY_PATH" | grep -v "ld-linux-${MACHINE}" | \
-   grep -Pv 
'lib(gcc_s|resolv|c|m|pthread|dl|rt|stdc\+\+|aio|xenstore|crypt|selinux)\.so([\d.]+)?'
 | \
-   sed 's/ =>/:/' | sed 
's/^\s*lib/\/usr\/lib\/lib/' | sort | uniq | tee -a $OUTPUT
-   fi
+   output_manifest "$SO_FILES" "$NAME_OR_PATH/$SUBDIRECTORY_PATH"
fi
exit 0
 fi
@@ -181,7 +175,7 @@ if [[ -f $NAME_OR_PATH ]]; then
fi
fi
REAL_PATH=$(realpath $NAME_OR_PATH)
-   output_manifest "$REAL_PATH"
+   output_manifest "$REAL_PATH" "$REAL_PATH"
else
echo "The $NAME_OR_PATH is not ELF" >&2
exit 1
@@ -194,7 +188,7 @@ else
if [[ "$so_path" != "" ]]; then
echo "# Shared library" | tee $OUTPUT
echo "/usr/lib/$so_name: $so_path" | tee -a 
$OUTPUT
-   output_manifest $so_path
+   output_manifest $so_path $so_path
 else
 exit 0
 fi
@@ 

[osv-dev] [COMMIT osv master] vdso: initialize it only once

2023-08-27 Thread Commit Bot
From: Waldemar Kozaczuk 
Committer: Waldemar Kozaczuk 
Branch: master

vdso: initialize it only once

The namespaces functionality requires new elf::program is instantiated
for every new namespace. That would trigger the vdso ELF to be re-initialized
again which is wrong and is manifested by the failure of the tst-namespace.cc
on aarch64:

0x000fc00ea06c in abort (fmt=fmt@entry=0xfc049be50 "Assertion failed: %s 
(%s: %s: %d)\n") at runtime.cc:145
0x000fc00ea094 in __assert_fail (expr=, file=, line=, func=) at runtime.cc:153
0x000fc0163a54 in elf::object::relocate_pltgot 
(this=this@entry=0x600040e8e810) at core/elf.cc:783
0x000fc0163ad4 in elf::object::relocate (this=0x600040e8e810) at 
core/elf.cc:866
0x000fc0163c1c in elf::program::initialize_libvdso 
(this=this@entry=0x600040d1ce00) at 
/usr/include/c++/11/bits/shared_ptr_base.h:1295
0x000fc01642ec in elf::program::program (this=this@entry=0x600040d1ce00, 
addr=addr@entry=0x1004) at core/elf.cc:1359
0x000fc0283b30 in osv::application::new_program (this=0x600040d1cc10) at 
core/app.cc:520

To prevent it, we change the vdso initialization logic to ensure
it only happens for the very initial program instance. Otherwise
we simply reference the vdso instance of the initial program
when constructing other new programs.

Signed-off-by: Waldemar Kozaczuk 

---
diff --git a/core/elf.cc b/core/elf.cc
--- a/core/elf.cc
+++ b/core/elf.cc
@@ -1361,12 +1361,16 @@ program::program(void* addr)
 
 void program::initialize_libvdso()
 {
-_libvdso = std::make_shared(*this, _start);
-_libvdso->set_base(_start);
-_libvdso->load_segments();
-_libvdso->process_headers();
-_libvdso->relocate();
-_libvdso->fix_permissions();
+if (!s_program) {
+_libvdso = std::make_shared(*this, _start);
+_libvdso->set_base(_start);
+_libvdso->load_segments();
+_libvdso->process_headers();
+_libvdso->relocate();
+_libvdso->fix_permissions();
+} else {
+_libvdso = s_program->_libvdso;
+}
 }
 
 void program::set_search_path(std::initializer_list path)

-- 
You received this message because you are subscribed to the Google Groups "OSv 
Development" group.
To unsubscribe from this group and stop receiving emails from it, send an email 
to osv-dev+unsubscr...@googlegroups.com.
To view this discussion on the web visit 
https://groups.google.com/d/msgid/osv-dev/2ff8150603f2b21f%40google.com.


[osv-dev] [COMMIT osv master] vdso: make it work on aarch64

2023-08-27 Thread Commit Bot
From: Waldemar Kozaczuk 
Committer: Waldemar Kozaczuk 
Branch: master

vdso: make it work on aarch64

This patch fixes couple of issues with vdso on aarch64.

Firstly, it fixed the compilation problem with the .align
directive which is interpreted differently on aarch64 (#bytes vs bits).

Secondly, it corrects the names of the vdso symbols exported
by the library - "__kernel_*" vs "__vdso_*" and which ones are exported.
The new arch specific version files have been added to properly
export those symbols.

Signed-off-by: Waldemar Kozaczuk 

---
diff --git a/Makefile b/Makefile
--- a/Makefile
+++ b/Makefile
@@ -2161,7 +2161,7 @@ $(out)/libenviron.so: $(environ_sources)
 $(out)/libvdso.so: libc/vdso/vdso.c
$(makedir)
$(call quiet, $(CC) $(CFLAGS) -c -fPIC -o $(out)/libvdso.o 
libc/vdso/vdso.c, CC libvdso.o)
-   $(call quiet, $(LD) -shared -fPIC -z now -o $(out)/libvdso.so 
$(out)/libvdso.o -T libc/vdso/vdso.lds, LINK libvdso.so)
+   $(call quiet, $(LD) -shared -fPIC -z now -o $(out)/libvdso.so 
$(out)/libvdso.o -T libc/vdso/vdso.lds 
--version-script=libc/vdso/$(arch)/vdso.version, LINK libvdso.so)
 
 bootfs_manifest ?= bootfs.manifest.skel
 
diff --git a/libc/vdso/aarch64/vdso.version b/libc/vdso/aarch64/vdso.version
--- a/libc/vdso/aarch64/vdso.version
+++ b/libc/vdso/aarch64/vdso.version
@@ -0,0 +1,9 @@
+/* Pretend like we are Linux 2.6.39 */
+LINUX_2.6.39 {
+global:
+__kernel_gettimeofday;
+__kernel_clock_gettime;
+__kernel_clock_getres;
+local:
+*;
+};
diff --git a/libc/vdso/vdso.c b/libc/vdso/vdso.c
--- a/libc/vdso/vdso.c
+++ b/libc/vdso/vdso.c
@@ -2,6 +2,7 @@
 #include 
 #include 
 
+#ifdef __x86_64__
 __attribute__((__visibility__("default")))
 time_t __vdso_time(time_t *tloc)
 {
@@ -19,3 +20,24 @@ int __vdso_clock_gettime(clockid_t clk_id, struct timespec 
*tp)
 {
 return clock_gettime(clk_id, tp);
 }
+#endif
+
+#ifdef __aarch64__
+__attribute__((__visibility__("default")))
+int __kernel_gettimeofday(struct timeval *tv, struct timezone *tz)
+{
+return gettimeofday(tv, tz);
+}
+
+__attribute__((__visibility__("default")))
+int __kernel_clock_gettime(clockid_t clk_id, struct timespec *tp)
+{
+return clock_gettime(clk_id, tp);
+}
+
+__attribute__((__visibility__("default")))
+int __kernel_clock_getres(clockid_t clk_id, struct timespec *tp)
+{
+return clock_getres(clk_id, tp);
+}
+#endif
diff --git a/libc/vdso/vdso.lds b/libc/vdso/vdso.lds
--- a/libc/vdso/vdso.lds
+++ b/libc/vdso/vdso.lds
@@ -25,16 +25,3 @@ PHDRS
 /*note PT_NOTE FLAGS(4);*/
 eh_frame_hdr PT_GNU_EH_FRAME FLAGS(4);
 }
-
-/* Pretend like we are Linux 2.6 */
-VERSION
-{
-LINUX_2.6 {
-global:
-__vdso_clock_gettime;
-__vdso_gettimeofday;
-__vdso_time;
-local:
-*;
-};
-}
diff --git a/libc/vdso/x64/vdso.version b/libc/vdso/x64/vdso.version
--- a/libc/vdso/x64/vdso.version
+++ b/libc/vdso/x64/vdso.version
@@ -0,0 +1,9 @@
+/* Pretend like we are Linux 2.6 */
+LINUX_2.6 {
+global:
+__vdso_clock_gettime;
+__vdso_gettimeofday;
+__vdso_time;
+local:
+*;
+};
diff --git a/libvdso-content.S b/libvdso-content.S
--- a/libvdso-content.S
+++ b/libvdso-content.S
@@ -1,5 +1,10 @@
 .pushsection .data
+#ifdef __x86_64__
 .align 4096
+#endif
+#ifdef __aarch64__
+.align 16
+#endif
 .global libvdso_start
 .hidden libvdso_start
 libvdso_start:

-- 
You received this message because you are subscribed to the Google Groups "OSv 
Development" group.
To unsubscribe from this group and stop receiving emails from it, send an email 
to osv-dev+unsubscr...@googlegroups.com.
To view this discussion on the web visit 
https://groups.google.com/d/msgid/osv-dev/19a5100603f2b2f6%40google.com.


[osv-dev] [COMMIT osv master] auxv AT_RANDOM: rollback use of getrandom() for now

2023-08-24 Thread Commit Bot
From: Waldemar Kozaczuk 
Committer: Waldemar Kozaczuk 
Branch: master

auxv AT_RANDOM: rollback use of getrandom() for now

The commit 597a8b56ec83413c06b363fecdace646a297224a changed
prepare_argv() to add AT_RANDOM the to auxiliary vector.

However, when I ran unit tests I noticed they they take in total
on average 20 seconds longer - bump from ~200 to ~220 after this commit.
There are 142 unit tests ran as part of RoFS image, which means that
on average each test ran ~140ms slower after this commit.

After removing the call to getrandom() and simply using the
pseudo-random rand_r() fixed this time regression.

Ideally, we would like to improve the AT_RANDOM quality of randomness but
for that we have to use different tool than getrandom() or fix its
issue.

Signed-off-by: Waldemar Kozaczuk 

---
diff --git a/core/app.cc b/core/app.cc
--- a/core/app.cc
+++ b/core/app.cc
@@ -20,7 +20,6 @@
 #include 
 #include 
 #include "libc/pthread.hh"
-#include 
 
 using namespace boost::range;
 
@@ -363,13 +362,14 @@ void application::prepare_argv(elf::program *program)
 }
 
 // Initialize random bytes array so it can be passed as AT_RANDOM auxv 
vector
-if (getrandom(random_bytes, sizeof(random_bytes), 0) != 
sizeof(random_bytes)) {
-// Fall back to rand_r()
-auto d = osv::clock::wall::now().time_since_epoch();
-unsigned seed = 
std::chrono::duration_cast(d).count() % 10;
-for (unsigned idx = 0; idx < sizeof(random_bytes)/(sizeof(int)); 
idx++) {
-reinterpret_cast(random_bytes)[idx] = rand_r();
-}
+//TODO: Ideally we would love to use something better than the 
pseudo-random scheme below
+//based on the time since epoch and rand_r(). But the getrandom() at this 
points does
+//no always work and is also very slow. Once we figure out how to fix or 
improve it
+//we may refine it with a better solution. For now it will do.
+auto d = osv::clock::wall::now().time_since_epoch();
+unsigned seed = 
std::chrono::duration_cast(d).count() % 10;
+for (unsigned idx = 0; idx < sizeof(random_bytes)/(sizeof(int)); idx++) {
+reinterpret_cast(random_bytes)[idx] = rand_r();
 }
 
 int auxv_parameters_count = 4;

-- 
You received this message because you are subscribed to the Google Groups "OSv 
Development" group.
To unsubscribe from this group and stop receiving emails from it, send an email 
to osv-dev+unsubscr...@googlegroups.com.
To view this discussion on the web visit 
https://groups.google.com/d/msgid/osv-dev/52c7790603ac0c3f%40google.com.


[osv-dev] [COMMIT osv master] libvdso: load it from kernel instead of filesystem

2023-08-23 Thread Commit Bot
From: Waldemar Kozaczuk 
Committer: WALDEMAR KOZACZUK 
Branch: master

libvdso: load it from kernel instead of filesystem

This patch changes how libvdso (Virtual Dynamic Shared Object)
is constructed and loaded.

Originally, libvdso support was added as part of the commit
e60339de2676b186c1ec957102a5a384506d127e to support Golang apps
that use libvdso library local functions to speed up execution
over corresponding syscalls. During discovery work to support
statically linked executables or executables loaded by Linux
dynamic linker on OSv, I noticed that glibc would use libvdso
as well (see https://man7.org/linux/man-pages/man7/vdso.7.html),
however it would fail to work with our current libvdso.
More specifically the issue had to do with the fact our
current libvdso contains 4 PT_LOAD segments and OSv loads
them in memory from filesystem in a way that is not compatible
with what glibc expects. The fragment from the linux vdso linker script
(https://elixir.bootlin.com/linux/v4.1.31/source/arch/x86/vdso/vdso-layout.lds.S)
has this comment next to the PHDRS tag:

"We must supply the ELF program headers explicitly to get just one
 PT_LOAD segment, and set the flags explicitly to make segments read-only."

So we want to make our libvdso.so look similar to what the Linux
one would look like.

To that extent we define new libvdso linker script that enforces
single segment layout and exports all symbols. Relatedly, we modify
the main makefile to build libvdso using new linker script and with
egerly resolvable symbols. On top of this we put the content of
libvdso.so into an libvdso.o object that becomes part of the kernel ELF
in a similar way bootfs content is. We also remove libvdso.so from all
manifest files.

The beginning of the vdso in the kernel image - libvdso_start - is where
we load the vdso ELF object from as a object::memory_image at the end of
the kernel ELF initialization in the elf::program() constructor. The
mechanism of loading and processing of libvdso.so is handled by new
method program::initialize_libvdso().

The advantage of this approach is that loading of vdso is no longer
dependant on the filesystem, it takes less memory (the object is loaded
only once) and is smaller - 2656 vs 13544 before and is probably
slightly faster (all is in memory already). The disadvantage is
that it makes the kernel 4K larger.

Signed-off-by: Waldemar Kozaczuk 

---
diff --git a/Makefile b/Makefile
--- a/Makefile
+++ b/Makefile
@@ -2125,7 +2125,7 @@ def_symbols = --defsym=OSV_KERNEL_BASE=$(kernel_base) \
   --defsym=OSV_KERNEL_VM_SHIFT=$(kernel_vm_shift)
 endif
 
-$(out)/loader.elf: $(stage1_targets) arch/$(arch)/loader.ld $(out)/bootfs.o 
$(loader_options_dep) $(version_script_file)
+$(out)/loader.elf: $(stage1_targets) arch/$(arch)/loader.ld $(out)/bootfs.o 
$(out)/libvdso-content.o $(loader_options_dep) $(version_script_file)
$(call quiet, $(LD) -o $@ $(def_symbols) \
-Bdynamic --export-dynamic --eh-frame-hdr --enable-new-dtags 
-L$(out)/arch/$(arch) \
 $(patsubst 
%version_script,--version-script=%version_script,$(patsubst %.ld,-T %.ld,$^)) \
@@ -2137,7 +2137,7 @@ $(out)/loader.elf: $(stage1_targets) 
arch/$(arch)/loader.ld $(out)/bootfs.o $(lo
@scripts/libosv.py $(out)/osv.syms $(out)/libosv.ld 
`scripts/osv-version.sh` | $(CC) -c -o $(out)/osv.o -x assembler -
$(call quiet, $(CC) $(out)/osv.o -nostdlib -shared -o $(out)/libosv.so 
-T $(out)/libosv.ld, LIBOSV.SO)
 
-$(out)/zfs_builder.elf: $(stage1_targets) arch/$(arch)/loader.ld 
$(out)/zfs_builder_bootfs.o $(loader_options_dep) $(version_script_file)
+$(out)/zfs_builder.elf: $(stage1_targets) arch/$(arch)/loader.ld 
$(out)/zfs_builder_bootfs.o $(out)/libvdso-content.o $(loader_options_dep) 
$(version_script_file)
$(call quiet, $(LD) -o $@ $(def_symbols) \
-Bdynamic --export-dynamic --eh-frame-hdr --enable-new-dtags 
-L$(out)/arch/$(arch) \
 $(patsubst 
%version_script,--version-script=%version_script,$(patsubst %.ld,-T %.ld,$^)) \
@@ -2161,7 +2161,7 @@ $(out)/libenviron.so: $(environ_sources)
 $(out)/libvdso.so: libc/vdso/vdso.c
$(makedir)
$(call quiet, $(CC) $(CFLAGS) -c -fPIC -o $(out)/libvdso.o 
libc/vdso/vdso.c, CC libvdso.o)
-   $(call quiet, $(LD) -shared -fPIC -o $(out)/libvdso.so $(out)/libvdso.o 
--version-script=libc/vdso/vdso.version, LINK libvdso.so)
+   $(call quiet, $(LD) -shared -fPIC -z now -o $(out)/libvdso.so 
$(out)/libvdso.o -T libc/vdso/vdso.lds, LINK libvdso.so)
 
 bootfs_manifest ?= bootfs.manifest.skel
 
@@ -2180,12 +2180,18 @@ libgcc_s_dir := ../../$(aarch64_gccbase)/lib64
 endif
 
 $(out)/bootfs.bin: scripts/mkbootfs.py $(bootfs_manifest) 
$(bootfs_manifest_dep) $(tools:%=$(out)/%) \
-   $(out)/libenviron.so $(out)/libvdso.so $(out)/libsolaris.so
+   $(out)/libenviron.so $(out)/libsolaris.so
$(call quiet, olddir=`pwd`; cd $(out); "$$olddir"/scripts/mkbootfs.py 
-o bootfs.bin 

[osv-dev] [COMMIT osv master] auxv: expose AT_MINSIGSTKSZ and AT_RANDOM

2023-08-23 Thread Commit Bot
From: Waldemar Kozaczuk 
Committer: WALDEMAR KOZACZUK 
Branch: master

auxv: expose AT_MINSIGSTKSZ and AT_RANDOM

This patch adds AT_MINSIGSTKSZ and AT_RANDOM to auxiliary vector.
These two values are needed to support static executables.

Signed-off-by: Waldemar Kozaczuk 

---
diff --git a/core/app.cc b/core/app.cc
--- a/core/app.cc
+++ b/core/app.cc
@@ -20,6 +20,7 @@
 #include 
 #include 
 #include "libc/pthread.hh"
+#include 
 
 using namespace boost::range;
 
@@ -333,6 +334,8 @@ void application::main()
 // _entry_point() doesn't return
 }
 
+static u64 random_bytes[2];
+
 void application::prepare_argv(elf::program *program)
 {
 // Prepare program_* variable used by the libc
@@ -360,13 +363,23 @@ void application::prepare_argv(elf::program *program)
 }
 
 // Load vdso library if available
-int auxv_parameters_count = 2;
+int auxv_parameters_count = 4;
 _libvdso = program->get_library("libvdso.so");
 if (!_libvdso) {
 auxv_parameters_count--;
 WARN_ONCE("application::prepare_argv(): missing libvdso.so -> may 
prevent shared libraries specifically Golang ones from functioning\n");
 }
 
+// Initialize random bytes array so it can be passed as AT_RANDOM auxv 
vector
+if (getrandom(random_bytes, sizeof(random_bytes), 0) != 
sizeof(random_bytes)) {
+// Fall back to rand_r()
+auto d = osv::clock::wall::now().time_since_epoch();
+unsigned seed = 
std::chrono::duration_cast(d).count() % 10;
+for (unsigned idx = 0; idx < sizeof(random_bytes)/(sizeof(int)); 
idx++) {
+reinterpret_cast(random_bytes)[idx] = rand_r();
+}
+}
+
 // Allocate the continuous buffer for argv[] and envp[]
 _argv.reset(new char*[_args.size() + 1 + envcount + 1 + 
sizeof(Elf64_auxv_t) * (auxv_parameters_count + 1)]);
 
@@ -388,7 +401,6 @@ void application::prepare_argv(elf::program *program)
 }
 contig_argv[_args.size() + 1 + envcount] = nullptr;
 
-
 // Pass the VDSO library to the application.
 Elf64_auxv_t* _auxv =
 reinterpret_cast(_argv[_args.size() + 1 + 
envcount + 1]);
@@ -401,6 +413,12 @@ void application::prepare_argv(elf::program *program)
 _auxv[auxv_idx].a_type = AT_PAGESZ;
 _auxv[auxv_idx++].a_un.a_val = sysconf(_SC_PAGESIZE);
 
+_auxv[auxv_idx].a_type = AT_MINSIGSTKSZ;
+_auxv[auxv_idx++].a_un.a_val = sysconf(_SC_MINSIGSTKSZ);
+
+_auxv[auxv_idx].a_type = AT_RANDOM;
+_auxv[auxv_idx++].a_un.a_val = reinterpret_cast(random_bytes);
+
 _auxv[auxv_idx].a_type = AT_NULL;
 _auxv[auxv_idx].a_un.a_val = 0;
 }

-- 
You received this message because you are subscribed to the Google Groups "OSv 
Development" group.
To unsubscribe from this group and stop receiving emails from it, send an email 
to osv-dev+unsubscr...@googlegroups.com.
To view this discussion on the web visit 
https://groups.google.com/d/msgid/osv-dev/6aaf8c0603a03e5e%40google.com.


[osv-dev] [COMMIT osv master] vfs: add mknodat

2023-08-22 Thread Commit Bot
From: Waldemar Kozaczuk 
Committer: WALDEMAR KOZACZUK 
Branch: master

vfs: add mknodat

Signed-off-by: Waldemar Kozaczuk 

Fix signature

---
diff --git a/exported_symbols/osv_ld-musl.so.1.symbols 
b/exported_symbols/osv_ld-musl.so.1.symbols
--- a/exported_symbols/osv_ld-musl.so.1.symbols
+++ b/exported_symbols/osv_ld-musl.so.1.symbols
@@ -646,6 +646,7 @@ mkdtemp
 mkfifo
 mkfifoat
 mknod
+mknodat
 mkostemp
 mkostemp64
 mkostemps
diff --git a/fs/vfs/main.cc b/fs/vfs/main.cc
--- a/fs/vfs/main.cc
+++ b/fs/vfs/main.cc
@@ -315,6 +315,13 @@ int mknod(const char *pathname, mode_t mode, dev_t dev)
 return __xmknod(0, pathname, mode, );
 }
 
+OSV_LIBC_API
+int mknodat(int dirfd, const char *pathname, mode_t mode, dev_t dev)
+{
+return vfs_fun_at2(dirfd, pathname, [mode, dev](const char *path) {
+return mknod(path, mode, dev);
+});
+}
 
 TRACEPOINT(trace_vfs_lseek, "%d 0x%x %d", int, off_t, int);
 TRACEPOINT(trace_vfs_lseek_ret, "0x%x", off_t);
diff --git a/linux.cc b/linux.cc
--- a/linux.cc
+++ b/linux.cc
@@ -566,6 +566,7 @@ OSV_LIBC_API long syscall(long number, ...)
 SYSCALL4(renameat, int, const char *, int, const char *);
 SYSCALL1(sys_brk, void *);
 SYSCALL4(clock_nanosleep, clockid_t, int, const struct timespec *, struct 
timespec *);
+SYSCALL4(mknodat, int, const char *, mode_t, dev_t);
 SYSCALL5(statx, int, const char *, int, unsigned int, struct statx *);
 }
 

-- 
You received this message because you are subscribed to the Google Groups "OSv 
Development" group.
To unsubscribe from this group and stop receiving emails from it, send an email 
to osv-dev+unsubscr...@googlegroups.com.
To view this discussion on the web visit 
https://groups.google.com/d/msgid/osv-dev/f472bb0603865e0a%40google.com.


  1   2   3   4   5   6   7   8   9   10   >