Bug#1042842: network interface names wrong in domU (>10 interfaces)

2024-03-19 Thread Valentin Kleibel

Hello,

I'm currently in the process of cleaning up and i noticed this bug i 
reported is still open.


Based on what we know now i think the bug can be closed.

To summarize:
* the ordering of network interfaces (and therefore the ethn names) was 
never meant to be stable and changed with Xen 4.17
* domUs starting with bookworm use enXn by default and their order 
matches the config
* bullseye domUs on a bookworm dom0 with more than 10 network interfaces 
need a workaround:

  * use udev from bullseye-backports and enXn naming scheme
  * use custom udev rules to get fixed interface names

Thanks for your help with this issue,
Valentin



Bug#1063338: [regression 6.1.76] dlm: cannot start dlm midcomms -97 after backport of e9cdebbe23f1 ("dlm: use kernel_connect() and kernel_bind()")

2024-02-09 Thread Valentin Kleibel

Hi


Would you be able to confirm that the attached patch fixes your issue as well?


Yes it does.

@debian maintainers: is it possible to include this patch in the next 
point release?


Thank you for your work,
Valentin



Bug#1063338: [regression 6.1.76] dlm: cannot start dlm midcomms -97 after backport of e9cdebbe23f1 ("dlm: use kernel_connect() and kernel_bind()")

2024-02-08 Thread Valentin Kleibel

Hi Jordan, hi all


Just a quick look comparing dlm_tcp_listen_bind between the latest 6.1
and 6.6 stable branches,
it looks like there is a mismatch here with the dlm_local_addr[0] parameter.

6.1


static int dlm_tcp_listen_bind(struct socket *sock)
{
int addr_len;

/* Bind to our port */
make_sockaddr(dlm_local_addr[0], dlm_config.ci_tcp_port, _len);
return kernel_bind(sock, (struct sockaddr *)_local_addr[0],
addr_len);
}

6.6

static int dlm_tcp_listen_bind(struct socket *sock)
{
int addr_len;

/* Bind to our port */
make_sockaddr(_local_addr[0], dlm_config.ci_tcp_port, _len);
return kernel_bind(sock, (struct sockaddr *)_local_addr[0],
addr_len);
}

6.6 contains commit c51c9cd8 (fs: dlm: don't put dlm_local_addrs on heap) which
changed

static struct sockaddr_storage *dlm_local_addr[DLM_MAX_ADDR_COUNT];

to

static struct sockaddr_storage dlm_local_addr[DLM_MAX_ADDR_COUNT];

It looks like kernel_bind() in 6.1 needs to be modified to match.


We tried to apply commit c51c9cd8 (fs: dlm: don't put dlm_local_addrs on 
heap) to the debian kernel 6.1.76 and came up with the attached patch. 
Besides the different offsets there is a slight change dlm_tcp_bind() 
where in 6.1.76 kernel_bind() is used instead of sock->ops->bind() in 
the original commit.


This patch solves the issue we experienced.

Thanks for your help,
Valentin--- a/fs/dlm/lowcomms.c	2024-02-08 10:42:19.328861479 +0100
+++ b/fs/dlm/lowcomms.c	2024-02-08 10:57:22.900463149 +0100
@@ -174,7 +174,7 @@
 static DEFINE_SPINLOCK(dlm_node_addrs_spin);
 
 static struct listen_connection listen_con;
-static struct sockaddr_storage *dlm_local_addr[DLM_MAX_ADDR_COUNT];
+static struct sockaddr_storage dlm_local_addr[DLM_MAX_ADDR_COUNT];
 static int dlm_local_count;
 int dlm_allow_conn;
 
@@ -398,7 +398,7 @@
 	if (!sa_out)
 		return 0;
 
-	if (dlm_local_addr[0]->ss_family == AF_INET) {
+	if (dlm_local_addr[0].ss_family == AF_INET) {
 		struct sockaddr_in *in4  = (struct sockaddr_in *) 
 		struct sockaddr_in *ret4 = (struct sockaddr_in *) sa_out;
 		ret4->sin_addr.s_addr = in4->sin_addr.s_addr;
@@ -727,7 +727,7 @@
 static void make_sockaddr(struct sockaddr_storage *saddr, uint16_t port,
 			  int *addr_len)
 {
-	saddr->ss_family =  dlm_local_addr[0]->ss_family;
+	saddr->ss_family =  dlm_local_addr[0].ss_family;
 	if (saddr->ss_family == AF_INET) {
 		struct sockaddr_in *in4_addr = (struct sockaddr_in *)saddr;
 		in4_addr->sin_port = cpu_to_be16(port);
@@ -1167,7 +1167,7 @@
 	int i, addr_len, result = 0;
 
 	for (i = 0; i < dlm_local_count; i++) {
-		memcpy(, dlm_local_addr[i], sizeof(localaddr));
+		memcpy(, _local_addr[i], sizeof(localaddr));
 		make_sockaddr(, port, _len);
 
 		if (!i)
@@ -1187,7 +1187,7 @@
 /* Get local addresses */
 static void init_local(void)
 {
-	struct sockaddr_storage sas, *addr;
+	struct sockaddr_storage sas;
 	int i;
 
 	dlm_local_count = 0;
@@ -1195,21 +1195,10 @@
 		if (dlm_our_addr(, i))
 			break;
 
-		addr = kmemdup(, sizeof(*addr), GFP_NOFS);
-		if (!addr)
-			break;
-		dlm_local_addr[dlm_local_count++] = addr;
+		memcpy(_local_addr[dlm_local_count++], , sizeof(sas));
 	}
 }
 
-static void deinit_local(void)
-{
-	int i;
-
-	for (i = 0; i < dlm_local_count; i++)
-		kfree(dlm_local_addr[i]);
-}
-
 static struct writequeue_entry *new_writequeue_entry(struct connection *con)
 {
 	struct writequeue_entry *entry;
@@ -1575,7 +1564,7 @@
 	}
 
 	/* Create a socket to communicate with */
-	result = sock_create_kern(_net, dlm_local_addr[0]->ss_family,
+	result = sock_create_kern(_net, dlm_local_addr[0].ss_family,
   SOCK_STREAM, dlm_proto_ops->proto, );
 	if (result < 0)
 		goto socket_err;
@@ -1786,7 +1775,6 @@
 	foreach_conn(free_conn);
 	srcu_read_unlock(_srcu, idx);
 	work_stop();
-	deinit_local();
 
 	dlm_proto_ops = NULL;
 }
@@ -1803,7 +1791,7 @@
 	if (result < 0)
 		return result;
 
-	result = sock_create_kern(_net, dlm_local_addr[0]->ss_family,
+	result = sock_create_kern(_net, dlm_local_addr[0].ss_family,
   SOCK_STREAM, dlm_proto_ops->proto, );
 	if (result < 0) {
 		log_print("Can't create comms socket: %d", result);
@@ -1842,7 +1830,7 @@
 	/* Bind to our cluster-known address connecting to avoid
 	 * routing problems.
 	 */
-	memcpy(_addr, dlm_local_addr[0], sizeof(src_addr));
+	memcpy(_addr, _local_addr[0], sizeof(src_addr));
 	make_sockaddr(_addr, 0, _len);
 
 	result = kernel_bind(sock, (struct sockaddr *)_addr,
@@ -1899,7 +1887,7 @@
 	int addr_len;
 
 	/* Bind to our port */
-	make_sockaddr(dlm_local_addr[0], dlm_config.ci_tcp_port, _len);
+	make_sockaddr(_local_addr[0], dlm_config.ci_tcp_port, _len);
 	return kernel_bind(sock, (struct sockaddr *)_local_addr[0],
 			   addr_len);
 }
@@ -1992,7 +1980,7 @@
 
 	error = work_start();
 	if (error)
-		goto fail_local;
+		goto fail;
 
 	dlm_allow_conn = 1;
 
@@ -2022,8 +2010,6 @@
 fail_proto_ops:
 	dlm_allow_conn = 0;
 	work_stop();
-fail_local:
-	deinit_local();
 fail:
 	return error;
 }


Bug#1063338: dlm: cannot start dlm midcomms -97

2024-02-06 Thread Valentin Kleibel

Package: linux-image-amd64
Version: 6.1.76+1
Source: linux
Source-Version: 6.1.76+1
Severity: important
Control: notfound -1 6.6.15-2

Dear Maintainers,

We discovered a bug affecting dlm that prevents any tcp communications 
by dlm when booted with debian kernel 6.1.76-1.


Dlm startup works (corosync-cpgtool shows the dlm:controld group with 
all expected nodes) but as soon as we try to add a lockspace dmesg shows:

```
dlm: Using TCP for communications
dlm: cannot start dlm midcomms -97
```

It seems that commit "dlm: use kernel_connect() and kernel_bind()" 
(e9cdebbe) was merged to 6.1.


Checking the code it seems that the changed function 
dlm_tcp_listen_bind() fails with exit code 97 (EAFNOSUPPORT)

It is called from

dlm/lockspace.c: threads_start() -> dlm_midcomms_start()
dlm/midcomms.c: dlm_midcomms_start() -> dlm_lowcomms_start()
dlm/lowcomms.c: dlm_lowcomms_start() -> dlm_listen_for_all() -> 
dlm_proto_ops->listen_bind() = dlm_tcp_listen_bind()


The error code is returned all the way to threads_start() where the 
error message is emmitted.


Booting with the unsigned kernel from testing (6.6.15-2), which also 
contains this commit, works without issues.


I'm not sure what additional changes are required to get this working or 
if rolling back this change is an option.


We'd be happy to test patches that might fix this issue.

Thanks for your help,
Valentin



Bug#1060394: network setup for bookworm uses eth0 instead of enX0

2024-01-10 Thread Valentin Kleibel

Package: xen-tools
Version: 4.9.2-1
Tags: patch

Dear Maintainers,

The default network interface naming scheme for bookworm don-U's is 
enX[num] but the network setup script used to fill 
/etc/network/interfaces still assumes eth0 for the first network interface.


I think either the script 
/usr/share/xen-tools/common/40-setup-networking-deb should be changed or 
a changed copy should be used for 
/usr/share/xen-tools/bookworm.d/40-setup-networking instead of the symlink.


I've attached a simple patch that i used creating new bookworm domUs.

Thanks for your work,
Valentin--- /usr/share/xen-tools/common/40-setup-networking-deb.orig2024-01-09 18:22:08.130262212 +0100
+++ /usr/share/xen-tools/common/40-setup-networking-deb 2024-01-09 18:21:34.908959639 +0100
@@ -49,9 +49,9 @@
 iface lo inet loopback
 
 # The primary network interface
-auto eth0
-iface eth0 inet dhcp
-# post-up ethtool -K eth0 tx off
+auto enX0
+iface enX0 inet dhcp
+# post-up ethtool -K enX0 tx off
 
 #
 # The commented out line above will disable TCP checksumming which
@@ -105,14 +105,14 @@
 iface lo inet loopback
 
 # The primary network interface
-auto eth0
-iface eth0 inet static
+auto enX0
+iface enX0 inet static
  address ${ip1}
 ${gway}
  netmask ${netmask}
 ${bcast}
 ${point}
- # post-up  ethtool -K eth0 tx off
+ # post-up  ethtool -K enX0 tx off
 
 #
 # The commented out line above will disable TCP checksumming which
@@ -131,11 +131,11 @@
 logMessage Adding etho:${interface}
 
 cat <>${prefix}/etc/network/interfaces
-auto eth0:${interface}
-iface eth0:${interface} inet static
+auto enX0:${interface}
+iface enX0:${interface} inet static
  address ${value}
  netmask ${netmask}
- # post-up  ethtool -K eth0 tx off
+ # post-up  ethtool -K enX0 tx off
 E_O_STATIC
 count=`expr $count + 1`
 interface=`expr $interface + 1`


Bug#1042842: [Pkg-xen-devel] Bug#1042842: network interface names wrong in domU (>10 interfaces)

2023-08-08 Thread Valentin Kleibel
On [0], you can read "In both cases the device naming is subject to the 
usual guest or backend domain facilities for renaming network devices".

It says "naming/renaming", but you can assume "detecting".

I also checked which net_ids udev knows about and the only things that 
pop up are:

ID_NET_NAMING_SCHEME=v247
ID_NET_NAME_MAC=enx00163efd832b
ID_OUI_FROM_DATABASE=Xensource, Inc.


Is it from dom0 or domU ?
Are you using "net.ifnames=0" on the domU kernel command line ?
"v247" looks like systemd "predictive naming scheme" (eth -> enX).
 From bookworm on, domUs vifs get named enXN (enX0, enX1, ...).
Read on :
https://www.debian.org/releases/stable/i386/release-notes/ch-information.en.html#xen-network


This is from the domU, running bullseye with a bookworm dom0.

See how ethN interfaces get messed up, like in your setup, but 
predictable names would work, as you can see in "altname enXN" :

eth1 (:01) -> enX1
eth2 (:10) -> enX10
eth3 (:02) -> enX2


I could not get our bullseye domU to show the "predictable names" even 
though i tried installing the bullseye-backports kernel 6.1.
After you wrote this i installed udev 252.5 from backports and it now 
uses the correct enXn interface names, even with kernel 5.10.


So, my answer does not tell you if something changed in Xen itself, only 
in Debian.
But I guess it relates to what Xen devs told us : vifs detection order 
cannot be relied upon, that's why "predictable names" were invented.
The vif detection part is related to the domains kernels, not Xen itself 
(at least that's what I understood).


Using eth0 nowadays is a bit like using /dev/sda for hard drives, it's 
considered legacy as it may create problems in some setups, like yours 
(ie. for disks, it's recommended to use UUIDs or /dev/disk/by-*).


I hope this answers your question.


Thank you, yes it does.

In our case the dom0 was updated to bookworm while the domU is still 
running bullseye.

-> updated Xen so the vif detection order changed (which we relied on)
-> the predictable network names for Xen don't work with bullseye

So my new resolution for bullseye domUs on a bookworm dom0 is to install 
udev from backports and change the domUs network config to use the new 
enXn naming scheme instead of ethn.




Bug#1042842: network interface names wrong in domU (>10 interfaces)

2023-08-08 Thread Valentin Kleibel

I posted on xen-devel, you can follow from :
https://lists.xenproject.org/archives/html/xen-devel/2023-08/msg00244.html
(Unfortunately, the formatting is weird via html, split the IRC part on 
"- ").


Thank you for posting upstream.

Note that, at first sight, I was told this seems "not-a-Xen" bug (read 
the IRC excerpts).
All documentation i found found on the Xen wiki suggests that interfaces 
are connected vifX.Y <-> ethY. [0] [1]
The only other way i know of for identifying the interfaces are MAC 
Addresses which can be randomly assigned if you don't configure them.


I also checked which net_ids udev knows about and the only things that 
pop up are:

ID_NET_NAMING_SCHEME=v247
ID_NET_NAME_MAC=enx00163efd832b
ID_OUI_FROM_DATABASE=Xensource, Inc.

Either i am missing the way you're supposed to do this, or there is a 
bug somewhere in the toolchain.
Unfortunately i'm not able to pinpoint the source of the issue, any help 
would be appreciated.


Valentin

[0] 
https://wiki.xenproject.org/wiki/Xen_Networking#Paravirtualised_Network_Devices
[1] 
https://wiki.xenproject.org/wiki/Network_Throughput_and_Performance_Guide#Technical_Overview




Bug#1042842: network interface names wrong in domU (>10 interfaces)

2023-08-02 Thread Valentin Kleibel

#xen-devel is the IRC Xen channel. I just pinged them, I'll wait.
Depending on their answers, I'll post on the xen-devel mailing list.


thanks for the clarification, looking forward to an answer.

Our current workaround is to edit the interface names in the domUs 
config to match the wrong sorting. And be extra careful that the domUs 
MACs match the ones we expect on that network.


Via udev (MAC matching) or /etc/network/interfaces ?
I ask because it may help others, while this gets resolved.


We just edited /etc/network/interfaces, as it only affects a few of our 
domUs.
i think udev rules matching the MAC would be a better solution. I just 
didn't take the time to write them and went for the quick and dirty 
solution.


Valentin



Bug#1042842: Acknowledgement (network interface names wrong in domU (>10 interfaces))

2023-08-02 Thread Valentin Kleibel

Hi,


the bug has been mentionned on #xen-devel, will keep you posted.


Thanks. I wasn't able to find such a report, could you link the archive 
or post the threads subject so i can find it?



Meanwhile, you may try to force the domU vif names with a letter


The sorting with letters doesn't work out as renaming the interface is a 
secondary step.


...
[53408.899507] vif vif-5-0 sort-a: renamed from vif5.0
...

Our current workaround is to edit the interface names in the domUs 
config to match the wrong sorting. And be extra careful that the domUs 
MACs match the ones we expect on that network.


Valentin



Bug#1042842: network interface names wrong in domU (>10 interfaces)

2023-08-01 Thread Valentin Kleibel

Package: xen-utils-4.17
Version: 4.17.1+2-gb773c48e36-1
Severity: important

Dear Maintainers,

On one of our domUs we discovered that the network interface names were 
wrongly assigned since recreating the domU after an upgrade to bookworm.


If over 10 network interfaces are configured the mapping (dom0) vifX.10 
<-> eth10 (domU) does not apply anymore.
Instead the interfaces on dom0 are sorted primarily by the leftmost 
digit. so for 11 interfaces we will end up with:

vifX.0 <> eth0
vifX.1 <> eth1
vifX.10 <> eth2
vifX.2 <> eth3
vifX.3 <> eth4


This was observed with linux-kernel versions 5.10.179-3 and 6.1.38-2 
(all combinations of domU and dom0) and xen 4.17.1+2-gb773c48e36-1.
You can find a config snippet and "xl network-list" + "ip a" command 
output demonstrating the issue below.
Booting the host with Xen 4.14.5+94-ge49571868d-1 restored the expected 
behaviour.


Looking for relevant changes i found commit fce6999 [0] which changes 
the way libxl__device_list works, but i'm not sure that's the cause of 
this issue.


Thanks for your help,
Valentin

[0] http://xenbits.xen.org/gitweb/?p=xen.git;a=commit;h=fce6999

Sample vif configuration (ascending MACs):
vif = [
'mac=00:16:3e:fd:83:2f,bridge=lanbr',
'mac=00:16:3e:fd:83:30,bridge=lanbr',
'mac=00:16:3e:fd:83:31,bridge=lanbr',
'mac=00:16:3e:fd:83:32,bridge=lanbr',
'mac=00:16:3e:fd:83:33,bridge=lanbr',
'mac=00:16:3e:fd:83:34,bridge=lanbr',
'mac=00:16:3e:fd:83:35,bridge=lanbr',
'mac=00:16:3e:fd:83:36,bridge=lanbr',
'mac=00:16:3e:fd:83:37,bridge=lanbr',
'mac=00:16:3e:fd:83:38,bridge=lanbr',
'mac=00:16:3e:fd:83:39,bridge=lanbr',
  ]

dom0# xl network-list 3
Idx BE Mac Addr. handle state evt-ch   tx-/rx-ring-ref BE-path 

0   0  00:16:3e:fd:83:2f 0 4 -1-1/-1 
/local/domain/0/backend/vif/3/0
1   0  00:16:3e:fd:83:30 1 4 -1-1/-1 
/local/domain/0/backend/vif/3/1
10  0  00:16:3e:fd:83:3910 4 -1-1/-1 
/local/domain/0/backend/vif/3/10
2   0  00:16:3e:fd:83:31 2 4 -1-1/-1 
/local/domain/0/backend/vif/3/2
3   0  00:16:3e:fd:83:32 3 4 -1-1/-1 
/local/domain/0/backend/vif/3/3
4   0  00:16:3e:fd:83:33 4 4 -1-1/-1 
/local/domain/0/backend/vif/3/4
5   0  00:16:3e:fd:83:34 5 4 -1-1/-1 
/local/domain/0/backend/vif/3/5
6   0  00:16:3e:fd:83:35 6 4 -1-1/-1 
/local/domain/0/backend/vif/3/6
7   0  00:16:3e:fd:83:36 7 4 -1-1/-1 
/local/domain/0/backend/vif/3/7
8   0  00:16:3e:fd:83:37 8 4 -1-1/-1 
/local/domain/0/backend/vif/3/8
9   0  00:16:3e:fd:83:38 9 4 -1-1/-1 
/local/domain/0/backend/vif/3/9


domU# ip a
1: lo:  mtu 65536 qdisc noqueue state UNKNOWN 
group default qlen 1000

link/loopback 00:00:00:00:00:00 brd 00:00:00:00:00:00
inet 127.0.0.1/8 scope host lo
   valid_lft forever preferred_lft forever
inet6 ::1/128 scope host
   valid_lft forever preferred_lft forever
2: eth0:  mtu 1500 qdisc mq state UP 
group default qlen 1000

link/ether 00:16:3e:fd:83:2f brd ff:ff:ff:ff:ff:ff
inet X.X.X.X/16 brd X.X.X.X scope global eth0
   valid_lft forever preferred_lft forever
inet6 fe80::216:3eff:fefd:832f/64 scope link
   valid_lft forever preferred_lft forever
3: eth1:  mtu 1500 qdisc noop state DOWN group 
default qlen 1000

link/ether 00:16:3e:fd:83:30 brd ff:ff:ff:ff:ff:ff
4: eth2:  mtu 1500 qdisc noop state DOWN group 
default qlen 1000

link/ether 00:16:3e:fd:83:39 brd ff:ff:ff:ff:ff:ff
5: eth3:  mtu 1500 qdisc noop state DOWN group 
default qlen 1000

link/ether 00:16:3e:fd:83:31 brd ff:ff:ff:ff:ff:ff
6: eth4:  mtu 1500 qdisc noop state DOWN group 
default qlen 1000

link/ether 00:16:3e:fd:83:32 brd ff:ff:ff:ff:ff:ff
7: eth5:  mtu 1500 qdisc noop state DOWN group 
default qlen 1000

link/ether 00:16:3e:fd:83:33 brd ff:ff:ff:ff:ff:ff
8: eth6:  mtu 1500 qdisc noop state DOWN group 
default qlen 1000

link/ether 00:16:3e:fd:83:34 brd ff:ff:ff:ff:ff:ff
9: eth7:  mtu 1500 qdisc noop state DOWN group 
default qlen 1000

link/ether 00:16:3e:fd:83:35 brd ff:ff:ff:ff:ff:ff
10: eth8:  mtu 1500 qdisc noop state DOWN group 
default qlen 1000

link/ether 00:16:3e:fd:83:36 brd ff:ff:ff:ff:ff:ff
11: eth9:  mtu 1500 qdisc noop state DOWN group 
default qlen 1000

link/ether 00:16:3e:fd:83:37 brd ff:ff:ff:ff:ff:ff
12: eth10:  mtu 1500 qdisc noop state DOWN group 
default qlen 1000

link/ether 00:16:3e:fd:83:38 brd ff:ff:ff:ff:ff:ff



Bug#1042080: cfengine3: sysv init script removed

2023-07-26 Thread Valentin Kleibel

Package: orphan-sysvinit-scripts
Version: 0.14
Severity: normal

Dear maintainers,

Unfortunately cfengine3 removed it's sysv-init-script in 3.18.2-1. 
Version 3.15.7-1 still shipped an init script that is working fine with 
the current version 3.21.0-2.


I kindly ask you to add the cfengine3 script to orphan-sysvinit-scripts.

Thanks,
Valentin



Bug#1039590: cfengine3: mpf 3.21 will delete symlink /usr/bin/python on update

2023-06-27 Thread Valentin Kleibel

Package: cfengine3
Version: 3.21.0-2
Severity: important

Dear Maintainer.

The default update mechanism in the provided masterfiles will delete a 
/usr/bin/python symlink.
The issue is discussed in 
https://github.com/cfengine/masterfiles/pull/2591 and a fix suggested: 
https://github.com/cfengine/masterfiles/commit/c84ac354265ff123f05248c7345079db42ce5dff


alternatively lines 142-149 could be deleted.

Best regards,
Valentin



Bug#1039588: cfengine3: mpf 3.21 might fail to find python interpreter used for apt-get package module

2023-06-27 Thread Valentin Kleibel

Package: cfengine3
Version: 3.21.0-2

Dear Maintainer,

In the shipped masterfiles in cfe_internal/update/update_policy.cf 
cfengine does its own search for a python executeable in $PATH to create 
a symlink with its preferred interpreter.
But $(sys.bindir) is excluded from the search and therefor cfengine will 
fail to find /usr/bin/python3 or other python interpreters residing in 
/usr/bin.

In turn package promises will fail as they are based on python.

The issue was reported upstream: 
https://github.com/cfengine/masterfiles/pull/2665


A patch to fix this behaviour is attached.

Best regards,
Valentindiff --git a/cfe_internal/update/update_policy.cf b/cfe_internal/update/update_policy.cf
index e71532b4..bba105da 100644
--- cfe_internal/update/update_policy.cf
+++ cfe_internal/update/update_policy.cf
@@ -90,9 +90,7 @@ bundle agent cfe_internal_setup_python_symlink(symlink_path)
 {
   vars:
   "path" string => getenv("PATH", 1024);
-  "path_folders" slist => filter("$(sys.bindir)",
- splitstring("$(path)", ":", 128),
- false, true, 128);
+  "path_folders" slist => splitstring("$(path)", ":", 128);
 
 windows::
   "abs_path_folders" -> {"CFE-2309"}


Bug#1026443: init scripts for lvm2

2022-12-20 Thread Valentin Kleibel

Package: lvm2
Version: 2.03.16-2
Source: lvm2
X-Debbugs-Cc: debian-init-divers...@chiark.greenend.org.uk

Dear Maintainers,

I kindly ask you to restore the init scripts removed in the commit 
"Remove remaining unused init scripts". [1]
I can report that they both are still in use and work fine except for 
bug #989345 [2] which has a patch available.


Thanks for your work,
Valentin

[1] 
https://salsa.debian.org/lvm-team/lvm2/-/commit/361969e6035ca57f584c06f348281e3e27d48351

[2] https://bugs.debian.org/cgi-bin/bugreport.cgi?bug=989345



Bug#1017944: grub-xen-host: 2.06-3 crashes PV guests in early boot

2022-09-06 Thread Valentin Kleibel

found 1017944 grub2/2.06-3~deb11u1
severity 1017944 serious
tags 1017944 patch

Dear Maintainers,

We can confirm that this bug affects all pv and pvh domUs that use pvgrub.
The commit responsible is 20239c28 "Bump debhelper from old 10 to 13." [1]
The relevant change in debhelper came with version v11: "The dh_strip 
and dh_shlibdeps tools no longer uses filename patterns to determine 
which files to process. Instead, they open the file and look for an ELF 
header to determine if a given file is an shared object or an ELF 
executable."

By choosing debhelper 13, this led to pv grub getting stripped.
A simple override to dh_strip mitigates the issue.

We assume that testing and unstable are affected as well but we do not 
have systems to test this.


Cheers,
Valentin

[1] 
https://salsa.debian.org/grub-team/grub/-/commit/20239c28e1e9ca3eba993e7702f5cb4da81dcf95--- a/debian/rules	2022-09-06 15:44:06.183081104 +0200
+++ b/debian/rules	2022-09-06 15:44:12.878341465 +0200
@@ -544,7 +544,7 @@
 	dh_bugfiles $(patsubst %,-N%,$(filter grub-efi-%-signed-template,$(BUILD_PACKAGES))) -A
 
 override_dh_strip:
-	dh_strip -X/usr/bin/grub-emu
+	dh_strip -X/usr/bin/grub-emu -X/usr/lib/grub-xen/grub-x86_64-xen.bin -X/usr/lib/grub-xen/grub-i386-xen_pvh.bin -X/usr/lib/grub-xen/grub-i386-xen.bin
 
 override_dh_shlibdeps:
 	dh_shlibdeps -X.module


Bug#1006645: aoe: removing aoe devices with flush (implicit in rmmod aoe) leads to page fault

2022-03-01 Thread Valentin Kleibel

Package: linux-image-amd64
Version: 5.10.92
Source: linux

Dear Maintainers,

while trying to fix #986837 we found another issue in the aoe driver:
Removal of an active aoe device leads to a page fault and inhibits the 
removal of the aoe module.
The issue affects all kernels from v4.20-rc1 up to v5.14-rc1 including 
5.10 currently in debian bullseye.
The code in freedev() calls blk_mq_free_tag_set() before running 
blk_cleanup_queue() which leads to this issue (drivers/block/aoedev.c 
L281ff).
The attached patch for affected kernel versions just changes the order 
of function calls to match the one introduced with blk_cleanup_disk() to 
mitigate this issue.

See also https://bugzilla.kernel.org/show_bug.cgi?id=215647

Cheers,
ValentinIndex: linux-5.10.92/drivers/block/aoe/aoedev.c
===
--- linux-5.10.92.orig/drivers/block/aoe/aoedev.c
+++ linux-5.10.92/drivers/block/aoe/aoedev.c
@@ -277,9 +277,9 @@ freedev(struct aoedev *d)
 	if (d->gd) {
 		aoedisk_rm_debugfs(d);
 		del_gendisk(d->gd);
+		blk_cleanup_queue(d->blkq);
 		put_disk(d->gd);
 		blk_mq_free_tag_set(>tag_set);
-		blk_cleanup_queue(d->blkq);
 	}
 	t = d->targets;
 	e = t + d->ntargets;


Bug#998014: cfengine3: recommend python3 instead of python

2021-10-28 Thread Valentin Kleibel

Package: cfengine3
Version: 3.15.2-3

Dear Maintainer,

I just noted that cfengine3 still recommends `python` for bullseye and 
upwards but this virtual package is no longer available.

Please change this to `Recommends: python3`.
This is especially important as without python the apt-get module of 
cfengine3 won't work.


Cheers,
Valentin



Bug#989345: lvm2-polld dies after 60s with sysvinit

2021-06-01 Thread Valentin Kleibel

Package: lvm2
Version: 2.03.11-2.1
Source: lvm2

Dear Maintainers,

I recently noticed that lvm2-polld started via sysvinit dies after 60s. 
This is due to the daemon arg "-t 60" which makes sense for a systemd 
socket but unfortunately not for sysvinit.
Removing the arg fixes the issue, see also the the manpage for reference 
[1].

Attached you can find a patch.

Best regards,
Valentin

[1] https://manpages.debian.org/buster/lvm2/lvmpolld.8.en.html#OPTIONS
--- /etc/init.d/lvm2-lvmpolld	2021-06-01 16:24:33.649383568 +0200
+++ /etc/init.d/lvm2-lvmpolld.orig	2021-06-01 16:24:22.453383376 +0200
@@ -14,7 +14,7 @@
 
 DESC="LVM2 poll daemon"
 DAEMON=/sbin/lvmpolld
-DAEMON_ARGS=""
+DAEMON_ARGS="-t 60"
 PIDFILE=/run/lvmpolld.pid
 
 do_start_prepare() {


Bug#988788: cfengine3-augments: can't override mpf_update_policy_master_location

2021-05-19 Thread Valentin Kleibel

Package: cfengine3
Version: 3.12.1-2
Source: cfengine3

Dear Maintainers,

I noticed a bug in the mpf framework that prevents overriding the master 
policy location via augments.

It ist well documented here: https://tracker.mender.io/browse/CFE-2953
I have also attached the patch.

Best regards,
Valentin
--- a/controls/update_def.cf
+++ b/controls/update_def.cf
@@ -145,7 +145,7 @@
   "mpf_update_policy_master_location" -> { "ENT-3692" }
 comment => "Directory where clients should get policy from.",
 string => "$(def.mpf_update_policy_master_location)",
-if => isvariable( $(def.mpf_update_policy_master_location) );
+if => isvariable( "def.mpf_update_policy_master_location" );
 
 # enable_cfengine_enterprise_hub_ha is defined below
 # Disabled by default


Bug#986837: aoe: kernel crash on blk_update_request: I/O error, BUG: scheduling while atomic

2021-05-17 Thread Valentin Kleibel

Hello,


Thanks for your help.
The bug has been reported upstream:
linux-kernel: https://lkml.org/lkml/2021/4/13/672
linux-block: 
https://lore.kernel.org/linux-block/b6aea08d-7190-e341-8780-13ba8e015...@vrvis.at/T/#u

kernel.org bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=212665


After waiting some weeks and in addition trying to reach out to 
supp...@coraid.com to find someone who might be willing to address this 
bug i unfortunately did not receive any response.
Do you have suggestions what i could do next? Unfortunately i do not 
think i have the skills to fix the bug myself.


Regards, Valentin



Bug#986837: aoe: kernel crash on blk_update_request: I/O error, BUG: scheduling while atomic

2021-04-13 Thread Valentin Kleibel

Hi


$ ./scripts/get_maintainer.pl ./drivers/block/aoe/
Justin Sanders  (supporter:ATA OVER ETHERNET (AOE) DRIVER)
Jens Axboe  (maintainer:BLOCK LAYER)
linux-bl...@vger.kernel.org (open list:BLOCK LAYER)
linux-ker...@vger.kernel.org (open list)


Thanks for your help.
The bug has been reported upstream:
linux-kernel: https://lkml.org/lkml/2021/4/13/672
linux-block: 
https://lore.kernel.org/linux-block/b6aea08d-7190-e341-8780-13ba8e015...@vrvis.at/T/#u

kernel.org bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=212665

Regards,
Valentin



Bug#986837: aoe: kernel crash on blk_update_request: I/O error, BUG: scheduling while atomic

2021-04-13 Thread Valentin Kleibel

Hi Salvatore,


Thanks for the report. I assume you can reproduce the issue as well
with 5.10.28-1 in unstable?


I did not test this before as the aoe driver code was not changed at all 
in the last 7 months. I can now report that the behavior is exactly the 
same running the kernel 5.10.0-6-amd64 #1 SMP Debian 5.10.28-1 from 
unstable.



Can you report the issue to upstream and loop in the bug?


Yes but i did not do this before. From what i can find i'd report to
the IO/STorage - Other section in bugzilla [1]  and mail to the listed 
maintainer "Justin Sanders" . Is that the way to go 
or did i forget something?


Regards,
Valentin

[1] 
https://bugzilla.kernel.org/buglist.cgi?component=Other=IO%2FStorage=---




Bug#986837: aoe: kernel crash on blk_update_request: I/O error, BUG: scheduling while atomic

2021-04-12 Thread Valentin Kleibel
.623630] RIP: 0010:cpuidle_enter_state+0xc7/0x350
[  408.623632] Code: 8b 3d dd 5b b7 6b e8 d8 4f a2 ff 49 89 c5 0f 1f 44 
00 00 31 ff e8 69 5a a2 ff 45 84 ff 0f 85 fa 00 00 00 fb 66 0f 1f 44 00 
00 <45> 85 f6 0f 88 06 01 00 00 49 63 c6 4c 2b 2c 24 48 8d 14 40 48 8d

[  408.623634] RSP: 0018:ba890038fea8 EFLAGS: 0246
[  408.623636] RAX: 9c18afc2bc00 RBX: 0002 RCX: 
001f
[  408.623637] RDX:  RSI: 3677d46d RDI: 

[  408.623639] RBP: 9c28d634b000 R08: 005f23a8382d R09: 
0018
[  408.623641] R10: 0dd5 R11: 1169 R12: 
955b8fa0
[  408.623644] R13: 005f23a8382d R14: 0002 R15: 


[  408.623651]  ? cpuidle_enter_state+0xb7/0x350
[  408.623660]  cpuidle_enter+0x29/0x40
[  408.623668]  do_idle+0x1ef/0x2b0
[  408.623677]  cpu_startup_entry+0x19/0x20
[  408.623685]  secondary_startup_64_no_verify+0xb0/0xbb
[  408.623991] bad: scheduling from the idle thread!
[  408.624027] CPU: 16 PID: 0 Comm: swapper/16 Tainted: GW 
  5.10.0-5-amd64 #1 Debian 5.10.26-1
[  408.624028] Hardware name: Supermicro AS -2013S-C0R/H11SSL-C, BIOS 
2.1 02/21/2020

[  408.624029] Call Trace:
[  408.624030]  
[  408.624034]  dump_stack+0x6b/0x83
[  408.624036]  dequeue_task_idle+0x28/0x40
[  408.624038]  __schedule+0x3bf/0x870
[  408.624041]  schedule+0x46/0xb0
[  408.624043]  blk_mq_freeze_queue_wait+0x62/0x90
[  408.624047]  ? add_wait_queue_exclusive+0x70/0x70
[  408.624051]  aoedev_downdev+0x106/0x150 [aoe]
[  408.624054]  rexmit_timer+0x4ea/0x500 [aoe]
[  408.624058]  ? rexmit_deferred+0x380/0x380 [aoe]
[  408.624062]  call_timer_fn+0x29/0xf0
[  408.624064]  __run_timers.part.0+0x1d3/0x240
[  408.624066]  ? ktime_get+0x38/0xa0
[  408.624068]  ? lapic_next_event+0x1d/0x20
[  408.624070]  ? clockevents_program_event+0x8d/0xf0
[  408.624072]  run_timer_softirq+0x26/0x50
[  408.624075]  __do_softirq+0xc5/0x275
[  408.624077]  asm_call_irq_on_stack+0x12/0x20
[  408.624078]  
[  408.624080]  do_softirq_own_stack+0x37/0x40
[  408.624084]  irq_exit_rcu+0x8e/0xc0
[  408.624094]  sysvec_apic_timer_interrupt+0x36/0x80
[  408.624103]  asm_sysvec_apic_timer_interrupt+0x12/0x20
[  408.624112] RIP: 0010:cpuidle_enter_state+0xc7/0x350
[  408.624121] Code: 8b 3d dd 5b b7 6b e8 d8 4f a2 ff 49 89 c5 0f 1f 44 
00 00 31 ff e8 69 5a a2 ff 45 84 ff 0f 85 fa 00 00 00 fb 66 0f 1f 44 00 
00 <45> 85 f6 0f 88 06 01 00 00 49 63 c6 4c 2b 2c 24 48 8d 14 40 48 8d

[  408.624130] RSP: 0018:ba890038fea8 EFLAGS: 0246
[  408.624142] RAX: 9c18afc2bc00 RBX: 0002 RCX: 
001f
[  408.624150] RDX:  RSI: 3677d46d RDI: 

[  408.624158] RBP: 9c28d634b000 R08: 005f23a8382d R09: 
0018
[  408.624163] R10: 0dd5 R11: 1169 R12: 
955b8fa0
[  408.624172] R13: 005f23a8382d R14: 0002 R15: 


[  408.624182]  ? cpuidle_enter_state+0xb7/0x350
[  408.624189]  cpuidle_enter+0x29/0x40
[  408.624196]  do_idle+0x1ef/0x2b0
[  408.624200]  cpu_startup_entry+0x19/0x20
[  408.624203]  secondary_startup_64_no_verify+0xb0/0xbb


These messages continue until the machine is reset.

It seems to be a regression from commit 3582dd291788 ("aoe: convert 
aoeblk to blk-mq") and a similar bug has already been fixed [1].


running on kernel 4.19.0-16-amd64 we get the expected result that the 
aoe device is removed upon timeout. dmesg output:

[]
[  301.543788] mlx4_en: enp65s0d1: Close port called
[  301.608154] mlx4_en: enp65s0d1: Link Down
[  527.124182] print_req_error: I/O error, dev etherd/e42.0, sector 4096
[  527.124248] aoe: device 42.0 is not up
[  527.124251] print_req_error: I/O error, dev etherd/e42.0, sector 0
[  527.124299] aoe: device 42.0 is not up
[  527.124300] aoe: device 42.0 is not up
[  527.124316] aoe: device 42.0 is not up


Hope someone can resolve this issue,

thanks for your help,
Valentin Kleibel

[1] https://lkml.org/lkml/2019/8/27/400



Bug#924314: test payload for CVE-2000-0482 crashes kernel 4.19.16

2019-03-11 Thread Valentin Kleibel

Package: src:linux
Version: 4.19.16-1
Severity: normal
Tags: upstream

While testing for known vulnerabilities, we realized that systems 
running debian buster on kernel 4.19.0-2-amd64 (upstream 4.19.16) crash 
when targeted by the openvas-nasl script jolt2.nasl, which is designed 
to test for CVE-2000-0482 
[https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2000-0482] also 
known as jolt2.
The script can be found in the archive 
http://dl.greenbone.net/community-nvt-feed-current.tar.bz2 In there the 
path is pre2008/jolt2.nasl
the commandline to run it (on a system with libopenvas-dev=9.0.3-1 
installed):


/usr/bin/openvas-nasl -t 10.0.0.1 -X -d jolt2.nasl

We also tested this with different hardware (cpu, motherboard, nic) with 
the exact same result.


The problem is resolved in linux-image-4.19.0-3-amd64 (upstream 4.19.20) 
and didn't exist in linux-image-4.19.0-1-amd64 (upstream 4.19.12)


Attached you can find the sylog from the crash.

Best regards,
Valentin Kleibel


-- System Information:
Debian Release: buster/sid
  APT prefers testing
  APT policy: (500, 'testing'), (500, 'stable')
Architecture: amd64 (x86_64)

Kernel: Linux 4.19.0-2-amd64 (SMP w/4 CPU cores)
Kernel taint flags: TAINT_UNSIGNED_MODULE
Locale: LANG=en_US.UTF-8, LC_CTYPE=en_US.UTF-8 (charmap=UTF-8), 
LANGUAGE=en_US:en (charmap=UTF-8)

Shell: /bin/sh linked to /bin/dash
Init: systemd (via /run/systemd/system)
LSM: AppArmor: enabled

Versions of packages linux-image-4.19.0-2-amd64 depends on:
ii  initramfs-tools [linux-initramfs-tool]  0.133
ii  kmod26-1
ii  linux-base  4.5

Versions of packages linux-image-4.19.0-2-amd64 recommends:
ii  apparmor 2.13.2-9
ii  firmware-linux-free  3.4
ii  irqbalance   1.5.0-3

Versions of packages linux-image-4.19.0-2-amd64 suggests:
pn  debian-kernel-handbook  
ii  grub-pc 2.02+dfsg1-12
pn  linux-doc-4.19  

Versions of packages linux-image-4.19.0-2-amd64 is related to:
ii  firmware-amd-graphics   20190114-1
pn  firmware-atheros
pn  firmware-bnx2   
pn  firmware-bnx2x  
pn  firmware-brcm80211  
pn  firmware-cavium 
pn  firmware-intel-sound
pn  firmware-intelwimax 
pn  firmware-ipw2x00
pn  firmware-ivtv   
pn  firmware-iwlwifi
pn  firmware-libertas   
ii  firmware-linux-nonfree  20190114-1
ii  firmware-misc-nonfree   20190114-1
pn  firmware-myricom
pn  firmware-netxen 
pn  firmware-qlogic 
ii  firmware-realtek20190114-1
pn  firmware-samsung
pn  firmware-siano  
pn  firmware-ti-connectivity
ii  xen-hypervisor-4.11-amd64 [xen-hypervisor]  4.11.1+26-g87f51bf366-3
Mar 11 10:39:26 helena systemd[1386]: Reached target Paths.
Mar 11 10:39:26 helena systemd[1386]: Condition check resulted in Sound System 
being skipped.
Mar 11 10:39:26 helena systemd[1386]: Listening on GnuPG network certificate 
management daemon.
Mar 11 10:39:26 helena systemd[1386]: Reached target Timers.
Mar 11 10:39:26 helena systemd[1386]: Listening on GnuPG cryptographic agent 
and passphrase cache (restricted).
Mar 11 10:39:26 helena systemd[1386]: Listening on GnuPG cryptographic agent 
and passphrase cache.
Mar 11 10:39:26 helena systemd[1386]: Starting D-Bus User Message Bus Socket.
Mar 11 10:39:26 helena systemd[1386]: Listening on GnuPG cryptographic agent 
and passphrase cache (access for web browsers).
Mar 11 10:39:26 helena systemd[1386]: Listening on GnuPG cryptographic agent 
(ssh-agent emulation).
Mar 11 10:39:26 helena systemd[1386]: Listening on D-Bus User Message Bus 
Socket.
Mar 11 10:39:26 helena systemd[1386]: Reached target Sockets.
Mar 11 10:39:26 helena systemd[1386]: Reached target Basic System.
Mar 11 10:39:26 helena systemd[1386]: Reached target Default.
Mar 11 10:39:26 helena systemd[1386]: Startup finished in 69ms.
Mar 11 10:39:26 helena systemd[1]: Started User Manager for UID 0.
Mar 11 10:39:26 helena systemd[1]: Started Session 3 of user root.
Mar 11 10:39:41 helena systemd[1]: systemd-fsckd.service: Succeeded.
Mar 11 10:39:42 helena systemd-timesyncd[625]: Synchronized to time server for 
the first time 213.235.200.199:123 (2.debian.pool.ntp.org).
Mar 11 10:39:49 helena kernel: [   67.981598] BUG: unable to handle kernel NULL 
pointer dereference at 
Mar 11 10:39:49 helena kernel: [   67.984160] PGD 0 P4D 0 
Mar 11 10:39:49 helena kernel: [   67.986673] Oops:  [#1] SMP NOPTI
Mar 11 10:39:49 helena kernel: [   67.989198] CPU: 2 PID: 0 Comm: swapper

Bug#923354: mitmproxy misses a dependency for pyhton3-pkg-resources

2019-02-26 Thread Valentin Kleibel
Package: mitmproxy
Version: 4.0.4-4
Severity: important

Dear Maintainer,

I just installed mitmproxy on a pretty minimal headless system and realized it 
doesn't
run withoutout pkg_resources. Installing python3-pkg-resources resolved the 
problem.
Adding the dependency should be a easy fix.

Thanks,
Valentin


-- System Information:
Debian Release: buster/sid
  APT prefers testing
  APT policy: (500, 'testing')
Architecture: amd64 (x86_64)

Kernel: Linux 4.19.0-2-amd64 (SMP w/1 CPU core)
Locale: LANG=C, LC_CTYPE=C.UTF-8 (charmap=UTF-8), LANGUAGE=C (charmap=UTF-8)
Shell: /bin/sh linked to /bin/dash
Init: sysvinit (via /sbin/init)

Versions of packages mitmproxy depends on:
ii  dpkg  1.19.5
ii  fonts-font-awesome5.0.10+really4.7.0~dfsg-1
ii  python3   3.7.2-1
ii  python3-blinker   1.4+dfsg1-0.2
ii  python3-brotli1.0.7-2
ii  python3-certifi   2018.8.24-1
ii  python3-click 7.0-1
ii  python3-cryptography  2.3-1
ii  python3-h11   0.8.1-1
ii  python3-h23.0.1-4
ii  python3-hyperframe5.1.0-1
ii  python3-kaitaistruct  0.8-1
ii  python3-ldap3 2.4.1-1
ii  python3-openssl   19.0.0-1
ii  python3-passlib   1.7.1-1
ii  python3-pyasn10.4.2-3
ii  python3-pyparsing 2.2.0+dfsg1-2
ii  python3-pyperclip 1.6.4-1
ii  python3-ruamel.yaml   0.15.34-1+b1
ii  python3-sortedcontainers  2.0.4-1
ii  python3-tornado   5.1.1-4
ii  python3-urwid 2.0.1-2+b1
ii  python3-wsproto   0.11.0-2

mitmproxy recommends no packages.

mitmproxy suggests no packages.

-- no debconf information