For anyone who is running the latest talos, I couldn't find a way to run
a script for watchdog in that cut down environment, so I've used a
privileged pod in kube-system ns. You can unbind the driver from the
device and then rebind, but I've found bouncing the if sufficient. I've
included that commented out in the ds yaml. If you run a hybrid cluster,
use node affinity with generic-device-plugin or just node labels to only
run pods on devices with macb drivers.

---

apiVersion: apps/v1
kind: DaemonSet
metadata:
  name: macb-watchdog
  namespace: kube-system
spec:

  selector:
    matchLabels:
      name: macb-watchdog
  template:
    metadata:
      labels:
        name: macb-watchdog
    spec:
      priorityClassName: system-node-critical
      hostNetwork: true
      containers:
      - name: watchdog
        env:
        - name: NODE_IP
          valueFrom:
            fieldRef:
              fieldPath: status.hostIP
        image: alpine:latest
        command: ["/bin/sh", "-c"]
        args:
          - |
            apk update && apk add --no-cache iproute2 iputils-ping
            trap 'echo "Received SIGTERM, exiting..."; exit 0' 15
            INTERFACE="end0"
            DEVICE="1f00100000.ethernet"
            DRV_PATH="/sys/bus/platform/drivers/macb"
            pingList() {
              for ip in 192.168.33.8 192.168.33.1 192.168.33.5 192.168.33.6 
192.168.33.7
              do
                [[ "${NODE_IP}" == "${ip}" ]] && continue
                ping -q -c1 -W.1 ${ip} &> /dev/null && return 0  # <--- Adjust 
-W ping timeout if needed
                echo "${ip} ping failed"
              done
              echo "All pings failed..."
              return 1
            }
            while true
            do
              if ! pingList
              then
                echo "Attempting driver reset..."
                
                # if [[ -e "${DRV_PATH}/${DEVICE}" ]]
                # then
                #   echo "${DEVICE}" > "${DRV_PATH}/unbind"
                #   sleep 1
                #   echo "${DEVICE}" > "${DRV_PATH}/bind"
                #   sleep 1
                # fi
                # ip link set "${INTERFACE}" up

                ip link set "$INTERFACE" down && ip link set "$INTERFACE" up
                sleep 4
              fi
              sleep 1
            done
        securityContext:
          privileged: true
        volumeMounts:
        - name: host-sys
          mountPath: /sys
          readOnly: false
      volumes:
      - name: host-sys
        hostPath:
          path: /sys

-- 
You received this bug notification because you are a member of Ubuntu
Bugs, which is subscribed to Ubuntu.
https://bugs.launchpad.net/bugs/2133877

Title:
  Complete network hang on Raspberry Pi 5 with kernel 6.17 under load -
  possibly related to CPU frequency scaling

To manage notifications about this bug go to:
https://bugs.launchpad.net/ubuntu/+source/linux-raspi/+bug/2133877/+subscriptions


-- 
ubuntu-bugs mailing list
[email protected]
https://lists.ubuntu.com/mailman/listinfo/ubuntu-bugs

Reply via email to