Re: arm64 pwmbl(4): simplify ramp case

2022-11-10 Thread Miod Vallat
> This actually breaks my machine.  malloc() is saying allocation too
> large.  OF_getproplen will return -1 on that.  Is it possible that
> len is treated as uint64_t as it is an int and sizeof is effectively
> uint64_t?

Ah, yes; size_t is unsigned and wider than int on 64-bit platforms,
therefore int is converted to unsigned for the comparison. Casting
sizeof to int will do.

> Might be easier to have a check like:
> 
>   if (sc->sc_channels == NULL)
>   return level < sc->sc_nlevels ? level : sc->sc_nlevels - 1;
> 
> Then you don't need to indent the whole block.  Makes the diff smaller
> and a bit easier to understand?

Sure; what about this new version, then?

Index: pwmbl.c
===
RCS file: /OpenBSD/src/sys/dev/fdt/pwmbl.c,v
retrieving revision 1.6
diff -u -p -r1.6 pwmbl.c
--- pwmbl.c 24 Oct 2021 17:52:26 -  1.6
+++ pwmbl.c 11 Nov 2022 06:46:41 -
@@ -35,7 +35,7 @@ struct pwmbl_softc {
struct device   sc_dev;
uint32_t*sc_pwm;
int sc_pwm_len;
-   uint32_t*sc_levels;
+   uint32_t*sc_levels; /* NULL if simple ramp */
int sc_nlevels;
uint32_tsc_max_level;
uint32_tsc_def_level;
@@ -73,7 +73,7 @@ pwmbl_attach(struct device *parent, stru
struct pwmbl_softc *sc = (struct pwmbl_softc *)self;
struct fdt_attach_args *faa = aux;
uint32_t *gpios;
-   int i, len;
+   int len;
 
len = OF_getproplen(faa->fa_node, "pwms");
if (len < 0) {
@@ -95,7 +95,7 @@ pwmbl_attach(struct device *parent, stru
}
 
len = OF_getproplen(faa->fa_node, "brightness-levels");
-   if (len > 0) {
+   if (len >= (int)sizeof(uint32_t)) {
sc->sc_levels = malloc(len, M_DEVBUF, M_WAITOK);
OF_getpropintarray(faa->fa_node, "brightness-levels",
sc->sc_levels, len);
@@ -107,13 +107,9 @@ pwmbl_attach(struct device *parent, stru
sc->sc_def_level = sc->sc_nlevels - 1;
sc->sc_def_level = sc->sc_levels[sc->sc_def_level];
} else {
+   /* No levels, assume a simple 0..255 ramp. */
sc->sc_nlevels = 256;
-   sc->sc_levels = mallocarray(sc->sc_nlevels,
-   sizeof(uint32_t), M_DEVBUF, M_WAITOK);
-   for (i = 0; i < sc->sc_nlevels; i++)
-   sc->sc_levels[i] = i;
-   sc->sc_max_level = sc->sc_levels[sc->sc_nlevels - 1];
-   sc->sc_def_level = sc->sc_levels[sc->sc_nlevels - 1];
+   sc->sc_max_level = sc->sc_def_level = sc->sc_nlevels - 1;
}
 
printf("\n");
@@ -143,6 +139,9 @@ pwmbl_find_brightness(struct pwmbl_softc
 {
uint32_t mid;
int i;
+
+   if (sc->sc_levels == NULL)
+   return level < sc->sc_nlevels ? level : sc->sc_nlevels - 1;
 
for (i = 0; i < sc->sc_nlevels - 1; i++) {
mid = (sc->sc_levels[i] + sc->sc_levels[i + 1]) / 2;



Re: tmux.1: Sort options

2022-11-10 Thread Jason McIntyre
On Thu, Nov 10, 2022 at 03:10:32PM -0500, Josiah Frentsos wrote:
> Index: tmux.1
> ===
> RCS file: /cvs/src/usr.bin/tmux/tmux.1,v
> retrieving revision 1.905
> diff -u -p -r1.905 tmux.1
> --- tmux.13 Nov 2022 08:33:57 -   1.905
> +++ tmux.110 Nov 2022 20:07:56 -
> @@ -23,7 +23,7 @@
>  .Sh SYNOPSIS
>  .Nm tmux
>  .Bk -words
> -.Op Fl 2CDluvV
> +.Op Fl 2CDluVv
>  .Op Fl c Ar shell-command
>  .Op Fl f Ar file
>  .Op Fl L Ar socket-name
> @@ -100,7 +100,7 @@ Force
>  .Nm
>  to assume the terminal supports 256 colours.
>  This is equivalent to
> -.Fl T Ar 256 .
> +.Fl T Cm 256 .

hi. i committed your diff - thanks for sending it. but i left out the
change above (Ar -> Cm). the reason being that i think it unrelated to a
diff to sort options, and on top of that there's a whole section on the
features, and none of them are marked up at all.

if you want to submit a separate diff to make the features all use Cm
that's fine. but it;s a big page, so try to make sure it is consistent
(no easy task ;)

cheers,
jmc

>  .It Fl C
>  Start in control mode (see the
>  .Sx CONTROL MODE
> @@ -203,6 +203,12 @@ If
>  is specified, the default socket directory is not used and any
>  .Fl L
>  flag is ignored.
> +.It Fl T Ar features
> +Set terminal features for the client.
> +This is a comma-separated list of features.
> +See the
> +.Ic terminal-features
> +option.
>  .It Fl u
>  Write UTF-8 output to the terminal even if the first environment
>  variable of
> @@ -214,12 +220,10 @@ that is set does not contain
>  .Qq UTF-8
>  or
>  .Qq UTF8 .
> -.It Fl T Ar features
> -Set terminal features for the client.
> -This is a comma-separated list of features.
> -See the
> -.Ic terminal-features
> -option.
> +.It Fl V
> +Report the
> +.Nm
> +version.
>  .It Fl v
>  Request verbose logging.
>  Log messages will be saved into
> @@ -244,10 +248,6 @@ signal may be sent to the
>  server process to toggle logging between on (as if
>  .Fl v
>  was given) and off.
> -.It Fl V
> -Report the
> -.Nm
> -version.
>  .It Ar command Op Ar flags
>  This specifies one of a set of commands used to control
>  .Nm ,
> 



Re: nd6_ioctl: Use local variable for consistency

2022-11-10 Thread Alexander Bluhm
On Thu, Nov 10, 2022 at 02:56:00PM +, Klemens Nanni wrote:
> No point in using the variable for half of the check.
> 
> OK?

OK bluhm@

> diff --git a/sys/netinet6/nd6.c b/sys/netinet6/nd6.c
> index e65eeb0c2ac..d88969be617 100644
> --- a/sys/netinet6/nd6.c
> +++ b/sys/netinet6/nd6.c
> @@ -1025,24 +1025,24 @@ nd6_ioctl(u_long cmd, caddr_t data, struct ifnet *ifp)
>   case SIOCGNBRINFO_IN6:
>   {
>   struct llinfo_nd6 *ln;
>   struct in6_addr nb_addr = nbi->addr; /* make local for safety */
>   time_t expire;
>  
>   NET_LOCK_SHARED();
>   /*
>* XXX: KAME specific hack for scoped addresses
>*  : for other scopes than link-local?
>*/
> - if (IN6_IS_ADDR_LINKLOCAL(>addr) ||
> - IN6_IS_ADDR_MC_LINKLOCAL(>addr)) {
> + if (IN6_IS_ADDR_LINKLOCAL(_addr) ||
> + IN6_IS_ADDR_MC_LINKLOCAL(_addr)) {
>   u_int16_t *idp = (u_int16_t *)_addr.s6_addr[2];
>  
>   if (*idp == 0)
>   *idp = htons(ifp->if_index);
>   }
>  
>   rt = nd6_lookup(_addr, 0, ifp, ifp->if_rdomain);
>   if (rt == NULL ||
>   (ln = (struct llinfo_nd6 *)rt->rt_llinfo) == NULL) {
>   rtfree(rt);
>   NET_UNLOCK_SHARED();



tmux.1: Sort options

2022-11-10 Thread Josiah Frentsos
Index: tmux.1
===
RCS file: /cvs/src/usr.bin/tmux/tmux.1,v
retrieving revision 1.905
diff -u -p -r1.905 tmux.1
--- tmux.1  3 Nov 2022 08:33:57 -   1.905
+++ tmux.1  10 Nov 2022 20:07:56 -
@@ -23,7 +23,7 @@
 .Sh SYNOPSIS
 .Nm tmux
 .Bk -words
-.Op Fl 2CDluvV
+.Op Fl 2CDluVv
 .Op Fl c Ar shell-command
 .Op Fl f Ar file
 .Op Fl L Ar socket-name
@@ -100,7 +100,7 @@ Force
 .Nm
 to assume the terminal supports 256 colours.
 This is equivalent to
-.Fl T Ar 256 .
+.Fl T Cm 256 .
 .It Fl C
 Start in control mode (see the
 .Sx CONTROL MODE
@@ -203,6 +203,12 @@ If
 is specified, the default socket directory is not used and any
 .Fl L
 flag is ignored.
+.It Fl T Ar features
+Set terminal features for the client.
+This is a comma-separated list of features.
+See the
+.Ic terminal-features
+option.
 .It Fl u
 Write UTF-8 output to the terminal even if the first environment
 variable of
@@ -214,12 +220,10 @@ that is set does not contain
 .Qq UTF-8
 or
 .Qq UTF8 .
-.It Fl T Ar features
-Set terminal features for the client.
-This is a comma-separated list of features.
-See the
-.Ic terminal-features
-option.
+.It Fl V
+Report the
+.Nm
+version.
 .It Fl v
 Request verbose logging.
 Log messages will be saved into
@@ -244,10 +248,6 @@ signal may be sent to the
 server process to toggle logging between on (as if
 .Fl v
 was given) and off.
-.It Fl V
-Report the
-.Nm
-version.
 .It Ar command Op Ar flags
 This specifies one of a set of commands used to control
 .Nm ,



Re: pause.3: misc cleanup

2022-11-10 Thread Ingo Schwarze
Hi Scott,

thanks for repeatedly working on time-related library documentation.  :-)

Unless noted otherwise, i agree with Todd's comments.

Todd C. Miller wrote on Wed, Nov 09, 2022 at 10:31:15AM -0700:
> On Wed, 09 Nov 2022 16:47:22 +, Scott Cheloha wrote:

>> RETURN VALUES
>>
>> - Pull ERRORS into this section.  No need to put the one error in a
>>   .Bl/.El, we can just mention it inline.

> OK

I somewhat strongly object to this change.

If there is content for a standard section, that section ought to
be present, even if it is very short.  The point is that a manual
page should not only contain the expected information, but all that
information should also be in the expected places, such that by merely
looking at the section headers, you can already tell whether a given
standard section has content, without reading any of the text.

For example, the RETURN VALUES section ought to be absent if and only
if all functions documented in the page are void.

For example, the ERRORS section ought to be absent if and only if
none of the functions ever touch the errno.  Admittedly, our pages
may not be perfect in that respect, some libc pages may be missing
an ERRORS section even though the functions in questions sometimes
set errno, but that's a (mild) defect.  We certainly should not
delete an existing ERRORS section for a function that definitely does
set the errno.

Also, in sections with a strongly conventional syntax (like ERRORS),
it is preferable to use the standard syntax even in corner cases.
In this case, yes, i do recommend a list with a single element.
Similarly, a utility program that can take exactly one option still
gets the standard DESCRIPTION sentence "The options are as follows:"
and then a one-element list.  The advantage for readers is having the
familiar layout, and for developers the update becomes easier when
the second option is added; not that anything will ever be added to
pause(3), but consistency in style is still a good thing.  Similarly,
in code, using getopt(3) is recommended even in a program that only
takes a single option.

>> SEE ALSO
>>
>> - Nix select(2) and setitimer(2), they aren't directly relevant.

> OK.  The reason select(2) is there is probably because you can
> emulate pause() using select().

I don't feel strongly about this.

Removing select(2) is probably OK because the reason explained
by Todd is mostly historical.  In Perl, you still use select()
to sleep for less than one second, but that's irrelevant to C
programming.

I'm less convinced about removing setitimer(2).  To me, setitimer(2)
feels just as relevant as kill(2).  Both cause signals to be delivered
(asap or later), which is quite relevant because that causes pause(3)
to return.

>> HISTORY
>>
>> - We still have sigpause(3) and sigblock(3) in userspace.  Should
>>   we .Xr them?  They aren't systems calls anymore, but they were
>>   at that time.  Unsure what to do here.

Polishing the HISTORY section should in no case hold up your other
work.

> I think it is better to use .Fn for sigpause(3) and sigblock(3)
> rather than .Xr here.

In this very special case, where sigpause(3) and sigblock(3) are
strongly deprecated, i agree.  Pointing out their continued existence -
even below HISTORY - risks accidentally encouraging their use.

> Note that in 4.3-Reno pause(3) was still implemented in terms of
> sigpause(3) and sigblock(3), it is just that those functions were
> themselves wrappers instead of system calls.  Personally, I would
> drop the bit about 4.3-Reno since it is not really correct in my
> opinion.

It could be argued either way: IMHO, a wrapper around a wrapper around
foo is still a wrapper around foo, so at least my version that's
currently in the tree can be argued to be correct.  Admittedly, Scott's
version less so; pause(3) certainly wasn't reimplemented for Reno.
Then again, replacing the last phase (starting at "and around")
with the following would avoid the semantic disagreement:

  It has been using sigsuspend(2) and sigprocmask(2) since 4.3-BSD Reno.

Yours,
  Ingo



Re: Push kernel lock inside in{,6}_control()

2022-11-10 Thread Klemens Nanni
On Thu, Nov 10, 2022 at 04:52:13PM +, ssnf wrote:
> Content-Disposition: attachment; filename=fuck

First you fail to provide a proper bug report, so I couldn't help you.
Now you hijack with your mail retitled "fuck", so I won't help you.



Re: qcpmic.4, qcpmicgpio.4, etc.: Sort SEE ALSO

2022-11-10 Thread Josiah Frentsos
On Thu, Nov 10, 2022 at 05:13:40PM +, Patrick Wildt wrote:
> There are other drivers with intro after others. Are we sorting those 
> alphabetically or by relevancy?

Which ones?



Re: qcpmic.4, qcpmicgpio.4, etc.: Sort SEE ALSO

2022-11-10 Thread Jason McIntyre
On Thu, Nov 10, 2022 at 05:13:40PM +, Patrick Wildt wrote:
> There are other drivers with intro after others. Are we sorting those 
> alphabetically or by relevancy?
> 

alphabetically, and in section order. i am kinda busy at the mo, if
anyone wants to fix these...

jmc

> Von meinem iPhone gesendet
> 
> > Am 10.11.2022 um 16:46 schrieb Josiah Frentsos :
> > 
> > ???Index: qcpmic.4
> > ===
> > RCS file: /cvs/src/share/man/man4/qcpmic.4,v
> > retrieving revision 1.2
> > diff -u -p -r1.2 qcpmic.4
> > --- qcpmic.410 Nov 2022 13:08:57 -1.2
> > +++ qcpmic.410 Nov 2022 16:43:07 -
> > @@ -34,12 +34,12 @@ Snapdragon SoCs.
> > The functionality for the hardware blocks found in each PMIC is
> > implemented in the children attaching to this driver.
> > .Sh SEE ALSO
> > -.Xr qcspmi 4 ,
> > +.Xr intro 4 ,
> > .Xr qcpmicgpio 4 ,
> > .Xr qcpon 4 ,
> > .Xr qcpwm 4 ,
> > .Xr qcrtc 4 ,
> > -.Xr intro 4
> > +.Xr qcspmi 4
> > .Sh HISTORY
> > The
> > .Nm
> > Index: qcpmicgpio.4
> > ===
> > RCS file: /cvs/src/share/man/man4/qcpmicgpio.4,v
> > retrieving revision 1.1
> > diff -u -p -r1.1 qcpmicgpio.4
> > --- qcpmicgpio.410 Nov 2022 12:57:08 -1.1
> > +++ qcpmicgpio.410 Nov 2022 16:43:07 -
> > @@ -30,8 +30,8 @@ SoCs.
> > It does not provide direct device driver entry points but makes its
> > functions available to other drivers.
> > .Sh SEE ALSO
> > -.Xr qcpmic 4 ,
> > -.Xr intro 4
> > +.Xr intro 4 ,
> > +.Xr qcpmic 4
> > .Sh HISTORY
> > The
> > .Nm
> > Index: qcpon.4
> > ===
> > RCS file: /cvs/src/share/man/man4/qcpon.4,v
> > retrieving revision 1.1
> > diff -u -p -r1.1 qcpon.4
> > --- qcpon.410 Nov 2022 13:08:57 -1.1
> > +++ qcpon.410 Nov 2022 16:43:07 -
> > @@ -26,10 +26,11 @@
> > The
> > .Nm
> > driver provides support for the PON controllers integrated on various
> > -Qualcomm Snapdragon SoCs.  This controller contains the power button.
> > +Qualcomm Snapdragon SoCs.
> > +This controller contains the power button.
> > .Sh SEE ALSO
> > -.Xr qcpmic 4 ,
> > -.Xr intro 4
> > +.Xr intro 4 ,
> > +.Xr qcpmic 4
> > .Sh HISTORY
> > The
> > .Nm
> > Index: qcpwm.4
> > ===
> > RCS file: /cvs/src/share/man/man4/qcpwm.4,v
> > retrieving revision 1.1
> > diff -u -p -r1.1 qcpwm.4
> > --- qcpwm.410 Nov 2022 13:08:57 -1.1
> > +++ qcpwm.410 Nov 2022 16:43:07 -
> > @@ -28,8 +28,8 @@ The
> > driver provides support for the PWM controllers integrated on various
> > Qualcomm Snapdragon SoCs.
> > .Sh SEE ALSO
> > -.Xr qcpmic 4 ,
> > -.Xr intro 4
> > +.Xr intro 4 ,
> > +.Xr qcpmic 4
> > .Sh HISTORY
> > The
> > .Nm
> > Index: qcrtc.4
> > ===
> > RCS file: /cvs/src/share/man/man4/qcrtc.4,v
> > retrieving revision 1.1
> > diff -u -p -r1.1 qcrtc.4
> > --- qcrtc.410 Nov 2022 13:08:57 -1.1
> > +++ qcrtc.410 Nov 2022 16:43:07 -
> > @@ -28,8 +28,8 @@ The
> > driver provides support for the RTC integrated on various Qualcomm
> > Snapdragon SoCs.
> > .Sh SEE ALSO
> > -.Xr qcpmic 4 ,
> > -.Xr intro 4
> > +.Xr intro 4 ,
> > +.Xr qcpmic 4
> > .Sh HISTORY
> > The
> > .Nm
> > Index: qcspmi.4
> > ===
> > RCS file: /cvs/src/share/man/man4/qcspmi.4,v
> > retrieving revision 1.1
> > diff -u -p -r1.1 qcspmi.4
> > --- qcspmi.410 Nov 2022 12:57:08 -1.1
> > +++ qcspmi.410 Nov 2022 16:43:07 -
> > @@ -29,8 +29,8 @@ The
> > driver provides support for the SPMI controller found on various
> > Qualcomm Snapdragon SoCs.
> > .Sh SEE ALSO
> > -.Xr qcpmic 4 ,
> > -.Xr intro 4
> > +.Xr intro 4 ,
> > +.Xr qcpmic 4
> > .Sh HISTORY
> > The
> > .Nm
> > 
> 



Re: qcpmic.4, qcpmicgpio.4, etc.: Sort SEE ALSO

2022-11-10 Thread Patrick Wildt
There are other drivers with intro after others. Are we sorting those 
alphabetically or by relevancy?

Von meinem iPhone gesendet

> Am 10.11.2022 um 16:46 schrieb Josiah Frentsos :
> 
> Index: qcpmic.4
> ===
> RCS file: /cvs/src/share/man/man4/qcpmic.4,v
> retrieving revision 1.2
> diff -u -p -r1.2 qcpmic.4
> --- qcpmic.410 Nov 2022 13:08:57 -1.2
> +++ qcpmic.410 Nov 2022 16:43:07 -
> @@ -34,12 +34,12 @@ Snapdragon SoCs.
> The functionality for the hardware blocks found in each PMIC is
> implemented in the children attaching to this driver.
> .Sh SEE ALSO
> -.Xr qcspmi 4 ,
> +.Xr intro 4 ,
> .Xr qcpmicgpio 4 ,
> .Xr qcpon 4 ,
> .Xr qcpwm 4 ,
> .Xr qcrtc 4 ,
> -.Xr intro 4
> +.Xr qcspmi 4
> .Sh HISTORY
> The
> .Nm
> Index: qcpmicgpio.4
> ===
> RCS file: /cvs/src/share/man/man4/qcpmicgpio.4,v
> retrieving revision 1.1
> diff -u -p -r1.1 qcpmicgpio.4
> --- qcpmicgpio.410 Nov 2022 12:57:08 -1.1
> +++ qcpmicgpio.410 Nov 2022 16:43:07 -
> @@ -30,8 +30,8 @@ SoCs.
> It does not provide direct device driver entry points but makes its
> functions available to other drivers.
> .Sh SEE ALSO
> -.Xr qcpmic 4 ,
> -.Xr intro 4
> +.Xr intro 4 ,
> +.Xr qcpmic 4
> .Sh HISTORY
> The
> .Nm
> Index: qcpon.4
> ===
> RCS file: /cvs/src/share/man/man4/qcpon.4,v
> retrieving revision 1.1
> diff -u -p -r1.1 qcpon.4
> --- qcpon.410 Nov 2022 13:08:57 -1.1
> +++ qcpon.410 Nov 2022 16:43:07 -
> @@ -26,10 +26,11 @@
> The
> .Nm
> driver provides support for the PON controllers integrated on various
> -Qualcomm Snapdragon SoCs.  This controller contains the power button.
> +Qualcomm Snapdragon SoCs.
> +This controller contains the power button.
> .Sh SEE ALSO
> -.Xr qcpmic 4 ,
> -.Xr intro 4
> +.Xr intro 4 ,
> +.Xr qcpmic 4
> .Sh HISTORY
> The
> .Nm
> Index: qcpwm.4
> ===
> RCS file: /cvs/src/share/man/man4/qcpwm.4,v
> retrieving revision 1.1
> diff -u -p -r1.1 qcpwm.4
> --- qcpwm.410 Nov 2022 13:08:57 -1.1
> +++ qcpwm.410 Nov 2022 16:43:07 -
> @@ -28,8 +28,8 @@ The
> driver provides support for the PWM controllers integrated on various
> Qualcomm Snapdragon SoCs.
> .Sh SEE ALSO
> -.Xr qcpmic 4 ,
> -.Xr intro 4
> +.Xr intro 4 ,
> +.Xr qcpmic 4
> .Sh HISTORY
> The
> .Nm
> Index: qcrtc.4
> ===
> RCS file: /cvs/src/share/man/man4/qcrtc.4,v
> retrieving revision 1.1
> diff -u -p -r1.1 qcrtc.4
> --- qcrtc.410 Nov 2022 13:08:57 -1.1
> +++ qcrtc.410 Nov 2022 16:43:07 -
> @@ -28,8 +28,8 @@ The
> driver provides support for the RTC integrated on various Qualcomm
> Snapdragon SoCs.
> .Sh SEE ALSO
> -.Xr qcpmic 4 ,
> -.Xr intro 4
> +.Xr intro 4 ,
> +.Xr qcpmic 4
> .Sh HISTORY
> The
> .Nm
> Index: qcspmi.4
> ===
> RCS file: /cvs/src/share/man/man4/qcspmi.4,v
> retrieving revision 1.1
> diff -u -p -r1.1 qcspmi.4
> --- qcspmi.410 Nov 2022 12:57:08 -1.1
> +++ qcspmi.410 Nov 2022 16:43:07 -
> @@ -29,8 +29,8 @@ The
> driver provides support for the SPMI controller found on various
> Qualcomm Snapdragon SoCs.
> .Sh SEE ALSO
> -.Xr qcpmic 4 ,
> -.Xr intro 4
> +.Xr intro 4 ,
> +.Xr qcpmic 4
> .Sh HISTORY
> The
> .Nm
> 



Re: Push kernel lock inside in{,6}_control()

2022-11-10 Thread ssnf
> On Wed, Nov 09, 2022 at 02:36:57PM +, ssnf wrote:
> > My softraid keydisk did not get initialized during the install process.
> > This fixes it.
>
> On 22/11/09 07:28PM, Klemens Nanni wrote:
> You need to be more specific, installing to a softraid volume has always
> required manual steps, so I have no idea what you did (not) do and when.
 
The comment above my diff states that "In case this is a softraid 
device, make sure all underlying device nodes exist before installing
boot-blocks on disk."

Without the double quotes, make_dev would not initialize my second disk
on my softraid volume, only the first disk. And once it reached the
installboot function, it would say that "/dev/rwd1c" did not exist.

The reason is that, without the double quotes, we get a multi-line
result from the $(bioctl|sed) command and only the first line is
actually passed to the make_dev function.

When we place it inside the double quotes, the multi-line result
becomes a single line, and thus make_dev works as expected.

ROOTDISK in my case was sd0, the first disk was wd0 and the second one
was wd1.

sd0 is an encrypted softraid device where wd0a is the RAID partition
and wd1 is the keydisk.

P.S.: It was not necessarily the keydisk that was uninitialized, but
rather the second disk that resulted from the bioctl command on the
softraid volume.



qcpmic.4, qcpmicgpio.4, etc.: Sort SEE ALSO

2022-11-10 Thread Josiah Frentsos
Index: qcpmic.4
===
RCS file: /cvs/src/share/man/man4/qcpmic.4,v
retrieving revision 1.2
diff -u -p -r1.2 qcpmic.4
--- qcpmic.410 Nov 2022 13:08:57 -  1.2
+++ qcpmic.410 Nov 2022 16:43:07 -
@@ -34,12 +34,12 @@ Snapdragon SoCs.
 The functionality for the hardware blocks found in each PMIC is
 implemented in the children attaching to this driver.
 .Sh SEE ALSO
-.Xr qcspmi 4 ,
+.Xr intro 4 ,
 .Xr qcpmicgpio 4 ,
 .Xr qcpon 4 ,
 .Xr qcpwm 4 ,
 .Xr qcrtc 4 ,
-.Xr intro 4
+.Xr qcspmi 4
 .Sh HISTORY
 The
 .Nm
Index: qcpmicgpio.4
===
RCS file: /cvs/src/share/man/man4/qcpmicgpio.4,v
retrieving revision 1.1
diff -u -p -r1.1 qcpmicgpio.4
--- qcpmicgpio.410 Nov 2022 12:57:08 -  1.1
+++ qcpmicgpio.410 Nov 2022 16:43:07 -
@@ -30,8 +30,8 @@ SoCs.
 It does not provide direct device driver entry points but makes its
 functions available to other drivers.
 .Sh SEE ALSO
-.Xr qcpmic 4 ,
-.Xr intro 4
+.Xr intro 4 ,
+.Xr qcpmic 4
 .Sh HISTORY
 The
 .Nm
Index: qcpon.4
===
RCS file: /cvs/src/share/man/man4/qcpon.4,v
retrieving revision 1.1
diff -u -p -r1.1 qcpon.4
--- qcpon.4 10 Nov 2022 13:08:57 -  1.1
+++ qcpon.4 10 Nov 2022 16:43:07 -
@@ -26,10 +26,11 @@
 The
 .Nm
 driver provides support for the PON controllers integrated on various
-Qualcomm Snapdragon SoCs.  This controller contains the power button.
+Qualcomm Snapdragon SoCs.
+This controller contains the power button.
 .Sh SEE ALSO
-.Xr qcpmic 4 ,
-.Xr intro 4
+.Xr intro 4 ,
+.Xr qcpmic 4
 .Sh HISTORY
 The
 .Nm
Index: qcpwm.4
===
RCS file: /cvs/src/share/man/man4/qcpwm.4,v
retrieving revision 1.1
diff -u -p -r1.1 qcpwm.4
--- qcpwm.4 10 Nov 2022 13:08:57 -  1.1
+++ qcpwm.4 10 Nov 2022 16:43:07 -
@@ -28,8 +28,8 @@ The
 driver provides support for the PWM controllers integrated on various
 Qualcomm Snapdragon SoCs.
 .Sh SEE ALSO
-.Xr qcpmic 4 ,
-.Xr intro 4
+.Xr intro 4 ,
+.Xr qcpmic 4
 .Sh HISTORY
 The
 .Nm
Index: qcrtc.4
===
RCS file: /cvs/src/share/man/man4/qcrtc.4,v
retrieving revision 1.1
diff -u -p -r1.1 qcrtc.4
--- qcrtc.4 10 Nov 2022 13:08:57 -  1.1
+++ qcrtc.4 10 Nov 2022 16:43:07 -
@@ -28,8 +28,8 @@ The
 driver provides support for the RTC integrated on various Qualcomm
 Snapdragon SoCs.
 .Sh SEE ALSO
-.Xr qcpmic 4 ,
-.Xr intro 4
+.Xr intro 4 ,
+.Xr qcpmic 4
 .Sh HISTORY
 The
 .Nm
Index: qcspmi.4
===
RCS file: /cvs/src/share/man/man4/qcspmi.4,v
retrieving revision 1.1
diff -u -p -r1.1 qcspmi.4
--- qcspmi.410 Nov 2022 12:57:08 -  1.1
+++ qcspmi.410 Nov 2022 16:43:07 -
@@ -29,8 +29,8 @@ The
 driver provides support for the SPMI controller found on various
 Qualcomm Snapdragon SoCs.
 .Sh SEE ALSO
-.Xr qcpmic 4 ,
-.Xr intro 4
+.Xr intro 4 ,
+.Xr qcpmic 4
 .Sh HISTORY
 The
 .Nm



Re: special case mpe(4) in in6_ifattach()

2022-11-10 Thread Klemens Nanni
On Fri, Nov 04, 2022 at 03:40:04PM +0100, Claudio Jeker wrote:
> So mpe(4) is a special device. It is a point-to-multipoint interface that
> does not do multicast. So setting IFF_MULTICAST on the interface is not
> correct but IPv6 depends on it because neighbor discovery.
> 
> Now there is no neighbor discovery on mpe(4) the neighbors are handled via
> BGP. So lets adjust in6_ifattach() to succeed for mpe(4) interfaces and
> with that BGP IPv6 L3VPN should work.
> 
> It may be an idea to move the IFF_MULTCAST check down into the
> ifp->if_type switch but right now this is good enough. I wonder if we have
> other interfaces that need similar treatment (e.g. mgre).

Good enough for now, I also have a few minues for nd6.c but prefer
simpler changes first, then the cleanup come later.

OK kn

> -- 
> :wq Claudio
> 
> Index: netinet6/in6_ifattach.c
> ===
> RCS file: /cvs/src/sys/netinet6/in6_ifattach.c,v
> retrieving revision 1.119
> diff -u -p -r1.119 in6_ifattach.c
> --- netinet6/in6_ifattach.c   8 Sep 2022 10:22:07 -   1.119
> +++ netinet6/in6_ifattach.c   4 Nov 2022 12:52:36 -
> @@ -373,7 +373,8 @@ in6_ifattach(struct ifnet *ifp)
>   if (ifp->if_mtu < IPV6_MMTU)
>   return (EINVAL);
>  
> - if ((ifp->if_flags & IFF_MULTICAST) == 0)
> + /* ND needs multicast but mpe(4) does neither multicast nor ND */
> + if ((ifp->if_flags & IFF_MULTICAST) == 0 && ifp->if_type != IFT_MPLS)
>   return (EINVAL);
>  
>   /*
> @@ -389,10 +390,14 @@ in6_ifattach(struct ifnet *ifp)
>   return (error);
>   }
>  
> - /* Interfaces that rely on strong a priori cryptographic binding of
> -  * IP addresses are incompatible with automatically assigned llv6. */
> + /*
> +  * Some interface don't need an automatically assigned link-local
> +  * address either because it think it is special (wireguard) or

s/it think//

> +  * because there is no need and no neighbor discovery (mpe).

s/because//

> +  */
>   switch (ifp->if_type) {
>   case IFT_WIREGUARD:
> + case IFT_MPLS:
>   return (0);
>   }
>  
> 



Re: replace SRP with SMR in the if_idxmap commit

2022-11-10 Thread Visa Hankala
On Thu, Nov 10, 2022 at 11:59:02PM +1000, David Gwynne wrote:
> On Thu, Nov 10, 2022 at 09:04:22PM +1000, David Gwynne wrote:
> > On Thu, Nov 10, 2022 at 08:10:35AM +1000, David Gwynne wrote:
> > > I know what this is. The barrier at the end of if_idxmap_alloc is 
> > > sleeping waiting for cpus to run that aren't running cos we haven't 
> > > finished booting yet.
> > > 
> > > I'll back it out and fix it up when I'm actually awake.
> > 
> > i woke up, so here's a diff.
> > 
> > this uses the usedidx as an smr_entry so we can use smr_call instead of
> > smr_barrier during autoconf.
> > 
> > this works for me on a box with a lot of hardware interfaces, which
> > forces allocation of a new interface map and therefore destruction of
> > the initial one.
> > 
> > there is still an smr_barrier in if_idxmap_remove, but remove only
> > happens when an interface goes away. we could use part of struct ifnet
> > (eg, if_description) as an smr_entry if needed.
> > 
> 
> this one is even better.

Please add #include .

> + SMR_PTR_SET_LOCKED(_idxmap.map, if_map);
> +
> + smr_init(>smr);
> + dtor->map = oif_map;

I think smr_call could be moved here. The call is non-blocking.
Then the scope of the dtor variable could be reduced too.

dtor->map = oif_map;
smr_init(>smr);
smr_call(>smr, if_idxmap_free, dtor);

OK visa@



Push kernel lock in6_ioctl_change_ifaddr()

2022-11-10 Thread Klemens Nanni
Purely mechanical except for the early function-local sockaddr dance,
same story as with the previous nd6_ioctl() push.

Feedback? OK?
---
 sys/netinet6/in6.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/sys/netinet6/in6.c b/sys/netinet6/in6.c
index 26f1127973e..65f4b44e700 100644
--- a/sys/netinet6/in6.c
+++ b/sys/netinet6/in6.c
@@ -245,9 +245,7 @@ in6_ioctl(u_long cmd, caddr_t data, struct ifnet *ifp, int 
privileged)
case SIOCDIFADDR_IN6:
if (!privileged)
return (EPERM);
-   KERNEL_LOCK();
error = in6_ioctl_change_ifaddr(cmd, data, ifp);
-   KERNEL_UNLOCK();
return (error);
case SIOCSIFADDR:
case SIOCSIFDSTADDR:
@@ -303,6 +301,7 @@ in6_ioctl_change_ifaddr(u_long cmd, caddr_t data, struct 
ifnet *ifp)
return (error);
}
 
+   KERNEL_LOCK();
NET_LOCK();
 
if (sa6 != NULL) {
@@ -409,6 +408,7 @@ in6_ioctl_change_ifaddr(u_long cmd, caddr_t data, struct 
ifnet *ifp)
 
 err:
NET_UNLOCK();
+   KERNEL_UNLOCK();
return (error);
 }
 
-- 
2.38.1



Push kernel lock into nd6_ioctl()

2022-11-10 Thread Klemens Nanni
Purely mechanical except for the early function-local sockaddr dance.

Feedback? OK?
---
 sys/netinet6/in6.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/sys/netinet6/in6.c b/sys/netinet6/in6.c
index 9e2691c5964..a3737914828 100644
--- a/sys/netinet6/in6.c
+++ b/sys/netinet6/in6.c
@@ -239,9 +239,7 @@ in6_ioctl(u_long cmd, caddr_t data, struct ifnet *ifp, int 
privileged)
case SIOCGIFNETMASK_IN6:
case SIOCGIFAFLAG_IN6:
case SIOCGIFALIFETIME_IN6:
-   KERNEL_LOCK();
error = in6_ioctl_get(cmd, data, ifp);
-   KERNEL_UNLOCK();
return (error);
case SIOCAIFADDR_IN6:
case SIOCDIFADDR_IN6:
@@ -431,6 +429,7 @@ in6_ioctl_get(u_long cmd, caddr_t data, struct ifnet *ifp)
return (error);
}
 
+   KERNEL_LOCK();
NET_LOCK_SHARED();
 
if (sa6 != NULL) {
@@ -526,6 +525,7 @@ in6_ioctl_get(u_long cmd, caddr_t data, struct ifnet *ifp)
 
 err:
NET_UNLOCK_SHARED();
+   KERNEL_UNLOCK();
return (error);
 }
 
-- 
2.38.1




Re: arm64 pwmbl(4): simplify ramp case

2022-11-10 Thread Patrick Wildt
On Mon, Jul 04, 2022 at 06:47:33PM +, Miod Vallat wrote:
> When the fdt does not provide a list of brightness states, pwmbl(4)
> builds a 256 state ramp (i.e. state[i] = i with 0 <= i < 256).
> 
> The following diff keeps that behaviour, but gets rid of the malloc
> call for that ramp, since the values are trivially known.
> 
> Compiles but not tested due to the lack of such hardware.
> 
> Index: sys/dev/fdt/pwmbl.c
> ===
> RCS file: /OpenBSD/src/sys/dev/fdt/pwmbl.c,v
> retrieving revision 1.6
> diff -u -p -r1.6 pwmbl.c
> --- sys/dev/fdt/pwmbl.c   24 Oct 2021 17:52:26 -  1.6
> +++ sys/dev/fdt/pwmbl.c   4 Jul 2022 18:45:16 -
> @@ -35,7 +35,7 @@ struct pwmbl_softc {
>   struct device   sc_dev;
>   uint32_t*sc_pwm;
>   int sc_pwm_len;
> - uint32_t*sc_levels;
> + uint32_t*sc_levels; /* NULL if simple ramp */
>   int sc_nlevels;
>   uint32_tsc_max_level;
>   uint32_tsc_def_level;
> @@ -73,7 +73,7 @@ pwmbl_attach(struct device *parent, stru
>   struct pwmbl_softc *sc = (struct pwmbl_softc *)self;
>   struct fdt_attach_args *faa = aux;
>   uint32_t *gpios;
> - int i, len;
> + int len;
>  
>   len = OF_getproplen(faa->fa_node, "pwms");
>   if (len < 0) {
> @@ -95,7 +95,7 @@ pwmbl_attach(struct device *parent, stru
>   }
>  
>   len = OF_getproplen(faa->fa_node, "brightness-levels");
> - if (len > 0) {
> + if (len >= sizeof(uint32_t)) {

This actually breaks my machine.  malloc() is saying allocation too
large.  OF_getproplen will return -1 on that.  Is it possible that
len is treated as uint64_t as it is an int and sizeof is effectively
uint64_t?

Moving len to ssize_t doesn't fix it, but doing

if (len >= (int)sizeof(uint32_t)) {

works.  So I wonder if

if (len > 0 && len >= sizeof(uint32_t)) {

would work as well.  Or maybe let's just keep it as it is?

>   sc->sc_levels = malloc(len, M_DEVBUF, M_WAITOK);
>   OF_getpropintarray(faa->fa_node, "brightness-levels",
>   sc->sc_levels, len);
> @@ -107,13 +107,9 @@ pwmbl_attach(struct device *parent, stru
>   sc->sc_def_level = sc->sc_nlevels - 1;
>   sc->sc_def_level = sc->sc_levels[sc->sc_def_level];
>   } else {
> + /* No levels, assume a simple 0..255 ramp. */
>   sc->sc_nlevels = 256;
> - sc->sc_levels = mallocarray(sc->sc_nlevels,
> - sizeof(uint32_t), M_DEVBUF, M_WAITOK);
> - for (i = 0; i < sc->sc_nlevels; i++)
> - sc->sc_levels[i] = i;
> - sc->sc_max_level = sc->sc_levels[sc->sc_nlevels - 1];
> - sc->sc_def_level = sc->sc_levels[sc->sc_nlevels - 1];
> + sc->sc_max_level = sc->sc_def_level = sc->sc_nlevels - 1;
>   }
>  
>   printf("\n");
> @@ -144,17 +140,22 @@ pwmbl_find_brightness(struct pwmbl_softc
>   uint32_t mid;
>   int i;

Might be easier to have a check like:

if (sc->sc_channels == NULL)
return level < sc->sc_nlevels ? level : sc->sc_nlevels - 1;

Then you don't need to indent the whole block.  Makes the diff smaller
and a bit easier to understand?

Cheers,
Patrick

>  
> - for (i = 0; i < sc->sc_nlevels - 1; i++) {
> - mid = (sc->sc_levels[i] + sc->sc_levels[i + 1]) / 2;
> - if (sc->sc_levels[i] <= level && level <= mid)
> + if (sc->sc_levels) {
> + for (i = 0; i < sc->sc_nlevels - 1; i++) {
> + mid = (sc->sc_levels[i] + sc->sc_levels[i + 1]) / 2;
> + if (sc->sc_levels[i] <= level && level <= mid)
> + return sc->sc_levels[i];
> + if (mid < level && level <= sc->sc_levels[i + 1])
> + return sc->sc_levels[i + 1];
> + }
> + if (level < sc->sc_levels[0])
> + return sc->sc_levels[0];
> + else
>   return sc->sc_levels[i];
> - if (mid < level && level <= sc->sc_levels[i + 1])
> - return sc->sc_levels[i + 1];
> +
> + } else {
> + return level < sc->sc_nlevels ? level : sc->sc_nlevels - 1;
>   }
> - if (level < sc->sc_levels[0])
> - return sc->sc_levels[0];
> - else
> - return sc->sc_levels[i];
>  }
>  
>  int
> 



nd6_ioctl: Use local variable for consistency

2022-11-10 Thread Klemens Nanni
No point in using the variable for half of the check.

OK?

diff --git a/sys/netinet6/nd6.c b/sys/netinet6/nd6.c
index e65eeb0c2ac..d88969be617 100644
--- a/sys/netinet6/nd6.c
+++ b/sys/netinet6/nd6.c
@@ -1025,24 +1025,24 @@ nd6_ioctl(u_long cmd, caddr_t data, struct ifnet *ifp)
case SIOCGNBRINFO_IN6:
{
struct llinfo_nd6 *ln;
struct in6_addr nb_addr = nbi->addr; /* make local for safety */
time_t expire;
 
NET_LOCK_SHARED();
/*
 * XXX: KAME specific hack for scoped addresses
 *  : for other scopes than link-local?
 */
-   if (IN6_IS_ADDR_LINKLOCAL(>addr) ||
-   IN6_IS_ADDR_MC_LINKLOCAL(>addr)) {
+   if (IN6_IS_ADDR_LINKLOCAL(_addr) ||
+   IN6_IS_ADDR_MC_LINKLOCAL(_addr)) {
u_int16_t *idp = (u_int16_t *)_addr.s6_addr[2];
 
if (*idp == 0)
*idp = htons(ifp->if_index);
}
 
rt = nd6_lookup(_addr, 0, ifp, ifp->if_rdomain);
if (rt == NULL ||
(ln = (struct llinfo_nd6 *)rt->rt_llinfo) == NULL) {
rtfree(rt);
NET_UNLOCK_SHARED();



Re: replace SRP with SMR in the if_idxmap commit

2022-11-10 Thread Hrvoje Popovski
On 10.11.2022. 14:59, David Gwynne wrote:
> On Thu, Nov 10, 2022 at 09:04:22PM +1000, David Gwynne wrote:
>> On Thu, Nov 10, 2022 at 08:10:35AM +1000, David Gwynne wrote:
>>> I know what this is. The barrier at the end of if_idxmap_alloc is sleeping 
>>> waiting for cpus to run that aren't running cos we haven't finished booting 
>>> yet.
>>>
>>> I'll back it out and fix it up when I'm actually awake.
>> i woke up, so here's a diff.
>>
>> this uses the usedidx as an smr_entry so we can use smr_call instead of
>> smr_barrier during autoconf.
>>
>> this works for me on a box with a lot of hardware interfaces, which
>> forces allocation of a new interface map and therefore destruction of
>> the initial one.
>>
>> there is still an smr_barrier in if_idxmap_remove, but remove only
>> happens when an interface goes away. we could use part of struct ifnet
>> (eg, if_description) as an smr_entry if needed.
>>
> this one is even better.


Hi,

with this diff my machines boot as they should.

Thank you



Re: pflow(4): make `so' dereference safe

2022-11-10 Thread Vitaliy Makkoveev
ping...

On Fri, Nov 04, 2022 at 10:04:35PM +0300, Vitaliy Makkoveev wrote:
> Each pflow(4) interface has associated socket, referenced as sc->so. We
> set this socket in pflowioctl() which is called with both kernel and net
> locks held. In the pflow_output_process() task we do sc->so dereference,
> which is protected by kernel lock. But the sosend(), called deeper by
> pflow_output_process(), has sleep points, introduced by solock(). So
> this kernel lock protection doesn't work as expected, and concurrent
> pflowioctl() could override this sc->so.
> 
> The diff below introduces per pflow(4) instance `sc_lock' rwlock(9) to
> protect sc->so. Since the solock() of udp(4) sockets uses netlock as
> backend, the `sc_lock' should be taken first. This expands a little
> netlock relocking within pflowioctl().
> 
> Also, pflow_sendout_mbuf() called by pflow_output_process(), now called
> without kernel lock held, so the mp safe counters_pkt(9) used instead
> of manual `if_opackets' increment.
> 
> Since if_detach() does some ifnet destruction, now it can't be called
> before we finish pflow_output_process() task, otherwise we introduce use
> after free for interface counters. In other hand, we need to deny
> pflowioctl() to reschedule pflow_output_process() task. The `sc_dyind'
> flag introduced for that.
> 
> Hrvoje, could you test this diff please?
> 
> Index: sys/net/if_pflow.c
> ===
> RCS file: /cvs/src/sys/net/if_pflow.c,v
> retrieving revision 1.96
> diff -u -p -r1.96 if_pflow.c
> --- sys/net/if_pflow.c12 Aug 2022 16:38:50 -  1.96
> +++ sys/net/if_pflow.c4 Nov 2022 18:23:27 -
> @@ -132,11 +132,11 @@ pflow_output_process(void *arg)
>   struct mbuf *m;
>  
>   mq_delist(>sc_outputqueue, );
> - KERNEL_LOCK();
> + rw_enter_read(>sc_lock);
>   while ((m = ml_dequeue()) != NULL) {
>   pflow_sendout_mbuf(sc, m);
>   }
> - KERNEL_UNLOCK();
> + rw_exit_read(>sc_lock);
>  }
>  
>  int
> @@ -146,6 +146,7 @@ pflow_clone_create(struct if_clone *ifc,
>   struct pflow_softc  *pflowif;
>  
>   pflowif = malloc(sizeof(*pflowif), M_DEVBUF, M_WAITOK|M_ZERO);
> + rw_init(>sc_lock, "pflowlk");
>   MGET(pflowif->send_nam, M_WAIT, MT_SONAME);
>   pflowif->sc_version = PFLOW_PROTO_DEFAULT;
>  
> @@ -254,6 +255,7 @@ pflow_clone_create(struct if_clone *ifc,
>   mq_init(>sc_outputqueue, 8192, IPL_SOFTNET);
>   pflow_setmtu(pflowif, ETHERMTU);
>   pflow_init_timeouts(pflowif);
> + if_counters_alloc(ifp);
>   if_attach(ifp);
>   if_alloc_sadl(ifp);
>  
> @@ -275,6 +277,7 @@ pflow_clone_destroy(struct ifnet *ifp)
>   error = 0;
>  
>   NET_LOCK();
> + sc->sc_dying = 1;
>   SLIST_REMOVE(_list, sc, pflow_softc, sc_next);
>   NET_UNLOCK();
>  
> @@ -475,10 +478,17 @@ pflowioctl(struct ifnet *ifp, u_long cmd
>   struct pflowreq  pflowr;
>   int  error;
>  
> + if (sc->sc_dying)
> + return ENXIO;
> +
>   switch (cmd) {
>   case SIOCSIFADDR:
>   case SIOCSIFDSTADDR:
>   case SIOCSIFFLAGS:
> + /* XXXSMP: enforce lock order */
> + NET_UNLOCK();
> + rw_enter_read(>sc_lock);
> + NET_LOCK();
>   if ((ifp->if_flags & IFF_UP) && sc->so != NULL) {
>   ifp->if_flags |= IFF_RUNNING;
>   sc->sc_gcounter=pflowstats.pflow_flows;
> @@ -487,6 +497,7 @@ pflowioctl(struct ifnet *ifp, u_long cmd
>   pflow_sendout_ipfix_tmpl(sc);
>   } else
>   ifp->if_flags &= ~IFF_RUNNING;
> + rw_exit_read(>sc_lock);
>   break;
>   case SIOCSIFMTU:
>   if (ifr->ifr_mtu < PFLOW_MINMTU)
> @@ -523,10 +534,13 @@ pflowioctl(struct ifnet *ifp, u_long cmd
>  
>   /* XXXSMP breaks atomicity */
>   NET_UNLOCK();
> + rw_enter_write(>sc_lock);
>   error = pflow_set(sc, );
>   NET_LOCK();
> - if (error != 0)
> + if (error != 0) {
> + rw_exit_write(>sc_lock);
>   return (error);
> + }
>  
>   if ((ifp->if_flags & IFF_UP) && sc->so != NULL) {
>   ifp->if_flags |= IFF_RUNNING;
> @@ -535,6 +549,7 @@ pflowioctl(struct ifnet *ifp, u_long cmd
>   pflow_sendout_ipfix_tmpl(sc);
>   } else
>   ifp->if_flags &= ~IFF_RUNNING;
> + rw_exit_write(>sc_lock);
>  
>   break;
>  
> @@ -1196,8 +1211,8 @@ pflow_sendout_ipfix_tmpl(struct pflow_so
>  int
>  pflow_sendout_mbuf(struct pflow_softc *sc, struct mbuf *m)
>  {
> - sc->sc_if.if_opackets++;
> - sc->sc_if.if_obytes += m->m_pkthdr.len;
> + counters_pkt(sc->sc_if.if_counters,
> + ifc_opackets, ifc_obytes, 

Re: replace SRP with SMR in the if_idxmap commit

2022-11-10 Thread David Gwynne
On Thu, Nov 10, 2022 at 09:04:22PM +1000, David Gwynne wrote:
> On Thu, Nov 10, 2022 at 08:10:35AM +1000, David Gwynne wrote:
> > I know what this is. The barrier at the end of if_idxmap_alloc is sleeping 
> > waiting for cpus to run that aren't running cos we haven't finished booting 
> > yet.
> > 
> > I'll back it out and fix it up when I'm actually awake.
> 
> i woke up, so here's a diff.
> 
> this uses the usedidx as an smr_entry so we can use smr_call instead of
> smr_barrier during autoconf.
> 
> this works for me on a box with a lot of hardware interfaces, which
> forces allocation of a new interface map and therefore destruction of
> the initial one.
> 
> there is still an smr_barrier in if_idxmap_remove, but remove only
> happens when an interface goes away. we could use part of struct ifnet
> (eg, if_description) as an smr_entry if needed.
> 

this one is even better.

Index: if.c
===
RCS file: /cvs/src/sys/net/if.c,v
retrieving revision 1.676
diff -u -p -r1.676 if.c
--- if.c9 Nov 2022 22:15:50 -   1.676
+++ if.c10 Nov 2022 13:55:39 -
@@ -196,32 +196,29 @@ void if_map_dtor(void *, void *);
 struct ifnet *if_ref(struct ifnet *);
 
 /*
- * struct if_map
- *
- * bounded array of ifnet srp pointers used to fetch references of live
- * interfaces with if_get().
- */
-
-struct if_map {
-   unsigned longlimit;
-   /* followed by limit ifnet srp pointers */
-};
-
-/*
  * struct if_idxmap
  *
  * infrastructure to manage updates and accesses to the current if_map.
+ *
+ * interface index 0 is special and represents "no interface", so we
+ * use the 0th slot in map to store the length of the array.
  */
 
 struct if_idxmap {
-   unsigned int serial;
-   unsigned int count;
-   struct srp   map;
-   struct rwlocklock;
-   unsigned char   *usedidx;   /* bitmap of indices in use */
+   unsigned int  serial;
+   unsigned int  count;
+   struct ifnet**map;  /* SMR protected */
+   struct rwlock lock;
+   unsigned char*usedidx;  /* bitmap of indices in use */
+};
+
+struct if_idxmap_dtor {
+   struct smr_entry  smr;
+   struct ifnet**map;
 };
 
 void   if_idxmap_init(unsigned int);
+void   if_idxmap_free(void *);
 void   if_idxmap_alloc(struct ifnet *);
 void   if_idxmap_insert(struct ifnet *);
 void   if_idxmap_remove(struct ifnet *);
@@ -265,7 +262,7 @@ ifinit(void)
 * most machines boot with 4 or 5 interfaces, so size the initial map
 * to accommodate this
 */
-   if_idxmap_init(8);
+   if_idxmap_init(8); /* 8 is a nice power of 2 for malloc */
 
for (i = 0; i < NET_TASKQ; i++) {
nettqmp[i] = taskq_create("softnet", 1, IPL_NET, TASKQ_MPSAFE);
@@ -274,48 +271,49 @@ ifinit(void)
}
 }
 
-static struct if_idxmap if_idxmap = {
-   0,
-   0,
-   SRP_INITIALIZER(),
-   RWLOCK_INITIALIZER("idxmaplk"),
-   NULL,
-};
-
-struct srp_gc if_ifp_gc = SRP_GC_INITIALIZER(if_ifp_dtor, NULL);
-struct srp_gc if_map_gc = SRP_GC_INITIALIZER(if_map_dtor, NULL);
+static struct if_idxmap if_idxmap;
 
 struct ifnet_head ifnetlist = TAILQ_HEAD_INITIALIZER(ifnetlist);
 
+static inline unsigned int
+if_idxmap_limit(struct ifnet **if_map)
+{
+   return ((uintptr_t)if_map[0]);
+}
+
+static inline size_t
+if_idxmap_usedidx_size(unsigned int limit)
+{
+   return (max(howmany(limit, NBBY), sizeof(struct if_idxmap_dtor)));
+}
+
 void
 if_idxmap_init(unsigned int limit)
 {
-   struct if_map *if_map;
-   struct srp *map;
-   unsigned int i;
+   struct ifnet **if_map;
 
-   if_idxmap.serial = 1; /* skip ifidx 0 so it can return NULL */
+   rw_init(_idxmap.lock, "idxmaplk");
+   if_idxmap.serial = 1; /* skip ifidx 0 */
 
-   if_map = malloc(sizeof(*if_map) + limit * sizeof(*map),
-   M_IFADDR, M_WAITOK);
+   if_map = mallocarray(limit, sizeof(*if_map), M_IFADDR,
+   M_WAITOK | M_ZERO);
 
-   if_map->limit = limit;
-   map = (struct srp *)(if_map + 1);
-   for (i = 0; i < limit; i++)
-   srp_init([i]);
+   if_map[0] = (struct ifnet *)(uintptr_t)limit;
 
-   if_idxmap.usedidx = malloc(howmany(limit, NBBY),
+   if_idxmap.usedidx = malloc(if_idxmap_usedidx_size(limit),
M_IFADDR, M_WAITOK | M_ZERO);
+   setbit(if_idxmap.usedidx, 0); /* blacklist ifidx 0 */
 
/* this is called early so there's nothing to race with */
-   srp_update_locked(_map_gc, _idxmap.map, if_map);
+   SMR_PTR_SET_LOCKED(_idxmap.map, if_map);
 }
 
 void
 if_idxmap_alloc(struct ifnet *ifp)
 {
-   struct if_map *if_map;
-   struct srp *map;
+   struct if_idxmap_dtor *dtor = NULL;
+   struct ifnet **if_map;
+   unsigned int limit;
unsigned int 

Re: adding a mutex to pf_state

2022-11-10 Thread David Gwynne
Yes please. ok dlg@.

> On 10 Nov 2022, at 10:10 pm, Alexandr Nedvedicky  wrote:
> 
> Hello,
> 
> David (dlg@) asked me to shrink the scope of the change.  The new diff
> introduces a mutex to pf_state and adjust pf_detach_state() to utilize the
> newly introduced mutex.  I've also added annotation to pf_state members. The
> members with '?' needs our attention. We need to verify if they are either
> protected by global state lock, or if we cover them by newly introduced
> mutex.
> 
> new diff also adds mtx_init() to pf_state_import(). This was missing in
> my earlier diff. It was found and reported by Hrvoje@.
> 
> thanks and
> regards
> sashan
> 
> 
> 8<---8<---8<--8<
> diff --git a/sys/net/pf.c b/sys/net/pf.c
> index 2c13c99baac..c6022867dd4 100644
> --- a/sys/net/pf.c
> +++ b/sys/net/pf.c
> @@ -185,7 +185,8 @@ int pf_translate_icmp_af(struct pf_pdesc*, int, void *);
> void pf_send_icmp(struct mbuf *, u_int8_t, u_int8_t, int,
>sa_family_t, struct pf_rule *, u_int);
> void pf_detach_state(struct pf_state *);
> -void pf_state_key_detach(struct pf_state *, int);
> +void pf_state_key_detach(struct pf_state *,
> +struct pf_state_key *);
> u_int32_t pf_tcp_iss(struct pf_pdesc *);
> void pf_rule_to_actions(struct pf_rule *,
>struct pf_rule_actions *);
> @@ -779,7 +780,8 @@ pf_state_key_attach(struct pf_state_key *sk, struct 
> pf_state *s, int idx)
> s->key[idx] = sk;
> 
> if ((si = pool_get(_state_item_pl, PR_NOWAIT)) == NULL) {
> - pf_state_key_detach(s, idx);
> + pf_state_key_detach(s, s->key[idx]);
> + s->key[idx] = NULL;
> return (-1);
> }
> si->s = s;
> @@ -799,42 +801,50 @@ pf_state_key_attach(struct pf_state_key *sk, struct 
> pf_state *s, int idx)
> void
> pf_detach_state(struct pf_state *s)
> {
> - if (s->key[PF_SK_WIRE] == s->key[PF_SK_STACK])
> - s->key[PF_SK_WIRE] = NULL;
> + struct pf_state_key *key[2];
> +
> + mtx_enter(>mtx);
> + key[PF_SK_WIRE] = s->key[PF_SK_WIRE];
> + key[PF_SK_STACK] = s->key[PF_SK_STACK];
> + s->key[PF_SK_WIRE] = NULL;
> + s->key[PF_SK_STACK] = NULL;
> + mtx_leave(>mtx);
> +
> + if (key[PF_SK_WIRE] == key[PF_SK_STACK])
> + key[PF_SK_WIRE] = NULL;
> 
> - if (s->key[PF_SK_STACK] != NULL)
> - pf_state_key_detach(s, PF_SK_STACK);
> + if (key[PF_SK_STACK] != NULL)
> + pf_state_key_detach(s, key[PF_SK_STACK]);
> 
> - if (s->key[PF_SK_WIRE] != NULL)
> - pf_state_key_detach(s, PF_SK_WIRE);
> + if (key[PF_SK_WIRE] != NULL)
> + pf_state_key_detach(s, key[PF_SK_WIRE]);
> }
> 
> void
> -pf_state_key_detach(struct pf_state *s, int idx)
> +pf_state_key_detach(struct pf_state *s, struct pf_state_key *key)
> {
> struct pf_state_item *si;
> - struct pf_state_key *sk;
> 
> - if (s->key[idx] == NULL)
> + PF_STATE_ASSERT_LOCKED();
> +
> + if (key == NULL)
> return;
> 
> - si = TAILQ_FIRST(>key[idx]->states);
> + si = TAILQ_FIRST(>states);
> while (si && si->s != s)
>si = TAILQ_NEXT(si, entry);
> 
> if (si) {
> - TAILQ_REMOVE(>key[idx]->states, si, entry);
> + TAILQ_REMOVE(>states, si, entry);
> pool_put(_state_item_pl, si);
> }
> 
> - sk = s->key[idx];
> - s->key[idx] = NULL;
> - if (TAILQ_EMPTY(>states)) {
> - RB_REMOVE(pf_state_tree, _statetbl, sk);
> - sk->removed = 1;
> - pf_state_key_unlink_reverse(sk);
> - pf_state_key_unlink_inpcb(sk);
> - pf_state_key_unref(sk);
> + if (TAILQ_EMPTY(>states)) {
> + RB_REMOVE(pf_state_tree, _statetbl, key);
> + key->removed = 1;
> + pf_state_key_unlink_reverse(key);
> + pf_state_key_unlink_inpcb(key);
> + pf_state_key_unref(key);
> }
> }
> 
> @@ -997,7 +1007,9 @@ pf_state_insert(struct pfi_kif *kif, struct pf_state_key 
> **skw,
> }
> *skw = s->key[PF_SK_WIRE];
> if (pf_state_key_attach(*sks, s, PF_SK_STACK)) {
> - pf_state_key_detach(s, PF_SK_WIRE);
> + pf_state_key_detach(s, s->key[PF_SK_WIRE]);
> + s->key[PF_SK_WIRE] = NULL;
> + *skw = NULL;
> PF_STATE_EXIT_WRITE();
> return (-1);
> }
> @@ -1429,6 +1441,7 @@ pf_state_import(const struct pfsync_state *sp, int 
> flags)
> st->sync_state = PFSYNC_S_NONE;
> 
> refcnt_init(>refcnt);
> + mtx_init(>mtx, IPL_NET);
> 
> /* XXX when we have anchors, use STATE_INC_COUNTERS */
> r->states_cur++;
> @@ -4348,6 +4361,7 @@ pf_create_state(struct pf_pdesc *pd, struct pf_rule *r, 
> struct pf_rule *a,
> * pf_state_inserts() grabs reference for pfsync!
> */
> refcnt_init(>refcnt);
> + mtx_init(>mtx, IPL_NET);
> 
> switch (pd->proto) {
> case IPPROTO_TCP:
> diff --git a/sys/net/pfvar.h b/sys/net/pfvar.h
> index cc6c9f3dedc..9a75f9a1e6c 100644
> --- a/sys/net/pfvar.h
> +++ b/sys/net/pfvar.h
> @@ -743,37 +743,49 @@ struct pf_state_cmp {
> u_int8_t pad[3];
> };
> 
> +/*
> + * pf_state structure members require protection
> + * either by state lock (a global rw-lock) or
> + * needs to be protected by mutex which is part
> + * of state. The annotation goes as follows:
> + * - stateRW: means protected by exclusive state lock
> + * - mtx: protected by mutex (pf_state::state_mtx)
> + * - syncMTX: protected by mutex in pfsync
> + * - I: immutable since 

Re: special case mpe(4) in in6_ifattach()

2022-11-10 Thread David Gwynne



> On 10 Nov 2022, at 10:56 pm, Claudio Jeker  wrote:
> 
> On Fri, Nov 04, 2022 at 03:40:04PM +0100, Claudio Jeker wrote:
>> So mpe(4) is a special device. It is a point-to-multipoint interface that
>> does not do multicast. So setting IFF_MULTICAST on the interface is not
>> correct but IPv6 depends on it because neighbor discovery.
>> 
>> Now there is no neighbor discovery on mpe(4) the neighbors are handled via
>> BGP. So lets adjust in6_ifattach() to succeed for mpe(4) interfaces and
>> with that BGP IPv6 L3VPN should work.
>> 
>> It may be an idea to move the IFF_MULTCAST check down into the
>> ifp->if_type switch but right now this is good enough. I wonder if we have
>> other interfaces that need similar treatment (e.g. mgre).
> 
> I think this is a better diff.
> Only require the IFF_MULTCAST bit if nd6_need_cache() returns true.
> In other words full ND is running on the interface.
> Such tunnel interfaces will not get a link-local address automatically
> assigned anymore but such an address can still be set up by hand.

this makes sense to me. ok.

> 
> -- 
> :wq Claudio
> 
> Index: netinet6/in6_ifattach.c
> ===
> RCS file: /cvs/src/sys/netinet6/in6_ifattach.c,v
> retrieving revision 1.119
> diff -u -p -r1.119 in6_ifattach.c
> --- netinet6/in6_ifattach.c 8 Sep 2022 10:22:07 - 1.119
> +++ netinet6/in6_ifattach.c 10 Nov 2022 12:18:09 -
> @@ -373,7 +373,7 @@ in6_ifattach(struct ifnet *ifp)
> if (ifp->if_mtu < IPV6_MMTU)
> return (EINVAL);
> 
> - if ((ifp->if_flags & IFF_MULTICAST) == 0)
> + if (nd6_need_cache(ifp) && (ifp->if_flags & IFF_MULTICAST) == 0)
> return (EINVAL);
> 
> /*
> @@ -389,12 +389,12 @@ in6_ifattach(struct ifnet *ifp)
> return (error);
> }
> 
> - /* Interfaces that rely on strong a priori cryptographic binding of
> - * IP addresses are incompatible with automatically assigned llv6. */
> - switch (ifp->if_type) {
> - case IFT_WIREGUARD:
> + /*
> + * Only interfaces that need the ND cache should automatically
> + * assign a link local address.
> + */
> + if (!nd6_need_cache(ifp))
> return (0);
> - }
> 
> /* Assign a link-local address, if there's none. */
> if (in6ifa_ifpforlinklocal(ifp, 0) == NULL) {
> 



Re: special case mpe(4) in in6_ifattach()

2022-11-10 Thread Claudio Jeker
On Fri, Nov 04, 2022 at 03:40:04PM +0100, Claudio Jeker wrote:
> So mpe(4) is a special device. It is a point-to-multipoint interface that
> does not do multicast. So setting IFF_MULTICAST on the interface is not
> correct but IPv6 depends on it because neighbor discovery.
> 
> Now there is no neighbor discovery on mpe(4) the neighbors are handled via
> BGP. So lets adjust in6_ifattach() to succeed for mpe(4) interfaces and
> with that BGP IPv6 L3VPN should work.
> 
> It may be an idea to move the IFF_MULTCAST check down into the
> ifp->if_type switch but right now this is good enough. I wonder if we have
> other interfaces that need similar treatment (e.g. mgre).

I think this is a better diff.
Only require the IFF_MULTCAST bit if nd6_need_cache() returns true.
In other words full ND is running on the interface.
Such tunnel interfaces will not get a link-local address automatically
assigned anymore but such an address can still be set up by hand.

-- 
:wq Claudio

Index: netinet6/in6_ifattach.c
===
RCS file: /cvs/src/sys/netinet6/in6_ifattach.c,v
retrieving revision 1.119
diff -u -p -r1.119 in6_ifattach.c
--- netinet6/in6_ifattach.c 8 Sep 2022 10:22:07 -   1.119
+++ netinet6/in6_ifattach.c 10 Nov 2022 12:18:09 -
@@ -373,7 +373,7 @@ in6_ifattach(struct ifnet *ifp)
if (ifp->if_mtu < IPV6_MMTU)
return (EINVAL);
 
-   if ((ifp->if_flags & IFF_MULTICAST) == 0)
+   if (nd6_need_cache(ifp) && (ifp->if_flags & IFF_MULTICAST) == 0)
return (EINVAL);
 
/*
@@ -389,12 +389,12 @@ in6_ifattach(struct ifnet *ifp)
return (error);
}
 
-   /* Interfaces that rely on strong a priori cryptographic binding of
-* IP addresses are incompatible with automatically assigned llv6. */
-   switch (ifp->if_type) {
-   case IFT_WIREGUARD:
+   /*
+* Only interfaces that need the ND cache should automatically
+* assign a link local address.
+*/
+   if (!nd6_need_cache(ifp))
return (0);
-   }
 
/* Assign a link-local address, if there's none. */
if (in6ifa_ifpforlinklocal(ifp, 0) == NULL) {



Re: Push kernel lock inside in{,6}_control()

2022-11-10 Thread Klemens Nanni
On Thu, Nov 10, 2022 at 10:55:19AM +, Klemens Nanni wrote:
> so->so_state is already read without kernel lock inside soo_ioctl()
> which calls pru_control() aka in6_control() and in_control().
> 
> This leaves individual ioctl cases to unlock/push into.
> 
> Feedback? OK?

Now with the netinet6 bits included.
---
 sys/netinet/in.c   | 8 
 sys/netinet6/in6.c | 8 
 2 files changed, 8 insertions(+), 8 deletions(-)

diff --git a/sys/netinet/in.c b/sys/netinet/in.c
index 990aaf84c8a..c44de17d502 100644
--- a/sys/netinet/in.c
+++ b/sys/netinet/in.c
@@ -202,8 +202,6 @@ in_control(struct socket *so, u_long cmd, caddr_t data, 
struct ifnet *ifp)
int privileged;
int error;
 
-   KERNEL_LOCK();
-
privileged = 0;
if ((so->so_state & SS_PRIV) != 0)
privileged++;
@@ -212,16 +210,18 @@ in_control(struct socket *so, u_long cmd, caddr_t data, 
struct ifnet *ifp)
 #ifdef MROUTING
case SIOCGETVIFCNT:
case SIOCGETSGCNT:
+   KERNEL_LOCK();
error = mrt_ioctl(so, cmd, data);
+   KERNEL_UNLOCK();
break;
 #endif /* MROUTING */
default:
+   KERNEL_LOCK();
error = in_ioctl(cmd, data, ifp, privileged);
+   KERNEL_UNLOCK();
break;
}
 
-   KERNEL_UNLOCK();
-
return error;
 }
 
diff --git a/sys/netinet6/in6.c b/sys/netinet6/in6.c
index a51ca2fa5a4..1d9c2c49162 100644
--- a/sys/netinet6/in6.c
+++ b/sys/netinet6/in6.c
@@ -199,8 +199,6 @@ in6_control(struct socket *so, u_long cmd, caddr_t data, 
struct ifnet *ifp)
int privileged;
int error;
 
-   KERNEL_LOCK();
-
privileged = 0;
if ((so->so_state & SS_PRIV) != 0)
privileged++;
@@ -209,16 +207,18 @@ in6_control(struct socket *so, u_long cmd, caddr_t data, 
struct ifnet *ifp)
 #ifdef MROUTING
case SIOCGETSGCNT_IN6:
case SIOCGETMIFCNT_IN6:
+   KERNEL_LOCK();
error = mrt6_ioctl(so, cmd, data);
+   KERNEL_UNLOCK();
break;
 #endif /* MROUTING */
default:
+   KERNEL_LOCK();
error = in6_ioctl(cmd, data, ifp, privileged);
+   KERNEL_UNLOCK();
break;
}
 
-   KERNEL_UNLOCK();
-
return error;
 }
 
-- 
2.38.1



Re: adding a mutex to pf_state

2022-11-10 Thread Alexandr Nedvedicky
Hello,

David (dlg@) asked me to shrink the scope of the change.  The new diff
introduces a mutex to pf_state and adjust pf_detach_state() to utilize the
newly introduced mutex.  I've also added annotation to pf_state members. The
members with '?' needs our attention. We need to verify if they are either
protected by global state lock, or if we cover them by newly introduced
mutex.

new diff also adds mtx_init() to pf_state_import(). This was missing in
my earlier diff. It was found and reported by Hrvoje@.

thanks and
regards
sashan


8<---8<---8<--8<
diff --git a/sys/net/pf.c b/sys/net/pf.c
index 2c13c99baac..c6022867dd4 100644
--- a/sys/net/pf.c
+++ b/sys/net/pf.c
@@ -185,7 +185,8 @@ int  pf_translate_icmp_af(struct pf_pdesc*, 
int, void *);
 voidpf_send_icmp(struct mbuf *, u_int8_t, u_int8_t, int,
sa_family_t, struct pf_rule *, u_int);
 voidpf_detach_state(struct pf_state *);
-voidpf_state_key_detach(struct pf_state *, int);
+voidpf_state_key_detach(struct pf_state *,
+   struct pf_state_key *);
 u_int32_t   pf_tcp_iss(struct pf_pdesc *);
 voidpf_rule_to_actions(struct pf_rule *,
struct pf_rule_actions *);
@@ -779,7 +780,8 @@ pf_state_key_attach(struct pf_state_key *sk, struct 
pf_state *s, int idx)
s->key[idx] = sk;
 
if ((si = pool_get(_state_item_pl, PR_NOWAIT)) == NULL) {
-   pf_state_key_detach(s, idx);
+   pf_state_key_detach(s, s->key[idx]);
+   s->key[idx] = NULL;
return (-1);
}
si->s = s;
@@ -799,42 +801,50 @@ pf_state_key_attach(struct pf_state_key *sk, struct 
pf_state *s, int idx)
 void
 pf_detach_state(struct pf_state *s)
 {
-   if (s->key[PF_SK_WIRE] == s->key[PF_SK_STACK])
-   s->key[PF_SK_WIRE] = NULL;
+   struct pf_state_key *key[2];
+
+   mtx_enter(>mtx);
+   key[PF_SK_WIRE] = s->key[PF_SK_WIRE];
+   key[PF_SK_STACK] = s->key[PF_SK_STACK];
+   s->key[PF_SK_WIRE] = NULL;
+   s->key[PF_SK_STACK] = NULL;
+   mtx_leave(>mtx);
+
+   if (key[PF_SK_WIRE] == key[PF_SK_STACK])
+   key[PF_SK_WIRE] = NULL;
 
-   if (s->key[PF_SK_STACK] != NULL)
-   pf_state_key_detach(s, PF_SK_STACK);
+   if (key[PF_SK_STACK] != NULL)
+   pf_state_key_detach(s, key[PF_SK_STACK]);
 
-   if (s->key[PF_SK_WIRE] != NULL)
-   pf_state_key_detach(s, PF_SK_WIRE);
+   if (key[PF_SK_WIRE] != NULL)
+   pf_state_key_detach(s, key[PF_SK_WIRE]);
 }
 
 void
-pf_state_key_detach(struct pf_state *s, int idx)
+pf_state_key_detach(struct pf_state *s, struct pf_state_key *key)
 {
struct pf_state_item*si;
-   struct pf_state_key *sk;
 
-   if (s->key[idx] == NULL)
+   PF_STATE_ASSERT_LOCKED();
+
+   if (key == NULL)
return;
 
-   si = TAILQ_FIRST(>key[idx]->states);
+   si = TAILQ_FIRST(>states);
while (si && si->s != s)
si = TAILQ_NEXT(si, entry);
 
if (si) {
-   TAILQ_REMOVE(>key[idx]->states, si, entry);
+   TAILQ_REMOVE(>states, si, entry);
pool_put(_state_item_pl, si);
}
 
-   sk = s->key[idx];
-   s->key[idx] = NULL;
-   if (TAILQ_EMPTY(>states)) {
-   RB_REMOVE(pf_state_tree, _statetbl, sk);
-   sk->removed = 1;
-   pf_state_key_unlink_reverse(sk);
-   pf_state_key_unlink_inpcb(sk);
-   pf_state_key_unref(sk);
+   if (TAILQ_EMPTY(>states)) {
+   RB_REMOVE(pf_state_tree, _statetbl, key);
+   key->removed = 1;
+   pf_state_key_unlink_reverse(key);
+   pf_state_key_unlink_inpcb(key);
+   pf_state_key_unref(key);
}
 }
 
@@ -997,7 +1007,9 @@ pf_state_insert(struct pfi_kif *kif, struct pf_state_key 
**skw,
}
*skw = s->key[PF_SK_WIRE];
if (pf_state_key_attach(*sks, s, PF_SK_STACK)) {
-   pf_state_key_detach(s, PF_SK_WIRE);
+   pf_state_key_detach(s, s->key[PF_SK_WIRE]);
+   s->key[PF_SK_WIRE] = NULL;
+   *skw = NULL;
PF_STATE_EXIT_WRITE();
return (-1);
}
@@ -1429,6 +1441,7 @@ pf_state_import(const struct pfsync_state *sp, int flags)
st->sync_state = PFSYNC_S_NONE;
 
refcnt_init(>refcnt);
+   mtx_init(>mtx, IPL_NET);
 
/* XXX when we have anchors, use STATE_INC_COUNTERS */
r->states_cur++;
@@ -4348,6 +4361,7 @@ pf_create_state(struct pf_pdesc *pd, struct pf_rule *r, 
struct pf_rule *a,
 * pf_state_inserts() grabs reference for pfsync!
 */

Re: replace SRP with SMR in the if_idxmap commit

2022-11-10 Thread David Gwynne
On Thu, Nov 10, 2022 at 08:10:35AM +1000, David Gwynne wrote:
> I know what this is. The barrier at the end of if_idxmap_alloc is sleeping 
> waiting for cpus to run that aren't running cos we haven't finished booting 
> yet.
> 
> I'll back it out and fix it up when I'm actually awake.

i woke up, so here's a diff.

this uses the usedidx as an smr_entry so we can use smr_call instead of
smr_barrier during autoconf.

this works for me on a box with a lot of hardware interfaces, which
forces allocation of a new interface map and therefore destruction of
the initial one.

there is still an smr_barrier in if_idxmap_remove, but remove only
happens when an interface goes away. we could use part of struct ifnet
(eg, if_description) as an smr_entry if needed.

Index: if.c
===
RCS file: /cvs/src/sys/net/if.c,v
retrieving revision 1.676
diff -u -p -r1.676 if.c
--- if.c9 Nov 2022 22:15:50 -   1.676
+++ if.c10 Nov 2022 10:58:30 -
@@ -196,32 +196,29 @@ void if_map_dtor(void *, void *);
 struct ifnet *if_ref(struct ifnet *);
 
 /*
- * struct if_map
- *
- * bounded array of ifnet srp pointers used to fetch references of live
- * interfaces with if_get().
- */
-
-struct if_map {
-   unsigned longlimit;
-   /* followed by limit ifnet srp pointers */
-};
-
-/*
  * struct if_idxmap
  *
  * infrastructure to manage updates and accesses to the current if_map.
+ *
+ * interface index 0 is special and represents "no interface", so we
+ * use the 0th slot in map to store the length of the array.
  */
 
 struct if_idxmap {
-   unsigned int serial;
-   unsigned int count;
-   struct srp   map;
-   struct rwlocklock;
-   unsigned char   *usedidx;   /* bitmap of indices in use */
+   unsigned int  serial;
+   unsigned int  count;
+   struct ifnet**map;  /* SMR protected */
+   struct rwlock lock;
+   unsigned char*usedidx;  /* bitmap of indices in use */
+};
+
+struct if_idxmap_dtor {
+   struct smr_entry  smr;
+   struct ifnet**map;
 };
 
 void   if_idxmap_init(unsigned int);
+void   if_idxmap_free(void *);
 void   if_idxmap_alloc(struct ifnet *);
 void   if_idxmap_insert(struct ifnet *);
 void   if_idxmap_remove(struct ifnet *);
@@ -265,7 +262,7 @@ ifinit(void)
 * most machines boot with 4 or 5 interfaces, so size the initial map
 * to accommodate this
 */
-   if_idxmap_init(8);
+   if_idxmap_init(8); /* 8 is a nice power of 2 for malloc */
 
for (i = 0; i < NET_TASKQ; i++) {
nettqmp[i] = taskq_create("softnet", 1, IPL_NET, TASKQ_MPSAFE);
@@ -274,48 +271,48 @@ ifinit(void)
}
 }
 
-static struct if_idxmap if_idxmap = {
-   0,
-   0,
-   SRP_INITIALIZER(),
-   RWLOCK_INITIALIZER("idxmaplk"),
-   NULL,
-};
-
-struct srp_gc if_ifp_gc = SRP_GC_INITIALIZER(if_ifp_dtor, NULL);
-struct srp_gc if_map_gc = SRP_GC_INITIALIZER(if_map_dtor, NULL);
+static struct if_idxmap if_idxmap;
 
 struct ifnet_head ifnetlist = TAILQ_HEAD_INITIALIZER(ifnetlist);
 
+static inline unsigned int
+if_idxmap_limit(struct ifnet **if_map)
+{
+   return ((uintptr_t)if_map[0]);
+}
+
+static inline size_t
+if_idxmap_usedidx_size(unsigned int limit)
+{
+   return (max(howmany(limit, NBBY), sizeof(struct if_idxmap_dtor)));
+}
+
 void
 if_idxmap_init(unsigned int limit)
 {
-   struct if_map *if_map;
-   struct srp *map;
-   unsigned int i;
+   struct ifnet **if_map;
 
-   if_idxmap.serial = 1; /* skip ifidx 0 so it can return NULL */
+   rw_init(_idxmap.lock, "idxmaplk");
+   if_idxmap.serial = 1; /* skip ifidx 0 */
 
-   if_map = malloc(sizeof(*if_map) + limit * sizeof(*map),
-   M_IFADDR, M_WAITOK);
+   if_map = mallocarray(limit, sizeof(*if_map), M_IFADDR,
+   M_WAITOK | M_ZERO);
 
-   if_map->limit = limit;
-   map = (struct srp *)(if_map + 1);
-   for (i = 0; i < limit; i++)
-   srp_init([i]);
+   if_map[0] = (struct ifnet *)(uintptr_t)limit;
 
-   if_idxmap.usedidx = malloc(howmany(limit, NBBY),
+   if_idxmap.usedidx = malloc(if_idxmap_usedidx_size(limit),
M_IFADDR, M_WAITOK | M_ZERO);
 
/* this is called early so there's nothing to race with */
-   srp_update_locked(_map_gc, _idxmap.map, if_map);
+   SMR_PTR_SET_LOCKED(_idxmap.map, if_map);
 }
 
 void
 if_idxmap_alloc(struct ifnet *ifp)
 {
-   struct if_map *if_map;
-   struct srp *map;
+   struct ifnet **if_map, **oif_map;
+   struct if_idxmap_dtor *dtor = NULL;
+   unsigned int limit, olimit;
unsigned int index, i;
 
refcnt_init(>if_refcnt);
@@ -325,49 +322,46 @@ if_idxmap_alloc(struct ifnet *ifp)
if (++if_idxmap.count >= USHRT_MAX)

Push kernel lock inside in{,6}_control()

2022-11-10 Thread Klemens Nanni
so->so_state is already read without kernel lock inside soo_ioctl()
which calls pru_control() aka in6_control() and in_control().

This leaves individual ioctl cases to unlock/push into.

Feedback? OK?
---
 sys/netinet/in.c | 8 
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/sys/netinet/in.c b/sys/netinet/in.c
index 990aaf84c8a..c44de17d502 100644
--- a/sys/netinet/in.c
+++ b/sys/netinet/in.c
@@ -202,8 +202,6 @@ in_control(struct socket *so, u_long cmd, caddr_t data, 
struct ifnet *ifp)
int privileged;
int error;
 
-   KERNEL_LOCK();
-
privileged = 0;
if ((so->so_state & SS_PRIV) != 0)
privileged++;
@@ -212,16 +210,18 @@ in_control(struct socket *so, u_long cmd, caddr_t data, 
struct ifnet *ifp)
 #ifdef MROUTING
case SIOCGETVIFCNT:
case SIOCGETSGCNT:
+   KERNEL_LOCK();
error = mrt_ioctl(so, cmd, data);
+   KERNEL_UNLOCK();
break;
 #endif /* MROUTING */
default:
+   KERNEL_LOCK();
error = in_ioctl(cmd, data, ifp, privileged);
+   KERNEL_UNLOCK();
break;
}
 
-   KERNEL_UNLOCK();
-
return error;
 }
 
-- 
2.38.1



Push kernel lock into pru_control()

2022-11-10 Thread Klemens Nanni
Purely mechanical, then in6_control() and in_control() can be pushed
further individually.

Feedback? OK?
---
 sys/kern/sys_socket.c | 2 --
 sys/netinet/in.c  | 4 
 sys/netinet6/in6.c| 4 
 3 files changed, 8 insertions(+), 2 deletions(-)

diff --git a/sys/kern/sys_socket.c b/sys/kern/sys_socket.c
index b07119b71cd..e42dd948006 100644
--- a/sys/kern/sys_socket.c
+++ b/sys/kern/sys_socket.c
@@ -134,9 +134,7 @@ soo_ioctl(struct file *fp, u_long cmd, caddr_t data, struct 
proc *p)
}
if (IOCGROUP(cmd) == 'r')
return (EOPNOTSUPP);
-   KERNEL_LOCK();
error = pru_control(so, cmd, data, NULL);
-   KERNEL_UNLOCK();
break;
}
 
diff --git a/sys/netinet/in.c b/sys/netinet/in.c
index cd8289d2e89..990aaf84c8a 100644
--- a/sys/netinet/in.c
+++ b/sys/netinet/in.c
@@ -202,6 +202,8 @@ in_control(struct socket *so, u_long cmd, caddr_t data, 
struct ifnet *ifp)
int privileged;
int error;
 
+   KERNEL_LOCK();
+
privileged = 0;
if ((so->so_state & SS_PRIV) != 0)
privileged++;
@@ -218,6 +220,8 @@ in_control(struct socket *so, u_long cmd, caddr_t data, 
struct ifnet *ifp)
break;
}
 
+   KERNEL_UNLOCK();
+
return error;
 }
 
diff --git a/sys/netinet6/in6.c b/sys/netinet6/in6.c
index 900456fff1a..a51ca2fa5a4 100644
--- a/sys/netinet6/in6.c
+++ b/sys/netinet6/in6.c
@@ -199,6 +199,8 @@ in6_control(struct socket *so, u_long cmd, caddr_t data, 
struct ifnet *ifp)
int privileged;
int error;
 
+   KERNEL_LOCK();
+
privileged = 0;
if ((so->so_state & SS_PRIV) != 0)
privileged++;
@@ -215,6 +217,8 @@ in6_control(struct socket *so, u_long cmd, caddr_t data, 
struct ifnet *ifp)
break;
}
 
+   KERNEL_UNLOCK();
+
return error;
 }
 
-- 
2.38.1