Re: diff: pfctl: error message for nonexisting rtable

2020-09-17 Thread YASUOKA Masahiko
the condition was reversed.

ok?
Index: parse.y
===
RCS file: /cvs/src/sbin/pfctl/parse.y,v
retrieving revision 1.702
diff -u -p -r1.702 parse.y
--- parse.y 17 Sep 2020 10:09:43 -  1.702
+++ parse.y 17 Sep 2020 14:23:42 -
@@ -1216,7 +1216,7 @@ antispoof_opt : LABEL label   {
if ($2 < 0 || $2 > RT_TABLEID_MAX) {
yyerror("invalid rtable id");
YYERROR;
-   } else if (lookup_rtable($2) >= 1) {
+   } else if (lookup_rtable($2) < 1) {
yyerror("rtable %lld does not exist", $2);
YYERROR;
}
@@ -2003,7 +2003,7 @@ filter_opt: USER uids {
if ($2 < 0 || $2 > RT_TABLEID_MAX) {
yyerror("invalid rtable id");
YYERROR;
-   } else if (lookup_rtable($2) >= 1) {
+   } else if (lookup_rtable($2) < 1) {
yyerror("rtable %lld does not exist", $2);
YYERROR;
}



Re: diff: pfctl: error message for nonexisting rtable

2020-09-17 Thread YASUOKA Masahiko
Hi,

I just committed yours.

Thanks,

On Wed, 16 Sep 2020 16:07:40 +0200
Klemens Nanni  wrote:
> On Wed, Sep 16, 2020 at 07:49:19PM +0900, YASUOKA Masahiko wrote:
>> New diff is using -1 for ENOENT.
>> 
>> Also domainid == 0 is a valid domain id, but previous diff cannot make
>> a cache of it since 0 is the default value.  So new diff is doing
>> 
>> -static u_int found[RT_TABLEID_MAX+1];
>> +static struct {
>> +int  found;
>> +int  domainid;
>> +}rtables[RT_TABLEID_MAX+1];
>> 
>> to distinguish the default 0 and domainid 0.
> This looks more complicated than it needs to be, but I also don't want
> to bikeshed it;  given that the parser is happy with this and we plan to
> remove this code alltogether anyway in the next release cycle:  OK kn.
> 
> Alternatively, here's a much simpler diff resembling what I had in mind.
> Feel free to commit this instead (with my OK), give me an OK for it or
> go ahead with yours.
> 
> It uses the same function and reflects the fact that every rdomain is a
> rtable but not every rtable is also a rdomain (your choice of `domainid'
> seems inconsistent with that).
> 
> Index: parse.y
> ===
> RCS file: /cvs/src/sbin/pfctl/parse.y,v
> retrieving revision 1.701
> diff -u -p -r1.701 parse.y
> --- parse.y   28 Jan 2020 15:40:35 -  1.701
> +++ parse.y   16 Sep 2020 13:58:23 -
> @@ -392,7 +392,7 @@ intinvalid_redirect(struct node_host *
>  u_int16_t parseicmpspec(char *, sa_family_t);
>  int   kw_casecmp(const void *, const void *);
>  int   map_tos(char *string, int *);
> -int   rdomain_exists(u_int);
> +int   lookup_rtable(u_int);
>  int   filteropts_to_rule(struct pf_rule *, struct filter_opts *);
>  
>  TAILQ_HEAD(loadanchorshead, loadanchors)
> @@ -1216,6 +1216,9 @@ antispoof_opt   : LABEL label   {
>   if ($2 < 0 || $2 > RT_TABLEID_MAX) {
>   yyerror("invalid rtable id");
>   YYERROR;
> + } else if (lookup_rtable($2) >= 1) {
> + yyerror("rtable %lld does not exist", $2);
> + YYERROR;
>   }
>   antispoof_opts.rtableid = $2;
>   }
> @@ -2000,6 +2003,9 @@ filter_opt  : USER uids {
>   if ($2 < 0 || $2 > RT_TABLEID_MAX) {
>   yyerror("invalid rtable id");
>   YYERROR;
> + } else if (lookup_rtable($2) >= 1) {
> + yyerror("rtable %lld does not exist", $2);
> + YYERROR;
>   }
>   filter_opts.rtableid = $2;
>   }
> @@ -2475,7 +2481,7 @@ if_item : STRING{
>   | RDOMAIN NUMBER{
>   if ($2 < 0 || $2 > RT_TABLEID_MAX)
>   yyerror("rdomain %lld outside range", $2);
> - else if (rdomain_exists($2) != 1)
> + else if (lookup_rtable($2) != 2)
>   yyerror("rdomain %lld does not exist", $2);
>  
>   $$ = calloc(1, sizeof(struct node_if));
> @@ -5868,37 +5874,38 @@ map_tos(char *s, int *val)
>  }
>  
>  int
> -rdomain_exists(u_int rdomain)
> +lookup_rtable(u_int rtableid)
>  {
>   size_t   len;
>   struct rt_tableinfo  info;
>   int  mib[6];
>   static u_int found[RT_TABLEID_MAX+1];
>  
> - if (found[rdomain] == 1)
> - return 1;
> + if (found[rtableid])
> + return found[rtableid];
>  
>   mib[0] = CTL_NET;
>   mib[1] = PF_ROUTE;
>   mib[2] = 0;
>   mib[3] = 0;
>   mib[4] = NET_RT_TABLE;
> - mib[5] = rdomain;
> + mib[5] = rtableid;
>  
>   len = sizeof(info);
>   if (sysctl(mib, 6, , , NULL, 0) == -1) {
>   if (errno == ENOENT) {
>   /* table nonexistent */
> + found[rtableid] = 0;
>   return 0;
>   }
>   err(1, "%s", __func__);
>   }
> - if (info.rti_domainid == rdomain) {
> - found[rdomain] = 1;
> - return 1;
> + if (info.rti_domainid == rtableid) {
> + found[rtableid] = 2;
> + return 2;
>   }
> - /* rdomain is a table, but not an rdomain */
> - return 0;
> + found[rtableid] = 1;
> + return 1;
>  }
>  
>  int



Re: diff: pfctl: error message for nonexisting rtable

2020-09-16 Thread YASUOKA Masahiko
Hi,

On Wed, 16 Sep 2020 12:04:55 +0200
Klemens Nanni  wrote:
> Using the function verb would reads a bit clearer/more intuitive,
> i.e.

Yes, "if (!rtable_exists($2))" seems better.

>> @@ -5887,17 +5897,37 @@ rdomain_exists(u_int rdomain)
>>  
>>  len = sizeof(info);
>>  if (sysctl(mib, 6, , , NULL, 0) == -1) {
>> -if (errno == ENOENT) {
>> +if (errno == ENOENT)
>>  /* table nonexistent */
>> -return 0;
>> -}
>> -err(1, "%s", __func__);
>> -}
>> -if (info.rti_domainid == rdomain) {
>> -found[rdomain] = 1;
>> +domainid[rdomain] = RT_TABLEID_MAX;
> This does not look correct, RT_TABLEID_MAX (255) is the biggest *valid*
> id, so you cannot use it to denote a nonexistent routing table.

Good catch.  Thanks,

> Perhaps use `static int domainid[RT_TABLEID_MAX+1]' and `-1' to reflect
> ENOENT?

New diff is using -1 for ENOENT.

Also domainid == 0 is a valid domain id, but previous diff cannot make
a cache of it since 0 is the default value.  So new diff is doing

-   static u_int found[RT_TABLEID_MAX+1];
+   static struct {
+   int  found;
+   int  domainid;
+   }rtables[RT_TABLEID_MAX+1];

to distinguish the default 0 and domainid 0.

ok?


Make pfctl check if the rtable really exists when parsing the config.

Index: sbin/pfctl/parse.y
===
RCS file: /cvs/src/sbin/pfctl/parse.y,v
retrieving revision 1.701
diff -u -p -r1.701 parse.y
--- sbin/pfctl/parse.y  28 Jan 2020 15:40:35 -  1.701
+++ sbin/pfctl/parse.y  16 Sep 2020 10:40:25 -
@@ -392,7 +392,9 @@ int  invalid_redirect(struct node_host *
 u_int16_t parseicmpspec(char *, sa_family_t);
 int kw_casecmp(const void *, const void *);
 int map_tos(char *string, int *);
+int get_domainid(u_int);
 int rdomain_exists(u_int);
+int rtable_exists(u_int);
 int filteropts_to_rule(struct pf_rule *, struct filter_opts *);
 
 TAILQ_HEAD(loadanchorshead, loadanchors)
@@ -1217,6 +1219,10 @@ antispoof_opt: LABEL label   {
yyerror("invalid rtable id");
YYERROR;
}
+   else if (!rtable_exists($2)) {
+   yyerror("rtable %lld does not exist", $2);
+   YYERROR;
+   }
antispoof_opts.rtableid = $2;
}
;
@@ -2001,6 +2007,10 @@ filter_opt   : USER uids {
yyerror("invalid rtable id");
YYERROR;
}
+   else if (!rtable_exists($2)) {
+   yyerror("rtable %lld does not exist", $2);
+   YYERROR;
+   }
filter_opts.rtableid = $2;
}
| DIVERTTO STRING PORT portplain {
@@ -2475,7 +2485,7 @@ if_item   : STRING{
| RDOMAIN NUMBER{
if ($2 < 0 || $2 > RT_TABLEID_MAX)
yyerror("rdomain %lld outside range", $2);
-   else if (rdomain_exists($2) != 1)
+   else if (!rdomain_exists($2))
yyerror("rdomain %lld does not exist", $2);
 
$$ = calloc(1, sizeof(struct node_if));
@@ -5868,36 +5878,60 @@ map_tos(char *s, int *val)
 }
 
 int
-rdomain_exists(u_int rdomain)
+get_domainid(u_int rtable)
 {
size_t   len;
struct rt_tableinfo  info;
int  mib[6];
-   static u_int found[RT_TABLEID_MAX+1];
+   static struct {
+   int  found;
+   int  domainid;
+   }rtables[RT_TABLEID_MAX+1];
 
-   if (found[rdomain] == 1)
-   return 1;
+   if (rtables[rtable].found)
+   return rtables[rtable].domainid;
 
mib[0] = CTL_NET;
mib[1] = PF_ROUTE;
mib[2] = 0;
mib[3] = 0;
mib[4] = NET_RT_TABLE;
-   mib[5] = rdomain;
+   mib[5] = rtable;
 
len = sizeof(info);
if (sysctl(mib, 6, , , NULL, 0) == -1) {
-   if (errno == ENOENT) {
+   if (errno == ENOENT)
/* table nonexistent */
-   return 0;
-   }
-   err(1, "%s", __func__);
-   }
-   if (info.rti_domainid == rdomain) {
-   found[rdomain] = 1;
+   rtables[rtable].domainid = -1;
+   else
+   err(1, "%s", __func__);
+   } else
+   rtables[rtable].domainid = info.rti_domainid;
+   

Re: diff: pfctl: error message for nonexisting rtable

2020-09-16 Thread YASUOKA Masahiko
Hi,

So, it seems we need to more code and test for pf(4) part.

Let me continue this separetely.

On Mon, 14 Sep 2020 11:07:53 +0200
Klemens Nanni  wrote:
> On Mon, Sep 14, 2020 at 02:09:27PM +0900, YASUOKA Masahiko wrote:
>> Make pfctl check if the rtable really exists when parsing the config.
> I concur, but you can do this with less (duplicated) code.
> 
> Instead of copying rdomain_exists() into rtable_exists() with the
> `rti_domainid' check omitted, tweak (and rename) rdomain_exists() into
> returning the information whether the given ID is just an rtable.
> 
> rdomain_exists() merges the "invalid id" and "id is an rtable but not
> an rdmomain" cases - make those separate return codes, check/adjust
> existing callers and use it for your new checks.

Yes, I could reduce the code.  Thanks,

ok?


Make pfctl check if the rtable really exists when parsing the config.

Index: sbin/pfctl/parse.y
===
RCS file: /cvs/src/sbin/pfctl/parse.y,v
retrieving revision 1.701
diff -u -p -r1.701 parse.y
--- sbin/pfctl/parse.y  28 Jan 2020 15:40:35 -  1.701
+++ sbin/pfctl/parse.y  16 Sep 2020 09:11:21 -
@@ -392,7 +392,9 @@ int  invalid_redirect(struct node_host *
 u_int16_t parseicmpspec(char *, sa_family_t);
 int kw_casecmp(const void *, const void *);
 int map_tos(char *string, int *);
+int get_domainid(u_int);
 int rdomain_exists(u_int);
+int rtable_exists(u_int);
 int filteropts_to_rule(struct pf_rule *, struct filter_opts *);
 
 TAILQ_HEAD(loadanchorshead, loadanchors)
@@ -1217,6 +1219,10 @@ antispoof_opt: LABEL label   {
yyerror("invalid rtable id");
YYERROR;
}
+   else if (rtable_exists($2) != 1) {
+   yyerror("rtable %lld does not exist", $2);
+   YYERROR;
+   }
antispoof_opts.rtableid = $2;
}
;
@@ -2001,6 +2007,10 @@ filter_opt   : USER uids {
yyerror("invalid rtable id");
YYERROR;
}
+   else if (rtable_exists($2) != 1) {
+   yyerror("rtable %lld does not exist", $2);
+   YYERROR;
+   }
filter_opts.rtableid = $2;
}
| DIVERTTO STRING PORT portplain {
@@ -5868,15 +5878,15 @@ map_tos(char *s, int *val)
 }
 
 int
-rdomain_exists(u_int rdomain)
+get_domainid(u_int rdomain)
 {
size_t   len;
struct rt_tableinfo  info;
int  mib[6];
-   static u_int found[RT_TABLEID_MAX+1];
+   static u_int domainid[RT_TABLEID_MAX+1];
 
-   if (found[rdomain] == 1)
-   return 1;
+   if (domainid[rdomain] != 0)
+   return domainid[rdomain];
 
mib[0] = CTL_NET;
mib[1] = PF_ROUTE;
@@ -5887,17 +5897,37 @@ rdomain_exists(u_int rdomain)
 
len = sizeof(info);
if (sysctl(mib, 6, , , NULL, 0) == -1) {
-   if (errno == ENOENT) {
+   if (errno == ENOENT)
/* table nonexistent */
-   return 0;
-   }
-   err(1, "%s", __func__);
-   }
-   if (info.rti_domainid == rdomain) {
-   found[rdomain] = 1;
+   domainid[rdomain] = RT_TABLEID_MAX;
+   else
+   err(1, "%s", __func__);
+   } else
+   domainid[rdomain] = info.rti_domainid;
+
+   return domainid[rdomain];
+}
+
+int
+rdomain_exists(u_int rdomain)
+{
+   int domainid;
+
+   domainid = get_domainid(rdomain);
+   if (domainid == rdomain)
return 1;
-   }
/* rdomain is a table, but not an rdomain */
+   return 0;
+}
+
+int
+rtable_exists(u_int rtable)
+{
+   int domainid;
+
+   domainid = get_domainid(rtable);
+   if (domainid < RT_TABLEID_MAX)
+   return 1;
return 0;
 }
 



Re: diff: pfctl: error message for nonexisting rtable

2020-09-14 Thread YASUOKA Masahiko
Hi,

On Tue, 15 Sep 2020 02:31:24 +0200
Klemens Nanni  wrote:
> On Tue, Sep 15, 2020 at 12:30:35AM +0200, Klemens Nanni wrote:
>> Actually, that should just work regardless of whether the rounting
>> domain exists at ruleset creation time;  just like it is the case with
>> interface names/groups which may come and go at runtime without
>> requiring changes to the ruleset.
>> 
>> Rules on nonexistent interfaces won't match, routing domains (and
>> ultimately routing tables) should behave the same, I think.
>> 
>> Here's a diff that does this for routing domains allowing me to always
>> use `on rdomain 5' - I've tested it with a few examplatory rulesets and
>> behaviour is as expected.
>> 
>> It will need more eye balling and I am not pushing such changes before
>> release, but if that is a general direction we agree, your proposed
>> `rtable' fix could move along and become just as flexible instead.
> More on this:
> 
>   # ifconfig lo1 rdomain 1
>   # echo pass on rdomain 1 | pfctl -f-
>   # ifconfig lo1 destroy
>   # pfctl -sr 
>  
>   pass on rdomain 1 all flags S/SA
> 
> The ruleset stays valid and continues to work as soon as routing domain
> `1' reappears, there is no reason to require existence of it at ruleset
> creation;  this is safe because routing domains are just normative
> numbers, there's no further state when it comes to filtering - either
> the id on the packet matches the number in the ruleset or it doesn't.
> 
> Routing tables however are more involved as they can be used to *alter*
> a packet's flow in pf.conf(5), so requiring them to be present at
> ruleset creation makes sense to guarantee that pf will only ever change
> routing table ids to valid ones.

It's not clear for me why non-existing rdomain is accepted but
non-existing rtable is rejected.  I suppose we can make pf(4) can
handle a packet for the non-existing routing table as if the routing
table is empty.

> Routing domains can be deleted, but that doesn't invalidate rules like
> `on rdomain 1', which simply won't match when the given id does not
> exist.
> 
> Routing tables however cannot be deleted, they get moved to the default
> routing domain whenever their corresponding routing domain disappears;
> this is in line with only ever loading valid routing table ids into pf.
> 
> So unless I missed something, that ruleset creation (`pfctl -f ...')
> is the only occasion pf actually needs to validate routing table ids:
> they are guaranteed to always exist from then on.
> 
> Given this, my diff looks fine as is and should not change `rtable'
> behaviour - YASUOKA's diff is also fine as is and actually implements
> the validity check I just mentioned, obsoleting my initial feedback.



diff: pfctl: error message for nonexisting rtable

2020-09-13 Thread YASUOKA Masahiko
Hi,

When pf rule with a "on rdomain n" with nonexisting rdomain n causes

  /etc/pf.conf:XXX: rdomain n does not exist

error.  But with a "rtable n" with nonexisting rtable n will cause

  pfctl: DIOCADDRULE: Device busy

error.  It is hard to find the cause by this error message.

  /etc/pf.conf:XXX: rtable n does not exist

is better.  

ok?


Make pfctl check if the rtable really exists when parsing the config.

Index: sbin/pfctl/parse.y
===
RCS file: /cvs/src/sbin/pfctl/parse.y,v
retrieving revision 1.701
diff -u -p -r1.701 parse.y
--- sbin/pfctl/parse.y  28 Jan 2020 15:40:35 -  1.701
+++ sbin/pfctl/parse.y  14 Sep 2020 04:54:39 -
@@ -393,6 +393,7 @@ u_int16_t parseicmpspec(char *, sa_famil
 int kw_casecmp(const void *, const void *);
 int map_tos(char *string, int *);
 int rdomain_exists(u_int);
+int rtable_exists(u_int);
 int filteropts_to_rule(struct pf_rule *, struct filter_opts *);
 
 TAILQ_HEAD(loadanchorshead, loadanchors)
@@ -1217,6 +1218,10 @@ antispoof_opt: LABEL label   {
yyerror("invalid rtable id");
YYERROR;
}
+   else if (rtable_exists($2) != 1) {
+   yyerror("rtable %lld does not exist", $2);
+   YYERROR;
+   }
antispoof_opts.rtableid = $2;
}
;
@@ -2001,6 +2006,10 @@ filter_opt   : USER uids {
yyerror("invalid rtable id");
YYERROR;
}
+   else if (rtable_exists($2) != 1) {
+   yyerror("rtable %lld does not exist", $2);
+   YYERROR;
+   }
filter_opts.rtableid = $2;
}
| DIVERTTO STRING PORT portplain {
@@ -5899,6 +5908,36 @@ rdomain_exists(u_int rdomain)
}
/* rdomain is a table, but not an rdomain */
return 0;
+}
+
+int
+rtable_exists(u_int rtable)
+{
+   size_t   len;
+   struct rt_tableinfo  info;
+   int  mib[6];
+   static u_int found[RT_TABLEID_MAX+1];
+
+   if (found[rtable] == 1)
+   return 1;
+
+   mib[0] = CTL_NET;
+   mib[1] = PF_ROUTE;
+   mib[2] = 0;
+   mib[3] = 0;
+   mib[4] = NET_RT_TABLE;
+   mib[5] = rtable;
+
+   len = sizeof(info);
+   if (sysctl(mib, 6, , , NULL, 0) == -1) {
+   if (errno == ENOENT) {
+   /* table nonexistent */
+   return 0;
+   }
+   err(1, "%s", __func__);
+   }
+   found[rtable] = 1;
+   return 1;
 }
 
 int



Re: httpd: use the original uri for REQUEST_URI

2020-09-11 Thread YASUOKA Masahiko
Anyone?

This is a tiny change but makes httpd(8) more correct.
The diff is not so complicated.

On Thu, 03 Sep 2020 13:09:49 +0900 (JST)
YASUOKA Masahiko  wrote:
> Let me update the diff.  Previous doesn't have an error handling when
> strdup() failed.
> 
> On Thu, 03 Sep 2020 13:02:51 +0900 (JST)
> YASUOKA Masahiko  wrote:
>> The diff makes REQUEST_URI in FastCGI become the original request
>> URI.  Currently it is an url which is url decoded and canonicalized.
>> I could not find a specification of REQUEST_URI, but I suppose it is
>> the URI in HTTP request.  Apache httpd and nginx is using the original
>> URI for it.
>> 
>> ok?
>> 
>> 
>> Use the original requested URI for REQUEST_URI.
> 
> Index: usr.sbin/httpd/http.h
> ===
> RCS file: /cvs/src/usr.sbin/httpd/http.h,v
> retrieving revision 1.15
> diff -u -p -r1.15 http.h
> --- usr.sbin/httpd/http.h 8 May 2019 21:41:06 -   1.15
> +++ usr.sbin/httpd/http.h 3 Sep 2020 04:09:26 -
> @@ -246,6 +246,7 @@ struct http_descriptor {
>   /* Rewritten path and query remain NULL if not used */
>   char*http_path_alias;
>   char*http_query_alias;
> + char*http_path_orig;
>  
>   /* A tree of headers and attached lists for repeated headers. */
>   struct kv   *http_lastheader;
> Index: usr.sbin/httpd/server_fcgi.c
> ===
> RCS file: /cvs/src/usr.sbin/httpd/server_fcgi.c,v
> retrieving revision 1.83
> diff -u -p -r1.83 server_fcgi.c
> --- usr.sbin/httpd/server_fcgi.c  24 Aug 2020 15:49:11 -  1.83
> +++ usr.sbin/httpd/server_fcgi.c  3 Sep 2020 04:09:26 -
> @@ -299,13 +299,13 @@ server_fcgi(struct httpd *env, struct cl
>   }
>  
>   if (!desc->http_query) {
> - if (fcgi_add_param(, "REQUEST_URI", desc->http_path,
> + if (fcgi_add_param(, "REQUEST_URI", desc->http_path_orig,
>   clt) == -1) {
>   errstr = "failed to encode param";
>   goto fail;
>   }
>   } else {
> - if (asprintf(, "%s?%s", desc->http_path,
> + if (asprintf(, "%s?%s", desc->http_path_orig,
>   desc->http_query) == -1) {
>   errstr = "failed to encode param";
>   goto fail;
> Index: usr.sbin/httpd/server_http.c
> ===
> RCS file: /cvs/src/usr.sbin/httpd/server_http.c,v
> retrieving revision 1.140
> diff -u -p -r1.140 server_http.c
> --- usr.sbin/httpd/server_http.c  3 Aug 2020 10:59:53 -   1.140
> +++ usr.sbin/httpd/server_http.c  3 Sep 2020 04:09:26 -
> @@ -100,6 +100,8 @@ server_httpdesc_free(struct http_descrip
>  
>   free(desc->http_path);
>   desc->http_path = NULL;
> + free(desc->http_path_orig);
> + desc->http_path_orig = NULL;
>   free(desc->http_path_alias);
>   desc->http_path_alias = NULL;
>   free(desc->http_query);
> @@ -1204,9 +1206,13 @@ server_response(struct httpd *httpd, str
>   char*hostval, *query;
>   const char  *errstr = NULL;
>  
> - /* Decode the URL */
> + /* Preserve original path */
>   if (desc->http_path == NULL ||
> - url_decode(desc->http_path) == NULL)
> + (desc->http_path_orig = strdup(desc->http_path)) == NULL)
> + goto fail;
> +
> + /* Decode the URL */
> + if (url_decode(desc->http_path) == NULL)
>   goto fail;
>  
>   /* Canonicalize the request path */



Re: httpd: use the original uri for REQUEST_URI

2020-09-02 Thread YASUOKA Masahiko
Let me update the diff.  Previous doesn't have an error handling when
strdup() failed.

On Thu, 03 Sep 2020 13:02:51 +0900 (JST)
YASUOKA Masahiko  wrote:
> The diff makes REQUEST_URI in FastCGI become the original request
> URI.  Currently it is an url which is url decoded and canonicalized.
> I could not find a specification of REQUEST_URI, but I suppose it is
> the URI in HTTP request.  Apache httpd and nginx is using the original
> URI for it.
> 
> ok?
> 
> 
> Use the original requested URI for REQUEST_URI.

Index: usr.sbin/httpd/http.h
===
RCS file: /cvs/src/usr.sbin/httpd/http.h,v
retrieving revision 1.15
diff -u -p -r1.15 http.h
--- usr.sbin/httpd/http.h   8 May 2019 21:41:06 -   1.15
+++ usr.sbin/httpd/http.h   3 Sep 2020 04:09:26 -
@@ -246,6 +246,7 @@ struct http_descriptor {
/* Rewritten path and query remain NULL if not used */
char*http_path_alias;
char*http_query_alias;
+   char*http_path_orig;
 
/* A tree of headers and attached lists for repeated headers. */
struct kv   *http_lastheader;
Index: usr.sbin/httpd/server_fcgi.c
===
RCS file: /cvs/src/usr.sbin/httpd/server_fcgi.c,v
retrieving revision 1.83
diff -u -p -r1.83 server_fcgi.c
--- usr.sbin/httpd/server_fcgi.c24 Aug 2020 15:49:11 -  1.83
+++ usr.sbin/httpd/server_fcgi.c3 Sep 2020 04:09:26 -
@@ -299,13 +299,13 @@ server_fcgi(struct httpd *env, struct cl
}
 
if (!desc->http_query) {
-   if (fcgi_add_param(, "REQUEST_URI", desc->http_path,
+   if (fcgi_add_param(, "REQUEST_URI", desc->http_path_orig,
clt) == -1) {
errstr = "failed to encode param";
goto fail;
}
} else {
-   if (asprintf(, "%s?%s", desc->http_path,
+   if (asprintf(, "%s?%s", desc->http_path_orig,
desc->http_query) == -1) {
errstr = "failed to encode param";
goto fail;
Index: usr.sbin/httpd/server_http.c
===
RCS file: /cvs/src/usr.sbin/httpd/server_http.c,v
retrieving revision 1.140
diff -u -p -r1.140 server_http.c
--- usr.sbin/httpd/server_http.c3 Aug 2020 10:59:53 -   1.140
+++ usr.sbin/httpd/server_http.c3 Sep 2020 04:09:26 -
@@ -100,6 +100,8 @@ server_httpdesc_free(struct http_descrip
 
free(desc->http_path);
desc->http_path = NULL;
+   free(desc->http_path_orig);
+   desc->http_path_orig = NULL;
free(desc->http_path_alias);
desc->http_path_alias = NULL;
free(desc->http_query);
@@ -1204,9 +1206,13 @@ server_response(struct httpd *httpd, str
char*hostval, *query;
const char  *errstr = NULL;
 
-   /* Decode the URL */
+   /* Preserve original path */
if (desc->http_path == NULL ||
-   url_decode(desc->http_path) == NULL)
+   (desc->http_path_orig = strdup(desc->http_path)) == NULL)
+   goto fail;
+
+   /* Decode the URL */
+   if (url_decode(desc->http_path) == NULL)
goto fail;
 
/* Canonicalize the request path */



httpd: use the original uri for REQUEST_URI

2020-09-02 Thread YASUOKA Masahiko
The diff makes REQUEST_URI in FastCGI become the original request
URI.  Currently it is an url which is url decoded and canonicalized.
I could not find a specification of REQUEST_URI, but I suppose it is
the URI in HTTP request.  Apache httpd and nginx is using the original
URI for it.

ok?


Use the original requested URI for REQUEST_URI.

Index: usr.sbin/httpd/http.h
===
RCS file: /cvs/src/usr.sbin/httpd/http.h,v
retrieving revision 1.15
diff -u -p -r1.15 http.h
--- usr.sbin/httpd/http.h   8 May 2019 21:41:06 -   1.15
+++ usr.sbin/httpd/http.h   3 Sep 2020 04:00:49 -
@@ -246,6 +246,7 @@ struct http_descriptor {
/* Rewritten path and query remain NULL if not used */
char*http_path_alias;
char*http_query_alias;
+   char*http_path_orig;
 
/* A tree of headers and attached lists for repeated headers. */
struct kv   *http_lastheader;
Index: usr.sbin/httpd/server_fcgi.c
===
RCS file: /cvs/src/usr.sbin/httpd/server_fcgi.c,v
retrieving revision 1.83
diff -u -p -r1.83 server_fcgi.c
--- usr.sbin/httpd/server_fcgi.c24 Aug 2020 15:49:11 -  1.83
+++ usr.sbin/httpd/server_fcgi.c3 Sep 2020 04:00:49 -
@@ -299,13 +299,13 @@ server_fcgi(struct httpd *env, struct cl
}
 
if (!desc->http_query) {
-   if (fcgi_add_param(, "REQUEST_URI", desc->http_path,
+   if (fcgi_add_param(, "REQUEST_URI", desc->http_path_orig,
clt) == -1) {
errstr = "failed to encode param";
goto fail;
}
} else {
-   if (asprintf(, "%s?%s", desc->http_path,
+   if (asprintf(, "%s?%s", desc->http_path_orig,
desc->http_query) == -1) {
errstr = "failed to encode param";
goto fail;
Index: usr.sbin/httpd/server_http.c
===
RCS file: /cvs/src/usr.sbin/httpd/server_http.c,v
retrieving revision 1.140
diff -u -p -r1.140 server_http.c
--- usr.sbin/httpd/server_http.c3 Aug 2020 10:59:53 -   1.140
+++ usr.sbin/httpd/server_http.c3 Sep 2020 04:00:49 -
@@ -100,6 +100,8 @@ server_httpdesc_free(struct http_descrip
 
free(desc->http_path);
desc->http_path = NULL;
+   free(desc->http_path_orig);
+   desc->http_path_orig = NULL;
free(desc->http_path_alias);
desc->http_path_alias = NULL;
free(desc->http_query);
@@ -1203,6 +1205,10 @@ server_response(struct httpd *httpd, str
int  portval = -1, ret;
char*hostval, *query;
const char  *errstr = NULL;
+
+   /* preserve original path */
+   if (desc->http_path != NULL)
+   desc->http_path_orig = strdup(desc->http_path);
 
/* Decode the URL */
if (desc->http_path == NULL ||



Re: Make pipex more common for pppac and pppx

2020-08-26 Thread YASUOKA Masahiko
On Mon, 24 Aug 2020 20:07:48 +0300
Vitaliy Makkoveev  wrote:
> I pointed some comments inline.

Thanks,

>> +case PIPEXASESSION:
>> +{
>> +struct pipex_session_req *req =
>> +(struct pipex_session_req *)data;
>> +if ((error = pipex_init_session(, req)) != 0)
>> +break;
>> +error = pipex_link_session(session, >sc_if, sc);
>> +break;
>> +}
> 
> If pipex_link_session() fails `session' will be leaked.

Yes, it's a good catch.

>> +case PIPEXDSESSION:
>> +{
>> +struct pipex_session_close_req *req =
>> +(struct pipex_session_close_req *)data;
>> +session = pipex_lookup_by_session_id(req->pcr_protocol,
>> +req->pcr_session_id);
>> +if (session == NULL || session->ifindex != sc->sc_if.if_index) {
> 
> Can you compare with `session->ownersc' instead of `ifindex' like other
> code does? For consistency with other code.

Yes, it's better.

> What about to introduce pppac_{add,del}_session() and move related code
> into them?

Also I agreed.

> Also I see no such reason to kill pipex_{add,destroy}_session() because
> they play with `pipex_rd_head{4,6}' and you don't need newly introduced
> `session->is_pppx' which you use only once for that reason. 

pipex_{add,destroy}_session() should be killed since they are only for pppac.  
I think such functions should have "pppac_" prefix and placed in if_pppx.c.  
Also I'd like to move pipex_rd_head{4,6} things to pppac_{add,del}_session with 
a next step.  Yes, we might be able to kill is_pppx.  But I'd like to discuss 
that as a next step as well.


I'd like to commit this for this moment, and continue further discussion.

ok?

Index: sys/net/if_pppx.c
===
RCS file: /cvs/src/sys/net/if_pppx.c,v
retrieving revision 1.101
diff -u -p -r1.101 if_pppx.c
--- sys/net/if_pppx.c   14 Aug 2020 11:05:38 -  1.101
+++ sys/net/if_pppx.c   26 Aug 2020 06:25:34 -
@@ -163,7 +163,6 @@ struct pppx_if {
struct ifnetpxi_if;
struct pppx_dev *pxi_dev;   /* [I] */
struct pipex_session*pxi_session;   /* [I] */
-   struct pipex_iface_context  pxi_ifcontext;  /* [N] */
 };
 
 static inline int
@@ -181,12 +180,6 @@ intpppx_add_session(struct pppx_dev *,
struct pipex_session_req *);
 intpppx_del_session(struct pppx_dev *,
struct pipex_session_close_req *);
-intpppx_config_session(struct pppx_dev *,
-   struct pipex_session_config_req *);
-intpppx_get_stat(struct pppx_dev *,
-   struct pipex_session_stat_req *);
-intpppx_get_closed(struct pppx_dev *,
-   struct pipex_session_list_req *);
 intpppx_set_session_descr(struct pppx_dev *,
struct pipex_session_descr_req *);
 
@@ -424,17 +417,6 @@ pppxioctl(dev_t dev, u_long cmd, caddr_t
 
NET_LOCK();
switch (cmd) {
-   case PIPEXSMODE:
-   /*
-* npppd always enables on open, and only disables before
-* closing. we cheat and let open and close do that, so lie
-* to npppd.
-*/
-   break;
-   case PIPEXGMODE:
-   *(int *)addr = 1;
-   break;
-
case PIPEXASESSION:
error = pppx_add_session(pxd,
(struct pipex_session_req *)addr);
@@ -445,21 +427,6 @@ pppxioctl(dev_t dev, u_long cmd, caddr_t
(struct pipex_session_close_req *)addr);
break;
 
-   case PIPEXCSESSION:
-   error = pppx_config_session(pxd,
-   (struct pipex_session_config_req *)addr);
-   break;
-
-   case PIPEXGSTAT:
-   error = pppx_get_stat(pxd,
-   (struct pipex_session_stat_req *)addr);
-   break;
-
-   case PIPEXGCLOSED:
-   error = pppx_get_closed(pxd,
-   (struct pipex_session_list_req *)addr);
-   break;
-
case PIPEXSIFDESCR:
error = pppx_set_session_descr(pxd,
(struct pipex_session_descr_req *)addr);
@@ -472,7 +439,7 @@ pppxioctl(dev_t dev, u_long cmd, caddr_t
break;
 
default:
-   error = ENOTTY;
+   error = pipex_ioctl(pxd, cmd, addr);
break;
}
NET_UNLOCK();
@@ -741,11 +708,7 @@ pppx_add_session(struct pppx_dev *pxd, s
if_addrhooks_run(ifp);
}
 
-   /* fake a pipex interface context */
-   pxi->pxi_ifcontext.ifindex = ifp->if_index;
-   pxi->pxi_ifcontext.pipexmode = PIPEX_ENABLED;
-
-   error = pipex_link_session(session, >pxi_ifcontext);
+   error = pipex_link_session(session, 

Re: Make pipex more common for pppac and pppx

2020-08-19 Thread YASUOKA Masahiko
Hi,

Thank you for your comments.

On Mon, 17 Aug 2020 00:15:08 +0300
Vitaliy Makkoveev  wrote:
> I like your idea to kill `pipex_iface_context'. I had trying to keep it
> by myself and this was wrong way. Could you rework your diff to be
> against the recent sources?

I'm sorry the diff was for the old version.

>> @@ -1122,8 +1051,11 @@ pppacopen(dev_t dev, int flags, int mode, struct proc 
>> *p)
>>  #if NBPFILTER > 0
>>  bpfattach(>if_bpf, ifp, DLT_LOOP, sizeof(uint32_t));
>>  #endif
>> -
>> -pipex_iface_init(>sc_pipex_iface, ifp->if_index);
>> +/* virtual pipex_session entry for multicast */
>> +session = pool_get(_session_pool, PR_WAITOK | PR_ZERO);
>> +session->is_multicast = 1;
>> +session->ifindex = ifp->if_index;
>> +sc->sc_multicast_session = session;
>>  
> Interface index is not required for multicast session, because it's
> never used. Also I like to alloc `sc_multicast_session' before
> if_attach().

The diff was to use `ifindex' to select all sessions associated the
same pppac(4).  But the latest diff uses `ownersc' instead for the
same purpose.  Also the allocation was moved to earlier part of the
function.

>> @@ -1382,7 +1340,10 @@ pppacclose(dev_t dev, int flags, int mode, struct 
>> proc *p)
>>  klist_invalidate(>sc_wsel.si_note);
>>  splx(s);
>>  
>> -pipex_iface_fini(>sc_pipex_iface);
>> +pool_put(_session_pool, sc->sc_multicast_session);
>> +NET_LOCK();
>> +pipex_destroy_all_sessions(sc);
>> +NET_UNLOCK();
>>  
>>  if_detach(ifp);
> 
> The recent sources has pppac(4) with unlocked start routine. I like you
> detach `ifp' before destroy `sc_multicast_session'.

The lines were moved after if_detach().

I'll test this more on this weekend, then I'll ask ok for this.

Index: sys/net/if_pppx.c
===
RCS file: /cvs/src/sys/net/if_pppx.c,v
retrieving revision 1.101
diff -u -p -r1.101 if_pppx.c
--- sys/net/if_pppx.c   14 Aug 2020 11:05:38 -  1.101
+++ sys/net/if_pppx.c   20 Aug 2020 05:19:55 -
@@ -163,7 +163,6 @@ struct pppx_if {
struct ifnetpxi_if;
struct pppx_dev *pxi_dev;   /* [I] */
struct pipex_session*pxi_session;   /* [I] */
-   struct pipex_iface_context  pxi_ifcontext;  /* [N] */
 };
 
 static inline int
@@ -181,12 +180,6 @@ intpppx_add_session(struct pppx_dev *,
struct pipex_session_req *);
 intpppx_del_session(struct pppx_dev *,
struct pipex_session_close_req *);
-intpppx_config_session(struct pppx_dev *,
-   struct pipex_session_config_req *);
-intpppx_get_stat(struct pppx_dev *,
-   struct pipex_session_stat_req *);
-intpppx_get_closed(struct pppx_dev *,
-   struct pipex_session_list_req *);
 intpppx_set_session_descr(struct pppx_dev *,
struct pipex_session_descr_req *);
 
@@ -424,17 +417,6 @@ pppxioctl(dev_t dev, u_long cmd, caddr_t
 
NET_LOCK();
switch (cmd) {
-   case PIPEXSMODE:
-   /*
-* npppd always enables on open, and only disables before
-* closing. we cheat and let open and close do that, so lie
-* to npppd.
-*/
-   break;
-   case PIPEXGMODE:
-   *(int *)addr = 1;
-   break;
-
case PIPEXASESSION:
error = pppx_add_session(pxd,
(struct pipex_session_req *)addr);
@@ -445,21 +427,6 @@ pppxioctl(dev_t dev, u_long cmd, caddr_t
(struct pipex_session_close_req *)addr);
break;
 
-   case PIPEXCSESSION:
-   error = pppx_config_session(pxd,
-   (struct pipex_session_config_req *)addr);
-   break;
-
-   case PIPEXGSTAT:
-   error = pppx_get_stat(pxd,
-   (struct pipex_session_stat_req *)addr);
-   break;
-
-   case PIPEXGCLOSED:
-   error = pppx_get_closed(pxd,
-   (struct pipex_session_list_req *)addr);
-   break;
-
case PIPEXSIFDESCR:
error = pppx_set_session_descr(pxd,
(struct pipex_session_descr_req *)addr);
@@ -472,7 +439,7 @@ pppxioctl(dev_t dev, u_long cmd, caddr_t
break;
 
default:
-   error = ENOTTY;
+   error = pipex_ioctl(pxd, cmd, addr);
break;
}
NET_UNLOCK();
@@ -741,11 +708,7 @@ pppx_add_session(struct pppx_dev *pxd, s
if_addrhooks_run(ifp);
}
 
-   /* fake a pipex interface context */
-   pxi->pxi_ifcontext.ifindex = ifp->if_index;
-   pxi->pxi_ifcontext.pipexmode = PIPEX_ENABLED;
-
-   error = pipex_link_session(session, >pxi_ifcontext);
+   error = 

Re: Make pipex more common for pppac and pppx

2020-08-15 Thread YASUOKA Masahiko
Let me update the diff.  A bug found by the test.

diff --git a/sys/net/if_pppx.c b/sys/net/if_pppx.c
index 62b85bc34af..6d3de6973bd 100644
--- a/sys/net/if_pppx.c
+++ b/sys/net/if_pppx.c
@@ -163,7 +163,6 @@ struct pppx_if {
struct ifnetpxi_if;
struct pppx_dev *pxi_dev;   /* [I] */
struct pipex_session*pxi_session;   /* [I] */
-   struct pipex_iface_context  pxi_ifcontext;  /* [N] */
 };
 
 static inline int
@@ -181,12 +180,6 @@ intpppx_add_session(struct pppx_dev *,
struct pipex_session_req *);
 intpppx_del_session(struct pppx_dev *,
struct pipex_session_close_req *);
-intpppx_config_session(struct pppx_dev *,
-   struct pipex_session_config_req *);
-intpppx_get_stat(struct pppx_dev *,
-   struct pipex_session_stat_req *);
-intpppx_get_closed(struct pppx_dev *,
-   struct pipex_session_list_req *);
 intpppx_set_session_descr(struct pppx_dev *,
struct pipex_session_descr_req *);
 
@@ -424,17 +417,6 @@ pppxioctl(dev_t dev, u_long cmd, caddr_t addr, int flags, 
struct proc *p)
 
NET_LOCK();
switch (cmd) {
-   case PIPEXSMODE:
-   /*
-* npppd always enables on open, and only disables before
-* closing. we cheat and let open and close do that, so lie
-* to npppd.
-*/
-   break;
-   case PIPEXGMODE:
-   *(int *)addr = 1;
-   break;
-
case PIPEXASESSION:
error = pppx_add_session(pxd,
(struct pipex_session_req *)addr);
@@ -445,21 +427,6 @@ pppxioctl(dev_t dev, u_long cmd, caddr_t addr, int flags, 
struct proc *p)
(struct pipex_session_close_req *)addr);
break;
 
-   case PIPEXCSESSION:
-   error = pppx_config_session(pxd,
-   (struct pipex_session_config_req *)addr);
-   break;
-
-   case PIPEXGSTAT:
-   error = pppx_get_stat(pxd,
-   (struct pipex_session_stat_req *)addr);
-   break;
-
-   case PIPEXGCLOSED:
-   error = pppx_get_closed(pxd,
-   (struct pipex_session_list_req *)addr);
-   break;
-
case PIPEXSIFDESCR:
error = pppx_set_session_descr(pxd,
(struct pipex_session_descr_req *)addr);
@@ -472,7 +439,7 @@ pppxioctl(dev_t dev, u_long cmd, caddr_t addr, int flags, 
struct proc *p)
break;
 
default:
-   error = ENOTTY;
+   error = pipex_ioctl(pxd, cmd, addr);
break;
}
NET_UNLOCK();
@@ -742,11 +709,7 @@ pppx_add_session(struct pppx_dev *pxd, struct 
pipex_session_req *req)
if_addrhooks_run(ifp);
}
 
-   /* fake a pipex interface context */
-   pxi->pxi_ifcontext.ifindex = ifp->if_index;
-   pxi->pxi_ifcontext.pipexmode = PIPEX_ENABLED;
-
-   error = pipex_link_session(session, >pxi_ifcontext);
+   error = pipex_link_session(session, ifp, pxd);
if (error)
goto detach;
 
@@ -786,40 +749,6 @@ pppx_del_session(struct pppx_dev *pxd, struct 
pipex_session_close_req *req)
return (0);
 }
 
-int
-pppx_config_session(struct pppx_dev *pxd,
-struct pipex_session_config_req *req)
-{
-   struct pppx_if *pxi;
-
-   pxi = pppx_if_find(pxd, req->pcr_session_id, req->pcr_protocol);
-   if (pxi == NULL)
-   return (EINVAL);
-
-   return pipex_config_session(req, >pxi_ifcontext);
-}
-
-int
-pppx_get_stat(struct pppx_dev *pxd, struct pipex_session_stat_req *req)
-{
-   struct pppx_if *pxi;
-
-   pxi = pppx_if_find(pxd, req->psr_session_id, req->psr_protocol);
-   if (pxi == NULL)
-   return (EINVAL);
-
-   return pipex_get_stat(req, >pxi_ifcontext);
-}
-
-int
-pppx_get_closed(struct pppx_dev *pxd, struct pipex_session_list_req *req)
-{
-   /* XXX: Only opened sessions exist for pppx(4) */
-   memset(req, 0, sizeof(*req));
-
-   return 0;
-}
-
 int
 pppx_set_session_descr(struct pppx_dev *pxd,
 struct pipex_session_descr_req *req)
@@ -1022,9 +951,8 @@ struct pppac_softc {
struct selinfo  sc_rsel;
struct mutexsc_wsel_mtx;
struct selinfo  sc_wsel;
-
-   struct pipex_iface_context
-   sc_pipex_iface;
+   struct pipex_session
+   *sc_multicast_session;
 
struct mbuf_queue
sc_mq;
@@ -1084,6 +1012,7 @@ pppacopen(dev_t dev, int flags, int mode, struct proc *p)
 {
struct pppac_softc *sc;
struct ifnet *ifp;
+   struct pipex_session *session;
 
sc = malloc(sizeof(*sc), M_DEVBUF, M_WAITOK|M_ZERO);
if (pppac_lookup(dev) != NULL) {
@@ 

Make pipex more common for pppac and pppx

2020-08-15 Thread YASUOKA Masahiko
This diff makes pipex become more common for pppac and pppx.

- Delete "pipex_iface_context".

   It had been created when pppx doesn't exist.  This creates some
   confusions.  For example session->pipex_iface is the device context
   when pppac(4) but it's not when pppx(4).

623 Static int
624 pipex_get_closed(struct pipex_session_list_req *req,
625 struct pipex_iface_context *iface)
626 {
627 struct pipex_session *session, *session_tmp;
628 
629 NET_ASSERT_LOCKED();
630 bzero(req, sizeof(*req));
631 LIST_FOREACH_SAFE(session, _close_wait_list, state_list,
632 session_tmp) {
633 if (session->pipex_iface != iface)
634 continue;

  at #633, using it to verify the ownership.  But PIPEXGCLOSED is to
  get all closed sessions associated with the *device* (not the
  interface).  So we need another way to verify the owner.

  - The diff adds "void *ownersc" to session for it.
  - PIPEXGCLOSED for pppx is actually broken.  The diff fixes this.

- pipex_iface_context has a dummy session for multicast and it's not
  used by pppx(4).  The diff moves all multicast things to pppac local.

- Also session creation and deletion for pppac cannot be used by
  pppx.  Move them to pppac local.

- Make PIPEX{S,G}MODE dummy.  I'd like to delete them afterward.

The diff is still under review and test.

comment?

diff --git a/sys/net/if_pppx.c b/sys/net/if_pppx.c
index 62b85bc34af..6d3de6973bd 100644
--- a/sys/net/if_pppx.c
+++ b/sys/net/if_pppx.c
@@ -163,7 +163,6 @@ struct pppx_if {
struct ifnetpxi_if;
struct pppx_dev *pxi_dev;   /* [I] */
struct pipex_session*pxi_session;   /* [I] */
-   struct pipex_iface_context  pxi_ifcontext;  /* [N] */
 };
 
 static inline int
@@ -181,12 +180,6 @@ intpppx_add_session(struct pppx_dev *,
struct pipex_session_req *);
 intpppx_del_session(struct pppx_dev *,
struct pipex_session_close_req *);
-intpppx_config_session(struct pppx_dev *,
-   struct pipex_session_config_req *);
-intpppx_get_stat(struct pppx_dev *,
-   struct pipex_session_stat_req *);
-intpppx_get_closed(struct pppx_dev *,
-   struct pipex_session_list_req *);
 intpppx_set_session_descr(struct pppx_dev *,
struct pipex_session_descr_req *);
 
@@ -424,17 +417,6 @@ pppxioctl(dev_t dev, u_long cmd, caddr_t addr, int flags, 
struct proc *p)
 
NET_LOCK();
switch (cmd) {
-   case PIPEXSMODE:
-   /*
-* npppd always enables on open, and only disables before
-* closing. we cheat and let open and close do that, so lie
-* to npppd.
-*/
-   break;
-   case PIPEXGMODE:
-   *(int *)addr = 1;
-   break;
-
case PIPEXASESSION:
error = pppx_add_session(pxd,
(struct pipex_session_req *)addr);
@@ -445,21 +427,6 @@ pppxioctl(dev_t dev, u_long cmd, caddr_t addr, int flags, 
struct proc *p)
(struct pipex_session_close_req *)addr);
break;
 
-   case PIPEXCSESSION:
-   error = pppx_config_session(pxd,
-   (struct pipex_session_config_req *)addr);
-   break;
-
-   case PIPEXGSTAT:
-   error = pppx_get_stat(pxd,
-   (struct pipex_session_stat_req *)addr);
-   break;
-
-   case PIPEXGCLOSED:
-   error = pppx_get_closed(pxd,
-   (struct pipex_session_list_req *)addr);
-   break;
-
case PIPEXSIFDESCR:
error = pppx_set_session_descr(pxd,
(struct pipex_session_descr_req *)addr);
@@ -472,7 +439,7 @@ pppxioctl(dev_t dev, u_long cmd, caddr_t addr, int flags, 
struct proc *p)
break;
 
default:
-   error = ENOTTY;
+   error = pipex_ioctl(pxd, cmd, addr);
break;
}
NET_UNLOCK();
@@ -742,11 +709,7 @@ pppx_add_session(struct pppx_dev *pxd, struct 
pipex_session_req *req)
if_addrhooks_run(ifp);
}
 
-   /* fake a pipex interface context */
-   pxi->pxi_ifcontext.ifindex = ifp->if_index;
-   pxi->pxi_ifcontext.pipexmode = PIPEX_ENABLED;
-
-   error = pipex_link_session(session, >pxi_ifcontext);
+   error = pipex_link_session(session, ifp, pxd);
if (error)
goto detach;
 
@@ -786,40 +749,6 @@ pppx_del_session(struct pppx_dev *pxd, struct 
pipex_session_close_req *req)
return (0);
 }
 
-int
-pppx_config_session(struct pppx_dev *pxd,
-struct pipex_session_config_req *req)
-{
-   struct pppx_if *pxi;
-
-   pxi = pppx_if_find(pxd, 

Re: pppac(4): destroy sessions the same way as pppx(4) does

2020-08-14 Thread YASUOKA Masahiko
On Wed, 12 Aug 2020 12:26:22 +0300
Vitaliy Makkoveev  wrote:
> We destroy pppx(4) related sessions while we performing PIPEXDSESSION
> command. But with pppac(4) we set session's state to
> PIPEX_STATE_CLOSE_WAIT2 and we wait garbage collector to do destruction.

pppac's PIPEXDSESSION set the states PIPEX_STATE_CLOSED.  It is to
wait until pipex{in,out}q becomes empty.

> We removed `pipex{in,out}q'. So we can safe destroy session in any time.
> I propose to make pppac(4) session destruction path the same as pppx(4)
> does. Now we destroy them while performing PIPEXDSESSION commad too.

Yes.  I agree this point.

> Also there is no in-kernel garbage collector for pppac(4) sessions.
> yasuoka@ pointed me that npppd(8) should kill expired sessions.
> 
> This not only makes pppac(4) closer to pppx(4) but simplify code and
> allow us to make safe pppx(4) session processing by pipex_timer().
> So this is preparation step to restore in-kernel timeout for pppx(4)
> too.

Below, I am asking to keep the timeout behavior.  There is a bug for
pppx(4) but it had been working for pppac(4) for long time.  If you
really want to change the behavior please provide a reason.  I have
not so strong opinion but I don't want to change the behavior without
a reason.

> Index: sys/net/pipex.c
> ===
> RCS file: /cvs/src/sys/net/pipex.c,v
> retrieving revision 1.124
> diff -u -p -r1.124 pipex.c
> --- sys/net/pipex.c   12 Aug 2020 08:41:39 -  1.124
> +++ sys/net/pipex.c   12 Aug 2020 09:07:12 -
> @@ -536,29 +536,6 @@ out:
>   return error;
>  }
>  
> -int
> -pipex_notify_close_session(struct pipex_session *session)
> -{
> - NET_ASSERT_LOCKED();
> - session->state = PIPEX_STATE_CLOSE_WAIT;
> - session->stat.idle_time = 0;
> - LIST_INSERT_HEAD(_close_wait_list, session, state_list);
> -
> - return (0);
> -}
> -

Unrelated but ok.

> -int
> -pipex_notify_close_session_all(void)
> -{
> - struct pipex_session *session;
> -
> - NET_ASSERT_LOCKED();
> - LIST_FOREACH(session, _session_list, session_list)
> - if (session->state == PIPEX_STATE_OPENED)
> - pipex_notify_close_session(session);
> - return (0);
> -}
> -

Unrelated but ok.  Since it's not used.

>  Static int
>  pipex_close_session(struct pipex_session_close_req *req,
>  struct pipex_iface_context *iface)
> @@ -573,13 +550,9 @@ pipex_close_session(struct pipex_session
>   if (session->pipex_iface != iface)
>   return (EINVAL);
>  
> - /* remove from close_wait list */
> - if (session->state == PIPEX_STATE_CLOSE_WAIT)
> - LIST_REMOVE(session, state_list);
> -

This must be kept.  Useland may PIPEXDSESSION before PIPEXGCLOSED for
this session.

>   /* get statistics before destroy the session */
>   req->pcr_stat = session->stat;
> - session->state = PIPEX_STATE_CLOSED;
> + pipex_destroy_session(session);
>  
>   return (0);
>  }

ok

> @@ -739,47 +712,25 @@ pipex_timer_stop(void)
>  Static void
>  pipex_timer(void *ignored_arg)
>  {
> - struct pipex_session *session, *session_tmp;
> + struct pipex_session *session;
>  
>   timeout_add_sec(_timer_ch, pipex_prune);
>  
>   NET_LOCK();
>   /* walk through */
> - LIST_FOREACH_SAFE(session, _session_list, session_list,
> - session_tmp) {
> - switch (session->state) {
> - case PIPEX_STATE_OPENED:
> - if (session->timeout_sec == 0)
> - continue;
> -
> - session->stat.idle_time++;
> - if (session->stat.idle_time < session->timeout_sec)
> - continue;
> -
> - pipex_notify_close_session(session);
> - break;
> -
> - case PIPEX_STATE_CLOSE_WAIT:
> - case PIPEX_STATE_CLOSE_WAIT2:
> - /* Wait PIPEXDSESSION from userland */
> - session->stat.idle_time++;
> - if (session->stat.idle_time < PIPEX_CLOSE_TIMEOUT)
> - continue;
> -
> - if (session->state == PIPEX_STATE_CLOSE_WAIT)
> - LIST_REMOVE(session, state_list);
> - session->state = PIPEX_STATE_CLOSED;
> - /* FALLTHROUGH */
> + LIST_FOREACH(session, _session_list, session_list) {
> + if (session->state != PIPEX_STATE_OPENED)
> + continue;
> + if (session->timeout_sec == 0)
> + continue;
>  
> - case PIPEX_STATE_CLOSED:
> - pipex_destroy_session(session);
> - break;
> + session->stat.idle_time++;
> + if (session->stat.idle_time < session->timeout_sec)
> + continue;
>  
> - default:
> - break;
> -  

Re: pipex "idle-timeout" work with pppx(4).

2020-08-12 Thread YASUOKA Masahiko
Hi,

On Wed, 12 Aug 2020 12:38:39 +0300
Vitaliy Makkoveev  wrote:
> We don't need to mark pppx(4) sessions because there is no special cases
> for them. We just need to kill pppx(4) related "pr_timeout_sec != 0"
> checks and call pipex_get_closed() by pppx_get_closed().

How do you implement that by calling pipex_get_closed() by
pppx_get_closed()?


PIPEXGCLOSED is to pick up expired sessions which is associated with
the character device (/dev/{pppx,pppac}0).  In pppac(4) case, the
character device is the same object of the interface pppac.  But
pppx(4) is not the same.  pipex_session has no direct referece to the
device.  This is why my diff was modifying pipex_get_closed().



Re: pipex "idle-timeout" work with pppx(4).

2020-08-11 Thread YASUOKA Masahiko
On Tue, 11 Aug 2020 23:06:45 +0300
Vitaliy Makkoveev  wrote:
> We removed `pipex{in,out}q'. So now we can destroy pppac(4) session just
> like we do in pppx(4) case. Also there is no reason to allow
> pipex_timer() to destroy sessions - userland will do this by
> PIPEXDSESSION. This permit us to use existing pipex_get_closed() for
> both pppac(4) and pppx(4) without any modifications.
> 
> So, I propose pipex_close_session() and pipex_timer() be like below.

It doesn't seem to fix "idle-timeout".

> We simplify pppac(4) session destruction. We unify behavior with pppx(4)
> - we killing session just now. There is no reason to modify
> pipex_get_closed() and pipex_link_session(). pppx(4) related sessions
> can be processed by pipex_timer(). There is no performance impact.

We need to modify pppx_get_closed() to implement idle-timeout.

> Do you like this? We can do two diffs. The first to unify destruction
> and the second to re-enable in-kernel timeout for pppx(4) and revert man
> pages modifications.

I have no objection to your "unify destruction".

I'll rebase my diff after that work.



Re: pipex "idle-timeout" work with pppx(4).

2020-08-11 Thread YASUOKA Masahiko


my diff is to make pppx(4) have the same "idle-timeout"
functionality.  I strongly think pppx(4) must have the same
functionalities of pppac(4) because I don't see any reason to have
any difference between pppx(4) and pppac(4).

Your pseudo code is suggesting another thing.  You would like to
change the existing behavior of pppac(4)?  Then, what is a problem you
concern.  I'd like you to provide what is the relation of my diff or a
background of the code.

On Tue, 11 Aug 2020 01:20:45 +0300
Vitaliy Makkoveev  wrote:
> 
> 
>> On 10 Aug 2020, at 19:53, Vitaliy Makkoveev  wrote:
>> 
>> We are doing all wrong :)
>> 
>> We can just unlink pppx(4) related session from `pipex_session_list' if
>> it's time expired. But since this unlinked session is still exists in
>> pppx(4) layer we can access through pppx_get_closed() without any
>> search. We should only add flag to session which identifies it as
>> pppx(4) related.
>> 
>> I hope you like this idea.
>> 
>>  cut begin 
>> Static void
>> pipex_timer(void *ignored_arg)
>> {
>>struct pipex_session *session, *session_tmp;
>> 
>>timeout_add_sec(_timer_ch, pipex_prune);
>> 
>>NET_LOCK();
>>/* walk through */
>>LIST_FOREACH_SAFE(session, _session_list, session_list,
>>session_tmp) {
>>switch (session->state) {
>>case PIPEX_STATE_OPENED:
>>if (session->timeout_sec == 0)
>>continue;
>> 
>>session->stat.idle_time++;
>>if (session->stat.idle_time < session->timeout_sec)
>>continue;
>> 
>>  if (session->pppx_session)
>>  pipex_unlink_session(session);
>>  else
>>  pipex_notify_close_session(session);
>>break;
>>  /* ... */
>> }
>> 
>> pppx_get_closed(struct pppx_dev *pxd, struct pipex_session_list_req *req)
>> {
>>  struct pppx_if *pxi;
>> 
>>  pxi = pppx_if_find(pxd, req->pdr_session_id, req->pdr_protocol);
>>  if (pxi == NULL)
>>  return (EINVAL);
>> 
>>  memset(req, 0, sizeof(*req));
>>  if (session->state == PIPEX_STATE_CLOSED) {
>>  req->plr_ppp_id[req->plr_ppp_id_count++] = session->ppp_id;
>>  pppx_if_destroy(pxi);   
>>  }
>> 
>>  return 0;
>> }
> 
> Sorry for noise. I should avoid to write pseudo code.



Re: pipex "idle-timeout" work with pppx(4).

2020-08-11 Thread YASUOKA Masahiko
Hi,

On Mon, 10 Aug 2020 16:30:27 +0300
Vitaliy Makkoveev  wrote:
> On Mon, Aug 10, 2020 at 03:12:02PM +0900, YASUOKA Masahiko wrote:
>> On Sun, 9 Aug 2020 20:03:50 +0300
>> Vitaliy Makkoveev  wrote:
>> > On Sun, Aug 09, 2020 at 06:20:13PM +0300, Vitaliy Makkoveev wrote:
>> >> You propose to unlink pppx(4) related session which reached timeout. I'm
>> >> ok with this direction. But I see no reason to rework _get_closed()
>> >> routines.
>> >> 
>> >> in pppac(4) case it's assumed what if session is not yet destroyed by
>> >> garbage collector, it will be destroyed while we performing PIPEXGCLOSED
>> >> command. We can make pppx(4) behavior the same and I propose to
>> >> pppx_get_closed() be like below. 
>> >> 
>> >> Also, nothing requires to modify pipex_get_closed(). 
>> >> 
>> >>  cut begin 
>> > 
>> > Sorry, I mean
>> > 
>> > pppx_get_closed(struct pppx_dev *pxd, struct pipex_session_list_req *req)
>> > {
>> >struct pppx_if  *pxi;
>> > 
>> >memset(req, 0, sizeof(*req));
>> > 
>> >while ((pxi = LIST_FIRST(>pxd_pxis))) {
>> >if (pxi->pxi_session->state == session->state =
>> >PIPEX_STATE_CLOSED) {
>> >req->plr_ppp_id[req->plr_ppp_id_count++] =
>> >pxi->pxi_session->ppp_id;
>> >pppx_if_destroy(pxi);
>> >}
>> >}
>> > 
>> >return 0;
>> > }
>> 
>> Yes, the diff doesn't seem to be completed but this way also will work.
>> 
>> Usually there is few CLOSED session even if there is a lot of session.
>> Also there is no CLOSED session if idle-timeout is not configured.  I
>> avoided that way because I think checking all sessions' state to find
>> such the few sessions is too expensive.
>> 
>> A way I am suggesting:
>> 
>> @@ -622,7 +625,7 @@ pipex_get_stat(struct pipex_session_stat
>>  
>>  Static int
>>  pipex_get_closed(struct pipex_session_list_req *req,
>> -struct pipex_iface_context *iface)
>> +int (*isowner)(void *, struct pipex_session *), void *ctx)
>>  {
>>  struct pipex_session *session, *session_tmp;
>>  
>> @@ -630,7 +633,7 @@ pipex_get_closed(struct pipex_session_li
>>  bzero(req, sizeof(*req));
>>  LIST_FOREACH_SAFE(session, _close_wait_list, state_list,
>>  session_tmp) {
>> -if (session->pipex_iface != iface)
>> +if (!isowner(ctx, session))
>>  continue;
>>  req->plr_ppp_id[req->plr_ppp_id_count++] = session->ppp_id;
>>  LIST_REMOVE(session, state_list);
>> 
>> uses pipex_close_wait_list which contains only sessions which is timed
>> out.
> 
> You are right. pipex_get_closed() walks through `pipex_close_wait_list'
> which contains only CLOSE_WAIT sessions.
> 
> According to npppd(8) code we do PIPEXGCLOSED related walkthrough once
> per NPPPD_TIMER_TICK_IVAL seconds, which is defined as 4. Is this such
> performance impact?

It might be not so expensive for you.  But why do you intend to use
that extra CPU when you have a cheaper way?

> Also who should destroy these sessions? It's assumed npppd(8) will
> destroy them by l2tp_ctrl_timeout() and pptp_ctrl_timeout()? Excuse me
> if I'm wrong, but who will destroy sessions in pppoe case?

In usr.sbin/npppd/npppd/npppd.c:

1306 static void
1307 pipex_periodic(npppd *_this)
1308 {
(snip)
1326 do {
1327 error = ioctl(devf, PIPEXGCLOSED, );
1328 if (error) {
1329 if (errno != ENXIO)
1330 log_printf(LOG_WARNING,
1331 "PIPEXGCLOSED failed: %m");
1332 break;
1333 }
1334 for (i = 0; i < req.plr_ppp_id_count; i++) {
1335 ppp_id = req.plr_ppp_id[i];
1336 slist_add(, (void 
*)(uintptr_t)ppp_id);
1337 }
1338 } while (req.plr_flags & PIPEX_LISTREQ_MORE);

ppp sessions which are closed by pipex(4) is inserted into "dlist".

1350 /* Disconnect request */
1351 slist_itr_first();
1352 while (slist_itr_has_next()) {
(snip)
1372 ppp_log(ppp, LOG_INFO, "Stop requested by the kernel");
1373 /* TODO: PIPEX doesn't return the disconect reason */
1374 #ifdef USE_NPPPD_RADIUS
1375 ppp_set_radius_terminate_cause(ppp,
1376 RADIUS_TERMNATE_CAUSE_IDLE_TIMEOUT);
1377 #endif
1378 ppp_stop(ppp, NULL);

all ppp session are stopd at #1378.  PPP is finisingh a layer by a
layer, ppp_stop0() will called.  That function will call PIPEXDSESSION.

I'd like to empasize that npppd(8) takes responsibilities of pipex
sessions' creation/deletion even when idle timeout happening.



Re: pipex "idle-timeout" work with pppx(4).

2020-08-10 Thread YASUOKA Masahiko
Hi,

Thank you for your review.

On Sun, 9 Aug 2020 20:03:50 +0300
Vitaliy Makkoveev  wrote:
> On Sun, Aug 09, 2020 at 06:20:13PM +0300, Vitaliy Makkoveev wrote:
>> You propose to unlink pppx(4) related session which reached timeout. I'm
>> ok with this direction. But I see no reason to rework _get_closed()
>> routines.
>> 
>> in pppac(4) case it's assumed what if session is not yet destroyed by
>> garbage collector, it will be destroyed while we performing PIPEXGCLOSED
>> command. We can make pppx(4) behavior the same and I propose to
>> pppx_get_closed() be like below. 
>> 
>> Also, nothing requires to modify pipex_get_closed(). 
>> 
>>  cut begin 
> 
> Sorry, I mean
> 
> pppx_get_closed(struct pppx_dev *pxd, struct pipex_session_list_req *req)
> {
>   struct pppx_if  *pxi;
> 
>   memset(req, 0, sizeof(*req));
> 
>   while ((pxi = LIST_FIRST(>pxd_pxis))) {
>   if (pxi->pxi_session->state == session->state =
>   PIPEX_STATE_CLOSED) {
>   req->plr_ppp_id[req->plr_ppp_id_count++] =
>   pxi->pxi_session->ppp_id;
>   pppx_if_destroy(pxi);
>   }
>   }
> 
>   return 0;
> }

Yes, the diff doesn't seem to be completed but this way also will work.

Usually there is few CLOSED session even if there is a lot of session.
Also there is no CLOSED session if idle-timeout is not configured.  I
avoided that way because I think checking all sessions' state to find
such the few sessions is too expensive.

A way I am suggesting:

@@ -622,7 +625,7 @@ pipex_get_stat(struct pipex_session_stat
 
 Static int
 pipex_get_closed(struct pipex_session_list_req *req,
-struct pipex_iface_context *iface)
+int (*isowner)(void *, struct pipex_session *), void *ctx)
 {
struct pipex_session *session, *session_tmp;
 
@@ -630,7 +633,7 @@ pipex_get_closed(struct pipex_session_li
bzero(req, sizeof(*req));
LIST_FOREACH_SAFE(session, _close_wait_list, state_list,
session_tmp) {
-   if (session->pipex_iface != iface)
+   if (!isowner(ctx, session))
continue;
req->plr_ppp_id[req->plr_ppp_id_count++] = session->ppp_id;
LIST_REMOVE(session, state_list);

uses pipex_close_wait_list which contains only sessions which is timed
out.

>> Also I have one inlined comment within your diff. 

>> > @@ -430,6 +425,7 @@ pipex_link_session(struct pipex_session 
>> >struct pipex_iface_context *iface)
>> >  {
>> >struct pipex_hash_head *chain;
>> > +  struct ifnet *ifp;
>> >  
>> >NET_ASSERT_LOCKED();
>> >  
>> > @@ -442,6 +438,11 @@ pipex_link_session(struct pipex_session 
>> >session->pipex_iface = iface;
>> >session->ifindex = iface->ifindex;
>> >  
>> > +  ifp = if_get(iface->ifindex);
>> > +  if (ifp != NULL && ifp->if_flags & IFF_POINTOPOINT)
>> > +  session->is_p2p = 1;
>> > +  if_put(ifp);
>> > +
>> 
>> I guess NULL `ifp' here exposes us a bug. I like to have assertion here.

ok, I agree here.


The diff is updated.

Index: sys/net/if_pppx.c
===
RCS file: /cvs/src/sys/net/if_pppx.c,v
retrieving revision 1.98
diff -u -p -r1.98 if_pppx.c
--- sys/net/if_pppx.c   28 Jul 2020 09:53:36 -  1.98
+++ sys/net/if_pppx.c   10 Aug 2020 06:09:52 -
@@ -185,6 +185,7 @@ int pppx_config_session(struct pppx_dev
struct pipex_session_config_req *);
 intpppx_get_stat(struct pppx_dev *,
struct pipex_session_stat_req *);
+intpppx_is_owner(void *, struct pipex_session *);
 intpppx_get_closed(struct pppx_dev *,
struct pipex_session_list_req *);
 intpppx_set_session_descr(struct pppx_dev *,
@@ -645,14 +646,6 @@ pppx_add_session(struct pppx_dev *pxd, s
struct in_ifaddr *ia;
struct sockaddr_in ifaddr;
 
-   /*
-* XXX: As long as `session' is allocated as part of a `pxi'
-*  it isn't possible to free it separately.  So disallow
-*  the timeout feature until this is fixed.
-*/
-   if (req->pr_timeout_sec != 0)
-   return (EINVAL);
-
error = pipex_init_session(, req);
if (error)
return (error);
@@ -812,12 +805,22 @@ pppx_get_stat(struct pppx_dev *pxd, stru
 }
 
 int
-pppx_get_closed(struct pppx_dev *pxd, struct pipex_session_list_req *req)
+pppx_is_owner(void *ctx, struct pipex_session *session)
 {
-   /* XXX: Only opened sessions exist for pppx(4) */
-   memset(req, 0, sizeof(*req));
+   struct pppx_dev *pxd = ctx;
+   struct pppx_if *pxi;
 
-   return 0;
+   pxi = pppx_if_find(pxd, session->session_id, session->protocol);
+   if (pxi != NULL)
+   return (1);
+
+   return (0);
+}
+
+int
+pppx_get_closed(struct pppx_dev *pxd, struct pipex_session_list_req *req)
+{
+   

pipex "idle-timeout" work with pppx(4).

2020-08-09 Thread YASUOKA Masahiko
This diff makes pipex "idle-timeout" work with pppx(4).

ok?

Index: sys/net/if_pppx.c
===
RCS file: /disk/cvs/openbsd/src/sys/net/if_pppx.c,v
retrieving revision 1.98
diff -u -p -r1.98 if_pppx.c
--- sys/net/if_pppx.c   28 Jul 2020 09:53:36 -  1.98
+++ sys/net/if_pppx.c   9 Aug 2020 08:05:16 -
@@ -185,6 +185,7 @@ int pppx_config_session(struct pppx_dev
struct pipex_session_config_req *);
 intpppx_get_stat(struct pppx_dev *,
struct pipex_session_stat_req *);
+intpppx_is_owner(void *, struct pipex_session *);
 intpppx_get_closed(struct pppx_dev *,
struct pipex_session_list_req *);
 intpppx_set_session_descr(struct pppx_dev *,
@@ -645,14 +646,6 @@ pppx_add_session(struct pppx_dev *pxd, s
struct in_ifaddr *ia;
struct sockaddr_in ifaddr;
 
-   /*
-* XXX: As long as `session' is allocated as part of a `pxi'
-*  it isn't possible to free it separately.  So disallow
-*  the timeout feature until this is fixed.
-*/
-   if (req->pr_timeout_sec != 0)
-   return (EINVAL);
-
error = pipex_init_session(, req);
if (error)
return (error);
@@ -812,12 +805,22 @@ pppx_get_stat(struct pppx_dev *pxd, stru
 }
 
 int
-pppx_get_closed(struct pppx_dev *pxd, struct pipex_session_list_req *req)
+pppx_is_owner(void *ctx, struct pipex_session *session)
 {
-   /* XXX: Only opened sessions exist for pppx(4) */
-   memset(req, 0, sizeof(*req));
+   struct pppx_dev *pxd = ctx;
+   struct pppx_if *pxi;
 
-   return 0;
+   pxi = pppx_if_find(pxd, session->session_id, session->protocol);
+   if (pxi != NULL)
+   return (1);
+
+   return (0);
+}
+
+int
+pppx_get_closed(struct pppx_dev *pxd, struct pipex_session_list_req *req)
+{
+   return (pipex_get_closed(req, pppx_is_owner, pxd));
 }
 
 int
@@ -1059,6 +1062,7 @@ static intpppac_ioctl(struct ifnet *, u
 static int pppac_output(struct ifnet *, struct mbuf *, struct sockaddr *,
struct rtentry *);
 static voidpppac_start(struct ifnet *);
+static int pppac_is_owner(void *, struct pipex_session *);
 
 static inline struct pppac_softc *
 pppac_lookup(dev_t dev)
@@ -1251,6 +1255,16 @@ pppacwrite(dev_t dev, struct uio *uio, i
 }
 
 int
+pppac_is_owner(void *ctx, struct pipex_session *session)
+{
+   struct pppac_softc *sc = ctx;
+
+   if (session->ifindex == sc->sc_if.if_index)
+   return (1);
+   return (0);
+}
+
+int
 pppacioctl(dev_t dev, u_long cmd, caddr_t data, int flags, struct proc *p)
 {
struct pppac_softc *sc = pppac_lookup(dev);
@@ -1264,6 +1278,13 @@ pppacioctl(dev_t dev, u_long cmd, caddr_
break;
case FIONREAD:
*(int *)data = mq_hdatalen(>sc_mq);
+   break;
+
+   case PIPEXGCLOSED:
+   NET_LOCK();
+   error = pipex_get_closed((struct pipex_session_list_req *)data,
+   pppac_is_owner, sc);
+   NET_UNLOCK();
break;
 
default:
Index: sys/net/pipex.c
===
RCS file: /disk/cvs/openbsd/src/sys/net/pipex.c,v
retrieving revision 1.123
diff -u -p -r1.123 pipex.c
--- sys/net/pipex.c 4 Aug 2020 09:32:05 -   1.123
+++ sys/net/pipex.c 9 Aug 2020 08:05:16 -
@@ -240,11 +240,6 @@ pipex_ioctl(struct pipex_iface_context *
pipex_iface);
break;
 
-   case PIPEXGCLOSED:
-   ret = pipex_get_closed((struct pipex_session_list_req *)data,
-   pipex_iface);
-   break;
-
default:
ret = ENOTTY;
break;
@@ -430,6 +425,7 @@ pipex_link_session(struct pipex_session 
struct pipex_iface_context *iface)
 {
struct pipex_hash_head *chain;
+   struct ifnet *ifp;
 
NET_ASSERT_LOCKED();
 
@@ -442,6 +438,11 @@ pipex_link_session(struct pipex_session 
session->pipex_iface = iface;
session->ifindex = iface->ifindex;
 
+   ifp = if_get(iface->ifindex);
+   if (ifp != NULL && ifp->if_flags & IFF_POINTOPOINT)
+   session->is_p2p = 1;
+   if_put(ifp);
+
LIST_INSERT_HEAD(_session_list, session, session_list);
chain = PIPEX_ID_HASHTABLE(session->session_id);
LIST_INSERT_HEAD(chain, session, id_chain);
@@ -469,6 +470,8 @@ pipex_unlink_session(struct pipex_sessio
session->ifindex = 0;
 
NET_ASSERT_LOCKED();
+   if (session->state == PIPEX_STATE_CLOSED)
+   return;
LIST_REMOVE(session, id_chain);
 #if defined(PIPEX_PPTP) || defined(PIPEX_L2TP)
switch (session->protocol) {
@@ -622,7 +625,7 @@ pipex_get_stat(struct pipex_session_stat
 
 Static int
 

Re: describe 'idle-timeout' exception in npppd.conf man page

2020-08-08 Thread YASUOKA Masahiko
On Sat, 8 Aug 2020 16:01:59 +0300
Vitaliy Makkoveev  wrote:
> On Sat, Aug 08, 2020 at 08:49:24PM +0900, YASUOKA Masahiko wrote:
>> On Fri, 7 Aug 2020 22:19:05 +0300
>> Vitaliy Makkoveev  wrote:
>> > Some times ago we disabled in-kernel timeout for pppx(4) related
>> > pipex(4) sessions. We did this for prevent use after free issue caused
>> > by pipex_timer [1]. By default "idle-timeout" is not set in
>> > npppd.conf(5) and I guess this is reason for we forgot to describe this
>> > exception in npppd.conf(5).
>> > 
>> > But looks like one user caught this [2]. So I propose to describe this
>> > in BUGS section of npppd.conf(5).
>> > 
>> > Also current "idle-timeout" description looks incorrect. If this option
>> > is missing, there is not in-kernel timeout for this session, but
>> > npppd(8) uses it's own timeout for. And we can't configure this value.
>> > 
>> > YASUOKA, what do you think? May be we can kill in-kernel timeout feature
>> > for pipex(4)?, and make npppd(8)'s idle timeout configurable by this
>> > option?
>> 
>> I think we should mention this to the man page until we fix it.
>> So I'd like you to update the man page first.
>> 
>> I'll try to review the problem.
>> 
> 
> Thanks. I updated my diff with changes proposed by jmc@. Are you agree
> with them?

Yes.  ok yasuoka

>> > 1. 
>> > https://cvsweb.openbsd.org/src/sys/net/if_pppx.c?rev=1.78=text/x-cvsweb-markup
>> > 2. https://marc.info/?l=openbsd-misc=159655468504864=2 
>> > 
>> > 
>> > Index: usr.sbin/npppd/npppd/npppd.conf.5
>> > ===
>> > RCS file: /cvs/src/usr.sbin/npppd/npppd/npppd.conf.5,v
>> > retrieving revision 1.27
>> > diff -u -p -r1.27 npppd.conf.5
>> > --- usr.sbin/npppd/npppd/npppd.conf.5  23 Apr 2020 21:10:54 -  
>> > 1.27
>> > +++ usr.sbin/npppd/npppd/npppd.conf.5  7 Aug 2020 19:17:00 -
>> > @@ -699,3 +699,9 @@ The current version of
>> >  .Xr npppd 8
>> >  does not support adding or removing tunnel settings or changing listener
>> >  settings (listen address, port and l2tp-ipsec-require).
>> > +.Pp
>> > +This time
>> > +.Xr pppx 4
>> > +does not allow to create sessions with non null
>> > +.Ic idle-timeout
>> > +option. 
>> 
> 



Re: describe 'idle-timeout' exception in npppd.conf man page

2020-08-08 Thread YASUOKA Masahiko
On Fri, 7 Aug 2020 22:19:05 +0300
Vitaliy Makkoveev  wrote:
> Some times ago we disabled in-kernel timeout for pppx(4) related
> pipex(4) sessions. We did this for prevent use after free issue caused
> by pipex_timer [1]. By default "idle-timeout" is not set in
> npppd.conf(5) and I guess this is reason for we forgot to describe this
> exception in npppd.conf(5).
> 
> But looks like one user caught this [2]. So I propose to describe this
> in BUGS section of npppd.conf(5).
> 
> Also current "idle-timeout" description looks incorrect. If this option
> is missing, there is not in-kernel timeout for this session, but
> npppd(8) uses it's own timeout for. And we can't configure this value.
> 
> YASUOKA, what do you think? May be we can kill in-kernel timeout feature
> for pipex(4)?, and make npppd(8)'s idle timeout configurable by this
> option?

I think we should mention this to the man page until we fix it.
So I'd like you to update the man page first.

I'll try to review the problem.

> 1. 
> https://cvsweb.openbsd.org/src/sys/net/if_pppx.c?rev=1.78=text/x-cvsweb-markup
> 2. https://marc.info/?l=openbsd-misc=159655468504864=2 
> 
> 
> Index: usr.sbin/npppd/npppd/npppd.conf.5
> ===
> RCS file: /cvs/src/usr.sbin/npppd/npppd/npppd.conf.5,v
> retrieving revision 1.27
> diff -u -p -r1.27 npppd.conf.5
> --- usr.sbin/npppd/npppd/npppd.conf.5 23 Apr 2020 21:10:54 -  1.27
> +++ usr.sbin/npppd/npppd/npppd.conf.5 7 Aug 2020 19:17:00 -
> @@ -699,3 +699,9 @@ The current version of
>  .Xr npppd 8
>  does not support adding or removing tunnel settings or changing listener
>  settings (listen address, port and l2tp-ipsec-require).
> +.Pp
> +This time
> +.Xr pppx 4
> +does not allow to create sessions with non null
> +.Ic idle-timeout
> +option. 



Re: [PATCH] pipex(4): rework PPP input

2020-08-04 Thread YASUOKA Masahiko
Sorry for delayed reply.

On Wed, 27 May 2020 01:29:36 +0300
Sergey Ryazanov  wrote:
> On Tue, May 26, 2020 at 12:07 PM Vitaliy Makkoveev
>  wrote:
>>> On 25 May 2020, at 22:04, Sergey Ryazanov  wrote:
>>> On Sat, May 23, 2020 at 3:07 PM Vitaliy Makkoveev
>>>  wrote:
 For example, each pipex session should have unique pair of `protocol’ and
 `session_id’. These values are passed from userland. While the only
 instance of npppd(8) uses pipex(4) this is not the problem. But you
 introduce the case while pipex(4) will be used by multiple independent
 userland programs. At least, I have interest how you handle this.
>>>
>>> This should not be a problem here. npppd(8) support server mode only.
>>> While my work is to implement acceleration for client side of L2TP
>>> connection.
>>
>> I guess they can coexist. Also you can have multiple connections to
>> ppp servers simultaneously.
> 
> With 16 bits long session id field, according to birthday problem to
> reach 0.9 collision probability I need 549 simultaneous sessions.
> Should I still be worried or I have a time to complete integration
> work and then update UDP  filter for love of the game?

usr.sbin/npppd/l2tp/l2tp_local.h

 79 #define L2TP_SESSION_ID_MASK0x7fff

npppd uses 0-32767


Re: pipex(4): kill pipexintr()

2020-08-03 Thread YASUOKA Masahiko
On Mon, 3 Aug 2020 23:36:09 +0300
Vitaliy Makkoveev  wrote:
> On Tue, Aug 04, 2020 at 01:26:14AM +0900, YASUOKA Masahiko wrote:
>> Comments?
> 
> You introduce `cookie' as 
> 
>   cookie = session->protocol << 16 | session->session_id;
> 
> also multicast sessions initialized as 
> 
>   session->protocol = PIPEX_PROTO_NONE;
>   session->session_id = ifindex;
> 
> `protocol' and `session_id' come from userland, so I like to have checks
> like below. It's allow us to avoid `cookie' be broken while
> `pr_session_id' exceeds 16 bit integer. Also userland should not pass
> PIPEX_PROTO_NONE as `pr_protocol' because we shouldn't have multicast
> and not multicast sessions with the same `cookie'.
> 
>  cut begin 
> 
> pipex_init_session(struct pipex_session **rsession,
> struct pipex_session_req *req)
> {
>   if (req->pr_protocol == PIPEX_PROTO_NONE)
>   return (EINVAL);

pipex_init_session() has the same check already.

 287 int
 288 pipex_init_session(struct pipex_session **rsession,
 289 struct pipex_session_req *req)
 290 {
 (snip)
 297 switch (req->pr_protocol) {
 298 #ifdef PIPEX_PPPOE
 299 case PIPEX_PROTO_PPPOE:
 (snip)
 333 default:
 334 return (EPROTONOSUPPORT);
 335 }

> 
>   if (req->pr_session_id > 0x)
>   return (EINVAL);
> 
>  cut end 

req->pr_session_id can't be > 0x since it's uint16_t.

> Also cookies introduce invalidation problem. Yes, it has low
> probability, but we can have operation order like below:
> 
> 1. enqueue session with `protocol' = 0xaa and `session_id' = 0xbb, and
>   `cookie' = 0xaabb
> 2. kill this session
> 3. create new session `protocol' = 0xaa and `session_id' = 0xbb
> 4. this newly created session will be used by pipexintr()
> 
> As I have seen while played with refcounters, session can be enqueued
> more than 10 times...

The diff makes the problem worse, but it could happen already if the
session-id is reused.

> Also It's not obvious that interface index will never exceed 16 bit
> counter. It's unsigned int and may be underlay counter's resolution
> will be expanded in future. So I like to have at least corresponding
> assertion in pipex_iface_init().

Right.  This is fixable with another unique number.

> So, may be my first solution is the best here. And, as mpi@ pointed,
> ipsec(4) should be reworked to allow parallelism.

Does first mean killing the pipexintr?

What I explained was wrong.  I'm sorry about this.

On Fri, 31 Jul 2020 09:36:32 +0900 (JST)
YASUOKA Masahiko  wrote:
> A packet of L2TP/IPsec (encapsulated IP/PPP/L2TP/UDP/ESP/UDP/IP) is
> processed like:
> 
>ipv4_input
>  ...
>udp_input
>  ipsec_common_input
>  esp_input
>crypto_dispatch
>  => crypto_taskq_mp_safe
> 
>kthread "crynlk"
>  crypto_invoke
>... (*1)
>  crypto_done
>  esp_input_cb
>ipsec_common_input_cb
>  ip_deliver
>udp_input
>  pipex_l2tp_input
>pipex_common_input
>  (*2)
>  pipex_ppp_input
>pipex_mppe_input (*3)
>  pipex_ppp_input
>pipex_ip_input
>  ipv4_input
>...

This should be

   kthread "crynlk"
 crypto_invoke
   ... (*1)
 crypto_done
   kthread "crypto" < another thread
 ipsec_input_cb < this is missed
   esp_input_cb
 ipsec_common_input_cb
   ip_deliver
 udp_input
   pipex_l2tp_input
 pipex_common_input
   (*2)
   pipex_ppp_input
 pipex_mppe_input (*3)
   pipex_ppp_input
 pipex_ip_input
   ipv4_input
 ...

> At *2 there was a queue.  "crynlk" is a busy thread, since it is doing
> decryption at *1.  I think it's better pipex input is be done by
> another thread than crypto since it also has decryption at *3.

This is false.  *3 is done by another thread.
It is the same if crypto driver is not CRYPTOCAP_F_MPSAFE.
(crypto_invoke() is done by the caller's thread and the callback
 (ipsec_input_cb) is called by"crypto" thread.)

So I have no actual reason to keep the queues.

ok yasuoka for the diff which kills pipexintr.



Re: pipex(4): kill pipexintr()

2020-08-03 Thread YASUOKA Masahiko
On Sat, 1 Aug 2020 18:52:27 +0300
Vitaliy Makkoveev  wrote:
> On Sat, Aug 01, 2020 at 07:44:17PM +0900, YASUOKA Masahiko wrote:
>> I'm not sure when it is broken, in old versions, it was assumed the
>> pipex queues are empty when pipex_iface_stop() is called.  The problem
>> mvs@ found is the assumption is not true any more.
>> 
>> pipex has a mechanism that delete a session when the queues are empty.
>> 
>> 819 Static void
>> 820 pipex_timer(void *ignored_arg)
>> 821 {
>> (snip)
>> 854 case PIPEX_STATE_CLOSED:
>> 855 /*
>> 856  * mbuf queued in pipexinq or pipexoutq may 
>> have a
>> 857  * refererce to this session.
>> 858  */
>> 859 if (!mq_empty() || 
>> !mq_empty())
>> 860 continue;
>> 861 
>> 862 pipex_destroy_session(session);
>> 863 break;
>> 
>> I think using this is better.
>> 
>> How about this?
> 
> Unfortunately your diff is incorrect. It introduces memory leaks and
> breaks pppx(4). Also it is incomplete.

Thank you for your feedbacks.

> We have multiple ways to kill pipex(sessions):
> 
> 1. pppx(4)
> 
> We have `struct pppx_if' which has pointer to corresponding session and
> this session is accessed directly within pppx(4) layer. Since we can't
> destroy `ppp_if' in pipex(4) layer we can't destroy these sessions by
> pipex_timer(). The only way to destroy them is pppx_if_destroy() which:
> 
> 1. unlink session by pipex_unlink_session()
> 2. detach corresponding `ifnet' by if_detach()
> 3. release session by pipex_rele_session() 
> 
> It's unsafe because mbuf queues can have references to this session.

Yes.

> 2. pppac(4)
> 
> We have no direct access to corresponding sessions within pppac(4)
> layer. Also there are multiple ways to do this:
> 
> 1. pipex_ioctl() with `PIPEXSMODE' command. Underlay pipex_iface_stop()
> walks through `pipex_session_list' and destroy sessions by
> pipex_destroy_session() call. It's unsafe because we don't check queues.
> 
> 2. pipex_ioctl() with `PIPEXDSESSION'. pipex_close_session() will change
> session's  state and pipex_timer() will kill this sessions later. This
> is the only safe way.
> 
> 3. pipex_iface_fini(). The same as `PIPEXSMODE', pipex_iface_stop()
> kills sessions, Which is also unsafe. Also we have another use after
> free issue:
> 
>  cut begin 
> 
> pipex_iface_fini(struct pipex_iface_context *pipex_iface)
> {
> pool_put(_session_pool, pipex_iface->multicast_session);
> NET_LOCK();
> pipex_iface_stop(pipex_iface);
> NET_UNLOCK();
> }
> 
>  cut end 
> 
> `multicast_session' should be protected too. It also can be pushed to
> `pipexoutq'.

Yes, I missed this point.

> Also since this time pipexintr() and pipex_iface_fini() are
> both serialized by KERNEL_LOCK() too we can't destroy `multicast_session'
> which is in use by pipexintr(). But when we will drop KERNEL_LOCK()
> around pipexintr() we can catch use after free issue here. I already did
> diff for move this pool_put() under NET_LOCK(), but it was rejectedi by
> mpi@ because:
> 
>  cut begin 
> pipex_iface_fini() should be called on the last reference of the  
>   
> descriptor.  So this shouldn't be necessary.  If there's an issue 
>   
> with the current order of the operations, we should certainly fix 
>   
> it differently.   
>  cut end 

Yes, I understand what mpi@ is saying.  But this is a separate story.

> So I repeat it again: npppd(8) can be killed in every moment by SIGKILL
> or by SIGSEGV and pppacclose() will be called and it will call
> pipex_iface_fini(). `multicast_session' can be used in this moment by
> pipexintr().
> 
> And no locks protect `multicast_session' itself.
> 
> The two diffs I proposed in this thread solve problems caused by
> pipexintr().

There are a lot of ways to solve the problems.

The diff I sent few days ago is to destruct the pipex sessions in the
pipex timer.  As you pointed out it has some problems.  Those problems
can be fixed, but I'd suggest another way.  I attached at last.

The problem exposed is "use-after-free".  Since I think this is not a
problem of parallel processing, having reference counter seems too
much for me.


The diff is not to refer the session by a pointer, but by the id.
The idea is come from IPsec tdb.

Comments?


diff --git a/sys/net/pipex.c b/sys/net/p

Re: pipex(4): kill pipexintr()

2020-08-01 Thread YASUOKA Masahiko
Hi,

I'm not sure when it is broken, in old versions, it was assumed the
pipex queues are empty when pipex_iface_stop() is called.  The problem
mvs@ found is the assumption is not true any more.

pipex has a mechanism that delete a session when the queues are empty.

819 Static void
820 pipex_timer(void *ignored_arg)
821 {
(snip)
854 case PIPEX_STATE_CLOSED:
855 /*
856  * mbuf queued in pipexinq or pipexoutq may 
have a
857  * refererce to this session.
858  */
859 if (!mq_empty() || 
!mq_empty())
860 continue;
861 
862 pipex_destroy_session(session);
863 break;

I think using this is better.

How about this?

diff --git a/sys/net/pipex.c b/sys/net/pipex.c
index 2ad7757fee9..6fe14c400bf 100644
--- a/sys/net/pipex.c
+++ b/sys/net/pipex.c
@@ -190,7 +190,7 @@ pipex_iface_stop(struct pipex_iface_context *pipex_iface)
LIST_FOREACH_SAFE(session, _session_list, session_list,
session_tmp) {
if (session->pipex_iface == pipex_iface)
-   pipex_destroy_session(session);
+   pipex_unlink_session(session);
}
 }
 
@@ -470,9 +470,16 @@ pipex_link_session(struct pipex_session *session,
 void
 pipex_unlink_session(struct pipex_session *session)
 {
+   struct radix_node *rn;
+
session->ifindex = 0;
 
NET_ASSERT_LOCKED();
+   if (!in_nullhost(session->ip_address.sin_addr)) {
+   rn = rn_delete(>ip_address, >ip_netmask,
+   pipex_rd_head4, (struct radix_node *)session);
+   KASSERT(rn != NULL);
+   }
LIST_REMOVE(session, id_chain);
 #if defined(PIPEX_PPTP) || defined(PIPEX_L2TP)
switch (session->protocol) {
@@ -486,10 +493,6 @@ pipex_unlink_session(struct pipex_session *session)
LIST_REMOVE(session, state_list);
LIST_REMOVE(session, session_list);
session->state = PIPEX_STATE_CLOSED;
-
-   /* if final session is destroyed, stop timer */
-   if (LIST_EMPTY(_session_list))
-   pipex_timer_stop();
 }
 
 Static int
@@ -652,20 +655,16 @@ pipex_get_closed(struct pipex_session_list_req *req,
 Static int
 pipex_destroy_session(struct pipex_session *session)
 {
-   struct radix_node *rn;
-
/* remove from radix tree and hash chain */
NET_ASSERT_LOCKED();
 
-   if (!in_nullhost(session->ip_address.sin_addr)) {
-   rn = rn_delete(>ip_address, >ip_netmask,
-   pipex_rd_head4, (struct radix_node *)session);
-   KASSERT(rn != NULL);
-   }
-
pipex_unlink_session(session);
pipex_rele_session(session);
 
+   /* if final session is destroyed, stop timer */
+   if (LIST_EMPTY(_session_list))
+   pipex_timer_stop();
+
return (0);
 }
 
@@ -739,7 +738,8 @@ pipexintr(void)
mq_delist(, );
while ((m = ml_dequeue()) != NULL) {
pkt_session = m->m_pkthdr.ph_cookie;
-   if (pkt_session == NULL) {
+   if (pkt_session == NULL ||
+   pkt_session->state == PIPEX_STATE_CLOSED) {
m_freem(m);
continue;
}
@@ -776,7 +776,8 @@ pipexintr(void)
mq_delist(, );
while ((m = ml_dequeue()) != NULL) {
pkt_session = m->m_pkthdr.ph_cookie;
-   if (pkt_session == NULL) {
+   if (pkt_session == NULL ||
+   pkt_session->state == PIPEX_STATE_CLOSED) {
m_freem(m);
continue;
}



Re: pipex(4): kill pipexintr()

2020-07-30 Thread YASUOKA Masahiko
On Thu, 30 Jul 2020 22:43:10 +0300
Vitaliy Makkoveev  wrote:
> On Thu, Jul 30, 2020 at 10:05:13PM +0900, YASUOKA Masahiko wrote:
>> On Thu, 30 Jul 2020 15:34:09 +0300
>> Vitaliy Makkoveev  wrote:
>> > On Thu, Jul 30, 2020 at 09:13:46PM +0900, YASUOKA Masahiko wrote:
>> >> If the diff removes the queue, then the pipex input routine is
>> >> executed by the NIC's interrupt handler.
>> >> 
>> >> The queues had been made to avoid that kind of situations.
>> > 
>> > It's not enqueued in pppoe case. According pipex_pppoe_input() code we
>> > call pipex_common_input() with `useq' argument set to '0', so we don't
>> > enqueue mbuf(9) but pass it to pipex_ppp_input() which will pass it to
>> > ipv{4,6}_input().
>> 
>> You are right.  Sorry, I forgot about this which I did that by myself.
> 
> I'm interesting the reason why you did that.

I remembered, it was first step of MP steps for pipex.

At that time, I discussed with mpi, he suggested like below.

 1. stop enqueueing packets for PPPoE
 2. try not take a kernel lock before calling gre_input(), then we can
also stop enqueueing packets for PPTP(GRE)
 3. for L2TP, keep the queue and change the netisr to an unlocked task



Re: pipex(4): kill pipexintr()

2020-07-30 Thread YASUOKA Masahiko
On Thu, 30 Jul 2020 22:43:10 +0300
Vitaliy Makkoveev  wrote:
> On Thu, Jul 30, 2020 at 10:05:13PM +0900, YASUOKA Masahiko wrote:
>> On Thu, 30 Jul 2020 15:34:09 +0300
>> Vitaliy Makkoveev  wrote:
>> > On Thu, Jul 30, 2020 at 09:13:46PM +0900, YASUOKA Masahiko wrote:
>> >> Hi,
>> >> 
>> >> sys/net/if_ethersubr.c:
>> >> 372 void
>> >> 373 ether_input(struct ifnet *ifp, struct mbuf *m)
>> >> (snip)
>> >> 519 #if NPPPOE > 0 || defined(PIPEX)
>> >> 520 case ETHERTYPE_PPPOEDISC:
>> >> 521 case ETHERTYPE_PPPOE:
>> >> 522 if (m->m_flags & (M_MCAST | M_BCAST))
>> >> 523 goto dropanyway;
>> >> 524 #ifdef PIPEX
>> >> 525 if (pipex_enable) {
>> >> 526 struct pipex_session *session;
>> >> 527 
>> >> 528 if ((session = pipex_pppoe_lookup_session(m)) 
>> >> != NULL) {
>> >> 529 pipex_pppoe_input(m, session);
>> >> 530 return;
>> >> 531 }
>> >> 532 }
>> >> 533 #endif
>> >> 
>> >> previously a packet which branchces to #529 is enqueued.
>> >> 
>> >> If the diff removes the queue, then the pipex input routine is
>> >> executed by the NIC's interrupt handler.
>> >> 
>> >> The queues had been made to avoid that kind of situations.
>> > 
>> > It's not enqueued in pppoe case. According pipex_pppoe_input() code we
>> > call pipex_common_input() with `useq' argument set to '0', so we don't
>> > enqueue mbuf(9) but pass it to pipex_ppp_input() which will pass it to
>> > ipv{4,6}_input().
>> 
>> You are right.  Sorry, I forgot about this which I did that by myself.
>> 
> 
> I'm interesting the reason why you did that.
> 
>> >> Also I don't see a relation of the use-after-free problem and killing
>> >> queues.  Can't we fix the problem unless we kill the queues?
>> > 
>> > Yes we can. Reference counters allow us to keep orphan sessions in these
>> > queues without use after free issue.
>> > 
>> > I will wait your commentaries current enqueuing before to do something.
>> 
>> I have another concern.
>> 
>> You might know, when L2TP/IPsec is used heavily, the crypto thread
>> uses 100% of 1 CPU core.  In that case, that thread becomes like
>> below:
>> 
>>   crypto thread -> udp_userreq -> pipex_l2tp_input
>> 
>> some clients are using MPPE(RC4 encryption) on CCP.  It's not so
>> light.
>> 
>> How do we offload this for CPUs?  I am thinking that "pipex" can have
>> a dedicated thread.  Do we have another scenario?
>>
> 
> I suppose you mean udp_input(). What is you call "crypto thread"? I did
> a little backtrace but I didn't find this thread.
> 
> ether_resolve
>   if_input_local
> ipv4_input
>   ip_input_if
> ip_ours
>   ip_deliver
> udp_input (through pr_input)
>   pipex_l2tp_input
> 
> ipi{,6}_mloopback
>   if_input_local
> ipv4_input
>   ...
> udp_input (through pr_input)
>   pipex_l2tp_input
> 
> loinput
>   if_input_local
> ipv4_input
>   ...
> udp_input (through pr_input)
>   pipex_l2tp_input
> 
> Also various pseudo drivers call ipv{4,6}_input() and underlay
> udp_unput() too.
> 
> Except nfs, we call udp_usrreq() through socket layer only. Do you mean
> userland as "crypto thread"?

Sorry, udp_usrreq() should be usr_input() and crypto thread meant a
kthread for crypto_taskq_mp_safe, whose name is "crynlk" (see
crypto_init()).

A packet of L2TP/IPsec (encapsulated IP/PPP/L2TP/UDP/ESP/UDP/IP) is
processed like:

   ipv4_input
 ...
   udp_input
 ipsec_common_input
   esp_input
 crypto_dispatch
   => crypto_taskq_mp_safe

   kthread "crynlk"
 crypto_invoke
   ... (*1)
 crypto_done
   esp_input_cb
 ipsec_common_input_cb
   ip_deliver
 udp_input
   pipex_l2tp_input
 pipex_common_input
   (*2)
   pipex_ppp_input
 pipex_mppe_input (*3)
   pipex_ppp_input
 pipex_ip_input
 

Re: pipex(4): kill pipexintr()

2020-07-30 Thread YASUOKA Masahiko
On Thu, 30 Jul 2020 15:34:09 +0300
Vitaliy Makkoveev  wrote:
> On Thu, Jul 30, 2020 at 09:13:46PM +0900, YASUOKA Masahiko wrote:
>> Hi,
>> 
>> sys/net/if_ethersubr.c:
>> 372 void
>> 373 ether_input(struct ifnet *ifp, struct mbuf *m)
>> (snip)
>> 519 #if NPPPOE > 0 || defined(PIPEX)
>> 520 case ETHERTYPE_PPPOEDISC:
>> 521 case ETHERTYPE_PPPOE:
>> 522 if (m->m_flags & (M_MCAST | M_BCAST))
>> 523 goto dropanyway;
>> 524 #ifdef PIPEX
>> 525 if (pipex_enable) {
>> 526 struct pipex_session *session;
>> 527 
>> 528 if ((session = pipex_pppoe_lookup_session(m)) != 
>> NULL) {
>> 529 pipex_pppoe_input(m, session);
>> 530 return;
>> 531 }
>> 532 }
>> 533 #endif
>> 
>> previously a packet which branchces to #529 is enqueued.
>> 
>> If the diff removes the queue, then the pipex input routine is
>> executed by the NIC's interrupt handler.
>> 
>> The queues had been made to avoid that kind of situations.
> 
> It's not enqueued in pppoe case. According pipex_pppoe_input() code we
> call pipex_common_input() with `useq' argument set to '0', so we don't
> enqueue mbuf(9) but pass it to pipex_ppp_input() which will pass it to
> ipv{4,6}_input().

You are right.  Sorry, I forgot about this which I did that by myself.

>> Also I don't see a relation of the use-after-free problem and killing
>> queues.  Can't we fix the problem unless we kill the queues?
> 
> Yes we can. Reference counters allow us to keep orphan sessions in these
> queues without use after free issue.
> 
> I will wait your commentaries current enqueuing before to do something.

I have another concern.

You might know, when L2TP/IPsec is used heavily, the crypto thread
uses 100% of 1 CPU core.  In that case, that thread becomes like
below:

  crypto thread -> udp_userreq -> pipex_l2tp_input

some clients are using MPPE(RC4 encryption) on CCP.  It's not so
light.

How do we offload this for CPUs?  I am thinking that "pipex" can have
a dedicated thread.  Do we have another scenario?

--yasuoka



Re: pipex(4): kill pipexintr()

2020-07-30 Thread YASUOKA Masahiko
Hi,

sys/net/if_ethersubr.c:
372 void
373 ether_input(struct ifnet *ifp, struct mbuf *m)
(snip)
519 #if NPPPOE > 0 || defined(PIPEX)
520 case ETHERTYPE_PPPOEDISC:
521 case ETHERTYPE_PPPOE:
522 if (m->m_flags & (M_MCAST | M_BCAST))
523 goto dropanyway;
524 #ifdef PIPEX
525 if (pipex_enable) {
526 struct pipex_session *session;
527 
528 if ((session = pipex_pppoe_lookup_session(m)) != 
NULL) {
529 pipex_pppoe_input(m, session);
530 return;
531 }
532 }
533 #endif

previously a packet which branchces to #529 is enqueued.

If the diff removes the queue, then the pipex input routine is
executed by the NIC's interrupt handler.

The queues had been made to avoid that kind of situations.

Also I don't see a relation of the use-after-free problem and killing
queues.  Can't we fix the problem unless we kill the queues?

On Wed, 29 Jul 2020 23:04:36 +0300
Vitaliy Makkoveev  wrote:
> Now pipex(4) is fully covered by NET_LOCK() and this is documented. But
> we still have an issue with pipex(4) session itself and I guess it's
> time to fix it.
> 
> We have `pipexinq' and `pipexoutq' mbuf(9) queues to store mbufs. Each
> mbuf(9) passed to these queues stores the pointer to corresponding
> session referenced as `m_pkthdr.ph_cookie'. We enqueue incoming mbufs for
> pppx(4) and incoming and outgoing mbufs for pppac(4). But we don't
> enqueue pppoe related mbufs. After packet was enqueued to corresponding
> queue we call schednetisr() which just schedules netisr() to run:
> 
>  cut begin 
> 
> 780 pipex_ppp_enqueue(struct mbuf *m0, struct pipex_session *session,
> 781 struct mbuf_queue *mq)
> 782 {
> 783 m0->m_pkthdr.ph_cookie = session;
> 784 /* XXX need to support other protocols */
> 785 m0->m_pkthdr.ph_ppp_proto = PPP_IP;
> 786 
> 787 if (mq_enqueue(mq, m0) != 0)
> 788 return (1);
> 789 
> 790 schednetisr(NETISR_PIPEX);
> 791 
> 792 return (0);
> 793 }
> 
>  cut end 
> 
> Also we have pipex_timer() which should destroy session in safe way, but
> it does this only for pppac(4) and only for sessions closed by
> `PIPEXDSESSION' command:
> 
>  cut begin 
> 
> 812 pipex_timer(void *ignored_arg)
> 813 {
>   /* skip */
> 846 case PIPEX_STATE_CLOSED:
> 847 /*
> 848  * mbuf queued in pipexinq or pipexoutq may have a
> 849* refererce to this session.
> 850  */
> 851 if (!mq_empty() || !mq_empty())
> 852 continue;
> 853 
> 854 pipex_destroy_session(session);
> 855 break;
> 
>  cut end 
> 
> While we destroy sessions through pipex_rele_session() or through
> pipex_iface_fini() or through `PIPEXSMODE' command we don't check
> `pipexinq' and `pipexoutq' state. This means we can break them.
> 
> It's not guaranteed that netisr() will start just after schednetisr()
> call. This means we can destroy session, but corresponding mbuf(9) is
> stored within `pipexinq' or `pipexoutq'. It's `m_pkthdr.ph_cookie' still
> stores pointer to destroyed session and we have use after free issue. I
> wonder why we didn't caught panic yet.
> 
> I propose to kill `pipexinq', `pipexoutq' and pipexintr(). There is
> absolutely no reason them to exist. This should not only fix issue
> described above but simplifies code too.
> 
> Other ways are to implement reference counters for session or walk
> through mbuf(9) queues and kill corresponding mbufs. It doesn't make
> sense to go these ways.
> 
> Index: lib/libc/sys/sysctl.2
> ===
> RCS file: /cvs/src/lib/libc/sys/sysctl.2,v
> retrieving revision 1.40
> diff -u -p -r1.40 sysctl.2
> --- lib/libc/sys/sysctl.2 17 May 2020 05:48:39 -  1.40
> +++ lib/libc/sys/sysctl.2 29 Jul 2020 13:47:40 -
> @@ -2033,35 +2033,11 @@ The currently defined variable names are
>  .Bl -column "Third level name" "integer" "Changeable" -offset indent
>  .It Sy "Third level name" Ta Sy "Type" Ta Sy "Changeable"
>  .It Dv PIPEXCTL_ENABLE Ta integer Ta yes
> -.It Dv PIPEXCTL_INQ Ta node Ta not applicable
> -.It Dv PIPEXCTL_OUTQ Ta node Ta not applicable
>  .El
>  .Bl -tag -width "123456"
>  .It Dv PIPEXCTL_ENABLE
>  If set to 1, enable PIPEX processing.
>  The default is 0.
> -.It Dv PIPEXCTL_INQ Pq Va net.pipex.inq
> -Fourth level comprises an array of
> -.Vt struct ifqueue
> -structures containing information about the PIPEX packet input queue.
> -The forth level names for the elements of
> -.Vt struct ifqueue
> -are the same as described in
> -.Li ip.arpq
> -in the
> -.Dv PF_INET
> -section.
> -.It Dv PIPEXCTL_OUTQ Pq Va 

Re: pf: route-to least-states

2020-07-28 Thread YASUOKA Masahiko
Hi,

On Tue, 28 Jul 2020 18:54:48 +0200
Alexandr Nedvedicky  wrote:
> On Wed, Jul 29, 2020 at 01:22:48AM +0900, YASUOKA Masahiko wrote:
>> Previous commit has a wrong part..
>> 
>> ok?
>> 
>> Fix previous commit which referred wrong address.
> 
> would it make sense to move the block, you've introduced earler
> under the !PF_AZERO() branch just couple lines below. something
> like this:
> 
> 8<---8<---8<--8<
> diff --git a/sys/net/pf_lb.c b/sys/net/pf_lb.c
> index 510795a4d0b..f77d96a99ec 100644
> --- a/sys/net/pf_lb.c
> +++ b/sys/net/pf_lb.c
> @@ -322,13 +322,13 @@ pf_map_addr_sticky(sa_family_t af, struct pf_rule *r, 
> struct pf_addr *saddr,
> return (-1);
> }
>  
> -   if ((rpool->opts & PF_POOL_TYPEMASK) == PF_POOL_LEASTSTATES) {
> -   if (pf_map_addr_states_increase(af, rpool, naddr) == -1)
> +   if (!PF_AZERO(cached, af)) {
> +   pf_addrcpy(naddr, cached, af);
> +   if ((rpool->opts & PF_POOL_TYPEMASK) == PF_POOL_LEASTSTATES) 
> &&
> +   ((pf_map_addr_states_increase(af, rpool, cached) == -1))
> return (-1);
> }
>  
> -   if (!PF_AZERO(cached, af))
> -   pf_addrcpy(naddr, cached, af);
> if (pf_status.debug >= LOG_DEBUG) {
> log(LOG_DEBUG, "pf: pf_map_addr: "
> "src tracking (%u) maps ", type);
> 
> 8<---8<---8<--8<
> 
> It seems to me it would be better to bump number of states if and only if we
> actually find some address in pool.

Yes, I agree.

ok?

Fix previous commit which referred wrong address and returned wrong
value.


Index: sys/net/pf_lb.c
===
RCS file: /cvs/src/sys/net/pf_lb.c,v
retrieving revision 1.66
diff -u -p -r1.66 pf_lb.c
--- sys/net/pf_lb.c 28 Jul 2020 16:47:41 -  1.66
+++ sys/net/pf_lb.c 28 Jul 2020 17:01:34 -
@@ -322,13 +322,13 @@ pf_map_addr_sticky(sa_family_t af, struc
return (-1);
}
 
-   if ((rpool->opts & PF_POOL_TYPEMASK) == PF_POOL_LEASTSTATES) {
-   if (pf_map_addr_states_increase(af, rpool, naddr) == -1)
-   return (-1);
-   }
 
-   if (!PF_AZERO(cached, af))
+   if (!PF_AZERO(cached, af)) {
pf_addrcpy(naddr, cached, af);
+   if ((rpool->opts & PF_POOL_TYPEMASK) == PF_POOL_LEASTSTATES &&
+   pf_map_addr_states_increase(af, rpool, cached) == -1)
+   return (-1);
+   }
if (pf_status.debug >= LOG_DEBUG) {
log(LOG_DEBUG, "pf: pf_map_addr: "
"src tracking (%u) maps ", type);
@@ -651,7 +651,7 @@ pf_map_addr_states_increase(sa_family_t 
pf_print_host(naddr, 0, af);
addlog(". Failed to increase count!\n");
}
-   return (1);
+   return (-1);
}
} else if (rpool->addr.type == PF_ADDR_DYNIFTL) {
if (pfr_states_increase(rpool->addr.p.dyn->pfid_kt,
@@ -663,7 +663,7 @@ pf_map_addr_states_increase(sa_family_t 
pf_print_host(naddr, 0, af);
addlog(". Failed to increase count!\n");
}
-   return (1);
+   return (-1);
}
}
return (0);



Re: pf: route-to least-states

2020-07-28 Thread YASUOKA Masahiko
Hi,

Let me add another fix of previous.

ok?

Fix previous commit which referred wrong address and returned wrong
value.

Index: sys/net/pf_lb.c
===
RCS file: /cvs/src/sys/net/pf_lb.c,v
retrieving revision 1.66
diff -u -p -r1.66 pf_lb.c
--- sys/net/pf_lb.c 28 Jul 2020 16:47:41 -  1.66
+++ sys/net/pf_lb.c 28 Jul 2020 16:52:24 -
@@ -323,7 +323,7 @@ pf_map_addr_sticky(sa_family_t af, struc
}
 
if ((rpool->opts & PF_POOL_TYPEMASK) == PF_POOL_LEASTSTATES) {
-   if (pf_map_addr_states_increase(af, rpool, naddr) == -1)
+   if (pf_map_addr_states_increase(af, rpool, cached) == -1)
return (-1);
}
 
@@ -651,7 +651,7 @@ pf_map_addr_states_increase(sa_family_t 
pf_print_host(naddr, 0, af);
addlog(". Failed to increase count!\n");
}
-   return (1);
+   return (-1);
}
} else if (rpool->addr.type == PF_ADDR_DYNIFTL) {
if (pfr_states_increase(rpool->addr.p.dyn->pfid_kt,
@@ -663,7 +663,7 @@ pf_map_addr_states_increase(sa_family_t 
pf_print_host(naddr, 0, af);
addlog(". Failed to increase count!\n");
}
-   return (1);
+   return (-1);
}
}
return (0);



Re: pf: route-to least-states

2020-07-28 Thread YASUOKA Masahiko
Hi,

Previous commit has a wrong part..

ok?

Fix previous commit which referred wrong address.

Index: sys/net/pf_lb.c
===
RCS file: /cvs/src/sys/net/pf_lb.c,v
retrieving revision 1.65
diff -u -p -r1.65 pf_lb.c
--- sys/net/pf_lb.c 24 Jul 2020 14:06:33 -  1.65
+++ sys/net/pf_lb.c 28 Jul 2020 16:15:50 -
@@ -323,7 +323,7 @@ pf_map_addr_sticky(sa_family_t af, struc
}
 
if ((rpool->opts & PF_POOL_TYPEMASK) == PF_POOL_LEASTSTATES) {
-   if (pf_map_addr_states_increase(af, rpool, naddr) == -1)
+   if (pf_map_addr_states_increase(af, rpool, cached) == -1)
return (-1);
}
 



relayd: set group and divert-reply

2020-07-26 Thread YASUOKA Masahiko
Hi,

I'd like to run relayd as _relayd group always so that we can use
"group _relayd" in a pf rule.  This makes it possible to write a pf
rule easily which is to match only connections from relayd(8).

Also as for relayd.conf(5), I'd like to mention that "divert-reply" is
required for "transparent forward" and add an example pf rule which
uses "group _relayd".

ok?

Run relayd(8) as _relayd group user.

Index: usr.sbin/relayd/relayd.c
===
RCS file: /cvs/src/usr.sbin/relayd/relayd.c,v
retrieving revision 1.182
diff -u -p -r1.182 relayd.c
--- usr.sbin/relayd/relayd.c15 Sep 2019 19:23:29 -  1.182
+++ usr.sbin/relayd/relayd.c26 Jul 2020 08:39:27 -
@@ -201,6 +201,11 @@ main(int argc, char *argv[])
if ((ps->ps_pw =  getpwnam(RELAYD_USER)) == NULL)
errx(1, "unknown user %s", RELAYD_USER);
 
+   if (setgroups(1, >ps_pw->pw_gid) == -1 ||
+   setresgid(ps->ps_pw->pw_gid, ps->ps_pw->pw_gid, ps->ps_pw->pw_gid)
+   == -1)
+   err(1, "unable to set group ids");
+
log_init(debug, LOG_DAEMON);
log_setverbose(verbose);
 

Add a mention that "divert-reply" rule is required for "transparent
forward" and add an example which uses "group _relayd" to match the
outgoing connections.

Index: usr.sbin/relayd/relayd.conf.5
===
RCS file: /cvs/src/usr.sbin/relayd/relayd.conf.5,v
retrieving revision 1.198
diff -u -p -r1.198 relayd.conf.5
--- usr.sbin/relayd/relayd.conf.5   1 Jul 2020 06:47:18 -   1.198
+++ usr.sbin/relayd/relayd.conf.5   26 Jul 2020 08:39:27 -
@@ -622,6 +622,10 @@ Use the
 .Ic transparent
 keyword to enable fully-transparent mode; the source address of the
 client will be retained in this case.
+For this case,
+additional
+.Xr pf 4
+rule with divert-reply option is required for the outgoing connection.
 .Pp
 The
 .Ic with tls
@@ -1627,6 +1631,31 @@ relay tlsinspect {
protocol httpfilter
forward with tls to destination
 }
+.Ed
+.Pp
+If you want to use fully-transparent mode,
+you can add the
+.Ic transparent
+keyword to
+.Ic forward
+option:
+.Bd -literal -offset indent
+relay tlsinspect {
+   listen on 127.0.0.1 port 8443 tls
+   protocol httpfilter
+   transparent forward with tls to destination
+}
+.Ed
+.Pp
+And add a matching divert-reply rule in
+.Xr pf.conf 5 .
+You can use
+.Dq group _relayd
+to match only connections from
+.Xr relayd 8
+precisely:
+.Bd -literal -offset indent
+pass out proto tcp to port 443 group _relayd divert-reply
 .Ed
 .Pp
 The next simple router configuration example can be used to run



Re: pf_remove_divert_state

2020-07-26 Thread YASUOKA Masahiko
Thanks,

On Sat, 25 Jul 2020 15:00:07 +0200
Alexander Bluhm  wrote:
> On Sat, Jul 25, 2020 at 09:37:37PM +0900, YASUOKA Masahiko wrote:
>> Is this part a reason why we have "divert-reply"?
> 
> Yes.
> 
> Divert rules pass packets to the local network stack.  With divert-to
> you specify the socket address.  This works for incomming connections.
> The divert-to address can be 127.0.0.1 or anything else with
> SO_BINDANY.
> 
> When you use SO_BINDANY for outgoing connections and you don't know
> the addresses when writing pf.conf, use divert-reply.
> 
> As dangling states interfere with new connections, I added the
> divert state cleanup.  This is especially necessary for DGRAM or
> RAW sockets.

Yes.  My first message shows it is neccessary for TCP. 

Also my diff was totally wrong it deletes the states regardless of
it's for divert or not.

>> > Is that not possible for you?
>> 
>> It's possible.
> 
> Fine, then use divert-reply instead of changing the semantics.

I have thought it's hard to create a divert-reply rule for relayd's
"transparent forward to destination" case.  But I noticed tftp-proxy
is using "group _tftp_proxy" to match connections only from the
program precisely.

I'll send diffs to do the same thing for relayd in a separated mail.



Re: pf_remove_divert_state

2020-07-25 Thread YASUOKA Masahiko
On Sat, 25 Jul 2020 13:29:57 +0200
Alexander Bluhm  wrote:
> On Sat, Jul 25, 2020 at 08:20:21PM +0900, YASUOKA Masahiko wrote:
>> Currently SO_BINDANY is usable without any divert or divert-reply
>> rule.
> 
> This is why we have the divert-reply feature.  Just mark the states
> with that keyword when you want to use them with SO_BINDANY.

Thanks,

Let me clarify whether I understand correctly.

| @@ -1410,9 +1410,7 @@ pf_remove_divert_state(struct pf_state_k
|   struct pf_state_item*si;
|  
|   TAILQ_FOREACH(si, >states, entry) {
| - if (sk == si->s->key[PF_SK_STACK] && si->s->rule.ptr &&
| - (si->s->rule.ptr->divert.type == PF_DIVERT_TO ||
| - si->s->rule.ptr->divert.type == PF_DIVERT_REPLY)) {
| + if (sk == si->s->key[PF_SK_STACK]) {
|   pf_remove_state(si->s);
|   break;
|   }
| 

Is this part a reason why we have "divert-reply"?

> See man setsockopt

Yes, I have checked the API already.

> Is that not possible for you?

It's possible.

--yasuoka



pf_remove_divert_state

2020-07-25 Thread YASUOKA Masahiko
Hi,

# let me correct the previous mail, it has some typos.

Currently SO_BINDANY is usable without any divert or divert-reply
rule.

pf reserves its associated PCB to its state when the packet is going
out.  This time, the pf rule is not required to have "divert" or
"divert-reply" option.  When receiving reverse direction packets,
those packets are going to "ours" since they has the associated PCB.

But when dropping the connection, the PCB is deleted but the state
will not removed.  Currently pf removes the state only if it is
created by a rule with "divert-reply" or "divert" option.  Otherwise
the state is kept.

As the result, following incoming packets for the connection will be
forwarded by the state.  They should not be forwarded since they were
going to "ours".

I think the state should be deleted even if it's created by a rule
without "divert" or "divert-reply" option. The following diff will
change this behavior.  Also I attached a test procedure after the
diff.


ok? comments?

Don't keep a state when associated PCB is delete regardless it's
created without a "divert-to" or "divert-reply" rule.  It might be
created by SO_BINDANY.

Index: sys/net/pf.c
===
RCS file: /cvs/src/sys/net/pf.c,v
retrieving revision 1.1094
diff -u -p -r1.1094 pf.c
--- sys/net/pf.c24 Jul 2020 18:17:15 -  1.1094
+++ sys/net/pf.c25 Jul 2020 07:39:19 -
@@ -1410,9 +1410,7 @@ pf_remove_divert_state(struct pf_state_k
struct pf_state_item*si;
 
TAILQ_FOREACH(si, >states, entry) {
-   if (sk == si->s->key[PF_SK_STACK] && si->s->rule.ptr &&
-   (si->s->rule.ptr->divert.type == PF_DIVERT_TO ||
-   si->s->rule.ptr->divert.type == PF_DIVERT_REPLY)) {
+   if (sk == si->s->key[PF_SK_STACK]) {
pf_remove_state(si->s);
break;
}



network configuration:

  192.168.0.101 -- 192.168.0.1 [OBJ] 10.0.0.1 --> 10.0.0.10

setup:

  ifconfig pair100 rdomain 10
  ifconfig pair100 inet 192.168.0.1
  ifconfig pair101 rdomain 11 patch pair100
  ifconfig pair101 inet 192.168.0.101
  ifconfig pair102 rdomain 10
  ifconfig pair102 inet 10.0.0.1/24
  ifconfig pair103 rdomain 12 patch pair102
  ifconfig pair103 inet 10.0.0.101/24
  route -T11 add default 192.168.0.1

/etc/pf.conf:

  pass on {pair100 pair101 pair102 pair103}
  match out on pair102 nat-to (pair102:0)
  block in on pair103 proto tcp to port 443

procedure:

1. run a server by scapy on 443/tcp on rdomain 12

   $ doas route -T12 exec python test.py

2. connect to the server from OBJ (rdomain 10)

   $ doas route -T10 exec nc -vs 192.168.0.101 10.0.0.101 443
   Connection to 10.0.0.101 443 port [tcp/https] succeeded!
   Ctrl-D
   $

   close the connection by Ctrl-D immediately

3. see the packet capture on pair103

   - You can see packets like below
 19:28:51.822879 10.0.0.101.443 > 10.0.0.1.60956: . ack 1 win 8192
 19:28:51.823559 192.168.0.101.22083 > 10.0.0.101.443: R 0:0(0)
   ack 1 win 0 (DF) [tos 0x10]
 
   - Since the pf state is kept, the packet "10.0.0.101.443 >
 10.0.0.1.60956" is converted into "10.0.0.101.443 >
 192.168.0.101.22083" by the state's NAT
   - but since the PCB doesn't exist, the packet is forwarded.
   - but the packet is blocked by default "block return" rule
   - "192.168.0.101.22083 > 10.0.0.101.443" is the result of "block
 return"
   
   -> 192.168.0.101 is NATed address.  It should not appear on
  10.0.0.0/24 network.

teardown:

  ifconfig pair100 destroy
  ifconfig pair101 destroy
  ifconfig pair102 destroy
  ifconfig pair103 destroy

test.py
***
import time
from scapy.all import *

a=sniff(iface="pair102", count=1, filter="tcp and port 443")

ip_src = a[0][IP].src
ip_dst = a[0][IP].dst
sport =  a[0][TCP].sport
dport =  a[0][TCP].dport
seq_nr = 5
ack_nr = a[0][TCP].seq + 1

a=sr1(IP(src=ip_dst, dst=ip_src)/
  TCP(sport=dport, dport=sport, flags="SA", seq=seq_nr, ack=ack_nr,
  options=[('MSS', 1460)]))
#ack_nr = a[0][TCP].seq + 1

# Send FIN and receive FIN+ACK
seq_nr = seq_nr + 1
a=sr1(IP(src=ip_dst, dst=ip_src)/
  TCP(sport=dport, dport=sport, flags="FA", seq=seq_nr, ack=ack_nr))
ack_nr = a[0][TCP].seq + 1

time.sleep(2)

# Send ACK of FIN
lastack = (IP(src=ip_dst, dst=ip_src)/
  TCP(sport=dport, dport=sport, flags="A", seq=seq_nr, ack=ack_nr))
send(lastack)

# Resend in 100 times
for _ in range(100):
time.sleep(2)
send(lastack)
***



pf_remove_divert_state

2020-07-25 Thread YASUOKA Masahiko
Hi,

Currently SO_BINDANY is usable without any divert or divert-reply
rule.

pf reserves its associated PCB to its state when the packet is going
out.  This time, the pf rule is not required to have "divert" or
"divert-reply" option.  When receiving reverse direction packets,
those packets are going to "ours" since they has the associated PCB.

But when dropping the connection, the PCB is deleted but the state
will not removed.  Currently pf removes the state only if it is
created by a rule with "divert-reply" or "divert" option.  Otherwise
the state is kept.

As the result, following incoming packets for the connection will be
forwarded by the state.  They should not be forwarded since they were
going to "ours".

I think the state should be deleted even if it's created by a rule
without doesn't "divert" or "divert-reply" option. The following diff
will change this behavior.  Also I attached a test procedure after the
diff.


ok? comments?

Don't keep a state when associated PCB is delete regardless it's
created without a "divert-to" or "divert-reply" rule.  It might be
created by SO_BINDANY.

Index: sys/net/pf.c
===
RCS file: /cvs/src/sys/net/pf.c,v
retrieving revision 1.1094
diff -u -p -r1.1094 pf.c
--- sys/net/pf.c24 Jul 2020 18:17:15 -  1.1094
+++ sys/net/pf.c25 Jul 2020 07:39:19 -
@@ -1410,9 +1410,7 @@ pf_remove_divert_state(struct pf_state_k
struct pf_state_item*si;
 
TAILQ_FOREACH(si, >states, entry) {
-   if (sk == si->s->key[PF_SK_STACK] && si->s->rule.ptr &&
-   (si->s->rule.ptr->divert.type == PF_DIVERT_TO ||
-   si->s->rule.ptr->divert.type == PF_DIVERT_REPLY)) {
+   if (sk == si->s->key[PF_SK_STACK]) {
pf_remove_state(si->s);
break;
}



network configuration:

  192.168.0.101 -- 192.168.0.1 [OBJ] 10.0.0.1 --> 10.0.0.10

setup:

  ifconfig pair100 rdomain 10
  ifconfig pair100 inet 192.168.0.1
  ifconfig pair101 rdomain 11 patch pair100
  ifconfig pair101 inet 192.168.0.101
  ifconfig pair102 rdomain 10
  ifconfig pair102 inet 10.0.0.1/24
  ifconfig pair103 rdomain 12 patch pair102
  ifconfig pair103 inet 10.0.0.101/24
  route -T11 add default 192.168.0.1

/etc/pf.conf:

  pass on {pair101 pair102 pair103 pair104}
  match out on pair102 nat-to (pair102:0)
  block in on pair103 proto tcp to port 443

procedure:

1. run a server by scapy on 443/tcp on rdomain 12

   $ doas route -T12 exec python test.py

2. connect to the server from OBJ (rdomain 10)

   $ doas route -T10 exec nc -vs 192.168.0.101 10.0.0.101 443
   Connection to 10.0.0.101 443 port [tcp/https] succeeded!
   Ctrl-D
   $

   close the connection by Ctrl-D immediately

3. see the packet capture on pair103

   - You can see packets like below
 19:28:51.822879 10.0.0.101.443 > 10.0.0.1.60956: . ack 1 win 8192
 19:28:51.823559 192.168.0.101.22083 > 10.0.0.101.443: R 0:0(0)
   ack 1 win 0 (DF) [tos 0x10]
 
   - Since the pf state is kept, the packet "10.0.0.101.443 >
 10.0.0.1.60956" is converted into "10.0.0.101.443 >
 192.168.0.101.22083" by the state's NAT
   - but since the PCB doesn't exist, the packet is forwarded.
   - but the packet is blocked by default "block return" rule
   - "192.168.0.101.22083 > 10.0.0.101.443" is the result of "block
 return"
   
   -> 192.168.0.101 is NATed address.  It should not appear on
  10.0.0.0/24 network.

teardown:

  ifconfig pair100 destroy
  ifconfig pair101 destroy
  ifconfig pair102 destroy
  ifconfig pair103 destroy

test.py
***
import time
from scapy.all import *

a=sniff(iface="pair102", count=1, filter="tcp and port 443")

ip_src = a[0][IP].src
ip_dst = a[0][IP].dst
sport =  a[0][TCP].sport
dport =  a[0][TCP].dport
seq_nr = 5
ack_nr = a[0][TCP].seq + 1

a=sr1(IP(src=ip_dst, dst=ip_src)/
  TCP(sport=dport, dport=sport, flags="SA", seq=seq_nr, ack=ack_nr,
  options=[('MSS', 1460)]))
#ack_nr = a[0][TCP].seq + 1

# Send FIN and receive FIN+ACK
seq_nr = seq_nr + 1
a=sr1(IP(src=ip_dst, dst=ip_src)/
  TCP(sport=dport, dport=sport, flags="FA", seq=seq_nr, ack=ack_nr))
ack_nr = a[0][TCP].seq + 1

time.sleep(2)

# Send ACK of FIN
lastack = (IP(src=ip_dst, dst=ip_src)/
  TCP(sport=dport, dport=sport, flags="A", seq=seq_nr, ack=ack_nr))
send(lastack)

# Resend in 100 times
for _ in range(100):
time.sleep(2)
send(lastack)
***



carp: unicast carppeer and peer down

2020-07-25 Thread YASUOKA Masahiko
Hi,

When an unicast address is specified for carppeer, if the peer is
down, sending out advertisemnent packets will fail, this failure is
treated as an error of the sending host, then the error counter is
incremented and carpdemote is incremenated.  I think this is not
correct because the failure is not a fault of the sending host.

ok?

Don't treat an error if carppeer is an unicast and the peer is down.

Index: sys/netinet/ip_carp.c
===
RCS file: /cvs/src/sys/netinet/ip_carp.c,v
retrieving revision 1.347
diff -u -p -r1.347 ip_carp.c
--- sys/netinet/ip_carp.c   24 Jul 2020 18:17:15 -  1.347
+++ sys/netinet/ip_carp.c   25 Jul 2020 07:16:42 -
@@ -1140,7 +1140,9 @@ carp_send_ad(struct carp_vhost_entry *vh
 
error = ip_output(m, NULL, NULL, IP_RAWOUTPUT, >sc_imo,
NULL, 0);
-   if (error) {
+   if (error &&
+   /* when unicast, the peer's down is not our fault */
+   !(!IN_MULTICAST(sc->sc_peer.s_addr) && error == EHOSTDOWN)){
if (error == ENOBUFS)
carpstat_inc(carps_onomem);
else



pfsync: comparing duration when "bulk-end"

2020-07-24 Thread YASUOKA Masahiko
Hi,

pfsync does "bulk update" just after boot, I noticed it sometimes
fails.  When finishing "bulk update", the duration in the "bulk-end"
packet and our duration based on uptime are compared, but that
comparision should be fixed.  It must consider the values are rounded
in a second.

ok?

Consider being rounded in a second when comparing the duration in
"bulk-end"  packet and the duration based on our uptime.  This fixes
the problem the carp demote count sometimes becomes 33 after reboot.

Index: sys/net/if_pfsync.c
===
RCS file: /cvs/src/sys/net/if_pfsync.c,v
retrieving revision 1.274
diff -u -p -r1.274 if_pfsync.c
--- sys/net/if_pfsync.c 10 Jul 2020 13:26:42 -  1.274
+++ sys/net/if_pfsync.c 25 Jul 2020 05:09:47 -
@@ -1169,8 +1169,7 @@ pfsync_in_bus(caddr_t buf, int len, int 
break;
 
case PFSYNC_BUS_END:
-   if (getuptime() - ntohl(bus->endtime) >=
-   sc->sc_ureq_sent) {
+   if (ntohl(bus->endtime) <= getuptime() + 1 - sc->sc_ureq_sent) {
/* that's it, we're happy */
sc->sc_ureq_sent = 0;
sc->sc_bulk_tries = 0;



Re: pf: route-to {random,srchash} in an anchor

2020-07-24 Thread YASUOKA Masahiko
Hi,

On Thu, 23 Jul 2020 18:44:43 +0200
Alexandr Nedvedicky  wrote:
> On Thu, Jul 23, 2020 at 08:01:18PM +0900, YASUOKA Masahiko wrote:
>> Hi,
>> 
>> Last month, I fixed the problem "route-to least-state" in an anchor
>> didn't work.
>> 
>> https://marc.info/?t=15911745782=1=2
>> 
>> I noticed the same problem happens on "random" and "srchash" as well.
>> 
>> ok?
> 
> change looks good. I have just one nit-pick comment. I leave decision
> whether it's worth to adjust your diff or commit as-is up to you.
> 
> see in-line further below.

I can't remember why I used "null == false" logic, since I usually
avoid using that.

I'll commit the ajusted diff below.

Index: sys/net/pf_lb.c
===
RCS file: /cvs/src/sys/net/pf_lb.c,v
retrieving revision 1.65
diff -u -p -r1.65 pf_lb.c
--- sys/net/pf_lb.c 24 Jul 2020 14:06:33 -  1.65
+++ sys/net/pf_lb.c 24 Jul 2020 14:13:42 -
@@ -353,6 +353,7 @@ pf_map_addr(sa_family_t af, struct pf_ru
struct pf_addr   faddr;
struct pf_addr  *raddr = >addr.v.a.addr;
struct pf_addr  *rmask = >addr.v.a.mask;
+   struct pfr_ktable   *kt;
struct pfi_kif  *kif;
u_int64_tstates;
u_int16_tweight;
@@ -405,18 +406,17 @@ pf_map_addr(sa_family_t af, struct pf_ru
pf_poolmask(naddr, raddr, rmask, saddr, af);
break;
case PF_POOL_RANDOM:
-   if (rpool->addr.type == PF_ADDR_TABLE) {
-   cnt = rpool->addr.p.tbl->pfrkt_cnt;
-   if (cnt == 0)
-   rpool->tblidx = 0;
+   if (rpool->addr.type == PF_ADDR_TABLE ||
+   rpool->addr.type == PF_ADDR_DYNIFTL) {
+   if (rpool->addr.type == PF_ADDR_TABLE)
+   kt = rpool->addr.p.tbl;
else
-   rpool->tblidx = (int)arc4random_uniform(cnt);
-   memset(>counter, 0, sizeof(rpool->counter));
-   if (pfr_pool_get(rpool, , , af))
+   kt = rpool->addr.p.dyn->pfid_kt;
+   kt = pfr_ktable_select_active(kt);
+   if (kt == NULL)
return (1);
-   pf_addrcpy(naddr, >counter, af);
-   } else if (rpool->addr.type == PF_ADDR_DYNIFTL) {
-   cnt = rpool->addr.p.dyn->pfid_kt->pfrkt_cnt;
+
+   cnt = kt->pfrkt_cnt;
if (cnt == 0)
rpool->tblidx = 0;
else
@@ -462,18 +462,18 @@ pf_map_addr(sa_family_t af, struct pf_ru
case PF_POOL_SRCHASH:
hashidx =
pf_hash(saddr, (struct pf_addr *), >key, af);
-   if (rpool->addr.type == PF_ADDR_TABLE) {
-   cnt = rpool->addr.p.tbl->pfrkt_cnt;
-   if (cnt == 0)
-   rpool->tblidx = 0;
+
+   if (rpool->addr.type == PF_ADDR_TABLE ||
+   rpool->addr.type == PF_ADDR_DYNIFTL) {
+   if (rpool->addr.type == PF_ADDR_TABLE)
+   kt = rpool->addr.p.tbl;
else
-   rpool->tblidx = (int)(hashidx % cnt);
-   memset(>counter, 0, sizeof(rpool->counter));
-   if (pfr_pool_get(rpool, , , af))
+   kt = rpool->addr.p.dyn->pfid_kt;
+   kt = pfr_ktable_select_active(kt);
+   if (kt == NULL)
return (1);
-   pf_addrcpy(naddr, >counter, af);
-   } else if (rpool->addr.type == PF_ADDR_DYNIFTL) {
-   cnt = rpool->addr.p.dyn->pfid_kt->pfrkt_cnt;
+
+   cnt = kt->pfrkt_cnt;
if (cnt == 0)
rpool->tblidx = 0;
else
Index: sys/net/pf_table.c
===
RCS file: /cvs/src/sys/net/pf_table.c,v
retrieving revision 1.133
diff -u -p -r1.133 pf_table.c
--- sys/net/pf_table.c  24 Jun 2020 22:03:43 -  1.133
+++ sys/net/pf_table.c  24 Jul 2020 14:13:42 -
@@ -2108,9 +2108,8 @@ pfr_kentry_byaddr(struct pfr_ktable *kt,
struct sockaddr_in6  tmp6;
 #endif /* INET6 */
 
-   if (!(kt->pfrkt_flags & PFR_TFLAG_ACTIVE) && kt->pfrkt_root != NULL)
-   kt = kt->pfrkt_root;
-   if 

Re: pf: route-to least-states

2020-07-24 Thread YASUOKA Masahiko
Hi,

Thank you for your review.

On Fri, 24 Jul 2020 01:25:42 +0200
Alexandr Nedvedicky  wrote:
>> - interface is not selected properly if selected table entry specifies
>>   an interface.
> 
> to be honest I don't quite understand what's going on here.
> can you share some details of configuration/scenario, which
> triggers the bug your diff is fixing?

You seem to have understood the scenario correctly.

> the part of your change, which I'm not able to figure out is
> this single line:
> 
>> +if (pf_map_addr_states_increase(af, rpool, naddr) == -1)
>> +return (1);
>> +/* revert the kif which was set by pfr_pool_get() */
>> +rpool->kif = kif;
>>  break;
>>  }
> 
> your fix changes behavior, which is there since least-state
> option has been introduced. I believe it does not matter
> for case when route-to specifies single interface such as:
> 
>   route-to 192.168.1.10@em0 least-states
> 
> I'm not sure what will happen in situation, when there are more interfaces
> specified in combination with sticky-address:
>   
>   route-to {192.168.1.10@em0, 192.168.1.20@em1} last-states sticky-address

Yes.  This is a senario.

> the resulting code does not look quite right with your diff applied:
> 
> 602 } while (pf_match_addr(1, , rmask, >counter, 
> af) &&
> 603 (states > 0));
> 604 
> 605 if (pf_map_addr_states_increase(af, rpool, naddr) == -1)
> 606 return (1);
> 607 /* revert the kif which was set by pfr_pool_get() */
> 608 rpool->kif = kif;
> 609 break;
> 610 }
> 611 
> 612 if (rpool->opts & PF_POOL_STICKYADDR) {
> 613 if (sns[type] != NULL) {
> 614 pf_remove_src_node(sns[type]);
> 615 sns[type] = NULL;
> 616 }
> 617 if (pf_insert_src_node([type], r, type, af, saddr, 
> naddr,
> 618 rpool->kif))
> 619 return (1);
> 620 }
> 
> 
> at line 608 new code reverts kif set by pfr_pool_get(). At line 617
> (executed when sticky-address is set) the original code passes kif chosen 
> be
> pfr_pool_get(). You diff changes that behavior. So my question is simple:
>   is that intentional change?

Yes.

Let me simplify the block for "least-states".

535   case PF_POOL_LEASTSTATES:
539   pfr_pool_get(rpool);  // fist entry
 :
561   faddr = rpool->counter;   //record as final
 :
557   load = rpool->states / rpool->weight;
563   naddr = rpool->counter;
 :
571  do {
572  rpool->counter++;
575  pfr_pool_get(rpool);   /* next entry */
 :
585  cload = rpool->states / rpool->weight;
 :
 :   /* find lc minimum */
591  if (cload < load) {
595 load = cload;
597 naddr = rpool->counter;
601  }
603   } while (raddr->counter != faddr); // loop until final

the loop #571:606 is to find the minimum (least-states) entry.  If the
last entry is not the minimum, after the loop,

   rpool <= the last entry
   naddr <= the minimum entry

Also see the pfr_pool_get():

2272 int
2273 pfr_pool_get(struct pf_pool *rpool, struct pf_addr **raddr,
2274 struct pf_addr **rmask, sa_family_t af)
2275 {
(snip)
2417 rpool->states = 0;
2418 if (ke->pfrke_counters != NULL)
2419 rpool->states = ke->pfrke_counters->states;
2420 switch (ke->pfrke_type) {
2421 case PFRKE_COST:
2422 rpool->weight =
2423 ((struct pfr_kentry_cost *)ke)->weight;
2424 /* FALLTHROUGH */
2425 case PFRKE_ROUTE:
2426 rpool->kif = ((struct pfr_kentry_route 
*)ke)->kif;
2427 break;
2428 default:
2429 rpool->weight = 1;
2430 break;
2431 }

some fields of rpool (states, weight, kif) are set by the values of
the selected table entry.

So,

|  rpool <= the last entry
|  naddr <= the minimum entry

rpool->kif is the interface of the last entery.  It might be different
than the inteface of the minimum entry.

The diff is to keep kif of the minimum entry,

+   kif = rpool->kif;

revert rpool->kif by it after the loop.

+   /* revert the kif which was set by pfr_pool_get() */
+   rpool->kif = kif;




pf: route-to least-states

2020-07-23 Thread YASUOKA Masahiko
Hi,

The diff fixes 2 problems of "least-states":

- states whose address is selected by sticky-address is not counted
  for the number of states.
- interface is not selected properly if selected table entry specifies
  an interface.

ok?

Increase state counter for least-states when the address is selected
by sticky-address.  Also fix the problem that the interface which is
specified by the selected table entry is not used properly.

Index: sys/net/pf_lb.c
===
RCS file: /disk/cvs/openbsd/src/sys/net/pf_lb.c,v
retrieving revision 1.64
diff -u -p -r1.64 pf_lb.c
--- sys/net/pf_lb.c 2 Jul 2019 09:04:53 -   1.64
+++ sys/net/pf_lb.c 23 Jul 2020 11:06:05 -
@@ -97,6 +97,8 @@ u_int64_t  pf_hash(struct pf_addr *, st
 int pf_get_sport(struct pf_pdesc *, struct pf_rule *,
struct pf_addr *, u_int16_t *, u_int16_t,
u_int16_t, struct pf_src_node **);
+int pf_map_addr_states_increase(sa_family_t,
+   struct pf_pool *, struct pf_addr *);
 int pf_get_transaddr_af(struct pf_rule *,
struct pf_pdesc *, struct pf_src_node **);
 int pf_map_addr_sticky(sa_family_t, struct pf_rule *,
@@ -319,6 +321,12 @@ pf_map_addr_sticky(sa_family_t af, struc
sns[type] = NULL;
return (-1);
}
+
+   if ((rpool->opts & PF_POOL_TYPEMASK) == PF_POOL_LEASTSTATES) {
+   if (pf_map_addr_states_increase(af, rpool, naddr) == -1)
+   return (-1);
+   }
+
if (!PF_AZERO(cached, af))
pf_addrcpy(naddr, cached, af);
if (pf_status.debug >= LOG_DEBUG) {
@@ -345,6 +353,7 @@ pf_map_addr(sa_family_t af, struct pf_ru
struct pf_addr   faddr;
struct pf_addr  *raddr = >addr.v.a.addr;
struct pf_addr  *rmask = >addr.v.a.mask;
+   struct pfi_kif  *kif;
u_int64_tstates;
u_int16_tweight;
u_int64_tload;
@@ -539,6 +548,7 @@ pf_map_addr(sa_family_t af, struct pf_ru
 
states = rpool->states;
weight = rpool->weight;
+   kif = rpool->kif;
 
if ((rpool->addr.type == PF_ADDR_TABLE &&
rpool->addr.p.tbl->pfrkt_refcntcost > 0) ||
@@ -581,6 +591,7 @@ pf_map_addr(sa_family_t af, struct pf_ru
if (cload < load) {
states = rpool->states;
weight = rpool->weight;
+   kif = rpool->kif;
load = cload;
 
pf_addrcpy(naddr, >counter, af);
@@ -591,29 +602,10 @@ pf_map_addr(sa_family_t af, struct pf_ru
} while (pf_match_addr(1, , rmask, >counter, af) &&
(states > 0));
 
-   if (rpool->addr.type == PF_ADDR_TABLE) {
-   if (pfr_states_increase(rpool->addr.p.tbl,
-   naddr, af) == -1) {
-   if (pf_status.debug >= LOG_DEBUG) {
-   log(LOG_DEBUG,"pf: pf_map_addr: "
-   "selected address ");
-   pf_print_host(naddr, 0, af);
-   addlog(". Failed to increase count!\n");
-   }
-   return (1);
-   }
-   } else if (rpool->addr.type == PF_ADDR_DYNIFTL) {
-   if (pfr_states_increase(rpool->addr.p.dyn->pfid_kt,
-   naddr, af) == -1) {
-   if (pf_status.debug >= LOG_DEBUG) {
-   log(LOG_DEBUG, "pf: pf_map_addr: "
-   "selected address ");
-   pf_print_host(naddr, 0, af);
-   addlog(". Failed to increase count!\n");
-   }
-   return (1);
-   }
-   }
+   if (pf_map_addr_states_increase(af, rpool, naddr) == -1)
+   return (1);
+   /* revert the kif which was set by pfr_pool_get() */
+   rpool->kif = kif;
break;
}
 
@@ -642,6 +634,38 @@ pf_map_addr(sa_family_t af, struct pf_ru
addlog("\n");
}
 
+   return (0);
+}
+
+int
+pf_map_addr_states_increase(sa_family_t af, struct pf_pool *rpool,
+struct pf_addr *naddr)
+{
+   if (rpool->addr.type == PF_ADDR_TABLE) {
+   if (pfr_states_increase(rpool->addr.p.tbl,
+   naddr, af) == -1) {
+ 

pf: route-to {random,srchash} in an anchor

2020-07-23 Thread YASUOKA Masahiko
Hi,

Last month, I fixed the problem "route-to least-state" in an anchor
didn't work.

https://marc.info/?t=15911745782=1=2

I noticed the same problem happens on "random" and "srchash" as well.

ok?

Use the table on root always if current table is not active.

Index: sys/net/pf_lb.c
===
RCS file: /disk/cvs/openbsd/src/sys/net/pf_lb.c,v
retrieving revision 1.64
diff -u -p -r1.64 pf_lb.c
--- sys/net/pf_lb.c 2 Jul 2019 09:04:53 -   1.64
+++ sys/net/pf_lb.c 23 Jul 2020 10:45:48 -
@@ -345,6 +345,7 @@ pf_map_addr(sa_family_t af, struct pf_ru
struct pf_addr   faddr;
struct pf_addr  *raddr = >addr.v.a.addr;
struct pf_addr  *rmask = >addr.v.a.mask;
+   struct pfr_ktable   *kt;
u_int64_tstates;
u_int16_tweight;
u_int64_tload;
@@ -396,18 +397,17 @@ pf_map_addr(sa_family_t af, struct pf_ru
pf_poolmask(naddr, raddr, rmask, saddr, af);
break;
case PF_POOL_RANDOM:
-   if (rpool->addr.type == PF_ADDR_TABLE) {
-   cnt = rpool->addr.p.tbl->pfrkt_cnt;
-   if (cnt == 0)
-   rpool->tblidx = 0;
+   if (rpool->addr.type == PF_ADDR_TABLE ||
+   rpool->addr.type == PF_ADDR_DYNIFTL) {
+   if (rpool->addr.type == PF_ADDR_TABLE)
+   kt = rpool->addr.p.tbl;
else
-   rpool->tblidx = (int)arc4random_uniform(cnt);
-   memset(>counter, 0, sizeof(rpool->counter));
-   if (pfr_pool_get(rpool, , , af))
+   kt = rpool->addr.p.dyn->pfid_kt;
+   kt = pfr_ktable_select_active(kt);
+   if (!kt)
return (1);
-   pf_addrcpy(naddr, >counter, af);
-   } else if (rpool->addr.type == PF_ADDR_DYNIFTL) {
-   cnt = rpool->addr.p.dyn->pfid_kt->pfrkt_cnt;
+
+   cnt = kt->pfrkt_cnt;
if (cnt == 0)
rpool->tblidx = 0;
else
@@ -453,18 +453,18 @@ pf_map_addr(sa_family_t af, struct pf_ru
case PF_POOL_SRCHASH:
hashidx =
pf_hash(saddr, (struct pf_addr *), >key, af);
-   if (rpool->addr.type == PF_ADDR_TABLE) {
-   cnt = rpool->addr.p.tbl->pfrkt_cnt;
-   if (cnt == 0)
-   rpool->tblidx = 0;
+
+   if (rpool->addr.type == PF_ADDR_TABLE ||
+   rpool->addr.type == PF_ADDR_DYNIFTL) {
+   if (rpool->addr.type == PF_ADDR_TABLE)
+   kt = rpool->addr.p.tbl;
else
-   rpool->tblidx = (int)(hashidx % cnt);
-   memset(>counter, 0, sizeof(rpool->counter));
-   if (pfr_pool_get(rpool, , , af))
+   kt = rpool->addr.p.dyn->pfid_kt;
+   kt = pfr_ktable_select_active(kt);
+   if (!kt)
return (1);
-   pf_addrcpy(naddr, >counter, af);
-   } else if (rpool->addr.type == PF_ADDR_DYNIFTL) {
-   cnt = rpool->addr.p.dyn->pfid_kt->pfrkt_cnt;
+
+   cnt = kt->pfrkt_cnt;
if (cnt == 0)
rpool->tblidx = 0;
else
Index: sys/net/pf_table.c
===
RCS file: /disk/cvs/openbsd/src/sys/net/pf_table.c,v
retrieving revision 1.133
diff -u -p -r1.133 pf_table.c
--- sys/net/pf_table.c  24 Jun 2020 22:03:43 -  1.133
+++ sys/net/pf_table.c  23 Jul 2020 10:45:48 -
@@ -2108,9 +2108,8 @@ pfr_kentry_byaddr(struct pfr_ktable *kt,
struct sockaddr_in6  tmp6;
 #endif /* INET6 */
 
-   if (!(kt->pfrkt_flags & PFR_TFLAG_ACTIVE) && kt->pfrkt_root != NULL)
-   kt = kt->pfrkt_root;
-   if (!(kt->pfrkt_flags & PFR_TFLAG_ACTIVE))
+   kt = pfr_ktable_select_active(kt);
+   if (!kt)
return (0);
 
switch (af) {
@@ -2153,9 +2152,8 @@ pfr_update_stats(struct pfr_ktable *kt, 
int  dir_idx = (pd->dir == PF_OUT);
int  op_idx;
 
-   if (!(kt->pfrkt_flags & PFR_TFLAG_ACTIVE) && kt->pfrkt_root != NULL)
-   kt = kt->pfrkt_root;
-   if (!(kt->pfrkt_flags & PFR_TFLAG_ACTIVE))
+   kt = pfr_ktable_select_active(kt);
+   if (!kt)
return;
 
switch (af) {
@@ -2308,9 +2306,8 @@ pfr_pool_get(struct pf_pool *rpool, stru
 

Re: receive interfacez for carp when real mac is used

2020-07-22 Thread YASUOKA Masahiko
The problem I was to fix had been fixed by dlg@'s commit today.

  https://marc.info/?l=openbsd-cvs=159538265604770=2

So the diff is not needed any more.  Pointed out by dlg@.

Thanks,

On Wed, 22 Jul 2020 19:24:32 +0900 (JST)
YASUOKA Masahiko  wrote:
> Hi,
> 
> Currently when using the real mac address for carp(4) interface, all
> packets are treated as their receive inteface is carp.  This causes
> some problems.
> 
> For example, IPv6 ndp doesn't work on an interface which is used for
> carpdev.  Because it is assumed that reply packets are received with
> the same interface which is used to send out the request.
> 
> ok?
> 
> When realmac is used for carp(4), don't pass the packets through the
> interface since they are for the real interface.
> 
> Index: sys/netinet/ip_carp.c
> ===
> RCS file: /disk/cvs/openbsd/src/sys/netinet/ip_carp.c,v
> retrieving revision 1.345
> diff -u -p -r1.345 ip_carp.c
> --- sys/netinet/ip_carp.c 21 May 2020 05:24:59 -  1.345
> +++ sys/netinet/ip_carp.c 22 Jul 2020 09:52:20 -
> @@ -1418,6 +1418,14 @@ carp_input(struct ifnet *ifp0, struct mb
>   }
>   m_tag_prepend(m, mtag);
>   }
> +
> + /*
> +  * When carp is using realmac, since the matched MAC
> +  * address is for the real interface, the packets are
> +  * not for the carp interface.
> +  */
> + if (sc->sc_realmac)
> + sc = NULL;
>   break;
>   }
>   }
> 



receive interfacez for carp when real mac is used

2020-07-22 Thread YASUOKA Masahiko
Hi,

Currently when using the real mac address for carp(4) interface, all
packets are treated as their receive inteface is carp.  This causes
some problems.

For example, IPv6 ndp doesn't work on an interface which is used for
carpdev.  Because it is assumed that reply packets are received with
the same interface which is used to send out the request.

ok?

When realmac is used for carp(4), don't pass the packets through the
interface since they are for the real interface.

Index: sys/netinet/ip_carp.c
===
RCS file: /disk/cvs/openbsd/src/sys/netinet/ip_carp.c,v
retrieving revision 1.345
diff -u -p -r1.345 ip_carp.c
--- sys/netinet/ip_carp.c   21 May 2020 05:24:59 -  1.345
+++ sys/netinet/ip_carp.c   22 Jul 2020 09:52:20 -
@@ -1418,6 +1418,14 @@ carp_input(struct ifnet *ifp0, struct mb
}
m_tag_prepend(m, mtag);
}
+
+   /*
+* When carp is using realmac, since the matched MAC
+* address is for the real interface, the packets are
+* not for the carp interface.
+*/
+   if (sc->sc_realmac)
+   sc = NULL;
break;
}
}



Re: route add ::/0 ...

2020-07-06 Thread YASUOKA Masahiko
Let me updated the diff.

On Mon, 06 Jul 2020 17:54:30 +0900 (JST)
YASUOKA Masahiko  wrote:
> On Tue, 30 Jun 2020 02:42:02 +0200
> Klemens Nanni  wrote:
>> On Tue, Jun 30, 2020 at 09:00:30AM +0900, YASUOKA Masahiko wrote:
>>> inet_makenetandmask() had required another treatment.
>>> 
>>> Also -prefixlen 0 for -inet has a bug
>>> 
>>>  % doas ./obj/route -T100 add -inet 0.0.0.0 -prefixlen 0 127.0.0.1
>>>  add net 0.0.0.0: gateway 127.0.0.1
>>>  % netstat -nrf inet -T 100
>>>  Routing tables
>>> 
>>>  Internet:
>>>  DestinationGatewayFlags   Refs  Use   Mtu  Prio 
>>> Iface
>>>  0.0.0.0/32 127.0.0.1  UGS00 32768 8 
>>> lo100
>>> 
>>> /0 becomes /32.  The diff following also fixes the problem.
>> Yes, this looks correct to me;  regress is also happy (again).
>> 
>> OK kn
> 
> Thanks,
> 
> I'm  going to commit the diff.  ok or comments, are still welcome.
> 
> 
> Stop using make_addr() which trims trailing zeros of the netmask, set
> family and length field.  This fixes route(8) to handle "::/0"
> properly.  Also fix "route add -inet 0.0.0.0 -prefixlen 0 (gateway)"
> to work properly.
> 
> Index: sbin/route/route.c
> ===
> RCS file: /cvs/src/sbin/route/route.c,v
> retrieving revision 1.247
> diff -u -p -r1.247 route.c
> --- sbin/route/route.c15 Jan 2020 10:26:25 -  1.247
> +++ sbin/route/route.c6 Jul 2020 08:45:06 -
(snip)
> @@ -781,12 +780,9 @@ inet_makenetandmask(u_int32_t net, struc
>   sin->sin_addr.s_addr = htonl(net);
>   sin = _mask.sin;
>   sin->sin_addr.s_addr = htonl(mask);
> - sin->sin_len = 0;
> - sin->sin_family = 0;
> + sin->sin_family = AF_INET;
>   cp = (char *)(>sin_addr + 1);
> - while (*--cp == '\0' && cp > (char *)sin)
> - continue;
> - sin->sin_len = 1 + cp - (char *)sin;
> + sin->sin_len = sizeof(struct sockaddr_in);
>  }
>  
>  /*

"cp" becomes unused.  The updated diff removes "cp" as well.

Index: sbin/route/route.c
===
RCS file: /cvs/src/sbin/route/route.c,v
retrieving revision 1.247
diff -u -p -r1.247 route.c
--- sbin/route/route.c  15 Jan 2020 10:26:25 -  1.247
+++ sbin/route/route.c  6 Jul 2020 08:57:25 -
@@ -107,7 +107,6 @@ void print_rtmsg(struct rt_msghdr *, in
 voidpmsg_common(struct rt_msghdr *);
 voidpmsg_addrs(char *, int);
 voidbprintf(FILE *, int, char *);
-voidmask_addr(union sockunion *, union sockunion *, int);
 int getaddr(int, int, char *, struct hostent **);
 voidgetmplslabel(char *, int);
 int rtmsg(int, int, int, uint8_t);
@@ -767,7 +766,6 @@ void
 inet_makenetandmask(u_int32_t net, struct sockaddr_in *sin, int bits)
 {
u_int32_t mask;
-   char *cp;
 
rtm_addrs |= RTA_NETMASK;
if (bits == 0 && net == 0)
@@ -781,12 +779,8 @@ inet_makenetandmask(u_int32_t net, struc
sin->sin_addr.s_addr = htonl(net);
sin = _mask.sin;
sin->sin_addr.s_addr = htonl(mask);
-   sin->sin_len = 0;
-   sin->sin_family = 0;
-   cp = (char *)(>sin_addr + 1);
-   while (*--cp == '\0' && cp > (char *)sin)
-   continue;
-   sin->sin_len = 1 + cp - (char *)sin;
+   sin->sin_family = AF_INET;
+   sin->sin_len = sizeof(struct sockaddr_in);
 }
 
 /*
@@ -1001,7 +995,8 @@ prefixlen(int af, char *s)
memset(_mask, 0, sizeof(so_mask));
so_mask.sin.sin_family = AF_INET;
so_mask.sin.sin_len = sizeof(struct sockaddr_in);
-   so_mask.sin.sin_addr.s_addr = htonl(0x << (32 - len));
+   if (len != 0)
+   so_mask.sin.sin_addr.s_addr = htonl(0x << (32 - 
len));
break;
case AF_INET6:
so_mask.sin6.sin6_family = AF_INET6;
@@ -1088,8 +1083,6 @@ rtmsg(int cmd, int flags, int fmask, uin
rtm.rtm_mpls = mpls_flags;
rtm.rtm_hdrlen = sizeof(rtm);
 
-   if (rtm_addrs & RTA_NETMASK)
-   mask_addr(_dst, _mask, RTA_DST);
/* store addresses in ascending order of RTA values */
NEXTADDR(RTA_DST, so_dst);
NEXTADDR(RTA_GATEWAY, so_gate);
@@ -1118,34 +,6 @@ rtmsg(int cmd, int flags, int fmask, uin
}
 #undef rtm
return (0);
-}
-
-void
-mask_addr(union sockunion *addr, union sockunion *mask, int which)
-{
-   int olen = mask->sa.sa_len;
-   char *cp1 = olen + (char *)mask, *cp2;
-
-   for (mask-&g

Re: route add ::/0 ...

2020-07-06 Thread YASUOKA Masahiko


On Tue, 30 Jun 2020 02:42:02 +0200
Klemens Nanni  wrote:
> On Tue, Jun 30, 2020 at 09:00:30AM +0900, YASUOKA Masahiko wrote:
>> inet_makenetandmask() had required another treatment.
>> 
>> Also -prefixlen 0 for -inet has a bug
>> 
>>  % doas ./obj/route -T100 add -inet 0.0.0.0 -prefixlen 0 127.0.0.1
>>  add net 0.0.0.0: gateway 127.0.0.1
>>  % netstat -nrf inet -T 100
>>  Routing tables
>> 
>>  Internet:
>>  DestinationGatewayFlags   Refs  Use   Mtu  Prio 
>> Iface
>>  0.0.0.0/32 127.0.0.1  UGS00 32768 8 
>> lo100
>> 
>> /0 becomes /32.  The diff following also fixes the problem.
> Yes, this looks correct to me;  regress is also happy (again).
> 
> OK kn

Thanks,

I'm  going to commit the diff.  ok or comments, are still welcome.


Stop using make_addr() which trims trailing zeros of the netmask, set
family and length field.  This fixes route(8) to handle "::/0"
properly.  Also fix "route add -inet 0.0.0.0 -prefixlen 0 (gateway)"
to work properly.

Index: sbin/route/route.c
===
RCS file: /cvs/src/sbin/route/route.c,v
retrieving revision 1.247
diff -u -p -r1.247 route.c
--- sbin/route/route.c  15 Jan 2020 10:26:25 -  1.247
+++ sbin/route/route.c  6 Jul 2020 08:45:06 -
@@ -107,7 +107,6 @@ void print_rtmsg(struct rt_msghdr *, in
 voidpmsg_common(struct rt_msghdr *);
 voidpmsg_addrs(char *, int);
 voidbprintf(FILE *, int, char *);
-voidmask_addr(union sockunion *, union sockunion *, int);
 int getaddr(int, int, char *, struct hostent **);
 voidgetmplslabel(char *, int);
 int rtmsg(int, int, int, uint8_t);
@@ -781,12 +780,9 @@ inet_makenetandmask(u_int32_t net, struc
sin->sin_addr.s_addr = htonl(net);
sin = _mask.sin;
sin->sin_addr.s_addr = htonl(mask);
-   sin->sin_len = 0;
-   sin->sin_family = 0;
+   sin->sin_family = AF_INET;
cp = (char *)(>sin_addr + 1);
-   while (*--cp == '\0' && cp > (char *)sin)
-   continue;
-   sin->sin_len = 1 + cp - (char *)sin;
+   sin->sin_len = sizeof(struct sockaddr_in);
 }
 
 /*
@@ -1001,7 +997,8 @@ prefixlen(int af, char *s)
memset(_mask, 0, sizeof(so_mask));
so_mask.sin.sin_family = AF_INET;
so_mask.sin.sin_len = sizeof(struct sockaddr_in);
-   so_mask.sin.sin_addr.s_addr = htonl(0x << (32 - len));
+   if (len != 0)
+   so_mask.sin.sin_addr.s_addr = htonl(0x << (32 - 
len));
break;
case AF_INET6:
so_mask.sin6.sin6_family = AF_INET6;
@@ -1088,8 +1085,6 @@ rtmsg(int cmd, int flags, int fmask, uin
rtm.rtm_mpls = mpls_flags;
rtm.rtm_hdrlen = sizeof(rtm);
 
-   if (rtm_addrs & RTA_NETMASK)
-   mask_addr(_dst, _mask, RTA_DST);
/* store addresses in ascending order of RTA values */
NEXTADDR(RTA_DST, so_dst);
NEXTADDR(RTA_GATEWAY, so_gate);
@@ -1118,34 +1113,6 @@ rtmsg(int cmd, int flags, int fmask, uin
}
 #undef rtm
return (0);
-}
-
-void
-mask_addr(union sockunion *addr, union sockunion *mask, int which)
-{
-   int olen = mask->sa.sa_len;
-   char *cp1 = olen + (char *)mask, *cp2;
-
-   for (mask->sa.sa_len = 0; cp1 > (char *)mask; )
-   if (*--cp1 != '\0') {
-   mask->sa.sa_len = 1 + cp1 - (char *)mask;
-   break;
-   }
-   if ((rtm_addrs & which) == 0)
-   return;
-   switch (addr->sa.sa_family) {
-   case AF_INET:
-   case AF_INET6:
-   case AF_UNSPEC:
-   return;
-   }
-   cp1 = mask->sa.sa_len + 1 + (char *)addr;
-   cp2 = addr->sa.sa_len + 1 + (char *)addr;
-   while (cp2 > cp1)
-   *--cp2 = '\0';
-   cp2 = mask->sa.sa_len + 1 + (char *)mask;
-   while (cp1 > addr->sa.sa_data)
-   *--cp1 &= *--cp2;
 }
 
 char *msgtypes[] = {



Re: route add ::/0 ...

2020-06-29 Thread YASUOKA Masahiko
On Mon, 29 Jun 2020 19:18:17 +0200
Klemens Nanni  wrote:
> On Mon, Jun 29, 2020 at 11:55:10PM +0900, YASUOKA Masahiko wrote:
>> The function mask_addr() doesn't mask address for IPv4 and IPv6.  Does
>> any address family other than IPv4 or IPv6 require #1142:1148?  The
>> function seems to just trim the trailing zero.  Is it neccesaary?  And
>> it causes the confusion on the kernel.  How about deleting
>> mask_addr()?
>> 
>> The diff following also fixes the problem.
> Removing it breaks IPv4 default routes:
> 
>   # ifconfig lo1 rdomain 1 127.1.1.1
>   # ./obj/route -nT1 add 0.0.0.0/0 127.1.1.1
>   add net 0.0.0.0/0: gateway 127.1.1.1: Invalid argument
>   # route -nT1 add 0.0.0.0/0 127.1.1.1  
>   add net 0.0.0.0/0: gateway 127.1.1.1

Thanks,

inet_makenetandmask() had required another treatment.

Also -prefixlen 0 for -inet has a bug

 % doas ./obj/route -T100 add -inet 0.0.0.0 -prefixlen 0 127.0.0.1
 add net 0.0.0.0: gateway 127.0.0.1
 % netstat -nrf inet -T 100
 Routing tables

 Internet:
 DestinationGatewayFlags   Refs  Use   Mtu  Prio Iface
 0.0.0.0/32 127.0.0.1  UGS00 32768 8 lo100

/0 becomes /32.  The diff following also fixes the problem.


diff --git a/sbin/route/route.c b/sbin/route/route.c
index 9e43d8e89b6..532a918148d 100644
--- a/sbin/route/route.c
+++ b/sbin/route/route.c
@@ -107,7 +107,6 @@ void print_rtmsg(struct rt_msghdr *, int);
 voidpmsg_common(struct rt_msghdr *);
 voidpmsg_addrs(char *, int);
 voidbprintf(FILE *, int, char *);
-voidmask_addr(union sockunion *, union sockunion *, int);
 int getaddr(int, int, char *, struct hostent **);
 voidgetmplslabel(char *, int);
 int rtmsg(int, int, int, uint8_t);
@@ -781,12 +780,9 @@ inet_makenetandmask(u_int32_t net, struct sockaddr_in 
*sin, int bits)
sin->sin_addr.s_addr = htonl(net);
sin = _mask.sin;
sin->sin_addr.s_addr = htonl(mask);
-   sin->sin_len = 0;
-   sin->sin_family = 0;
+   sin->sin_family = AF_INET;
cp = (char *)(>sin_addr + 1);
-   while (*--cp == '\0' && cp > (char *)sin)
-   continue;
-   sin->sin_len = 1 + cp - (char *)sin;
+   sin->sin_len = sizeof(struct sockaddr_in);
 }
 
 /*
@@ -1001,7 +997,8 @@ prefixlen(int af, char *s)
memset(_mask, 0, sizeof(so_mask));
so_mask.sin.sin_family = AF_INET;
so_mask.sin.sin_len = sizeof(struct sockaddr_in);
-   so_mask.sin.sin_addr.s_addr = htonl(0x << (32 - len));
+   if (len != 0)
+   so_mask.sin.sin_addr.s_addr = htonl(0x << (32 - 
len));
break;
case AF_INET6:
so_mask.sin6.sin6_family = AF_INET6;
@@ -1088,8 +1085,6 @@ rtmsg(int cmd, int flags, int fmask, uint8_t prio)
rtm.rtm_mpls = mpls_flags;
rtm.rtm_hdrlen = sizeof(rtm);
 
-   if (rtm_addrs & RTA_NETMASK)
-   mask_addr(_dst, _mask, RTA_DST);
/* store addresses in ascending order of RTA values */
NEXTADDR(RTA_DST, so_dst);
NEXTADDR(RTA_GATEWAY, so_gate);
@@ -1120,34 +1115,6 @@ rtmsg(int cmd, int flags, int fmask, uint8_t prio)
return (0);
 }
 
-void
-mask_addr(union sockunion *addr, union sockunion *mask, int which)
-{
-   int olen = mask->sa.sa_len;
-   char *cp1 = olen + (char *)mask, *cp2;
-
-   for (mask->sa.sa_len = 0; cp1 > (char *)mask; )
-   if (*--cp1 != '\0') {
-   mask->sa.sa_len = 1 + cp1 - (char *)mask;
-   break;
-   }
-   if ((rtm_addrs & which) == 0)
-   return;
-   switch (addr->sa.sa_family) {
-   case AF_INET:
-   case AF_INET6:
-   case AF_UNSPEC:
-   return;
-   }
-   cp1 = mask->sa.sa_len + 1 + (char *)addr;
-   cp2 = addr->sa.sa_len + 1 + (char *)addr;
-   while (cp2 > cp1)
-   *--cp2 = '\0';
-   cp2 = mask->sa.sa_len + 1 + (char *)mask;
-   while (cp1 > addr->sa.sa_data)
-   *--cp1 &= *--cp2;
-}
-
 char *msgtypes[] = {
"",
"RTM_ADD: Add Route",



Re: route add ::/0 ...

2020-06-29 Thread YASUOKA Masahiko
On Mon, 29 Jun 2020 18:45:07 +0900 (JST)
YASUOKA Masahiko  wrote:
> On Mon, 29 Jun 2020 10:12:23 +0200
> Martin Pieuchot  wrote:
>> On 28/06/20(Sun) 20:41, YASUOKA Masahiko wrote:
>>> Hi,
>>> 
>>> When "::/0" is used as "default",
>>> 
>>>   # route add ::/0 fe80::1%em0
>>>   add net ::/0: gateway fe80::1%em0: Invalid argument
>>> 
>>> route command trims the sockaddr to { .len = 2, .family = AF_INET6 }
>>> for "::/0", but rtable_satoplen() refuses it.  I think it should be
>>> accepted.
>> 
>> rtable_satoplen() is used in many places, not just in the socket parsing
>> code used by route(8).  I don't know what side effects can be introduced
>> by this change.
>> 
>> Why is IPv6 different from IPv4 when it comes to the default route?
> 
> Diferent functions are used.  route(8) uses inet_makenetandmask() to
> create a sockaddr for IPv4 prefix length and uses prefixlen() for IPv6
> prefix length.  "/0" results:
> 
> IPv4
>   { .len = 1, .family = 0, ... }
> IPv6 
>   { .len = 2, .family = AF_INET6, ... }

I'm sorry this is not correct.  It is actually

IPv6 
  { .len = 28, .family = AF_INET6, ... }

> Next, route(8) uses mask_addr() to trim the tailing zeros.
> 
> 1129 void
> 1130 mask_addr(union sockunion *addr, union sockunion *mask, int which)
> 1131 {
> 1132 int olen = mask->sa.sa_len;
> 1133 char *cp1 = olen + (char *)mask, *cp2;
> 1134 
> 1135 for (mask->sa.sa_len = 0; cp1 > (char *)mask; )
> 1136 if (*--cp1 != '\0') {
> 1137 mask->sa.sa_len = 1 + cp1 - (char *)mask;
> 1138 break;
> 1139 }
> 
> See #1135 carefully.  As the results, the sockaddrs become:
> 
> IPv4
>   { .len = 0, .family = 0, ... }
> IPv6
>   { .len = 2, .family = AF_INET6, ... }

I'm start wondering what the mask_addr() is for.

   1123 void
   1124 mask_addr(union sockunion *addr, union sockunion *mask, int which)
   1125 {
   1126 int olen = mask->sa.sa_len;
   1127 char *cp1 = olen + (char *)mask, *cp2;
   1128 
   1129 for (mask->sa.sa_len = 0; cp1 > (char *)mask; )
   1130 if (*--cp1 != '\0') {
   1131 mask->sa.sa_len = 1 + cp1 - (char *)mask;
   1132 break;
   1133 }
   1134 if ((rtm_addrs & which) == 0)
   1135 return;
   1136 switch (addr->sa.sa_family) {
   1137 case AF_INET:
   1138 case AF_INET6:
   1139 case AF_UNSPEC:
   1140 return;
   1141 }
   1142 cp1 = mask->sa.sa_len + 1 + (char *)addr;
   1143 cp2 = addr->sa.sa_len + 1 + (char *)addr;
   1144 while (cp2 > cp1)
   1145 *--cp2 = '\0';
   1146 cp2 = mask->sa.sa_len + 1 + (char *)mask;
   1147 while (cp1 > addr->sa.sa_data)
   1148 *--cp1 &= *--cp2;
   1149 }

The function mask_addr() doesn't mask address for IPv4 and IPv6.  Does
any address family other than IPv4 or IPv6 require #1142:1148?  The
function seems to just trim the trailing zero.  Is it neccesaary?  And
it causes the confusion on the kernel.  How about deleting
mask_addr()?

The diff following also fixes the problem.

diff --git a/sbin/route/route.c b/sbin/route/route.c
index 9e43d8e89b6..87f26e5c1e7 100644
--- a/sbin/route/route.c
+++ b/sbin/route/route.c
@@ -107,7 +107,6 @@ void print_rtmsg(struct rt_msghdr *, int);
 voidpmsg_common(struct rt_msghdr *);
 voidpmsg_addrs(char *, int);
 voidbprintf(FILE *, int, char *);
-voidmask_addr(union sockunion *, union sockunion *, int);
 int getaddr(int, int, char *, struct hostent **);
 voidgetmplslabel(char *, int);
 int rtmsg(int, int, int, uint8_t);
@@ -1088,8 +1087,6 @@ rtmsg(int cmd, int flags, int fmask, uint8_t prio)
rtm.rtm_mpls = mpls_flags;
rtm.rtm_hdrlen = sizeof(rtm);
 
-   if (rtm_addrs & RTA_NETMASK)
-   mask_addr(_dst, _mask, RTA_DST);
/* store addresses in ascending order of RTA values */
NEXTADDR(RTA_DST, so_dst);
NEXTADDR(RTA_GATEWAY, so_gate);
@@ -1120,34 +1117,6 @@ rtmsg(int cmd, int flags, int fmask, uint8_t prio)
return (0);
 }
 
-void
-mask_addr(union sockunion *addr, union sockunion *mask, int which)
-{
-   int olen = mask->sa.sa_len;
-   char *cp1 = olen + (char *)mask, *cp2;
-
-   for (mask->sa.sa_len = 0; cp1 > (char *)mask; )
-   if (*--cp1 != '\0') {
-   mask->sa.sa_len = 1 + cp1 - (char *)mask;
-   break;
-   }
-   if ((rtm_addrs & which) == 0)
-   return;
-

Re: route add ::/0 ...

2020-06-29 Thread YASUOKA Masahiko
Hi,

On Mon, 29 Jun 2020 10:12:23 +0200
Martin Pieuchot  wrote:
> On 28/06/20(Sun) 20:41, YASUOKA Masahiko wrote:
>> Hi,
>> 
>> When "::/0" is used as "default",
>> 
>>   # route add ::/0 fe80::1%em0
>>   add net ::/0: gateway fe80::1%em0: Invalid argument
>> 
>> route command trims the sockaddr to { .len = 2, .family = AF_INET6 }
>> for "::/0", but rtable_satoplen() refuses it.  I think it should be
>> accepted.
> 
> rtable_satoplen() is used in many places, not just in the socket parsing
> code used by route(8).  I don't know what side effects can be introduced
> by this change.
> 
> Why is IPv6 different from IPv4 when it comes to the default route?

Diferent functions are used.  route(8) uses inet_makenetandmask() to
create a sockaddr for IPv4 prefix length and uses prefixlen() for IPv6
prefix length.  "/0" results:

IPv4
  { .len = 1, .family = 0, ... }
IPv6 
  { .len = 2, .family = AF_INET6, ... }

Next, route(8) uses mask_addr() to trim the tailing zeros.

1129 void
1130 mask_addr(union sockunion *addr, union sockunion *mask, int which)
1131 {
1132 int olen = mask->sa.sa_len;
1133 char *cp1 = olen + (char *)mask, *cp2;
1134 
1135 for (mask->sa.sa_len = 0; cp1 > (char *)mask; )
1136 if (*--cp1 != '\0') {
1137 mask->sa.sa_len = 1 + cp1 - (char *)mask;
1138 break;
1139 }

See #1135 carefully.  As the results, the sockaddrs become:

IPv4
  { .len = 0, .family = 0, ... }
IPv6
  { .len = 2, .family = AF_INET6, ... }

Yes, we can fix IPv6 case to have .len = 0 as well.

But I thought kernel should accept both cases, since the
representation for IPv6 didn't seem so bad for me.

> Shouldn't we change route(8) to have a `sa_len' of 0?
> 
> That would make the following true:
> 
> mlen = mask->sa_len;
> 
>   /* Default route */
>   if (mlen == 0)
>   return (0)
> 
>> Allow sockaddr for prefix length be trimmed before the key(address)
>> field.  Actually "route" command trims at the address family field for
>> "::/0"
>> 
>> Index: sys/net/rtable.c
>> ===
>> RCS file: /cvs/src/sys/net/rtable.c,v
>> retrieving revision 1.69
>> diff -u -p -r1.69 rtable.c
>> --- sys/net/rtable.c 21 Jun 2019 17:11:42 -  1.69
>> +++ sys/net/rtable.c 28 Jun 2020 11:30:54 -
>> @@ -887,8 +887,8 @@ rtable_satoplen(sa_family_t af, struct s
>>  
>>  ap = (uint8_t *)((uint8_t *)mask) + dp->dom_rtoffset;
>>  ep = (uint8_t *)((uint8_t *)mask) + mlen;
>> -if (ap > ep)
>> -return (-1);
>> +if (ap >= ep)
>> +return (0);
> 
> That means the kernel now silently ignore sockaddr short `sa_len'. Are
> they supposed to be supported or are they symptoms of bugs?

I have missed rtable_satoplen() is used by other functions.

> 
>>  /* Trim trailing zeroes. */
>>  while (ap < ep && ep[-1] == 0)
> 



route add ::/0 ...

2020-06-28 Thread YASUOKA Masahiko
Hi,

When "::/0" is used as "default",

  # route add ::/0 fe80::1%em0
  add net ::/0: gateway fe80::1%em0: Invalid argument

route command trims the sockaddr to { .len = 2, .family = AF_INET6 }
for "::/0", but rtable_satoplen() refuses it.  I think it should be
accepted.

ok?

Allow sockaddr for prefix length be trimmed before the key(address)
field.  Actually "route" command trims at the address family field for
"::/0"

Index: sys/net/rtable.c
===
RCS file: /cvs/src/sys/net/rtable.c,v
retrieving revision 1.69
diff -u -p -r1.69 rtable.c
--- sys/net/rtable.c21 Jun 2019 17:11:42 -  1.69
+++ sys/net/rtable.c28 Jun 2020 11:30:54 -
@@ -887,8 +887,8 @@ rtable_satoplen(sa_family_t af, struct s
 
ap = (uint8_t *)((uint8_t *)mask) + dp->dom_rtoffset;
ep = (uint8_t *)((uint8_t *)mask) + mlen;
-   if (ap > ep)
-   return (-1);
+   if (ap >= ep)
+   return (0);
 
/* Trim trailing zeroes. */
while (ap < ep && ep[-1] == 0)



Re: pipex(4): prevent `state_list' corruption

2020-06-22 Thread YASUOKA Masahiko
Yes, this seems right.

ok yasuoka

On Thu, 18 Jun 2020 23:53:25 +0300
Vitaliy Makkoveev  wrote:
> While pppac(4) destroy sessions by pipex_iface_fini() or by
> pipex_ioctl() with PIPEXSMODE command, some sessions can be linked to
> `state_list'. This case is not checked and sessions will never be
> unlinked and `state_list' will be broken after session's memory freeing.
> 
> Diff below adds session removal from `state_list' in
> pipex_unlink_session(). Also unlinked session `state' sets to
> PIPEX_STATE_CLOSED like pipex_close_session() does.
> 
> Index: sys/net/pipex.c
> ===
> RCS file: /cvs/src/sys/net/pipex.c,v
> retrieving revision 1.115
> diff -u -p -r1.115 pipex.c
> --- sys/net/pipex.c   18 Jun 2020 14:20:12 -  1.115
> +++ sys/net/pipex.c   18 Jun 2020 16:37:44 -
> @@ -473,8 +473,10 @@ pipex_unlink_session(struct pipex_sessio
>   break;
>   }
>  #endif
> -
> + if (session->state == PIPEX_STATE_CLOSE_WAIT)
> + LIST_REMOVE(session, state_list);
>   LIST_REMOVE(session, session_list);
> + session->state = PIPEX_STATE_CLOSED;
>  
>   /* if final session is destroyed, stop timer */
>   if (LIST_EMPTY(_session_list))
> 



Re: install npppd.conf with mode 0600

2020-06-21 Thread YASUOKA Masahiko
The line in etc/mtree/special should be updated as well.

  npppd.conf  type=file mode=0640 uname=root gname=wheel

other than that, ok yasuoka

On Sun, 21 Jun 2020 16:48:44 +0300
Vitaliy Makkoveev  wrote:
> We installing `npppd-users' with uid:gid root:wheel and mode 0600
> because it consists sensitive data but mode for npppd.conf is 0640.
> npppd.conf can also have radius passwords and nothing requires to allow
> it be readable by group. So set it's permissions to 0600.
> 
> Index: usr.sbin/npppd/Makefile
> ===
> RCS file: /cvs/src/usr.sbin/npppd/Makefile,v
> retrieving revision 1.6
> diff -u -p -r1.6 Makefile
> --- usr.sbin/npppd/Makefile   14 Mar 2013 16:20:46 -  1.6
> +++ usr.sbin/npppd/Makefile   21 Jun 2020 13:37:50 -
> @@ -6,7 +6,7 @@
>  SUBDIR+= npppd
>  
>  distribution:
> - ${INSTALL} -C -o root -g wheel -m 0640 ${.CURDIR}/npppd/npppd.conf \
> + ${INSTALL} -C -o root -g wheel -m 0600 ${.CURDIR}/npppd/npppd.conf \
>   ${DESTDIR}/etc/npppd/npppd.conf
>   ${INSTALL} -C -o root -g wheel -m 0600 ${.CURDIR}/npppd/npppd-users \
>   ${DESTDIR}/etc/npppd/npppd-users



Re: pf "route-to least-state" in an anchor doesn't work

2020-06-03 Thread YASUOKA Masahiko
Hello,

On Wed, 3 Jun 2020 23:30:56 +0200
Alexandr Nedvedicky  wrote:
> I'm OK with your change.

Thank you for your review and comment.

> However I would like to ask you to do yet another test.  I wonder if things
> will eventually work on unfixed PF if rules will be constructed as follows:
> 
> pfctl -a test -t LB -T add 10.0.0.11@pair102
> 
> echo 'pass in on rdomain 102 quick proto tcp to 10.0.0.101 port 8080 \
> keep state ( sloppy ) route-to  \
> least-states sticky-address' |pfctl -a test -f -
> 
> echo 'anchor test' | pfctl -f -
> 
> pfctl -e
> 
> I suspect the bug you've found and fixed happens when pfctl loads rules
> from pf.conf. I think the steps above will take a different route
> through the code, which avoids pfr_ina_define() (a.k.a. transactions).

I've tested it before the diff and after.  Both tests were OK.

  # pfctl -sr -a test   
 
  pass in quick on rdomain 102 inet proto tcp from any to 10.0.0.101 port = 
8080 flags S/SA keep state (sloppy) route-to  least-states sticky-address
  # pfctl -a test -tLB -Tshow
 10.0.0.11@pair102
  # 

  $ doas route -T 101 exec telnet 10.0.0.101 8080
  Trying 10.0.0.101...
  Connected to 10.0.0.101.
  Escape character is '^]'.
  ^]
  
  telnet> close
  Connection closed.
  $ 

> I don't have a test system readily available and I'm just curious
> if anything changes or not. Thanks for finding that for me.
> 
> As I've said I think your change should go in.
> 
> OK sashan

Thanks,



pf "route-to least-state" in an anchor doesn't work

2020-06-03 Thread YASUOKA Masahiko
Hi,

pf.conf:

  anchor {
pass in on rdomain 102 quick proto tcp to 10.0.0.101 port 8080 \
  keep state ( sloppy ) route-to  \
  least-states sticky-address
  }
  table  {
10.0.0.11@pair102
  }

this doesn't work.  All packets going to 10.0.0.101 are dropped with
'no-route'.  The problem doesn't happen if the pass rule is moved to
outside of the anchor or uses "round-robin" instead of "least-states".

In sys/net/pf_lb.c:
594 if (rpool->addr.type == PF_ADDR_TABLE) {
595 if (pfr_states_increase(rpool->addr.p.tbl,
596 naddr, af) == -1) {
597 if (pf_status.debug >= LOG_DEBUG) {
598 log(LOG_DEBUG,"pf: pf_map_addr: 
"
599 "selected address ");
600 pf_print_host(naddr, 0, af);
601 addlog(". Failed to increase 
count!\n");
602 }
603 return (1);
604 }

This chunk is to increase the counter for "least-state".  The packets
drops here because pfr_states_increase() returns -1.
pfr_states_increase() uses pfr_kentry_byaddr(), and
pfr_kentry_byaddr() uses pfr_lookup_addr() to lookup a kentry in the
table.

pfr_lookup_addr() never succeeded for above case, because it doesn't
care about using global (root) tables from rules in an anchor.  All
other functions which lookup a kentry from the table than
pfr_lookup_addr() seem to take care about that.

I thought that pfr_lookup_addr() is a local function used for ioctl to
create tables and manage its members.  So the keep it
untouched. Instead, the diff replaces the body of pfr_kentry_byaddr()
by the logic of pfr_match_addr().

* * *
Test

1. prepare network

  ifconfig pair101 rdomain 101 10.0.0.1/24
  ifconfig pair102 rdomain 102 10.0.0.10/24
  ifconfig pair102 alias 10.0.0.101/24
  ifconfig pair103 rdomain 103 10.0.0.11/24
  ifconfig pair104 rdomain 100 patch pair101 up
  ifconfig pair105 rdomain 100 patch pair102 up
  ifconfig pair106 rdomain 100 patch pair103 up
  ifconfig lo103 127.0.0.1/8
  ifconfig lo103 alias 10.0.0.101/24

  ifconfig bridge100 add pair104
  ifconfig bridge100 add pair105
  ifconfig bridge100 add pair106 up

2. setup pf.conf

  anchor {
pass in on rdomain 102 quick proto tcp to 10.0.0.101 port 8080 \
  keep state ( sloppy ) route-to  \
  least-states sticky-address
  }
  table  {
10.0.0.11@pair102
  }

3. start a daemon on 8080/tcp on #103

   doas route -T 103 exec nc -l 8080

4. try to connect to it from #101

   doas route -T 101 exec telnet 10.0.0.101 8080

   - test OK if the connection is established

5. teardown

  ifconfig pair106 destroy
  ifconfig pair105 destroy
  ifconfig pair104 destroy
  ifconfig pair103 destroy
  ifconfig pair102 destroy
  ifconfig pair101 destroy
  ifconfig bridge100 destroy

* * *

ok?

Fix pfr_kentry_byaddr() to be used for a rule in an anchor.  It
couldn't find an entry if its table is attached a table on the root.
This fixes the problem "route-to  least-states" doesn't work.
The problem is found by IIJ.

Index: sys/net/pf_table.c
===
RCS file: /cvs/src/sys/net/pf_table.c,v
retrieving revision 1.131
diff -u -p -r1.131 pf_table.c
--- sys/net/pf_table.c  8 Jul 2019 17:49:57 -   1.131
+++ sys/net/pf_table.c  3 Jun 2020 07:21:27 -
@@ -2085,11 +2085,28 @@ int
 pfr_match_addr(struct pfr_ktable *kt, struct pf_addr *a, sa_family_t af)
 {
struct pfr_kentry   *ke = NULL;
+   int  match;
+
+   ke = pfr_kentry_byaddr(kt, a, af, 0);
+
+   match = (ke && !(ke->pfrke_flags & PFRKE_FLAG_NOT));
+   if (match)
+   kt->pfrkt_match++;
+   else
+   kt->pfrkt_nomatch++;
+
+   return (match);
+}
+
+struct pfr_kentry *
+pfr_kentry_byaddr(struct pfr_ktable *kt, struct pf_addr *a, sa_family_t af,
+int exact)
+{
+   struct pfr_kentry   *ke = NULL;
struct sockaddr_in   tmp4;
 #ifdef INET6
struct sockaddr_in6  tmp6;
 #endif /* INET6 */
-   int  match;
 
if (!(kt->pfrkt_flags & PFR_TFLAG_ACTIVE) && kt->pfrkt_root != NULL)
kt = kt->pfrkt_root;
@@ -2116,12 +2133,10 @@ pfr_match_addr(struct pfr_ktable *kt, st
default:
unhandled_af(af);
}
-   match = (ke && !(ke->pfrke_flags & PFRKE_FLAG_NOT));
-   if (match)
-   kt->pfrkt_match++;
-   else
-   kt->pfrkt_nomatch++;
-   return (match);
+   if (exact && ke && KENTRY_NETWORK(ke))
+   ke = NULL;
+
+   return (ke);
 }
 
 void
@@ -2497,39 +2512,6 @@ pfr_states_decrease(struct pfr_ktable *k
"pfr_states_decrease: states-- when states <= 0");
 

Re: diff: init efifb even if VGA is probed.

2020-05-28 Thread YASUOKA Masahiko
On Thu, 28 May 2020 12:31:31 +0200 (CEST)
Mark Kettenis  wrote:
>> Date: Thu, 28 May 2020 17:01:48 +0900 (JST)
>> From: YASUOKA Masahiko 
>> 
>> Hi,
>> 
>> I'd like to conclude this issue.
>> 
>> On Fri, 21 Feb 2020 14:09:07 +0900 (JST)
>> YASUOKA Masahiko  wrote:
>> > I am testing a new hardware, HPE DL20 Gen10.
>> > 
>> > When efiboot starts the kernel, the video display becomes distorted
>> > and never recovered until CPU reset.
>> > 
>> > Our kernel tries to initialized console twice, first trial is done
>> > before getting boot info and second trial is done after getting boot
>> > info.  Since EFI framebuffer needs "boot info", it is initialized on
>> > second trial.
>> > 
>> > On HPE DL20 Gen10, probing vga is succeeded on first trial, the kernel
>> > selects vga for the console, but actually it is broken.  On usual
>> > machines which boot with EFI, the problem doesn't happen since they
>> > have no vga.
>> 
>> If we have a way to detect whether the machine has VGA.  ACPI
>> FADT_NO_VGA was a candidate.  But that bit is cleard both on my "HPE
>> DL20 Gen10" and Andrew Daugherity's Dell PowerEdge R230.  Also the
>> problem newly posted at misc@ (*) might be the same problem.
>> 
>>  (*) https://marc.info/?l=openbsd-misc=159064773219779=2
>> 
>> I think having the diff folowing is the best for this momemnt.
>> The diff does:
>> 
>>   - move cninit() after parsing bootinfo
>>   - give up the debug message which can be enabled if BOOTINFO_DEBUG is 
>> defined
>> 
>> ok?
> 
> I suspect we have to accept that there is too much broken hardware out
> there.

Finally we might have no way other than having a configuration in
boot.conf...

> There is no real reason to drop the debug messages.

OK, the debug messages are reverted.

> I'd prefer to call cninit() directly from init_x86_64, so I'd just
> move the call immediately after the block that calls getbootinfo().
> And emove the call from getbootinfo() of course.

I think the last diff already satisfied these things.

>> @@ -1395,11 +1395,6 @@ init_x86_64(paddr_t first_avail)
>>  i8254_startclock();
>>  
>>  /*
>> - * Attach the glass console early in case we need to display a panic.
>> - */
>> -cninit();
>> -
>> -/*
>>   * Initialize PAGE_SIZE-dependent variables.
>>   */
>>  uvm_setpagesize();
>> @@ -1421,6 +1416,8 @@ init_x86_64(paddr_t first_avail)
>>  } else
>>  panic("invalid /boot");
>>  
>> +cninit();
>> +
>>  /*
>>   * Memory on the AMD64 port is described by three different things.
>>   *

A hidden line which calls getbootinfo() is at just before second
chunk.  The updated diff was created with "-U 4" to clarify this.

Alternatively, are you suggesting

getbootinfo(bootinfo, bootinfo_size);
+   cninit();
} else
panic("invalid /boot");

?

Both is OK for me.

How about this?

Index: sys/arch/amd64/amd64/machdep.c
===
RCS file: /cvs/src/sys/arch/amd64/amd64/machdep.c,v
retrieving revision 1.264
diff -u -p -U4 -r1.264 machdep.c
--- sys/arch/amd64/amd64/machdep.c  16 May 2020 14:44:44 -  1.264
+++ sys/arch/amd64/amd64/machdep.c  28 May 2020 11:34:39 -
@@ -1394,13 +1394,8 @@ init_x86_64(paddr_t first_avail)
 
i8254_startclock();
 
/*
-* Attach the glass console early in case we need to display a panic.
-*/
-   cninit();
-
-   /*
 * Initialize PAGE_SIZE-dependent variables.
 */
uvm_setpagesize();
 
@@ -1420,8 +1415,10 @@ init_x86_64(paddr_t first_avail)
getbootinfo(bootinfo, bootinfo_size);
} else
panic("invalid /boot");
 
+   cninit();
+
 /*
  * Memory on the AMD64 port is described by three different things.
  *
  * 1. biosbasemem - This is outdated, and should really only be used to
@@ -1926,10 +1923,8 @@ getbootinfo(char *bootinfo, int bootinfo
bootarg32_t *q;
bios_ddb_t *bios_ddb;
bios_bootduid_t *bios_bootduid;
bios_bootsr_t *bios_bootsr;
-   int docninit = 0;
-
 #undef BOOTINFO_DEBUG
 #ifdef BOOTINFO_DEBUG
printf("bootargv:");
 #endif
@@ -1982,11 +1977,8 @@ getbootinfo(char *bootinfo, int bootinfo
comconsunit = unit;
comconsaddr = consaddr;
comconsrate = cdp->conspeed;
 

Re: diff: init efifb even if VGA is probed.

2020-05-28 Thread YASUOKA Masahiko
On Thu, 28 May 2020 17:01:48 +0900 (JST)
YASUOKA Masahiko  wrote:
> Hi,
> 
> I'd like to conclude this issue.
> 
> On Fri, 21 Feb 2020 14:09:07 +0900 (JST)
> YASUOKA Masahiko  wrote:
>> I am testing a new hardware, HPE DL20 Gen10.
>> 
>> When efiboot starts the kernel, the video display becomes distorted
>> and never recovered until CPU reset.
>> 
>> Our kernel tries to initialized console twice, first trial is done
>> before getting boot info and second trial is done after getting boot
>> info.  Since EFI framebuffer needs "boot info", it is initialized on
>> second trial.
>> 
>> On HPE DL20 Gen10, probing vga is succeeded on first trial, the kernel
>> selects vga for the console, but actually it is broken.  On usual
>> machines which boot with EFI, the problem doesn't happen since they
>> have no vga.
> 
> If we have a way to detect whether the machine has VGA.  ACPI
> FADT_NO_VGA was a candidate.  But that bit is cleard both on my "HPE
> DL20 Gen10" and Andrew Daugherity's Dell PowerEdge R230.  Also the
> problem newly posted at misc@ (*) might be the same problem.

Above paragraph may be unclear.  Let me update it by the following
paragraph.

If we have a way to detect whether the machine has VGA, we thought we
can select VGA or EFI framebuffer safely.  ACPI FADT_NO_VGA was a
candidate.  But the bit is cleared both on my "HPE DL20 Gen10" and
Andrew Daugherity's Dell PowerEdge R230.  Also the problem newly
posted at misc@ (*) might be the same problem.

>  (*) https://marc.info/?l=openbsd-misc=159064773219779=2
> 
> I think having the diff folowing is the best for this momemnt.
> The diff does:
> 
>   - move cninit() after parsing bootinfo
>   - give up the debug message which can be enabled if BOOTINFO_DEBUG is 
> defined
> 
> ok?
> 
> Index: sys/arch/amd64/amd64/machdep.c
> ===
> RCS file: /disk/cvs/openbsd/src/sys/arch/amd64/amd64/machdep.c,v
> retrieving revision 1.264
> diff -u -p -r1.264 machdep.c
> --- sys/arch/amd64/amd64/machdep.c16 May 2020 14:44:44 -  1.264
> +++ sys/arch/amd64/amd64/machdep.c28 May 2020 07:40:14 -
> @@ -1395,11 +1395,6 @@ init_x86_64(paddr_t first_avail)
>   i8254_startclock();
>  
>   /*
> -  * Attach the glass console early in case we need to display a panic.
> -  */
> - cninit();
> -
> - /*
>* Initialize PAGE_SIZE-dependent variables.
>*/
>   uvm_setpagesize();
> @@ -1421,6 +1416,8 @@ init_x86_64(paddr_t first_avail)
>   } else
>   panic("invalid /boot");
>  
> + cninit();
> +
>  /*
>   * Memory on the AMD64 port is described by three different things.
>   *
> @@ -1927,12 +1924,6 @@ getbootinfo(char *bootinfo, int bootinfo
>   bios_ddb_t *bios_ddb;
>   bios_bootduid_t *bios_bootduid;
>   bios_bootsr_t *bios_bootsr;
> - int docninit = 0;
> -
> -#undef BOOTINFO_DEBUG
> -#ifdef BOOTINFO_DEBUG
> - printf("bootargv:");
> -#endif
>  
>   for (q = (bootarg32_t *)bootinfo;
>   (q->ba_type != BOOTARG_END) &&
> @@ -1942,24 +1933,15 @@ getbootinfo(char *bootinfo, int bootinfo
>   switch (q->ba_type) {
>   case BOOTARG_MEMMAP:
>   bios_memmap = (bios_memmap_t *)q->ba_arg;
> -#ifdef BOOTINFO_DEBUG
> - printf(" memmap %p", bios_memmap);
> -#endif
>   break;
>   case BOOTARG_DISKINFO:
>   bios_diskinfo = (bios_diskinfo_t *)q->ba_arg;
> -#ifdef BOOTINFO_DEBUG
> - printf(" diskinfo %p", bios_diskinfo);
> -#endif
>   break;
>   case BOOTARG_APMINFO:
>   /* generated by i386 boot loader */
>   break;
>   case BOOTARG_CKSUMLEN:
>   bios_cksumlen = *(u_int32_t *)q->ba_arg;
> -#ifdef BOOTINFO_DEBUG
> - printf(" cksumlen %d", bios_cksumlen);
> -#endif
>   break;
>   case BOOTARG_PCIINFO:
>   /* generated by i386 boot loader */
> @@ -1983,15 +1965,8 @@ getbootinfo(char *bootinfo, int bootinfo
>   comconsaddr = consaddr;
>   comconsrate = cdp->conspeed;
>   comconsiot = X86_BUS_SPACE_IO;
> -
> - /* Probe the serial port this time. */
> - docninit+

Re: diff: init efifb even if VGA is probed.

2020-05-28 Thread YASUOKA Masahiko
Hi,

I'd like to conclude this issue.

On Fri, 21 Feb 2020 14:09:07 +0900 (JST)
YASUOKA Masahiko  wrote:
> I am testing a new hardware, HPE DL20 Gen10.
> 
> When efiboot starts the kernel, the video display becomes distorted
> and never recovered until CPU reset.
> 
> Our kernel tries to initialized console twice, first trial is done
> before getting boot info and second trial is done after getting boot
> info.  Since EFI framebuffer needs "boot info", it is initialized on
> second trial.
> 
> On HPE DL20 Gen10, probing vga is succeeded on first trial, the kernel
> selects vga for the console, but actually it is broken.  On usual
> machines which boot with EFI, the problem doesn't happen since they
> have no vga.

If we have a way to detect whether the machine has VGA.  ACPI
FADT_NO_VGA was a candidate.  But that bit is cleard both on my "HPE
DL20 Gen10" and Andrew Daugherity's Dell PowerEdge R230.  Also the
problem newly posted at misc@ (*) might be the same problem.

 (*) https://marc.info/?l=openbsd-misc=159064773219779=2

I think having the diff folowing is the best for this momemnt.
The diff does:

  - move cninit() after parsing bootinfo
  - give up the debug message which can be enabled if BOOTINFO_DEBUG is defined

ok?

Index: sys/arch/amd64/amd64/machdep.c
===
RCS file: /disk/cvs/openbsd/src/sys/arch/amd64/amd64/machdep.c,v
retrieving revision 1.264
diff -u -p -r1.264 machdep.c
--- sys/arch/amd64/amd64/machdep.c  16 May 2020 14:44:44 -  1.264
+++ sys/arch/amd64/amd64/machdep.c  28 May 2020 07:40:14 -
@@ -1395,11 +1395,6 @@ init_x86_64(paddr_t first_avail)
i8254_startclock();
 
/*
-* Attach the glass console early in case we need to display a panic.
-*/
-   cninit();
-
-   /*
 * Initialize PAGE_SIZE-dependent variables.
 */
uvm_setpagesize();
@@ -1421,6 +1416,8 @@ init_x86_64(paddr_t first_avail)
} else
panic("invalid /boot");
 
+   cninit();
+
 /*
  * Memory on the AMD64 port is described by three different things.
  *
@@ -1927,12 +1924,6 @@ getbootinfo(char *bootinfo, int bootinfo
bios_ddb_t *bios_ddb;
bios_bootduid_t *bios_bootduid;
bios_bootsr_t *bios_bootsr;
-   int docninit = 0;
-
-#undef BOOTINFO_DEBUG
-#ifdef BOOTINFO_DEBUG
-   printf("bootargv:");
-#endif
 
for (q = (bootarg32_t *)bootinfo;
(q->ba_type != BOOTARG_END) &&
@@ -1942,24 +1933,15 @@ getbootinfo(char *bootinfo, int bootinfo
switch (q->ba_type) {
case BOOTARG_MEMMAP:
bios_memmap = (bios_memmap_t *)q->ba_arg;
-#ifdef BOOTINFO_DEBUG
-   printf(" memmap %p", bios_memmap);
-#endif
break;
case BOOTARG_DISKINFO:
bios_diskinfo = (bios_diskinfo_t *)q->ba_arg;
-#ifdef BOOTINFO_DEBUG
-   printf(" diskinfo %p", bios_diskinfo);
-#endif
break;
case BOOTARG_APMINFO:
/* generated by i386 boot loader */
break;
case BOOTARG_CKSUMLEN:
bios_cksumlen = *(u_int32_t *)q->ba_arg;
-#ifdef BOOTINFO_DEBUG
-   printf(" cksumlen %d", bios_cksumlen);
-#endif
break;
case BOOTARG_PCIINFO:
/* generated by i386 boot loader */
@@ -1983,15 +1965,8 @@ getbootinfo(char *bootinfo, int bootinfo
comconsaddr = consaddr;
comconsrate = cdp->conspeed;
comconsiot = X86_BUS_SPACE_IO;
-
-   /* Probe the serial port this time. */
-   docninit++;
}
 #endif
-#ifdef BOOTINFO_DEBUG
-   printf(" console 0x%x:%d",
-   cdp->consdev, cdp->conspeed);
-#endif
}
break;
case BOOTARG_BOOTMAC:
@@ -2023,8 +1998,6 @@ getbootinfo(char *bootinfo, int bootinfo
 
case BOOTARG_EFIINFO:
bios_efiinfo = (bios_efiinfo_t *)q->ba_arg;
-   if (bios_efiinfo->fb_addr != 0)
-   docninit++;
break;
 
case BOOTARG_UCODE:
@@ -2032,18 +2005,9 @@ getbootinfo(char *bootinfo, int bootinfo
break;
 
default:
-#ifdef BOOTINFO_DEBUG
-   printf(" unsupported arg (%d) %p", q->ba_type,
-   q->ba_arg);
-#endif
break;
}
}
-   if (docninit > 0)
-   cninit();
-#ifdef BOOTINFO_DEBUG
-   printf("\n");
-#endif
 }
 
 int



fix pppac(4) without pipex

2020-04-12 Thread YASUOKA Masahiko
Hi,

The diff followings fixes panics when using pppac(4) with "pipex no".

Index: sys/net/if_pppx.c
===
RCS file: /cvs/src/sys/net/if_pppx.c,v
retrieving revision 1.83
diff -u -p -r1.83 if_pppx.c
--- sys/net/if_pppx.c   10 Apr 2020 07:36:52 -  1.83
+++ sys/net/if_pppx.c   12 Apr 2020 06:12:35 -
@@ -344,7 +344,7 @@ pppxwrite(dev_t dev, struct uio *uio, in
if (m == NULL)
return (ENOBUFS);
mlen = MHLEN;
-   if (uio->uio_resid >= MINCLSIZE) {
+   if (uio->uio_resid > MHLEN) {
MCLGET(m, M_DONTWAIT);
if (!(m->m_flags & M_EXT)) {
m_free(m);
@@ -1368,7 +1368,7 @@ pppacwrite(dev_t dev, struct uio *uio, i
if (m == NULL)
return (ENOMEM);
 
-   if (uio->uio_resid > MINCLSIZE) {
+   if (uio->uio_resid > MHLEN) {
m_clget(m, M_WAITOK, uio->uio_resid);
if (!ISSET(m->m_flags, M_EXT)) {
m_free(m);



Re: pipex(4) fix: check session existence before creation

2020-04-07 Thread YASUOKA Masahiko
ok yasuoka

On Mon, 6 Apr 2020 19:54:20 +0300
Vitaliy Makkoveev  wrote:
> Deny to create pipex_session which is already exist. Newly created
> session will be placed to list head so the caller of
> pipex_*_lookup_session() will receive wrong session.
> 
> Index: sys/net/if_pppx.c
> ===
> RCS file: /cvs/src/sys/net/if_pppx.c,v
> retrieving revision 1.79
> diff -u -p -r1.79 if_pppx.c
> --- sys/net/if_pppx.c 6 Apr 2020 12:31:30 -   1.79
> +++ sys/net/if_pppx.c 6 Apr 2020 13:47:26 -
> @@ -719,6 +719,11 @@ pppx_add_session(struct pppx_dev *pxd, s
>   return (EPROTONOSUPPORT);
>   }
>  
> + session = pipex_lookup_by_session_id(req->pr_protocol,
> + req->pr_session_id);
> + if (session)
> + return (EEXIST);
> +
>   pxi = pool_get(pppx_if_pl, PR_WAITOK | PR_ZERO);
>   if (pxi == NULL)
>   return (ENOMEM);
> Index: sys/net/pipex.c
> ===
> RCS file: /cvs/src/sys/net/pipex.c,v
> retrieving revision 1.112
> diff -u -p -r1.112 pipex.c
> --- sys/net/pipex.c   6 Apr 2020 13:14:04 -   1.112
> +++ sys/net/pipex.c   6 Apr 2020 13:47:33 -
> @@ -312,6 +312,11 @@ pipex_add_session(struct pipex_session_r
>   return (EPROTONOSUPPORT);
>   }
>  
> + session = pipex_lookup_by_session_id(req->pr_protocol,
> + req->pr_session_id);
> + if (session)
> + return (EEXIST);
> +
>   /* prepare a new session */
>   session = pool_get(_session_pool, PR_WAITOK | PR_ZERO);
>   session->state = PIPEX_STATE_OPENED;
> 



Re: Prevent memory corruption by pipex_timer()

2020-04-01 Thread YASUOKA Masahiko
Hi,

Sorry for my silence.

ok yasuoka for the daemon part.

On Wed, 1 Apr 2020 09:27:10 +0200
Martin Pieuchot  wrote:
> On 31/03/20(Tue) 23:16, Vitaliy Makkoveev wrote:
>> On Tue, Mar 31, 2020 at 06:15:46PM +0200, Martin Pieuchot wrote:
>> > [...] 
>> > Well better fix npppd(8), no?  Not crashing the kernel is priority 1.
>> I made patch for npppd(8) too. I include it to this email below, without
>> starting new thread, ok? If ioctl(PIPEXASESSION) failed and error !=
>> ENXIO it means that pipex is enabled and session creation failed so down
>> this connection.
> 
> Thanks, I committed the kernel part.  I'm waiting to see if other devs
> want to comment on the daemon part.
> 
>> > Then if the daemon has a bug, should the kernel work around it? 
>> In most common cases should :(
> 
> That's an opinion.  There's no true or false answer.  Working around it
> has obvious advantages but it doesn't make us fix existing bug and instead
> force us to maintain the work around. 
> 
> It is argued that the "failing hard" model, as it is practised in OpenBSD
> software development, has the advantage of resulting in simpler code because
> every layer is responsible for handling errors and doesn't pile workaround.
> 
> This bug is a nice example.  Thanks for the report!  If you could submit
> your refactoring in a new thread, I'd love to look at it.
> 



Re: diff: init efifb even if VGA is probed.

2020-03-09 Thread YASUOKA Masahiko
Hi,

Thank you for your test and feedback.

On Fri, 6 Mar 2020 16:38:24 -0600
Andrew Daugherity  wrote:
> On Sun, Mar 1, 2020 at 10:41 PM YASUOKA Masahiko  wrote:
>>
>> Hi,
>>
>> The problems you are pointing seem to be the same problem.
>>
>> > I'll try to test this diff next week if I can schedule some downtime.
>>
>> Test is appreciated.
>>
>> Also I'd like to know the result of
>>
>>   hexdump -C /var/db/acpi/FACP.1
>>
>> when "Load Legacy Video Option ROM" setting is disabled.
> 
> I just tested a -current kernel built yesterday with that diff (your
> post on Feb. 20), but unfortunately it does not fix the issue on my
> hardware.  As before, if "Load Legacy Video Option ROM" is disabled,
> output is squished to a purple line and when devices are initialized,
> vga1 is the wsdisplay0 device:

I see, first diff didn't fix the problem on your machine.

> vga1 at pci7 dev 0 function 0 "Matrox MGA G200eR" rev 0x01
> wsdisplay0 at vga1 mux 1: console (80x25, vt100 emulation)
> wsdisplay0: screen 1-5 added (80x25, vt100 emulation)
> efifb0 at mainbus0: 1280x1024, 32bpp
> wsdisplay at efifb0 not configured
> 
> vs. with the legacy video ROM setting:
> 
> "Matrox MGA G200eR" rev 0x01 at pci7 dev 0 function 0 not configured
> efifb0 at mainbus0: 1024x768, 32bpp
> wsdisplay0 at efifb0 mux 1
> wsdisplay0: screen 0-5 added (std, vt100 emulation)
> 
> I'm using a serial console, if it matters.  Hmm... I just noticed that
> with the legacy ROM setting disabled, both wsdisplay0 at vga1 mux
> 1/wskbd0 at ukbd0 *and* com1 claim to be the console.  With the
> setting enabled (and efifb working), only com1 is listed as console.
> 
> I haven't tried any of the later diffs as I'm not sure which are still
> recommended.

The last diff should fix the problem since it will initialize efifb
before initializing VGA without condition.

https://marc.info/?l=openbsd-tech=158280719421562=2

> The FACP.1 table does not change when the "Load Legacy Video Option
> ROM" setting is changed.  Here is its hexdump:
> andrew@gsc-lb1:~/acpidump$ hexdump -C legacy-2.8.1/FACP.1
>   46 41 43 50 0c 01 00 00  05 62 44 45 4c 4c 20 20  |FACP.bDELL  |
> 0010  50 45 5f 53 43 33 20 20  00 00 00 00 44 45 4c 4c  |PE_SC3  DELL|
> 0020  01 00 00 00 00 30 f8 8e  00 b0 fc 8e 00 04 09 00  |.0..|
> 0030  b2 00 00 00 f0 f1 f2 00  00 18 00 00 00 00 00 00  ||
> 0040  04 18 00 00 00 00 00 00  50 18 00 00 08 18 00 00  |P...|
> 0050  80 18 00 00 00 00 00 00  04 02 01 04 20 00 10 00  | ...|
> 0060  65 00 e9 03 00 00 00 00  01 03 0d 00 32 11 00 00  |e...2...|
> 0070  a5 86 00 00 01 08 00 01  f9 0c 00 00 00 00 00 00  ||
> 0080  06 00 00 00 00 00 00 00  00 00 00 00 00 b0 fc 8e  ||
> 0090  00 00 00 00 01 20 00 02  00 18 00 00 00 00 00 00  |. ..|
> 00a0  01 00 00 02 00 00 00 00  00 00 00 00 01 10 00 02  ||
> 00b0  04 18 00 00 00 00 00 00  01 00 00 02 00 00 00 00  ||
> 00c0  00 00 00 00 01 08 00 01  50 18 00 00 00 00 00 00  |P...|
> 00d0  01 20 00 03 08 18 00 00  00 00 00 00 01 00 00 01  |. ..|
> 00e0  80 18 00 00 00 00 00 00  01 00 00 01 00 00 00 00  ||
> 00f0  00 00 00 00 00 00 00 00  00 00 00 00 00 00 00 00  ||
> 0100  00 00 00 00 00 00 00 00  00 00 00 00  ||
> 010c

This was to check whether using "VGA Not Present" bit is useful on
your machine.  "Boot IA-PC Boot Architecture Flags" is 0x6D:6E =
0x0011, LEGACY_DEVICES bit is set, "VGA Not Present" is cleared.  This
means the bit isn't set as I expected, it isn't useful to know
existance of VGA.

> The only ACPI change made by toggling that option is the DMAR.25
> table.  Here are both versions:
> Legacy Video ROM enabled:
>   44 4d 41 52 90 00 00 00  01 83 49 4e 54 45 4c 20  |DMAR..INTEL |
> 0010  47 4e 4c 52 00 00 00 00  01 00 00 00 49 4e 54 4c  |GNLRINTL|
> 0020  01 00 00 00 26 01 00 00  00 00 00 00 00 00 00 00  |&...|
> 0030  00 00 20 00 01 00 00 00  00 00 d9 fe 00 00 00 00  |.. .|
> 0040  03 08 00 00 02 f0 1f 00  04 08 00 00 00 00 1f 00  ||
> 0050  01 00 20 00 00 00 00 00  00 b0 ba 7c 00 00 00 00  |.. ||
> 0060  ff 2f bb 84 00 00 00 00  01 08 00 00 00 01 00 00  |./..|
> 0070  01 00 20 00 00 00 00 00  00 10 31 8e 00 00 00 00  |.. ...1.|
> 0080  ff 0f 33 8e 00 00 00 00  01 08 00 00 00 00 14 00  |..3.|
> 0090
> and disabled:
> 

Re: efiboot, serial port order

2020-03-01 Thread YASUOKA Masahiko
Hi,

On Sun, 23 Feb 2020 12:18:37 +0100 (CET)
Mark Kettenis  wrote:
>> Date: Sat, 22 Feb 2020 10:40:13 +0900 (JST)
>> From: YASUOKA Masahiko 
>> efiboot is using ACPI UID to determine the minor number of comX.
(snip)
>> I originally wrote this code, because I thought ACPI UID enumeration
>> is better than the order of handles by EFI.
>> 
>> On qemu or vmware, 2 serials mappped like the following:
>> 
>>   EFI handle  ACPI UID  I/O addr  efiboot  kernel
>>   0   0 0x3f8 com0 com0
>>   1   1 0x2f8 com1 com1
>> 
>> EFI handle order and ACPI UID enumeration are same and they also match
>> I/O address assignment.
>> 
>> But on "HPE DL20 Gen10", 2 serials mappped like the following:
>> 
>>   EFI handle  ACPI UID  I/O addr  efiboot  kernel
>>   0   1 0x3f8 com1 com0
>>   1   0 0x2f8 com0 com1
>> 
>> Note that EFI handle order and ACPI UID enumeration is different and
>> ACPI UID enumeration doesn't match the order in I/O address
>> assignment.  In this case, since com0 or com1 are mixed up between
>> efiboot and kernel, if serial is usable on efiboot, it becomes not
>> usable on kernel.
>> 
>> Fortunately we can use "machine comaddr" to fix up the problem.
>> 
>> 
>> Also I don't know any actual case such that EFI handle order is wrong
>> but ACPI UID is correct.  If using ACPI UID is useless, we can apply
>> the diff attached at last.
>> 
>> comment?
> 
> I fear using the EFI handle order is going to cause similar problems
> on some other machine.  What we really need here is the io port
> address.  Unfortunately we can't map the UID into an address without a
> full AML parser, which is not something we want in the bootloader.
> 
> On arm64 we use the SPCR table to select the serial port.  This table
> does contain the io address of the serial port.  Unfortunately not all
> machines have an SPCR table so we want to retain some of the existing
> logic in case an SPCR table isn't provided.

Thank you for the comment.

I suppose we need to accept the behavior for amd64 until we found any
good way.


Also I'd like to commit the diff attached.

When working around the problem with the boot.conf like below,

 machine comaddr 0x3f8
 set tty com1

this expects the kernel selects com0 for console.  But actually
existing com(4) doesn't work so.


ok?

Update the console device always when attaching the real device
driver. The information by the driver is supposed more reliable than
the information which was set up earlier.

Index: sys/dev/ic/com.c
===
RCS file: /var/cvs/openbsd/src/sys/dev/ic/com.c,v
retrieving revision 1.171
diff -u -p -r1.171 com.c
--- sys/dev/ic/com.c5 Feb 2020 10:21:17 -   1.171
+++ sys/dev/ic/com.c2 Mar 2020 06:35:01 -
@@ -1498,8 +1498,8 @@ com_attach_subr(struct com_softc *sc)
if (cdevsw[maj].d_open == comopen)
break;
 
-   if (maj < nchrdev && cn_tab->cn_dev == NODEV)
-   cn_tab->cn_dev = makedev(maj, sc->sc_dev.dv_unit);
+   KASSERT(maj < nchrdev);
+   cn_tab->cn_dev = makedev(maj, sc->sc_dev.dv_unit);
 
printf("%s: console\n", sc->sc_dev.dv_xname);
}



diff: ipmi initializing watchdog

2020-03-01 Thread YASUOKA Masahiko
When I played with ipmi, I found problems in the initializing
watchdog.

As far as my test on HPE DL20 Gen10, the ipmi device refuses "set
watchdog timer" command if "use" value (in first byte) is none.  Since
existing code uses the value which read from the device, if the value
is not set on the device, the command actually fails.  The diff  makes
the driver set the value IPMI_WDOG_USE_SMS_OS explicitly always.

Also existing code ignores error when "set watchdog timer" fail.  I
think this is no good and some actual problems can happen.

ok?


Set "use" bits to "SMS/OS" always to prevent error.  Actually "set
watchdog timer" fails on HP DL20 Gen10 with ccode 0xCC (invalid
request) if the bits was not set in advance.  Also don't update the
timer period if setting watchdog timer is failed.

Index: sys/dev/ipmi.c
===
RCS file: /var/cvs/openbsd/src/sys/dev/ipmi.c,v
retrieving revision 1.109
diff -u -p -r1.109 ipmi.c
--- sys/dev/ipmi.c  18 Feb 2020 00:06:12 -  1.109
+++ sys/dev/ipmi.c  2 Mar 2020 05:39:34 -
@@ -130,7 +130,7 @@ voidipmi_cmd_wait_cb(void *);
 
 intipmi_watchdog(void *, int);
 void   ipmi_watchdog_tickle(void *);
-void   ipmi_watchdog_set(void *);
+intipmi_watchdog_set(void *, int);
 
 struct ipmi_softc *ipmilookup(dev_t dev);
 
@@ -1741,8 +1741,9 @@ ipmi_watchdog(void *arg, int period)
 
if (period < MIN_PERIOD && period > 0)
period = MIN_PERIOD;
+   if (ipmi_watchdog_set(sc, period) == -1)
+   return (sc->sc_wdog_period);
sc->sc_wdog_period = period;
-   ipmi_watchdog_set(sc);
printf("%s: watchdog %sabled\n", DEVNAME(sc),
(period == 0) ? "dis" : "en");
return (period);
@@ -1766,8 +1767,8 @@ ipmi_watchdog_tickle(void *arg)
ipmi_cmd();
 }
 
-void
-ipmi_watchdog_set(void *arg)
+int
+ipmi_watchdog_set(void *arg, int period)
 {
struct ipmi_softc   *sc = arg;
uint8_t wdog[IPMI_GET_WDOG_MAX];
@@ -1785,14 +1786,15 @@ ipmi_watchdog_set(void *arg)
ipmi_cmd();
 
/* Period is 10ths/sec */
-   uint16_t timo = htole16(sc->sc_wdog_period * 10);
+   uint16_t timo = htole16(period * 10);
 
memcpy([IPMI_SET_WDOG_TIMOL], , 2);
-   wdog[IPMI_SET_WDOG_TIMER] &= ~IPMI_WDOG_DONTSTOP;
-   wdog[IPMI_SET_WDOG_TIMER] |= (sc->sc_wdog_period == 0) ?
-   0 : IPMI_WDOG_DONTSTOP;
+   wdog[IPMI_SET_WDOG_TIMER] &= ~(IPMI_WDOG_DONTSTOP | IPMI_WDOG_USE_MASK);
+   wdog[IPMI_SET_WDOG_TIMER] |= IPMI_WDOG_USE_SMS_OS;
+   if (period != 0)
+   wdog[IPMI_SET_WDOG_TIMER] |= IPMI_WDOG_DONTSTOP;
wdog[IPMI_SET_WDOG_ACTION] &= ~IPMI_WDOG_MASK;
-   wdog[IPMI_SET_WDOG_ACTION] |= (sc->sc_wdog_period == 0) ?
+   wdog[IPMI_SET_WDOG_ACTION] |= (period == 0) ?
IPMI_WDOG_DISABLED : IPMI_WDOG_REBOOT;
 
c.c_cmd = APP_SET_WATCHDOG_TIMER;
@@ -1801,6 +1803,13 @@ ipmi_watchdog_set(void *arg)
c.c_rxlen = 0;
c.c_data = wdog;
ipmi_cmd();
+   if (c.c_ccode != 0) {
+   printf("%s: set watchdog timer failed: %.2x", DEVNAME(sc),
+   c.c_ccode);
+   return (-1);
+   }
+
+   return (0);
 }
 
 #if defined(__amd64__) || defined(__i386__)
Index: sys/dev/ipmivar.h
===
RCS file: /var/cvs/openbsd/src/sys/dev/ipmivar.h,v
retrieving revision 1.32
diff -u -p -r1.32 ipmivar.h
--- sys/dev/ipmivar.h   19 Dec 2019 09:01:50 -  1.32
+++ sys/dev/ipmivar.h   2 Mar 2020 05:39:34 -
@@ -136,6 +136,11 @@ struct ipmi_thread {
 };
 
 #define IPMI_WDOG_DONTSTOP 0x40
+#define IPMI_WDOG_USE_MASK 0x07
+#define IPMI_WDOG_USE_BIOS_FRB21
+#define IPMI_WDOG_USE_BIOS_POST2
+#define IPMI_WDOG_USE_OS_LOAD  3
+#define IPMI_WDOG_USE_SMS_OS   4
 
 #define IPMI_WDOG_MASK 0x03
 #define IPMI_WDOG_DISABLED 0x00



diff: "ipmi0: sendcmd fails"

2020-03-01 Thread YASUOKA Masahiko
Hi,

On HPE DL20 Gen10, kernel keeps printing "ipmi0: sendcmd fails" if
ipmi0 is enabled.

The machine has following 4 sensor devices.

  19-P/S 1 Inlet
  20-P/S 2 Inlet
  21-P/S 1
  22-P/S 2

But reading value from these devices fails always.  This causes the
problem above.

The diff makes such the devices disabled if the error happens when
probing.

ok?


Return error value when sending "sensor reading" is failed. This fixes
"ipmi0: sendcmd fails" errors when there is a sensor which is
enumurated but reading it is failed.

Index: sys/dev/ipmi.c
===
RCS file: /var/cvs/openbsd/src/sys/dev/ipmi.c,v
retrieving revision 1.109
diff -u -p -r1.109 ipmi.c
--- sys/dev/ipmi.c  18 Feb 2020 00:06:12 -  1.109
+++ sys/dev/ipmi.c  2 Mar 2020 05:38:25 -
@@ -1288,6 +1288,11 @@ read_sensor(struct ipmi_softc *sc, struc
c.c_data = data;
ipmi_cmd();
 
+   if (c.c_ccode != 0) {
+   dbg_printf(1, "sensor reading command for %s failed: %.2x\n",
+   psensor->i_sensor.desc, c.c_ccode);
+   return (rv);
+   }
dbg_printf(10, "values=%.2x %.2x %.2x %.2x %s\n",
data[0],data[1],data[2],data[3], psensor->i_sensor.desc);
psensor->i_sensor.flags &= ~SENSOR_FINVALID;



Re: diff: init efifb even if VGA is probed.

2020-03-01 Thread YASUOKA Masahiko
Hi,

On Fri, 21 Feb 2020 18:55:38 -0600
Andrew Daugherity  wrote:
> On Thu, Feb 20, 2020 at 11:10 PM YASUOKA Masahiko  wrote:
>> Hello,
>>
>> I am testing a new hardware, HPE DL20 Gen10.
>>
>> When efiboot starts the kernel, the video display becomes distorted
>> and never recovered until CPU reset.
>> [...]
>> On HPE DL20 Gen10, probing vga is succeeded on first trial, the kernel
>> selects vga for the console, but actually it is broken.  On usual
>> machines which boot with EFI, the problem doesn't happen since they
>> have no vga.
>>
>> The diff following fixes the problem by initializing efifb console
>> even if the VGA is probed.
> 
> This is exciting!  Your HP server sounds very much like what I've
> experienced on the Dell PowerEdge R230 [1] (probably also affects
> other Dell Rx30 in UEFI mode, maybe Rx40 too?), and I would not be
> surprised if your diff fixes mine too.
> 
> 
>> # Also, HP DL20 Gen10 has "UEFI optimized boot" setting on BIOS and
>> # disabling the setting avoids the problem happening.  But since the
>> # setting seems to be for old Windows, I think we should fix our
>> # kernel.
> 
> OpenBSD squishes the video to a thin purple line unless I enable the
> "Load Legacy Video Option ROM" setting in the Dell BIOS.  However,
> that setting is described as a compatibility shim for old OSes which
> don't support EFI GOP natively, and enabling it restricts the efifb
> resolution to 1024x768.  Every other OS I tested (including FreeBSD &
> Linux) worked properly without the legacy video ROM.
> 
> I got as far a nasty hack of a diff on efifb where if probing failed,
> attach it with hardcoded values matching my machine.  With said diff
> plus vga disabled via 'boot -c' (otherwise vga would steal the
> console), kernel output was still scrambled, but userland output from
> the boot process displayed correctly.  Unfortunately the keyboard
> wasn't attached to this console, but I could at least print stuff on
> the screen from an ssh session, e.g. 'echo "Hello, world!" >
> /dev/console'.  At some point I had to actually use this server
> though, so I abandoned that effort and put the server into production
> (using the legacy video ROM).

The problems you are pointing seem to be the same problem.

> I'll try to test this diff next week if I can schedule some downtime.

Test is appreciated.

Also I'd like to know the result of

  hexdump -C /var/db/acpi/FACP.1

when "Load Legacy Video Option ROM" setting is disabled.

Thanks,

> -Andrew
> 
> [1] https://marc.info/?l=openbsd-tech=150707255507032=2
> The first half isn't relevant, but the last part covers the R230, and
> has (old) dmesg for with and without the legacy video ROM.
> I should clarify "X still works in either case": with efifb active, X
> will prefer wsfb unless you add an xorg.conf for mga.  mga performs
> better and can use any resolution, while wsfb is limited to the efifb
> resolution.



Re: diff: init efifb even if VGA is probed.

2020-02-27 Thread YASUOKA Masahiko
On Sun, 23 Feb 2020 21:23:41 +1100
Jonathan Gray  wrote:
> On Sun, Feb 23, 2020 at 07:06:50PM +0900, YASUOKA Masahiko wrote:
>> On Sun, 23 Feb 2020 18:50:54 +0900 (JST)
>> YASUOKA Masahiko  wrote:
>> > On Sat, 22 Feb 2020 13:02:48 +1100
>> > Jonathan Gray  wrote:
>> >> On Fri, Feb 21, 2020 at 02:09:07PM +0900, YASUOKA Masahiko wrote:
>> >>> When efiboot starts the kernel, the video display becomes distorted
>> >>> and never recovered until CPU reset.
>> >>> 
>> >>> Our kernel tries to initialized console twice, first trial is done
>> >>> before getting boot info and second trial is done after getting boot
>> >>> info.  Since EFI framebuffer needs "boot info", it is initialized on
>> >>> second trial.
>> >>> 
>> >>> On HPE DL20 Gen10, probing vga is succeeded on first trial, the kernel
>> >>> selects vga for the console, but actually it is broken.  On usual
>> >>> machines which boot with EFI, the problem doesn't happen since they
>> >>> have no vga.
>> >>> 
>> >>> The diff following fixes the problem by initializing efifb console
>> >>> even if the VGA is probed.
>> >>> 
>> >>> # Also, HP DL20 Gen10 has "UEFI optimized boot" setting on BIOS and
>> >>> # disabling the setting avoids the problem happening.  But since the
>> >>> # setting seems to be for old Windows, I think we should fix our
>> >>> # kernel.
>> >>> 
>> >>> comment? ok?
>> >> 
>> >> Is there a way to detect efi or bios before boot info is set?
>> >> Ideally vga_cnattach() would never be called when booting via efi.
>> > 
>> > Yes.  I've tried to find such the way, I found 2 ways.
>> > 
>> > 1) ACPI has FADT_NO_VGA flag which indicate the system has VGA, but
>> > reading ACPI table at early of kernel boot is not good and difficult
>> > 
>> > 2) Pass a flag from efiboot.  A diff for this is attached.
>> > 
>> >> Should the cninit() before the boot args are parsed be removed and just
>> >> have cninit() unconditionally after?  This would make the debug printfs
>> >> in boot arg passing useless, but they already wouldn't work when booting
>> >> via efi.
>> > 
>> > I think this is a straight way and no downside for efi.  For a system
>> > booting via BIOS, there is a downside that panic or debug string isn't
>> > shown at very early part of kernel boot.
>> 
>> A diff for this is attached.
>> 
>> 1st diff
>> - initialize efifb even if vga is probed
>> 
>> 2nd diff
>> - pass a flag from efiboot, then initialize vga/efifb properly with it
>> 
>> 3rd diff
>> - parse bootarg first, then initialize vga/efifb properly
>> 
>> 
>> I think 3rd diff is the best.  Because it makes the code simple and
>> the downside doesn't seem so serious.
>> 
>> 
>> Index: sys/arch/amd64/amd64/machdep.c
>> ===
>> RCS file: /disk/cvs/openbsd/src/sys/arch/amd64/amd64/machdep.c,v
>> retrieving revision 1.261
>> diff -u -p -r1.261 machdep.c
>> --- sys/arch/amd64/amd64/machdep.c   24 Jan 2020 05:27:31 -  1.261
>> +++ sys/arch/amd64/amd64/machdep.c   23 Feb 2020 09:46:54 -
>> @@ -1394,16 +1394,6 @@ init_x86_64(paddr_t first_avail)
>>  i8254_startclock();
>>  
>>  /*
>> - * Attach the glass console early in case we need to display a panic.
>> - */
>> -cninit();
>> -
>> -/*
>> - * Initialize PAGE_SIZE-dependent variables.
>> - */
>> -uvm_setpagesize();
> 
> why move uvm_setpagesize?

Because I thought moving uvm_setpagesize helps a panic() in the
function.  But it doesn't help so much since there is still many other
panic()s.  The updated diff doesn't move the function.

>> -
>> -/*
>>   * Boot arguments are in a single page specified by /boot.
>>   *
>>   * We require the "new" vector form, as well as memory ranges
>> @@ -1420,6 +1410,16 @@ init_x86_64(paddr_t first_avail)
>>  } else
>>  panic("invalid /boot");
>>  
>> +/*
>> + * Attach the glass console early in case we need to display a panic.
>> + */
>> +cninit();
> 
> as cninit() is done here docninit and the cninit() call in getbootinfo()
> could be removed.

Yes,

Re: diff: init efifb even if VGA is probed.

2020-02-27 Thread YASUOKA Masahiko
On Sun, 23 Feb 2020 12:47:51 +0100 (CET)
Mark Kettenis  wrote:
>> Date: Sun, 23 Feb 2020 18:50:54 +0900 (JST)
>> From: YASUOKA Masahiko 
>> 
>> On Sat, 22 Feb 2020 13:02:48 +1100
>> Jonathan Gray  wrote:
>> > On Fri, Feb 21, 2020 at 02:09:07PM +0900, YASUOKA Masahiko wrote:
>> >> When efiboot starts the kernel, the video display becomes distorted
>> >> and never recovered until CPU reset.
>> >> 
>> >> Our kernel tries to initialized console twice, first trial is done
>> >> before getting boot info and second trial is done after getting boot
>> >> info.  Since EFI framebuffer needs "boot info", it is initialized on
>> >> second trial.
>> >> 
>> >> On HPE DL20 Gen10, probing vga is succeeded on first trial, the kernel
>> >> selects vga for the console, but actually it is broken.  On usual
>> >> machines which boot with EFI, the problem doesn't happen since they
>> >> have no vga.
>> >> 
>> >> The diff following fixes the problem by initializing efifb console
>> >> even if the VGA is probed.
>> >> 
>> >> # Also, HP DL20 Gen10 has "UEFI optimized boot" setting on BIOS and
>> >> # disabling the setting avoids the problem happening.  But since the
>> >> # setting seems to be for old Windows, I think we should fix our
>> >> # kernel.
>> >> 
>> >> comment? ok?
>> > 
>> > Is there a way to detect efi or bios before boot info is set?
>> > Ideally vga_cnattach() would never be called when booting via efi.
>> 
>> Yes.  I've tried to find such the way, I found 2 ways.
>> 
>> 1) ACPI has FADT_NO_VGA flag which indicate the system has VGA, but
>> reading ACPI table at early of kernel boot is not good and difficult
> 
> Reading it in efiboot would be fairly simple though.

I noticed FADT_NO_VGA is cleared on that machine...
I'm sorry i hadn't checked this first.

$ hexdump -C DL20.FACP.1
  46 41 43 50 0c 01 00 00  06 ef 48 50 45 20 20 20  |FACP..HPE   |
0010  53 65 72 76 65 72 20 20  01 00 00 00 31 35 39 30  |Server  1590|
0020  01 00 00 00 00 00 dd 7b  00 40 fe 7b 00 04 09 00  |...{.@.{|
0030  b2 00 00 00 a0 a1 f2 00  00 05 00 00 00 00 00 00  ||
0040  04 05 00 00 00 00 00 00  50 05 00 00 08 05 00 00  |P...|
0050  60 05 00 00 00 00 00 00  04 02 01 04 20 00 10 00  |`... ...|
0060  65 00 e9 03 00 00 00 00  01 03 0d 00 32 33 00 00  |e...23..|
0070  a5 84 00 00 01 08 00 01  f9 0c 00 00 00 00 00 00  ||
0080  06 00 00 00 00 00 00 00  00 00 00 00 00 40 fe 7b  |.@.{|
0090  00 00 00 00 01 20 00 02  00 05 00 00 00 00 00 00  |. ..|
00a0  01 00 00 00 00 00 00 00  00 00 00 00 01 10 00 02  ||
00b0  04 05 00 00 00 00 00 00  01 00 00 00 00 00 00 00  ||
00c0  00 00 00 00 01 08 00 00  50 05 00 00 00 00 00 00  |P...|
00d0  01 20 00 03 08 05 00 00  00 00 00 00 01 ff 00 01  |. ..|
00e0  60 05 00 00 00 00 00 00  01 00 00 00 00 00 00 00  |`...|
00f0  00 00 00 00 01 08 00 00  00 00 00 00 00 00 00 00  ||
0100  01 08 00 00 00 00 00 00  00 00 00 00  ||
010c
$ 

"iapc_boot_arch" field is at offset 0x6d-0x6e.  It's 0x0033.

#define FADT_LEGACY_DEVICES 0x0001  /* Legacy devices supported */
#define FADT_i8042  0x0002  /* Keyboard controller present 
*/
#define FADT_NO_VGA 0x0004  /* Do not probe VGA */
#define FADT_NO_MSI 0x0008  /* Do not enable MSI */

The bit is cleared.

>> 2) Pass a flag from efiboot.  A diff for this is attached.
> 
> So the EFI bootloader could pass a BAPIV_NOVGA fairly trivially.  In
> that case we probably should add the check for the flag in
> wscn_video_init().

I created a diff doing this.  Attached below.

>> > Should the cninit() before the boot args are parsed be removed and just
>> > have cninit() unconditionally after?  This would make the debug printfs
>> > in boot arg passing useless, but they already wouldn't work when booting
>> > via efi.
>> 
>> I think this is a straight way and no downside for efi.  For a system
>> booting via BIOS, there is a downside that panic or debug string isn't
>> shown at very early part of kernel boot.
>> 

Index: sys/stand/boot/bootarg.h
===
RCS file: /var/cvs/openbsd/src/sys/stand/boot/bootarg.h,v
retrieving revision 1.15
diff -u -

Re: diff: init efifb even if VGA is probed.

2020-02-23 Thread YASUOKA Masahiko
On Sun, 23 Feb 2020 18:50:54 +0900 (JST)
YASUOKA Masahiko  wrote:
> On Sat, 22 Feb 2020 13:02:48 +1100
> Jonathan Gray  wrote:
>> On Fri, Feb 21, 2020 at 02:09:07PM +0900, YASUOKA Masahiko wrote:
>>> When efiboot starts the kernel, the video display becomes distorted
>>> and never recovered until CPU reset.
>>> 
>>> Our kernel tries to initialized console twice, first trial is done
>>> before getting boot info and second trial is done after getting boot
>>> info.  Since EFI framebuffer needs "boot info", it is initialized on
>>> second trial.
>>> 
>>> On HPE DL20 Gen10, probing vga is succeeded on first trial, the kernel
>>> selects vga for the console, but actually it is broken.  On usual
>>> machines which boot with EFI, the problem doesn't happen since they
>>> have no vga.
>>> 
>>> The diff following fixes the problem by initializing efifb console
>>> even if the VGA is probed.
>>> 
>>> # Also, HP DL20 Gen10 has "UEFI optimized boot" setting on BIOS and
>>> # disabling the setting avoids the problem happening.  But since the
>>> # setting seems to be for old Windows, I think we should fix our
>>> # kernel.
>>> 
>>> comment? ok?
>> 
>> Is there a way to detect efi or bios before boot info is set?
>> Ideally vga_cnattach() would never be called when booting via efi.
> 
> Yes.  I've tried to find such the way, I found 2 ways.
> 
> 1) ACPI has FADT_NO_VGA flag which indicate the system has VGA, but
> reading ACPI table at early of kernel boot is not good and difficult
> 
> 2) Pass a flag from efiboot.  A diff for this is attached.
> 
>> Should the cninit() before the boot args are parsed be removed and just
>> have cninit() unconditionally after?  This would make the debug printfs
>> in boot arg passing useless, but they already wouldn't work when booting
>> via efi.
> 
> I think this is a straight way and no downside for efi.  For a system
> booting via BIOS, there is a downside that panic or debug string isn't
> shown at very early part of kernel boot.

A diff for this is attached.

1st diff
- initialize efifb even if vga is probed

2nd diff
- pass a flag from efiboot, then initialize vga/efifb properly with it

3rd diff
- parse bootarg first, then initialize vga/efifb properly


I think 3rd diff is the best.  Because it makes the code simple and
the downside doesn't seem so serious.


Index: sys/arch/amd64/amd64/machdep.c
===
RCS file: /disk/cvs/openbsd/src/sys/arch/amd64/amd64/machdep.c,v
retrieving revision 1.261
diff -u -p -r1.261 machdep.c
--- sys/arch/amd64/amd64/machdep.c  24 Jan 2020 05:27:31 -  1.261
+++ sys/arch/amd64/amd64/machdep.c  23 Feb 2020 09:46:54 -
@@ -1394,16 +1394,6 @@ init_x86_64(paddr_t first_avail)
i8254_startclock();
 
/*
-* Attach the glass console early in case we need to display a panic.
-*/
-   cninit();
-
-   /*
-* Initialize PAGE_SIZE-dependent variables.
-*/
-   uvm_setpagesize();
-
-   /*
 * Boot arguments are in a single page specified by /boot.
 *
 * We require the "new" vector form, as well as memory ranges
@@ -1420,6 +1410,16 @@ init_x86_64(paddr_t first_avail)
} else
panic("invalid /boot");
 
+   /*
+* Attach the glass console early in case we need to display a panic.
+*/
+   cninit();
+
+   /*
+* Initialize PAGE_SIZE-dependent variables.
+*/
+   uvm_setpagesize();
+
 /*
  * Memory on the AMD64 port is described by three different things.
  *
@@ -1928,11 +1928,6 @@ getbootinfo(char *bootinfo, int bootinfo
bios_bootsr_t *bios_bootsr;
int docninit = 0;
 
-#undef BOOTINFO_DEBUG
-#ifdef BOOTINFO_DEBUG
-   printf("bootargv:");
-#endif
-
for (q = (bootarg32_t *)bootinfo;
(q->ba_type != BOOTARG_END) &&
char *)q) - bootinfo) < bootinfo_size);
@@ -1941,24 +1936,15 @@ getbootinfo(char *bootinfo, int bootinfo
switch (q->ba_type) {
case BOOTARG_MEMMAP:
bios_memmap = (bios_memmap_t *)q->ba_arg;
-#ifdef BOOTINFO_DEBUG
-   printf(" memmap %p", bios_memmap);
-#endif
break;
case BOOTARG_DISKINFO:
bios_diskinfo = (bios_diskinfo_t *)q->ba_arg;
-#ifdef BOOTINFO_DEBUG
-   printf(" diskinfo %p", bios_diskinfo);
-#endif
break;
case BOOTARG_APMINFO:
/* generated by i386 boot loader */

Re: diff: init efifb even if VGA is probed.

2020-02-23 Thread YASUOKA Masahiko
On Sat, 22 Feb 2020 13:02:48 +1100
Jonathan Gray  wrote:
> On Fri, Feb 21, 2020 at 02:09:07PM +0900, YASUOKA Masahiko wrote:
>> When efiboot starts the kernel, the video display becomes distorted
>> and never recovered until CPU reset.
>> 
>> Our kernel tries to initialized console twice, first trial is done
>> before getting boot info and second trial is done after getting boot
>> info.  Since EFI framebuffer needs "boot info", it is initialized on
>> second trial.
>> 
>> On HPE DL20 Gen10, probing vga is succeeded on first trial, the kernel
>> selects vga for the console, but actually it is broken.  On usual
>> machines which boot with EFI, the problem doesn't happen since they
>> have no vga.
>> 
>> The diff following fixes the problem by initializing efifb console
>> even if the VGA is probed.
>> 
>> # Also, HP DL20 Gen10 has "UEFI optimized boot" setting on BIOS and
>> # disabling the setting avoids the problem happening.  But since the
>> # setting seems to be for old Windows, I think we should fix our
>> # kernel.
>> 
>> comment? ok?
> 
> Is there a way to detect efi or bios before boot info is set?
> Ideally vga_cnattach() would never be called when booting via efi.

Yes.  I've tried to find such the way, I found 2 ways.

1) ACPI has FADT_NO_VGA flag which indicate the system has VGA, but
reading ACPI table at early of kernel boot is not good and difficult

2) Pass a flag from efiboot.  A diff for this is attached.

> Should the cninit() before the boot args are parsed be removed and just
> have cninit() unconditionally after?  This would make the debug printfs
> in boot arg passing useless, but they already wouldn't work when booting
> via efi.

I think this is a straight way and no downside for efi.  For a system
booting via BIOS, there is a downside that panic or debug string isn't
shown at very early part of kernel boot.

* * *

Index: sys/arch/amd64/stand/efiboot/exec_i386.c
===
RCS file: /disk/cvs/openbsd/src/sys/arch/amd64/stand/efiboot/exec_i386.c,v
retrieving revision 1.3
diff -u -p -r1.3 exec_i386.c
--- sys/arch/amd64/stand/efiboot/exec_i386.c12 Dec 2019 13:09:35 -  
1.3
+++ sys/arch/amd64/stand/efiboot/exec_i386.c23 Feb 2020 09:49:48 -
@@ -163,11 +163,11 @@ run_loadfile(uint64_t *marks, int howto)
marks[i] += delta;
 
 #ifdef __amd64__
-   (*run_i386)((u_long)run_i386, entry, howto, bootdev, BOOTARG_APIVER,
+   (*run_i386)((u_long)run_i386, entry, howto, bootdev, BOOTARG_APIVER | 
BAPIV_EFI,
marks[MARK_END], extmem, cnvmem, ac, (intptr_t)av);
 #else
/* stack and the gung is ok at this point, so, no need for asm setup */
-   (*(startfuncp)entry)(howto, bootdev, BOOTARG_APIVER, marks[MARK_END],
+   (*(startfuncp)entry)(howto, bootdev, BOOTARG_APIVER | BAPIV_EFI, 
marks[MARK_END],
extmem, cnvmem, ac, (int)av);
 #endif
/* not reached */
Index: sys/arch/amd64/amd64/machdep.c
===
RCS file: /disk/cvs/openbsd/src/sys/arch/amd64/amd64/machdep.c,v
retrieving revision 1.261
diff -u -p -r1.261 machdep.c
--- sys/arch/amd64/amd64/machdep.c  24 Jan 2020 05:27:31 -  1.261
+++ sys/arch/amd64/amd64/machdep.c  23 Feb 2020 09:50:02 -
@@ -1396,7 +1396,8 @@ init_x86_64(paddr_t first_avail)
/*
 * Attach the glass console early in case we need to display a panic.
 */
-   cninit();
+   if (!ISSET(bootapiver, BAPIV_EFI))
+   cninit();
 
/*
 * Initialize PAGE_SIZE-dependent variables.
@@ -1420,6 +1421,9 @@ init_x86_64(paddr_t first_avail)
} else
panic("invalid /boot");
 
+   /* EFI: bootinfo is required to initialize efifb */
+   if (ISSET(bootapiver, BAPIV_EFI))
+   cninit();
 /*
  * Memory on the AMD64 port is described by three different things.
  *
Index: sys/stand/boot/bootarg.h
===
RCS file: /disk/cvs/openbsd/src/sys/stand/boot/bootarg.h,v
retrieving revision 1.15
diff -u -p -r1.15 bootarg.h
--- sys/stand/boot/bootarg.h8 Apr 2018 13:24:36 -   1.15
+++ sys/stand/boot/bootarg.h23 Feb 2020 09:50:02 -
@@ -32,6 +32,7 @@
 #defineBAPIV_VECTOR0x0002  /* MI vector of MD structures 
passed */
 #defineBAPIV_ENV   0x0004  /* MI environment vars vector */
 #defineBAPIV_BMEMMAP   0x0008  /* MI memory map passed is in 
bytes */
+#defineBAPIV_EFI   0x0010  /* MI booted from EFI */
 
 typedef struct _boot_args {
int ba_type;




efiboot, serial port order

2020-02-21 Thread YASUOKA Masahiko
Hi,

efiboot is using ACPI UID to determine the minor number of comX.

In sys/arch/amd64/stand/efiboot/efiboot.c:
 646 for (i = 0; i < sz / sizeof(EFI_HANDLE); i++) {
 647 /*
 648  * Identify port number of the handle.  This assumes ACPI
 649  * UID 0-3 map to legacy COM[1-4] and they use the legacy
 650  * port address.
 651  */
 652 status = EFI_CALL(BS->HandleProtocol, handles[i], 
_guid,
 653 (void **));
 654 if (EFI_ERROR(status))
 655 continue;
 656 uid = -1;
 657 for (dp = dp0; !IsDevicePathEnd(dp);
 658 dp = NextDevicePathNode(dp)) {
 659 dpp = (EFI_DEV_PATH_PTR)dp;
 660 if (DevicePathType(dp) == ACPI_DEVICE_PATH &&
 661 DevicePathSubType(dp) == ACPI_DP)
 662 if (dpp.Acpi->HID == EFI_PNP_ID(0x0501)) {
 663 uid = dpp.Acpi->UID;
 664 break;
 665 }
 666 }
 667 if (uid < 0 || nitems(serios) <= uid)
 668 continue;
 669 
 670 /* Prepare SERIAL_IO_INTERFACE */
 671 status = EFI_CALL(BS->HandleProtocol, handles[i], 
_guid,
 672 (void **));
 673 if (EFI_ERROR(status))
 674 continue;
 675 serios[uid] = serio;
 676 }
 677 free(handles, sz);
 678 
 679 for (i = 0; i < nitems(serios); i++) {
 680 if (serios[i] != NULL)
 681 printf(" com%d", i);
 682 }

I originally wrote this code, because I thought ACPI UID enumeration
is better than the order of handles by EFI.

On qemu or vmware, 2 serials mappped like the following:

  EFI handle  ACPI UID  I/O addr  efiboot  kernel
  0   0 0x3f8 com0 com0
  1   1 0x2f8 com1 com1

EFI handle order and ACPI UID enumeration are same and they also match
I/O address assignment.

But on "HPE DL20 Gen10", 2 serials mappped like the following:

  EFI handle  ACPI UID  I/O addr  efiboot  kernel
  0   1 0x3f8 com1 com0
  1   0 0x2f8 com0 com1

Note that EFI handle order and ACPI UID enumeration is different and
ACPI UID enumeration doesn't match the order in I/O address
assignment.  In this case, since com0 or com1 are mixed up between
efiboot and kernel, if serial is usable on efiboot, it becomes not
usable on kernel.

Fortunately we can use "machine comaddr" to fix up the problem.


Also I don't know any actual case such that EFI handle order is wrong
but ACPI UID is correct.  If using ACPI UID is useless, we can apply
the diff attached at last.

comment?

Index: sys/arch/amd64/stand/efiboot/efiboot.c
===
RCS file: /disk/cvs/openbsd/src/sys/arch/amd64/stand/efiboot/efiboot.c,v
retrieving revision 1.34
diff -u -p -r1.34 efiboot.c
--- sys/arch/amd64/stand/efiboot/efiboot.c  29 Nov 2019 16:16:19 -  
1.34
+++ sys/arch/amd64/stand/efiboot/efiboot.c  22 Feb 2020 01:34:59 -
@@ -631,10 +631,8 @@ efi_com_probe(struct consdev *cn)
EFI_HANDLE  *handles = NULL;
SERIAL_IO_INTERFACE *serio;
EFI_STATUS   status;
-   EFI_DEVICE_PATH *dp, *dp0;
-   EFI_DEV_PATH_PTR dpp;
UINTNsz;
-   int  i, uid = -1;
+   int  i;
 
cn->cn_pri = CN_LOWPRI;
cn->cn_dev = makedev(8, 0);
@@ -651,36 +649,12 @@ efi_com_probe(struct consdev *cn)
return;
}
 
-   for (i = 0; i < sz / sizeof(EFI_HANDLE); i++) {
-   /*
-* Identify port number of the handle.  This assumes ACPI
-* UID 0-3 map to legacy COM[1-4] and they use the legacy
-* port address.
-*/
-   status = EFI_CALL(BS->HandleProtocol, handles[i], _guid,
-   (void **));
-   if (EFI_ERROR(status))
-   continue;
-   uid = -1;
-   for (dp = dp0; !IsDevicePathEnd(dp);
-   dp = NextDevicePathNode(dp)) {
-   dpp = (EFI_DEV_PATH_PTR)dp;
-   if (DevicePathType(dp) == ACPI_DEVICE_PATH &&
-   DevicePathSubType(dp) == ACPI_DP)
-   if (dpp.Acpi->HID == EFI_PNP_ID(0x0501)) {
-   uid = dpp.Acpi->UID;
-   break;
-   }
-   }
-   if (uid < 0 || nitems(serios) <= uid)
-

Re: retries and timeouts for radiusctl(8) test

2020-02-20 Thread YASUOKA Masahiko
ok yasuoka

On Fri, 21 Feb 2020 15:32:53 +1000
David Gwynne  wrote:
> we (work) use radiusctl as part of a check script with relayd so
> we can try and keep a radius service available with some magical
> routing and redirect configs.
> 
> radiusctl is currently pretty simple and sends a radius request,
> and then waits for a reply. however, it does not implement retries
> and timeouts, so if either the reply or request are lost, radiusctl
> ends up waiting in recv forever for a packet that will never turn
> up.
> 
> combined with this, we seem to tickle something in relayd where it
> loses track of its children. this results in us "leaking" radiusctl
> processes. historically we've coped with this by running pkill -x
> radiusctl out of cron, and while it made us a sad on the inside,
> we've been busy recently and had to ignore this for now.
> 
> unfortunatly one of the radius servers failed this morning. this
> meant that instead of a few radius packets being lost over a long
> period time causing a slow leak of radiusctl processes, we never
> got a reply to any radius packet and started accumulating a ton of 
> radius processes. in fact, we hit maxproc, which made recovering very
> annoying.
> 
> we run a bunch of different checks out of relayd, but the radiusctl
> one is the only one that doesnt implement timeouts and retries. so
> im fixing it first, and then we'll try and figure out what's wrong
> with relayd.
> 
> the specific changes in this diff is the introduction of a transmission
> retry counter, a time interval between the tries, and a maximum
> wait time that the process has before it gives up waiting for a
> reply. each of these is configurable, but i think the defaults are
> reasonable for a test.
> 
> it introduces libevent, because that makes it easy to manage the
> timeouts.
> 
> ok?
> 
> Index: Makefile
> ===
> RCS file: /cvs/src/usr.sbin/radiusctl/Makefile,v
> retrieving revision 1.2
> diff -u -p -r1.2 Makefile
> --- Makefile  3 Aug 2015 04:10:21 -   1.2
> +++ Makefile  21 Feb 2020 05:15:14 -
> @@ -3,7 +3,7 @@ PROG= radiusctl
>  SRCS=radiusctl.c parser.c chap_ms.c
>  MAN= radiusctl.8
>  CFLAGS+= -Wall -Wextra -Wno-unused-parameter
> -LDADD+=  -lradius -lcrypto
> -DPADD+=  ${LIBRADIUS} ${LIBCRYPTO}
> +LDADD+=  -lradius -lcrypto -levent
> +DPADD+=  ${LIBRADIUS} ${LIBCRYPTO} ${LIBEVENT}
>  
>  .include 
> Index: parser.c
> ===
> RCS file: /cvs/src/usr.sbin/radiusctl/parser.c,v
> retrieving revision 1.1
> diff -u -p -r1.1 parser.c
> --- parser.c  21 Jul 2015 04:06:04 -  1.1
> +++ parser.c  21 Feb 2020 05:15:14 -
> @@ -18,6 +18,8 @@
>   * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
>   */
>  
> +#include 
> +
>  #include 
>  #include 
>  #include 
> @@ -35,6 +37,9 @@ enum token_type {
>   PORT,
>   METHOD,
>   NAS_PORT,
> + TRIES,
> + INTERVAL,
> + MAXWAIT,
>   ENDTOKEN
>  };
>  
> @@ -45,7 +50,11 @@ struct token {
>   const struct token  *next;
>  };
>  
> -static struct parse_result res;
> +static struct parse_result res = {
> + .tries  = TEST_TRIES_DEFAULT,
> + .interval   = { TEST_INTERVAL_DEFAULT, 0 },
> + .maxwait= { TEST_MAXWAIT_DEFAULT, 0 },
> +};
>  
>  static const struct token t_test[];
>  static const struct token t_secret[];
> @@ -55,6 +64,9 @@ static const struct token t_password[];
>  static const struct token t_port[];
>  static const struct token t_method[];
>  static const struct token t_nas_port[];
> +static const struct token t_tries[];
> +static const struct token t_interval[];
> +static const struct token t_maxwait[];
>  
>  static const struct token t_main[] = {
>   { KEYWORD,  "test", TEST,   t_test },
> @@ -82,6 +94,9 @@ static const struct token t_test_opts[] 
>   { KEYWORD,  "port", NONE,   t_port },
>   { KEYWORD,  "method",   NONE,   t_method },
>   { KEYWORD,  "nas-port", NONE,   t_nas_port },
> + { KEYWORD,  "interval", NONE,   t_interval },
> + { KEYWORD,  "tries",NONE,   t_tries },
> + { KEYWORD,  "maxwait",  NONE,   t_maxwait },
>   { ENDTOKEN, "", NONE,   NULL }
>  };
>  
> @@ -105,6 +120,21 @@ static const struct token t_nas_port[] =
>   { ENDTOKEN, "", NONE,   NULL }
>  };
>  
> +static const struct token t_tries[] = {
> + { TRIES,"", NONE,   t_test_opts },
> + { ENDTOKEN, "", NONE,   NULL }
> +};
> +
> +static const struct token t_interval[] = {
> + { INTERVAL, "", NONE,   t_test_opts },
> + { ENDTOKEN, "", NONE,   NULL 

diff: init efifb even if VGA is probed.

2020-02-20 Thread YASUOKA Masahiko
Hello,

I am testing a new hardware, HPE DL20 Gen10.

When efiboot starts the kernel, the video display becomes distorted
and never recovered until CPU reset.

Our kernel tries to initialized console twice, first trial is done
before getting boot info and second trial is done after getting boot
info.  Since EFI framebuffer needs "boot info", it is initialized on
second trial.

On HPE DL20 Gen10, probing vga is succeeded on first trial, the kernel
selects vga for the console, but actually it is broken.  On usual
machines which boot with EFI, the problem doesn't happen since they
have no vga.

The diff following fixes the problem by initializing efifb console
even if the VGA is probed.

# Also, HP DL20 Gen10 has "UEFI optimized boot" setting on BIOS and
# disabling the setting avoids the problem happening.  But since the
# setting seems to be for old Windows, I think we should fix our
# kernel.

comment? ok?

Initialize efifb as a console even if the VGA is probed.

Index: sys/arch/amd64/amd64/wscons_machdep.c
===
RCS file: /var/cvs/openbsd/src/sys/arch/amd64/amd64/wscons_machdep.c,v
retrieving revision 1.14
diff -u -p -r1.14 wscons_machdep.c
--- sys/arch/amd64/amd64/wscons_machdep.c   14 Oct 2017 04:44:43 -  
1.14
+++ sys/arch/amd64/amd64/wscons_machdep.c   21 Feb 2020 04:42:38 -
@@ -73,7 +73,7 @@
 #include 
 #endif
 
-intwscn_video_init(void);
+intwscn_video_init(int);
 void   wscn_input_init(int);
 
 cons_decl(ws);
@@ -103,10 +103,12 @@ wscninit(struct consdev *cp)
 {
static int initted = 0;
 
-   if (initted)
+   if (initted) {
+   wscn_video_init(1);
return;
+   }
 
-   if (wscn_video_init() == 0) {
+   if (wscn_video_init(0) == 0) {
initted = 1;
wscn_input_init(0);
}
@@ -134,11 +136,17 @@ wscnpollc(dev_t dev, int on)
  * Configure the display part of the console.
  */
 int
-wscn_video_init(void)
+wscn_video_init(int pass)
 {
 #if (NEFIFB > 0)
-   if (efifb_cnattach() == 0)
-   return (0);
+   extern int vgaconsole;
+   if (pass > 0) {
+   if (efifb_cnattach() == 0) {
+   vgaconsole = 0;
+   return (0);
+   }
+   return (-1);
+   }
 #endif
 #if (NVGA > 0)
if (vga_cnattach(X86_BUS_SPACE_IO, X86_BUS_SPACE_MEM, -1, 1) == 0)



Re: remove needless #ifdef

2020-02-14 Thread YASUOKA Masahiko
committed.  Thanks

On Fri, 14 Feb 2020 08:48:06 +0100
Claudio Jeker  wrote:
> On Thu, Feb 13, 2020 at 11:50:46PM +0100, Jan Stary wrote:
>> On Feb 10 09:28:38, yasu...@openbsd.org wrote:
>> > Hi,
>> > 
>> > On Sun, 09 Feb 2020 19:28:50 +0100
>> > Jeremie Courreges-Anglas  wrote:
>> > > On Sun, Feb 09 2020, Jan Stary  wrote:
>> > >> Currently, sys/net/pipex_local.h asks #ifdef __OpenBSD__
>> > >> and if so, defines "Static" to be nothing, to use it later.
>> > >> That can go away, right?
>> > > 
>> > > I believe that's something the IIJ folks want to keep, cc'ing Yasuoka.
>> > 
>> > I once thought keeping "static" is better for maintaining the code,
>> > but now I don't think it's necessary.  So it's ok to remove them.
>> 
>> So can we remove the please?
> 
> Yes. OK claudio
> 
>>  Jan
>> 
>> > >>
>> > >> Index: sys/net/pipex_local.h
>> > >> ===
>> > >> RCS file: /cvs/src/sys/net/pipex_local.h,v
>> > >> retrieving revision 1.30
>> > >> diff -u -p -r1.30 pipex_local.h
>> > >> --- sys/net/pipex_local.h   31 Jan 2019 18:01:14 -  1.30
>> > >> +++ sys/net/pipex_local.h   9 Feb 2020 15:26:51 -
>> > >> @@ -26,12 +26,6 @@
>> > >>   * SUCH DAMAGE.
>> > >>   */
>> > >>  
>> > >> -#ifdef __OpenBSD__
>> > >> -#define Static
>> > >> -#else
>> > >> -#define Static static
>> > >> -#endif
>> > >> -
>> > >>  #definePIPEX_PPTP  1
>> > >>  #definePIPEX_L2TP  1
>> > >>  #definePIPEX_PPPOE 1
>> > >> @@ -372,59 +366,56 @@ extern struct pipex_hash_head pipex_id_h
>> > >>  #define PIPEX_TCP_OPTLEN 40
>> > >>  #definePIPEX_L2TP_MINLEN   8
>> > >>  
>> > >> -/*
>> > >> - * static function prototypes
>> > >> - */
>> > >> -Static void  pipex_iface_start (struct 
>> > >> pipex_iface_context *);
>> > >> -Static void  pipex_iface_stop (struct 
>> > >> pipex_iface_context *);
>> > >> -Static int   pipex_add_session (struct 
>> > >> pipex_session_req *, struct pipex_iface_context *);
>> > >> -Static int   pipex_close_session (struct 
>> > >> pipex_session_close_req *);
>> > >> -Static int   pipex_config_session (struct 
>> > >> pipex_session_config_req *);
>> > >> -Static int   pipex_get_stat (struct 
>> > >> pipex_session_stat_req *);
>> > >> -Static int   pipex_get_closed (struct 
>> > >> pipex_session_list_req *);
>> > >> -Static int   pipex_destroy_session (struct 
>> > >> pipex_session *);
>> > >> -Static struct pipex_session  *pipex_lookup_by_ip_address (struct 
>> > >> in_addr);
>> > >> -Static struct pipex_session  *pipex_lookup_by_session_id (int, int);
>> > >> -Static void  pipex_ip_output (struct mbuf *, struct 
>> > >> pipex_session *);
>> > >> -Static void  pipex_ppp_output (struct mbuf *, struct 
>> > >> pipex_session *, int);
>> > >> -Static int   pipex_ppp_proto (struct mbuf *, struct 
>> > >> pipex_session *, int, int *);
>> > >> -Static void  pipex_ppp_input (struct mbuf *, struct 
>> > >> pipex_session *, int);
>> > >> -Static void  pipex_ip_input (struct mbuf *, struct 
>> > >> pipex_session *);
>> > >> +void  pipex_iface_start (struct pipex_iface_context *);
>> > >> +void  pipex_iface_stop (struct pipex_iface_context *);
>> > >> +int   pipex_add_session (struct pipex_session_req *, 
>> > >> struct pipex_iface_context *);
>> > >> +int   pipex_close_session (struct 
>> > >> pipex_session_close_req *);
>> > >> +int   pipex_config_session (struct 
>> > >> pipex_session_config_req *);
>> > >> +int   pipex_get_stat (struct pipex_session_stat_req *);
>> > >> +int   pipex_get_closed (struct pipex_session_list_req 
>> > >> *);
>> > >> +int   pipex_destroy_session (struct pipex_session *);
>> > >> +struct pipex_session  *pipex_lookup_by_ip_address (struct in_addr);
>> > >> +struct pipex_session  *pipex_lookup_by_session_id (int, int);
>> > >> +void  pipex_ip_output (struct mbuf *, struct 
>> > >> pipex_session *);
>> > >> +void  pipex_ppp_output (struct mbuf *, struct 
>> > >> pipex_session *, int);
>> > >> +int   pipex_ppp_proto (struct mbuf *, struct 
>> > >> pipex_session *, int, int *);
>> > >> +void  pipex_ppp_input (struct mbuf *, struct 
>> > >> pipex_session *, int);
>> > >> +void  pipex_ip_input (struct mbuf *, struct 
>> > >> pipex_session *);
>> > >>  #ifdef INET6
>> > >> -Static void  pipex_ip6_input (struct mbuf *, struct 
>> > >> pipex_session *);
>> > >> +void  pipex_ip6_input (struct mbuf *, struct 
>> > >> pipex_session *);
>> > >>  #endif
>> > >> -Static struct mbuf   *pipex_common_input(struct pipex_session 
>> > >> *, 

diff: httpd, handling no content-lenght and not chunked trasfer

2020-02-14 Thread YASUOKA Masahiko
Hi,

When httpd received the following request,

  POST /cgi-bin/test.cgi HTTP/1.0
  Host: 127.0.0.1
  Content-Type: application/json
  Content-Length: 0

if the request is handled by fastcgi, STDIN is closed immediately.

But the problem is that STDIN is never closed if the request doesn't
have "Content-Length" header like the following.  read(STDIN) doesn't
return forever.

  POST /cgi-bin/test.cgi HTTP/1.0
  Host: 127.0.0.1
  Content-Type: application/json

In RFC7230 Section 3.3.3

|   6.  If this is a request message and none of the above are true, then
|   the message body length is zero (no message body is present).

it mentions the body length is zero if both Content-Length and
Content-Transfer-Encoding is not specified.

The diff is to fix the problm.

ok?

Treat the message body length as 0 byte if Content-Length is not
specified and chunked transfer is not used.

Index: usr.sbin/httpd/server_http.c
===
RCS file: /cvs/src/usr.sbin/httpd/server_http.c,v
retrieving revision 1.136
diff -u -p -r1.136 server_http.c
--- usr.sbin/httpd/server_http.c14 Jan 2020 20:48:57 -  1.136
+++ usr.sbin/httpd/server_http.c4 Feb 2020 06:30:14 -
@@ -421,12 +421,12 @@ server_read_http(struct bufferevent *bev
/* HTTP request payload */
if (clt->clt_toread > 0)
bev->readcb = server_read_httpcontent;
-
-   /* Single-pass HTTP body */
-   if (clt->clt_toread < 0) {
-   clt->clt_toread = TOREAD_UNLIMITED;
-   bev->readcb = server_read;
-   }
+   else if (!desc->http_chunked)
+   /*
+* When no content-length and no chunked
+* transfer, it means body length is zero.
+*/
+   clt->clt_toread = 0;
break;
default:
server_abort_http(clt, 405, "method not allowed");



diff: nvme.c

2020-02-14 Thread YASUOKA Masahiko
Hi,

I have a problem that kernel core dumping always hangs around first
8MB.  The diff attached fixes the problem.

In nvme_poll():

 930 while (!ISSET(state.c.flags, htole16(NVME_CQE_PHASE))) {
 931 if (nvme_q_complete(sc, q) == 0)
 932 delay(10);
 933 
 934 /* XXX no timeout? */
 935 }

this loop is to wait commands completion when the system is cold.  If
CQE_PHASE flag on state.c.flags is set, it breaks the loop.  In
nvme_q_complete():

 979 int
 980 nvme_q_complete(struct nvme_softc *sc, struct nvme_queue *q)
 981 {
 982 struct nvme_ccb *ccb;
 983 struct nvme_cqe *ring = NVME_DMA_KVA(q->q_cq_dmamem), *cqe;
 984 u_int32_t head;
 985 u_int16_t flags;
 986 int rv = 0;
 987 
 988 if (!mtx_enter_try(>q_cq_mtx))
 989 return (-1);
 990 
 991 head = q->q_cq_head;
 992 
 993 nvme_dmamem_sync(sc, q->q_cq_dmamem, BUS_DMASYNC_POSTREAD);
 994 for (;;) {
 995 cqe = [head];
 996 flags = lemtoh16(>flags);
 997 if ((flags & NVME_CQE_PHASE) != q->q_cq_phase)
 998 break;
 999 
1000 ccb = >sc_ccbs[cqe->cid];
1001 ccb->ccb_done(sc, ccb, cqe);
1002 
1003 if (++head >= q->q_entries) {
1004 head = 0;
1005 q->q_cq_phase ^= NVME_CQE_PHASE;
1006 }
1007 
1008 rv = 1;
1009 }
1010 nvme_dmamem_sync(sc, q->q_cq_dmamem, BUS_DMASYNC_PREREAD);

See #997, the same CQE_PHASE frag is used for breaking the loop, but
see #1005.  It means is inverted when the ring buffer is looped.
Please note this.

In ccb->ccb_done()( which is actually nvme_poll_done()):

 954 void
 955 nvme_poll_done(struct nvme_softc *sc, struct nvme_ccb *ccb,
 956 struct nvme_cqe *cqe)
 957 {
 958 struct nvme_poll_state *state = ccb->ccb_cookie;
 959 
 960 SET(cqe->flags, htole16(NVME_CQE_PHASE));
 961 state->c = *cqe;
 962 }

struct nvme_poll_state *state is the same object "state" in
nvme_poll().  cqe is a mapped object of a physical queue.

On #960 set NVME_CQE_PHASE bit on "cqe" and copies it to "state".

I think nvme_poll_done() should change only the flag on "state", but
should not change the flag on "cqe".  Also let's remember that the
flag meaning on queue is inverted when the ring is looped.  As the
result of modifying the flag on the physical queue, it might happens
that the loop in nvme_q_complete() will never break.


comment? ok?

Index: sys/dev/ic/nvme.c
===
RCS file: /disk/cvs/openbsd/src/sys/dev/ic/nvme.c,v
retrieving revision 1.63
diff -u -p -r1.63 nvme.c
--- sys/dev/ic/nvme.c   27 Jul 2019 13:20:12 -  1.63
+++ sys/dev/ic/nvme.c   15 Feb 2020 02:16:22 -
@@ -957,8 +957,8 @@ nvme_poll_done(struct nvme_softc *sc, st
 {
struct nvme_poll_state *state = ccb->ccb_cookie;
 
-   SET(cqe->flags, htole16(NVME_CQE_PHASE));
state->c = *cqe;
+   SET(state->c.flags, htole16(NVME_CQE_PHASE));
 }
 
 void



Re: remove needless #ifdef

2020-02-09 Thread YASUOKA Masahiko
Hi,

On Sun, 09 Feb 2020 19:28:50 +0100
Jeremie Courreges-Anglas  wrote:
> On Sun, Feb 09 2020, Jan Stary  wrote:
>> Currently, sys/net/pipex_local.h asks #ifdef __OpenBSD__
>> and if so, defines "Static" to be nothing, to use it later.
>> That can go away, right?
> 
> I believe that's something the IIJ folks want to keep, cc'ing Yasuoka.

I once thought keeping "static" is better for maintaining the code,
but now I don't think it's necessary.  So it's ok to remove them.


>>  Jan
>>
>>
>> Index: sys/net/pipex_local.h
>> ===
>> RCS file: /cvs/src/sys/net/pipex_local.h,v
>> retrieving revision 1.30
>> diff -u -p -r1.30 pipex_local.h
>> --- sys/net/pipex_local.h31 Jan 2019 18:01:14 -  1.30
>> +++ sys/net/pipex_local.h9 Feb 2020 15:26:51 -
>> @@ -26,12 +26,6 @@
>>   * SUCH DAMAGE.
>>   */
>>  
>> -#ifdef __OpenBSD__
>> -#define Static
>> -#else
>> -#define Static static
>> -#endif
>> -
>>  #define PIPEX_PPTP  1
>>  #define PIPEX_L2TP  1
>>  #define PIPEX_PPPOE 1
>> @@ -372,59 +366,56 @@ extern struct pipex_hash_head  pipex_id_h
>>  #define PIPEX_TCP_OPTLEN 40
>>  #define PIPEX_L2TP_MINLEN   8
>>  
>> -/*
>> - * static function prototypes
>> - */
>> -Static void  pipex_iface_start (struct pipex_iface_context 
>> *);
>> -Static void  pipex_iface_stop (struct pipex_iface_context 
>> *);
>> -Static int   pipex_add_session (struct pipex_session_req *, 
>> struct pipex_iface_context *);
>> -Static int   pipex_close_session (struct 
>> pipex_session_close_req *);
>> -Static int   pipex_config_session (struct 
>> pipex_session_config_req *);
>> -Static int   pipex_get_stat (struct pipex_session_stat_req 
>> *);
>> -Static int   pipex_get_closed (struct 
>> pipex_session_list_req *);
>> -Static int   pipex_destroy_session (struct pipex_session *);
>> -Static struct pipex_session  *pipex_lookup_by_ip_address (struct in_addr);
>> -Static struct pipex_session  *pipex_lookup_by_session_id (int, int);
>> -Static void  pipex_ip_output (struct mbuf *, struct 
>> pipex_session *);
>> -Static void  pipex_ppp_output (struct mbuf *, struct 
>> pipex_session *, int);
>> -Static int   pipex_ppp_proto (struct mbuf *, struct 
>> pipex_session *, int, int *);
>> -Static void  pipex_ppp_input (struct mbuf *, struct 
>> pipex_session *, int);
>> -Static void  pipex_ip_input (struct mbuf *, struct 
>> pipex_session *);
>> +void  pipex_iface_start (struct pipex_iface_context *);
>> +void  pipex_iface_stop (struct pipex_iface_context *);
>> +int   pipex_add_session (struct pipex_session_req *, struct 
>> pipex_iface_context *);
>> +int   pipex_close_session (struct pipex_session_close_req 
>> *);
>> +int   pipex_config_session (struct pipex_session_config_req 
>> *);
>> +int   pipex_get_stat (struct pipex_session_stat_req *);
>> +int   pipex_get_closed (struct pipex_session_list_req *);
>> +int   pipex_destroy_session (struct pipex_session *);
>> +struct pipex_session  *pipex_lookup_by_ip_address (struct in_addr);
>> +struct pipex_session  *pipex_lookup_by_session_id (int, int);
>> +void  pipex_ip_output (struct mbuf *, struct pipex_session 
>> *);
>> +void  pipex_ppp_output (struct mbuf *, struct pipex_session 
>> *, int);
>> +int   pipex_ppp_proto (struct mbuf *, struct pipex_session 
>> *, int, int *);
>> +void  pipex_ppp_input (struct mbuf *, struct pipex_session 
>> *, int);
>> +void  pipex_ip_input (struct mbuf *, struct pipex_session 
>> *);
>>  #ifdef INET6
>> -Static void  pipex_ip6_input (struct mbuf *, struct 
>> pipex_session *);
>> +void  pipex_ip6_input (struct mbuf *, struct pipex_session 
>> *);
>>  #endif
>> -Static struct mbuf   *pipex_common_input(struct pipex_session *, 
>> struct mbuf *, int, int, int);
>> +struct mbuf   *pipex_common_input(struct pipex_session *, struct 
>> mbuf *, int, int, int);
>>  
>>  #ifdef PIPEX_PPPOE
>> -Static void  pipex_pppoe_output (struct mbuf *, struct 
>> pipex_session *);
>> +void  pipex_pppoe_output (struct mbuf *, struct 
>> pipex_session *);
>>  #endif
>>  
>>  #ifdef PIPEX_PPTP
>> -Static void  pipex_pptp_output (struct mbuf *, struct 
>> pipex_session *, int, int);
>> -Static struct pipex_session  *pipex_pptp_userland_lookup_session(struct 
>> mbuf *, struct sockaddr *);
>> +void  pipex_pptp_output (struct mbuf *, struct 
>> pipex_session *, int, int);
>> +struct pipex_session  *pipex_pptp_userland_lookup_session(struct mbuf *, 
>> struct 

Re: EFI frame buffer > 4GB

2020-01-23 Thread YASUOKA Masahiko
Yes, the diff fixed the problem of my vaio.

Thanks,

On Fri, 24 Jan 2020 01:52:56 +0100
Mark Kettenis  wrote:
> Mike Larkin and I came up with the folowing diff that keeps mapping
> the framebuffer early. We tested this on a small number of machines
> here that have the framebuffer < 4GB.
> It'd be great if we can confirm this also works on machine where it is
>> 4GB.
> 
> Thanks,
> 
> Mark



exit status of isakmpd(8)

2020-01-22 Thread YASUOKA Masahiko
ok?

When isakmpd's main process dies abnormally, currently its "monitor"
process exits with status 0.  Fix it to use the exit status of main
process.

Index: sbin/isakmpd/monitor.c
===
RCS file: /var/cvs/openbsd/src/sbin/isakmpd/monitor.c,v
retrieving revision 1.80
diff -u -p -r1.80 monitor.c
--- sbin/isakmpd/monitor.c  19 Dec 2019 19:09:53 -  1.80
+++ sbin/isakmpd/monitor.c  23 Jan 2020 03:54:35 -
@@ -146,7 +146,7 @@ monitor_init(int debug)
 void
 monitor_exit(int code)
 {
-   int status;
+   int status = 0, gotstatus = 0;
pid_t pid;
 
if (m_state.pid != 0) {
@@ -156,6 +156,8 @@ monitor_exit(int code)
do {
pid = waitpid(m_state.pid, , 0);
} while (pid == -1 && errno == EINTR);
+   if (pid != -1)
+   gotstatus = 1;
 
/* Remove FIFO and pid files.  */
unlink(ui_fifo);
@@ -163,7 +165,10 @@ monitor_exit(int code)
}
 
close(m_state.s);
-   exit(code);
+   if (code == 0 && gotstatus)
+   exit(WIFEXITED(status)? WEXITSTATUS(status) : 1);
+   else
+   exit(code);
 }
 
 int



Re: GRE datagram socket support

2020-01-21 Thread YASUOKA Masahiko
Hi,

I think that is a good idea.

On Wed, 22 Jan 2020 08:35:05 +1000
David Gwynne  wrote:
> Has anyone got an opinion on this? I am still interested in doing more
> packet capture things on OpenBSD using GRE as a transport, and the idea
> of maintaining this out of tree just makes me feel tired.
> 
> On Tue, Oct 29, 2019 at 06:34:50PM +1000, David Gwynne wrote:
>> i've been toying with this idea of implementing GRE as a datagram
>> protocol that userland can use just like UDP. the idea is to make it
>> easy to support the implementation of NHRP in userland for mgre(4),
>> and also for ERSPAN* support without going down the path linux took**.
>> 
>> so this is the result of having a go at implementing the idea. the diff
>> includes several independent parts, but they all work together to make
>> GRE as comfortable to use as UDP. the two main parts are the actual
>> protocol implementation in src/sys/netinet/ip_gre.c, and the tweaks to
>> getaddrinfo to allow the resolution of gre services. the /etc/services
>> chunk gets used by the getaddrinfo bits.
>> 
>> so, the first chunk lets you do this (as root in userland):
>> 
>>  int s = socket(AF_INET, SOCK_DGRAM, IPPROTO_GRE);
>> 
>> that gives you a file descriptor you can then use with bind(),
>> connect(), sendto(), recvfrom(), etc. you write a message to the
>> kernel and it prepends the GRE and IP headers and pushes it out.
>> it is set up so the GRE protocol is handed to the kernel via the
>> sin_port or sin6_port member of struct sockaddr_in an sockaddr_in6
>> respectively. there's no source and destination protocol fields, just
>> one that both ends agree on, so if you connect then bind, your
>> sockaddrs have to agree on the proto. unfortunately there's no such
>> thing as a wildcard or reserved protocol in GRE, so 0 can't be used
>> as a wildcard like it can in udp and tcp.
>> 
>> the sockets support the configuration of optional GRE headers, as
>> defined in RFC 2890, using setsockopt. importantly you can enable
>> the key and sequence number headers, which again, the kernel offloads
>> for you.
>> 
>> the second chunk tweaks getaddrinfo so it lets you specify things other
>> than IPPROTO_UDP and IPPROTO_TCP. protocols other than those are now
>> looked up in /etc/protocols to get their name, which in turn is used to
>> look up entries in /etc/services. while i was there and reading rfcs, i
>> noted different behaviour for wildcarded socktypes and protocols, which
>> i've tried to implement. eric@ seems generally ok with this stuff, and
>> suggested the tweak to pledge to allow access to /etc/protocols using
>> the dns pledge. tcp and udp are still special though, and are still
>> omgoptimised.
>> 
>> all this together lets the program at
>> https://mild.embarrassm.net/~dlg/diff/egred.c work. it is a userland
>> reimplementation of a simplified egre(4) using tap(4) and a gre socket.
>> the io path is literally reading from one fd and writing it to the othe,
>> everything else is boilerplate.
>> 
>> i suspect the kernel stuff is a bit rough as i havent had to test every
>> path, but it supports common functionality.
>> 
>> thoughts? i am pretty pleased with this has turned out, and would be
>> keen to put it in the tree and work on it some more.
>> 
>> * https://tools.ietf.org/html/draft-foschiano-erspan-03
>> ** http://vger.kernel.org/lpc_net2018_talks/erspan-linux-presentation.pdf
>> 
>> Index: etc/services
>> ===
>> RCS file: /cvs/src/etc/services,v
>> retrieving revision 1.96
>> diff -u -p -r1.96 services
>> --- etc/services 27 Jan 2019 20:35:06 -  1.96
>> +++ etc/services 29 Oct 2019 07:57:44 -
>> @@ -332,6 +332,21 @@ spamd-cfg   8026/tcp# 
>> spamd(8) configur
>>  dhcpd-sync  8067/udp# dhcpd(8) synchronisation
>>  hunt26740/udp   # hunt(6)
>>  #
>> +# GRE Protocol Types
>> +#
>> +keepalive   0/gre   # 0x: IP tunnel keepalive
>> +ipv42048/gre# 0x0800: IPv4
>> +nhrp8193/gre# 0x2001: Next Hop 
>> Resolution Protocol
>> +erspan3 8939/gre# 0x22eb: ERSPAN III
>> +transether  25944/gre   ethernet# 0x6558: Trans Ether Bridging
>> +ipv634525/gre   # 0x86dd: IPv6
>> +wccp34878/gre   # 0x883e: Web Content 
>> Cache Protocol
>> +mpls34887/gre   # 0x8847: MPLS
>> +#mpls   34888/gre   # 0x8848: MPLS Multicast
>> +erspan  35006/gre   erspan2 # 0x88be: ERSPAN I/II
>> +nsh 35151/gre   # 0x894f: Network Service Header
>> +control 47082/gre   # 0xb7ea: RFC 8157
>> +#
>>  # Appletalk
>>  #
>>  rtmp

diff for gdb

2019-12-20 Thread YASUOKA Masahiko
When I debug kernel with kernel core, backtrace command ends around
alltraps_kern_meltdown().  The following diff fixes this problem.

ok?

Teach gdb that the trap frame should be used for for
alltraps_kern_meltdown()

Index: gnu/usr.bin/binutils/gdb/amd64obsd-tdep.c
===
RCS file: /cvs/src/gnu/usr.bin/binutils/gdb/amd64obsd-tdep.c,v
retrieving revision 1.13
diff -u -p -r1.13 amd64obsd-tdep.c
--- gnu/usr.bin/binutils/gdb/amd64obsd-tdep.c   21 Oct 2019 13:09:52 -  
1.13
+++ gnu/usr.bin/binutils/gdb/amd64obsd-tdep.c   21 Dec 2019 06:52:03 -
@@ -463,6 +463,7 @@ amd64obsd_trapframe_sniffer (const struc
   || (name[0] == 'X' && strncmp(name, "Xipi_", 5) != 0)
   || (strcmp (name, "alltraps") == 0)
   || (strcmp (name, "alltraps_kern") == 0)
+  || (strcmp (name, "alltraps_kern_meltdown") == 0)
   || (strcmp (name, "intr_fast_exit") == 0)
   || (strcmp (name, "intr_exit_recurse") == 0)));
 }



Re: EFI frame buffer > 4GB

2019-09-22 Thread YASUOKA Masahiko
Hi,

I'm sorry for delay.

On Fri, 20 Sep 2019 22:08:52 -0700
Mike Larkin  wrote:
> On Fri, Sep 20, 2019 at 03:35:00PM +0200, Mark Kettenis wrote:
>> > Date: Fri, 20 Sep 2019 06:06:40 -0700
>> > From: Mike Larkin 
>> > On Fri, Sep 20, 2019 at 02:22:13PM +0200, Mark Kettenis wrote:
>> > > > Date: Fri, 20 Sep 2019 02:55:27 -0700
>> > > > From: Mike Larkin 
>> > > >
>> > > > On Fri, Sep 20, 2019 at 01:09:56AM +0900, YASUOKA Masahiko wrote:
>> > > > > Hi,
>> > > > >
>> > > > > I recently got a VAIO Pro PK.  The diff below is required to boot.
>> > > > > Without the diff, it freezes during boot.
>> > > > >
>> > > >
>> > > > > Its EFI framebuffer is located 0x40 (9 zeros).  This is > 4GB
>> > > > > and higher than highest available memory of the machine.  These
>> > > > > configuraions seem to cause the problem.
>> > > > >
>> > > > > * * *
>> > > > >
>> > > > > Call cninit() after pmap_bootstrap() is called.  Since the EFI
>> > > > > framebuffer may be located > 4GB which is not initialized by locore,
>> > > > > but by pmap_bootstrap().  Also make the address parameter passed to
>> > > > > pmap_bootstrap() cover the framebuffer.  Actually VAIO pro PK's
>> > > > > framebuffer is located higher than the highest available memory
>> > > > > region.
>> > > > >
>> > > > > ok? comments?
>> > > > >
>> > > >
>> > > > Hi,
>> > > >
>> > > >  I have a few questions...
>> > > >
>> > > > 1. There seems to be no limit on the max PA that we extend to here.
>> > > >This means, for example, if EFI places the framebuffer past 2TB
>> > > >PA, we won't have enough direct map to cover the mapping. Plus
>> > > >I think this will end up extending the direct map to cover any hole
>> > > >between "end of phys mem" and "efi fb addr". At a minimum, I think
>> > > >we need some sort of max PA clamp here. I don't know what Sony's
>> > > >placement algorithm is, but 0x40 is 256GB PA.
>> > >
>> > > A dmesg and pcidump output would be useful.

Attached them at the bottom.

>> > > I suspect that this is a discrete graphics card where the EFI frame
>> > > buffer resides in VRAM.  Using the direct map in this case is probably
>> > > not the right thing to do.
>> > >
>> > > > 2. What does delaying cninit do for machines that have errors or
>> > > >printfs before this? Would those even print anymore? This would
>> > > >affect all machines, even those without efifb, correct?
>> > >
>> > > Yes and no.  It doesn't affect the classic VGA glass console, but it
>> > > does mean serial output might disappear.  That isn't acceptable I'd
>> > > say.
>> > >
>> > > > 3. I am not a big fan of placing device-specific quirks in
>> > > >init_x86_64. Could this not be done in the efifb specific console
>> > > >init code? You could pmap_enter whatever you wanted there, based on
>> > > >the PA EFI sent you. Or does efifb go through the direct map for
>> > > >all video output? If so, we may be stuck creating that big direct 
>> > > > map
>> > > >range. If that's the case though, we should probably try to restrict
>> > > >the permissions in the unused holes.
>> > >
>> > > The direct map is only used early on in the boot process.  The frame
>> > > buffer is remapped in mainbus_attach() such that we can use
>> > > write-combining.  But that is done after we print copyright.  I think
>> > > the remapping could be done a bit earlier, but not before uvm gets
>> > > initialized, which happens after we print the copyright message.
>> > >
>> > > We don't have to use the direct map during early boot.  If you gave us
>> > > some other way to map the framebuffer before pmap_bootstrap() has been
>> > > called we could stick that into efifb_cnattach_common().  We'd need
>> > > your help with that though.  Note that the framebuffer can be fairly
>> > > large though (but we can probably come up with a reasonable 

EFI frame buffer > 4GB

2019-09-19 Thread YASUOKA Masahiko
Hi,

I recently got a VAIO Pro PK.  The diff below is required to boot.
Without the diff, it freezes during boot.


Its EFI framebuffer is located 0x40 (9 zeros).  This is > 4GB
and higher than highest available memory of the machine.  These
configuraions seem to cause the problem.

* * *

Call cninit() after pmap_bootstrap() is called.  Since the EFI
framebuffer may be located > 4GB which is not initialized by locore,
but by pmap_bootstrap().  Also make the address parameter passed to
pmap_bootstrap() cover the framebuffer.  Actually VAIO pro PK's
framebuffer is located higher than the highest available memory
region.

ok? comments?

Index: sys/arch/amd64/amd64/machdep.c
===
RCS file: /cvs/src/sys/arch/amd64/amd64/machdep.c,v
retrieving revision 1.259
diff -u -p -r1.259 machdep.c
--- sys/arch/amd64/amd64/machdep.c  7 Sep 2019 19:05:44 -   1.259
+++ sys/arch/amd64/amd64/machdep.c  19 Sep 2019 15:55:18 -
@@ -193,6 +193,8 @@ int lid_action = 1;
 int pwr_action = 1;
 int forceukbd;
 
+int docninit;
+
 /*
  * safepri is a safe priority for sleep to set for a spin-wait
  * during autoconfiguration or after a panic.
@@ -1371,6 +1373,7 @@ init_x86_64(paddr_t first_avail)
bios_memmap_t *bmp;
int x, ist;
uint64_t max_dm_size = ((uint64_t)512 * NUM_L4_SLOT_DIRECT) << 30;
+   paddr_t max_pa;
 
cpu_init_msrs(_info_primary);
 
@@ -1541,7 +1544,16 @@ init_x86_64(paddr_t first_avail)
 * Call pmap initialization to make new kernel address space.
 * We must do this before loading pages into the VM system.
 */
-   first_avail = pmap_bootstrap(first_avail, trunc_page(avail_end));
+   max_pa = avail_end;
+   /* Make sure max_pa covers the EFI frame buffer */
+   if (bios_efiinfo->fb_addr != 0 &&
+   max_pa < bios_efiinfo->fb_addr + bios_efiinfo->fb_size)
+   max_pa = bios_efiinfo->fb_addr + bios_efiinfo->fb_size;
+   first_avail = pmap_bootstrap(first_avail, trunc_page(max_pa));
+
+   /* Call cninit after entire physical memory is available */
+   if (docninit > 0)
+   cninit();
 
/* Allocate these out of the 640KB base memory */
if (avail_start != PAGE_SIZE)
@@ -1914,7 +1926,6 @@ getbootinfo(char *bootinfo, int bootinfo
bios_ddb_t *bios_ddb;
bios_bootduid_t *bios_bootduid;
bios_bootsr_t *bios_bootsr;
-   int docninit = 0;
 
 #undef BOOTINFO_DEBUG
 #ifdef BOOTINFO_DEBUG
@@ -2026,8 +2037,6 @@ getbootinfo(char *bootinfo, int bootinfo
break;
}
}
-   if (docninit > 0)
-   cninit();
 #ifdef BOOTINFO_DEBUG
printf("\n");
 #endif



Re: Use `if (retval == -1)' instead of 'if (retval < 0)'

2019-08-14 Thread YASUOKA Masahiko
I don't see any problem.

ok yasuoka

On Wed, 14 Aug 2019 16:12:01 +0900 (JST)
Masato Asou  wrote:
> Additional information.
> 
> From: Masato Asou 
> Subject: Use `if (retval == -1)' instead of 'if (retval < 0)'
> Date: Wed, 14 Aug 2019 13:42:13 +0900 (JST)
> 
>> Hi tech,
>> 
>> Use `if (retval == -1)' instead of 'if (retval < 0)' when check the
>> return value of system call.
>> 
>> How about it?
>> 
>> RCS file: /cvs/src/lib/libedit/readline.c,v
>> retrieving revision 1.28
>> diff -u -p -u -r1.28 readline.c
>> --- readline.c  28 Jun 2019 13:32:42 -  1.28
>> +++ readline.c  14 Aug 2019 04:38:55 -
>> @@ -2112,7 +2112,7 @@ _rl_event_read_char(EditLine *el, wchar_
>> return -1;
>>  #endif
>>  
>> -   if (num_read < 0 && errno == EAGAIN)
>> +   if (num_read == -1 && errno == EAGAIN)
>> continue;
>> if (num_read == 0)
>> continue;
>> --
>> ASOU Masato
> 
> The valiable num_read has a return value of READ(2) system call as follows:
> 
>2090 while (rl_event_hook) {
>2091
>2092 (*rl_event_hook)();
>2093
>2094 #if defined(FIONREAD)
>2095 if (ioctl(el->el_infd, FIONREAD, ) == -1)
>2096 return -1;
>2097 if (n)
>2098 num_read = read(el->el_infd, , 1);
>2099 else
>2100 num_read = 0;
>2101 #elif defined(F_SETFL) && defined(O_NDELAY)
>2102 if ((n = fcntl(el->el_infd, F_GETFL)) == -1)
>2103 return -1;
>2104 if (fcntl(el->el_infd, F_SETFL, n|O_NDELAY) ==
>-1)
>2105 return -1;
>2106 num_read = read(el->el_infd, , 1);
>2107 if (fcntl(el->el_infd, F_SETFL, n))
>2108 return -1;
>2109 #else
>2110 /* not non-blocking, but what you gonna do? */
>2111 num_read = read(el->el_infd, , 1);
>2112 return -1;
>2113 #endif
>2114
>2115 if (num_read < 0 && errno == EAGAIN)
>2116 continue;
>2117 if (num_read == 0)
>2118 continue;
>2119 break;
>2120 }
> --
> ASOU Masato
> 



Re: Removing the kernel timezone: config(8): drop timezone support

2019-08-09 Thread YASUOKA Masahiko
On Thu, 08 Aug 2019 23:36:48 -0600
"Theo de Raadt"  wrote:
> YASUOKA Masahiko  wrote:
>> On Wed, 7 Aug 2019 21:41:17 -0500
>> Scott Cheloha  wrote:
>> > Drop config(8) support for getting/setting the kernel timezone.
>> 
>> I'm using this.  Is there any alternative or discussion?
>> 
>> I think PCs are still shipped with their firmware configured to the
>> local time.
> 
> How are you using this?
> 
> By deactivating KARL I assume?  That's a pretty bad tradeoff.

Currently I rewrote /usr/libexec/reorder_kernel manually.

--- /usr/libexec/reorder_kernel.origSat Jul  6 18:20:04 2019
+++ /usr/libexec/reorder_kernel Sat Jul  6 18:18:23 2019
@@ -63,6 +63,13 @@ fi
 
 cd $KERNEL_DIR/$KERNEL
 make newbsd
+
+cat < My view is that localtime-offset of a PC doesn't matter unless you run
> another operating system, and frankly why has that become our problem
> instead of their problem?

Above diff is for my laptop PC.

As another use case, my company has some PC servers as a stock which
are commonly used for some services.  Some services use OpenBSD,
others use Linux.  If OpenBSD cannot use the clock as local time, we
need to change the clock setting and ask Linux team to use the clock
as UTC.  Of course it's possible, but I'd like to show you the matter.

> To my amusement this supporting code was added on January 8, 2000.

--yasuoka



Re: Removing the kernel timezone: config(8): drop timezone support

2019-08-08 Thread YASUOKA Masahiko
Hello,

On Wed, 7 Aug 2019 21:41:17 -0500
Scott Cheloha  wrote:
> Drop config(8) support for getting/setting the kernel timezone.

I'm using this.  Is there any alternative or discussion?

I think PCs are still shipped with their firmware configured to the
local time.

> ok?
> 
> Index: cmd.c
> ===
> RCS file: /cvs/src/usr.sbin/config/cmd.c,v
> retrieving revision 1.20
> diff -u -p -r1.20 cmd.c
> --- cmd.c 23 Nov 2013 17:38:15 -  1.20
> +++ cmd.c 8 Aug 2019 02:39:33 -
> @@ -26,7 +26,6 @@
>  
>  #include 
>  #include 
> -#include 
>  
>  #include 
>  #include 
> @@ -57,7 +56,6 @@ cmd_table_t cmd_table[] = {
>   {"show",   Xshow,   "[attr [val]]", "Show attribute"},
>   {"exit",   Xexit,   "", "Exit, without saving changes"},
>   {"quit",   Xquit,   "", "Quit, saving current changes"},
> - {"timezone", Xtimezone, "[mins [dst]]", "Show/change timezone"},
>   {"bufcachepercent", Xbufcachepct, "[number]",
>"Show/change BUFCACHEPERCENT"},
>   {"nkmempg", Xnkmempg,   "[number]", "Show/change NKMEMPAGES"},
> @@ -245,37 +243,6 @@ Xexit(cmd_t *cmd)
>  {
>   /* Nothing to do here */
>   return (CMD_EXIT);
> -}
> -
> -int
> -Xtimezone(cmd_t *cmd)
> -{
> - struct timezone *tz;
> - int num;
> - char*c;
> -
> - ukc_mod_kernel = 1;
> - tz = (struct timezone *)adjust((caddr_t)(nl[TZ_TZ].n_value));
> -
> - if (strlen(cmd->args) == 0) {
> - printf("timezone = %d, dst = %d\n",
> - tz->tz_minuteswest, tz->tz_dsttime);
> - } else {
> - if (number(cmd->args, ) == 0) {
> - tz->tz_minuteswest = num;
> - c = cmd->args;
> - while ((*c != '\0') && !isspace((unsigned char)*c))
> - c++;
> - while (isspace((unsigned char)*c))
> - c++;
> - if (strlen(c) != 0 && number(c, ) == 0)
> - tz->tz_dsttime = num;
> - printf("timezone = %d, dst = %d\n",
> - tz->tz_minuteswest, tz->tz_dsttime);
> - } else
> - printf("Unknown argument\n");
> - }
> - return (CMD_CONT);
>  }
>  
>  void
> Index: ukcutil.c
> ===
> RCS file: /cvs/src/usr.sbin/config/ukcutil.c,v
> retrieving revision 1.24
> diff -u -p -r1.24 ukcutil.c
> --- ukcutil.c 14 May 2019 13:44:25 -  1.24
> +++ ukcutil.c 8 Aug 2019 02:39:33 -
> @@ -25,7 +25,6 @@
>   */
>  
>  #include 
> -#include 
>  #include 
>  
>  #include 
> @@ -1398,7 +1397,6 @@ process_history(int len, char *buf)
>   char *c;
>   int devno, newno;
>   short unit, state;
> - struct timezone *tz;
>  
>   if (len == 0) {
>   printf("History is empty\n");
> @@ -1468,21 +1466,6 @@ process_history(int len, char *buf)
>   while (*c != '\n')
>   c++;
>   c++;
> - break;
> - case 't':
> - c++;
> - c++;
> - tz = (struct timezone *)adjust((caddr_t)nl[TZ_TZ].
> - n_value);
> - tz->tz_minuteswest = atoi(c);
> - while (*c != ' ')
> - c++;
> - c++;
> - tz->tz_dsttime = atoi(c);
> - while (*c != '\n')
> - c++;
> - c++;
> - ukc_mod_kernel = 1;
>   break;
>   case 'q':
>   while (*c != '\0')
> Index: ukc.h
> ===
> RCS file: /cvs/src/usr.sbin/config/ukc.h,v
> retrieving revision 1.14
> diff -u -p -r1.14 ukc.h
> --- ukc.h 27 Sep 2017 15:14:52 -  1.14
> +++ ukc.h 8 Aug 2019 02:39:33 -
> @@ -41,14 +41,13 @@
>  #define I_TEXTRALOC  11
>  #define  I_HISTLEN   12
>  #define  CA_HISTORY  13
> -#define TZ_TZ14
> -#define P_PDEVNAMES  15
> -#define I_PDEVSIZE   16
> -#define S_PDEVINIT   17
> -#define I_NMBCLUSTERS18
> -#define I_BUFCACHEPCT19
> -#define I_NKMEMPG20
> -#define NLENTRIES21
> +#define P_PDEVNAMES  14
> +#define I_PDEVSIZE   15
> +#define S_PDEVINIT   16
> +#define I_NMBCLUSTERS17
> +#define I_BUFCACHEPCT18
> +#define I_NKMEMPG19
> +#define NLENTRIES20
>  
>  #ifdef UKC_MAIN
>  struct nlist nl[] = {
> @@ -66,7 +65,6 @@ struct nlist nl[] = {
>   { "_textraloc" },
>   { "_userconf_histlen" },
>   { "_userconf_history" },
> - { "_tz" },
>   { "_pdevnames" },
>   { "_pdevnames_size" },
>   { "_pdevinit" },
> @@ -90,7 +88,6 

Re: SIGSEGV in libedit

2019-08-05 Thread YASUOKA Masahiko
On Mon, 05 Aug 2019 07:10:01 -0600
"Todd C. Miller"  wrote:
> On Mon, 05 Aug 2019 17:31:51 +0900, YASUOKA Masahiko wrote:
> 
>> The diff basically is to do the same thing which is done on the
>> upstream, but it also replaces h_malloc() and tok_malloc() which the
>> upstream didn't replace yet.
> 
> This only changes the reallocarray(3) calls that don't actually
> reallocate.

Yes.  el_malloc() had been used for them in the original libedit(3).

> Shouldn't the other calls that do reallocate the buffer be converted
> to recallocarray(3)?

Yes.  Actually I tried.  But I thought it's dangerous since the
current size of the buffer is not clear in some places.  So if we'll
do this, I'd like to do that separately.

Also, the upstream also didn't that yet.

In https://github.com/NetBSD/src/commit/b91b3c48e0edb116bd797586430cb426b575d717
|XXX: should fix realloc similarly.

--yasuoka



Re: SIGSEGV in libedit

2019-08-05 Thread YASUOKA Masahiko
Hi,

On Thu, 1 Aug 2019 18:02:41 +0200
Ingo Schwarze  wrote:
> YASUOKA Masahiko wrote on Thu, Aug 01, 2019 at 08:42:35PM +0900:
>> I noticed the upstream NetBSD recently replaced almost all malloc(3)s
>> by calloc(3) in libedit.
>> 
>> https://github.com/NetBSD/src/commit/b91b3c48e0edb116bd797586430cb426b575d717
>> 
>> This also fixes the problem.  I'll create a diff which does the same
>> thing.
(snip)
> So i think for committing to OpenBSD, each function changed needs
> to be inspected and it needs to be confirmed that zeroing each
> buffer in question does not cause new problems or hide other
> bugs.  That may be somewhat painful work, libedit code is not very
> audit friendly, but i don't think cutting corners is a good idea.
> 
> In general, code quality in libedit is somewhat below OpenBSD quality
> standards, so the time spent auditing it is certainly not wasted
> but spent at a place needing it.  Also, the code quality implies
> that zeroing some additional buffers likely improves security at
> least at some of the places - if it is checked properly.

After below diff is applied, I suppose that the memory region which
was undetermined when allocation will be fixed zero, but any other
change than this doesn't exist.

Also I think we need to follow the upstream's change since changes
which will happen on the upstream in future will assume this
initialization.

The diff basically is to do the same thing which is done on the
upstream, but it also replaces h_malloc() and tok_malloc() which the
upstream didn't replace yet.

ok?

Initialize the buffers when allocation.  This happened on the upstream.
http://mail-index.netbsd.org/source-changes/2019/07/23/msg107399.html

Index: lib/libedit/chared.c
===
RCS file: /cvs/src/lib/libedit/chared.c,v
retrieving revision 1.28
diff -u -p -r1.28 chared.c
--- lib/libedit/chared.c12 Apr 2017 18:24:37 -  1.28
+++ lib/libedit/chared.c5 Aug 2019 07:31:35 -
@@ -404,7 +404,7 @@ ch_init(EditLine *el)
el->el_chared.c_undo.len = -1;
el->el_chared.c_undo.cursor = 0;
 
-   el->el_chared.c_redo.buf = reallocarray(NULL, EL_BUFSIZ,
+   el->el_chared.c_redo.buf = calloc(EL_BUFSIZ,
sizeof(*el->el_chared.c_redo.buf));
if (el->el_chared.c_redo.buf == NULL)
return -1;
Index: lib/libedit/chartype.c
===
RCS file: /cvs/src/lib/libedit/chartype.c,v
retrieving revision 1.16
diff -u -p -r1.16 chartype.c
--- lib/libedit/chartype.c  29 Jan 2019 09:47:00 -  1.16
+++ lib/libedit/chartype.c  5 Aug 2019 07:31:35 -
@@ -145,7 +145,7 @@ ct_decode_argv(int argc, const char *arg
if (!conv->wsize)
return NULL;
 
-   wargv = reallocarray(NULL, argc + 1, sizeof(*wargv));
+   wargv = calloc(argc + 1, sizeof(*wargv));
 
for (i = 0, p = conv->wbuff; i < argc; ++i) {
if (!argv[i]) {   /* don't pass null pointers to mbstowcs */
@@ -214,7 +214,7 @@ ct_visual_string(const wchar_t *s)
return NULL;
if (!buff) {
buffsize = CT_BUFSIZ;
-   buff = reallocarray(NULL, buffsize, sizeof(*buff));
+   buff = calloc(buffsize, sizeof(*buff));
}
dst = buff;
while (*s) {
Index: lib/libedit/filecomplete.c
===
RCS file: /cvs/src/lib/libedit/filecomplete.c,v
retrieving revision 1.12
diff -u -p -r1.12 filecomplete.c
--- lib/libedit/filecomplete.c  11 Apr 2016 20:43:33 -  1.12
+++ lib/libedit/filecomplete.c  5 Aug 2019 07:31:35 -
@@ -77,7 +77,7 @@ fn_tilde_expand(const char *txt)
return NULL;
} else {
len = temp - txt + 1;   /* text until string after slash */
-   temp = malloc(len);
+   temp = calloc(len, 1);
if (temp == NULL)
return NULL;
(void)strncpy(temp, txt + 1, len - 2);
@@ -99,7 +99,7 @@ fn_tilde_expand(const char *txt)
txt += len;
 
tempsz = strlen(pass->pw_dir) + 1 + strlen(txt) + 1;
-   temp = malloc(tempsz);
+   temp = calloc(tempsz, 1);
if (temp == NULL)
return NULL;
(void)snprintf(temp, tempsz, "%s/%s", pass->pw_dir, txt);
@@ -222,7 +222,7 @@ fn_filename_completion_function(const ch
 #endif
 
tempsz = strlen(dirname) + len + 1;
-   temp = malloc(tempsz);
+   temp = calloc(tempsz, 1);
if (temp == NULL)
return NULL;
(void)snprintf(temp, tempsz, "%s%s", dirname, entry->d_name);
@@ -298,7 +298,7 @@ completion_matches(const char *text, cha
ma

Re: SIGSEGV in libedit

2019-08-05 Thread YASUOKA Masahiko
Hi,

On Thu, 1 Aug 2019 18:02:41 +0200
Ingo Schwarze  wrote:
> YASUOKA Masahiko wrote on Thu, Aug 01, 2019 at 08:42:35PM +0900:
>> I noticed the upstream NetBSD recently replaced almost all malloc(3)s
>> by calloc(3) in libedit.
>> 
>> https://github.com/NetBSD/src/commit/b91b3c48e0edb116bd797586430cb426b575d717
>> 
>> This also fixes the problem.  I'll create a diff which does the same
>> thing.
> 
> Might i suggest that you first send a diff changing only the one place
> required to fix the bug asou@ reported?  That can easily be reviewed,
> and i expect we can get it in quickly, and it allows a useful commit
> message explaining why exactly it is needed.

This is the diff which fixes the problem by replacing malloc by calloc.

ok?

Initialize the line buffer by zero when allocation.  This fixes the
problem a crash happens after the window size change.

Index: lib/libedit/terminal.c
===
RCS file: /cvs/src/lib/libedit/terminal.c,v
retrieving revision 1.18
diff -u -p -r1.18 terminal.c
--- lib/libedit/terminal.c  12 Apr 2017 18:24:37 -  1.18
+++ lib/libedit/terminal.c  5 Aug 2019 07:13:08 -
@@ -413,11 +413,11 @@ terminal_alloc_display(EditLine *el)
wchar_t **b;
coord_t *c = >el_terminal.t_size;
 
-   b = reallocarray(NULL, c->v + 1, sizeof(*b));
+   b = calloc(c->v + 1, sizeof(*b));
if (b == NULL)
goto done;
for (i = 0; i < c->v; i++) {
-   b[i] = reallocarray(NULL, c->h + 1, sizeof(**b));
+   b[i] = calloc(c->h + 1, sizeof(**b));
if (b[i] == NULL) {
while (--i >= 0)
free(b[i]);
@@ -428,11 +428,11 @@ terminal_alloc_display(EditLine *el)
b[c->v] = NULL;
el->el_display = b;
 
-   b = reallocarray(NULL, c->v + 1, sizeof(*b));
+   b = calloc(c->v + 1, sizeof(*b));
if (b == NULL)
goto done;
for (i = 0; i < c->v; i++) {
-   b[i] = reallocarray(NULL, c->h + 1, sizeof(**b));
+   b[i] = calloc(c->h + 1, sizeof(**b));
if (b[i] == NULL) {
while (--i >= 0)
free(b[i]);



Re: SIGSEGV in libedit

2019-08-01 Thread YASUOKA Masahiko
Hi,

I noticed the upstream NetBSD recently replaced almost all malloc(3)s
by calloc(3) in libedit.

  https://github.com/NetBSD/src/commit/b91b3c48e0edb116bd797586430cb426b575d717

This also fixes the problem.  I'll create a diff which does the same
thing.

On Thu, 01 Aug 2019 14:54:20 +0900 (JST)
YASUOKA Masahiko  wrote:
> Hi,
> 
> Programs using libedit(3) crashe after the program's window size is
> changed.  For example,
> 
>   1. Invoke ftp
>  $ ftp
>  ftp>
>   2. Resize its window
>   3. Enter "deb" + 
> 
>  => When the problem occurs, it crashes with a segmentation fault
>  => The problem doesn't occur, it displays "debug"
> 
> The problem happens once in 5-30 times.
> 
> See the problem by gdb.
> 
>   Program terminated with signal SIGSEGV, Segmentation fault.
>   #0  0x162ce7bd3ff2 in re_update_line (el=0x162cfa59d800, 
>   old=0x162c3e478e00 L"deb", '\xdfdfdfdf'  Cannot access memory at address 0x162c3e479000>, new=0x162c3854ae00 L"ftp> 
> debug", i=0)
>   at /home/yasuoka/src/lib/libedit/refresh.c:518
>   518 while (*o)
>   (gdb) bt
>   #0  0x162ce7bd3ff2 in re_update_line (el=0x162cfa59d800, 
>   old=0x162c3e478e00 L"deb", '\xdfdfdfdf'  Cannot access memory at address 0x162c3e479000>, new=0x162c3854ae00 L"ftp> 
> debug", i=0)
>   at /home/yasuoka/src/lib/libedit/refresh.c:518
>   #1  0x162ce7bd3c99 in re_refresh (el=0x162cfa59d800) at 
> /home/yasuoka/src/lib/libedit/refresh.c:298
>   #2  0x162ce7beba1c in el_wgets (el=0x162cfa59d800, 
> nread=0x7f7e1c6c) at /home/yasuoka/src/lib/libedit/read.c:577
>   #3  0x162ce7be700a in el_gets (el=0x162cfa59d800, nread=0x7f7e1c6c) 
> at /home/yasuoka/src/lib/libedit/eln.c:74
>   #4  0x162a32a19c60 in ?? ()
>   #5  0x162a32a19817 in ?? ()
>   #6  0x162a32a0b142 in ?? ()
>   #7  0x in ?? ()
>   (gdb) p el->el_display[0]
>   $1 = 0x162c3e478e00 L"deb", '\xdfdfdfdf'  access memory at address 0x162c3e479000>
>   (gdb) 
> 
> When  is entered, as the result of the completion, libedit calls
> re_update_line() <= re_reresh() to refresh the line.  In that
> function, it tries to find a NUL char in the line buffer, but the line
> buffer doesn't include any NUL, then the crash happens.
> 
> After the window size change, the line buffer is freeed and allocated
> again at terminal_alloc_display().  (These are done by signal
> handler.  This is a separated problem)
> 
>  409 static int
>  410 terminal_alloc_display(EditLine *el)
>  411 {
>  412 int i;
>  413 wchar_t **b;
>  414 coord_t *c = >el_terminal.t_size;
>  415 
>  416 b = reallocarray(NULL, c->v + 1, sizeof(*b));
>  417 if (b == NULL)
>  418 goto done;
>  419 for (i = 0; i < c->v; i++) {
>  420 b[i] = reallocarray(NULL, c->h + 1, sizeof(**b));
>  421 if (b[i] == NULL) {
>  422 while (--i >= 0)
>  423 free(b[i]);
>  424 free(b);
>  425 goto done;
>  426 }
>  427 }
>  428 b[c->v] = NULL;
>  429 el->el_display = b;
> 
> The line buffers are el->el_display[] and they are allocated by
> reallocarray().  Then they are initialized at re_clear_display().
> 
> 1149 protected void
> 1150 re_clear_display(EditLine *el)
> 1151 {
> 1152 int i;
> 1153 
> 1154 el->el_cursor.v = 0;
> 1155 el->el_cursor.h = 0;
> 1156 for (i = 0; i < el->el_terminal.t_size.v; i++)
> 1157 el->el_display[i][0] = '\0';
> 1158 el->el_refresh.r_oldcv = 0;
> 1159 }
> 
> Remark that only first char is set NUL, but the rest part is kept
> uninitialized.  Then user inputs "deb".  re_fastputc() is called for
> each char.
> 
> 1053 re_fastputc(EditLine *el, wint_t c)
> 1054 {
> 1055 wchar_t *lastline;
> 1056 int w;
> 1057 
> 1058 w = wcwidth(c);
> 1059 while (w > 1 && el->el_cursor.h + w > el->el_terminal.t_size.h)
> 1060 re_fastputc(el, ' ');
> 1061 
> 1062 terminal__putc(el, c);
> 1063 el->el_display[el->el_cursor.v][el->el_cursor.h++] = c;
> 
> the function just put the char like #1063, then the buffer becomes it
> doesn't include any NUL char like below.
> 
>   deb + 
> 
> After this, by calling re_reresh() the crash happens.
> 
> I had looked into the code deeply, but it didn't beco

SIGSEGV in libedit

2019-07-31 Thread YASUOKA Masahiko
Hi,

Programs using libedit(3) crashe after the program's window size is
changed.  For example,

  1. Invoke ftp
 $ ftp
 ftp>
  2. Resize its window
  3. Enter "deb" + 

 => When the problem occurs, it crashes with a segmentation fault
 => The problem doesn't occur, it displays "debug"

The problem happens once in 5-30 times.

See the problem by gdb.

  Program terminated with signal SIGSEGV, Segmentation fault.
  #0  0x162ce7bd3ff2 in re_update_line (el=0x162cfa59d800, 
  old=0x162c3e478e00 L"deb", '\xdfdfdfdf' , new=0x162c3854ae00 L"ftp> debug", i=0)
  at /home/yasuoka/src/lib/libedit/refresh.c:518
  518 while (*o)
  (gdb) bt
  #0  0x162ce7bd3ff2 in re_update_line (el=0x162cfa59d800, 
  old=0x162c3e478e00 L"deb", '\xdfdfdfdf' , new=0x162c3854ae00 L"ftp> debug", i=0)
  at /home/yasuoka/src/lib/libedit/refresh.c:518
  #1  0x162ce7bd3c99 in re_refresh (el=0x162cfa59d800) at 
/home/yasuoka/src/lib/libedit/refresh.c:298
  #2  0x162ce7beba1c in el_wgets (el=0x162cfa59d800, nread=0x7f7e1c6c) 
at /home/yasuoka/src/lib/libedit/read.c:577
  #3  0x162ce7be700a in el_gets (el=0x162cfa59d800, nread=0x7f7e1c6c) 
at /home/yasuoka/src/lib/libedit/eln.c:74
  #4  0x162a32a19c60 in ?? ()
  #5  0x162a32a19817 in ?? ()
  #6  0x162a32a0b142 in ?? ()
  #7  0x in ?? ()
  (gdb) p el->el_display[0]
  $1 = 0x162c3e478e00 L"deb", '\xdfdfdfdf' 
  (gdb) 

When  is entered, as the result of the completion, libedit calls
re_update_line() <= re_reresh() to refresh the line.  In that
function, it tries to find a NUL char in the line buffer, but the line
buffer doesn't include any NUL, then the crash happens.

After the window size change, the line buffer is freeed and allocated
again at terminal_alloc_display().  (These are done by signal
handler.  This is a separated problem)

 409 static int
 410 terminal_alloc_display(EditLine *el)
 411 {
 412 int i;
 413 wchar_t **b;
 414 coord_t *c = >el_terminal.t_size;
 415 
 416 b = reallocarray(NULL, c->v + 1, sizeof(*b));
 417 if (b == NULL)
 418 goto done;
 419 for (i = 0; i < c->v; i++) {
 420 b[i] = reallocarray(NULL, c->h + 1, sizeof(**b));
 421 if (b[i] == NULL) {
 422 while (--i >= 0)
 423 free(b[i]);
 424 free(b);
 425 goto done;
 426 }
 427 }
 428 b[c->v] = NULL;
 429 el->el_display = b;

The line buffers are el->el_display[] and they are allocated by
reallocarray().  Then they are initialized at re_clear_display().

1149 protected void
1150 re_clear_display(EditLine *el)
1151 {
1152 int i;
1153 
1154 el->el_cursor.v = 0;
1155 el->el_cursor.h = 0;
1156 for (i = 0; i < el->el_terminal.t_size.v; i++)
1157 el->el_display[i][0] = '\0';
1158 el->el_refresh.r_oldcv = 0;
1159 }

Remark that only first char is set NUL, but the rest part is kept
uninitialized.  Then user inputs "deb".  re_fastputc() is called for
each char.

1053 re_fastputc(EditLine *el, wint_t c)
1054 {
1055 wchar_t *lastline;
1056 int w;
1057 
1058 w = wcwidth(c);
1059 while (w > 1 && el->el_cursor.h + w > el->el_terminal.t_size.h)
1060 re_fastputc(el, ' ');
1061 
1062 terminal__putc(el, c);
1063 el->el_display[el->el_cursor.v][el->el_cursor.h++] = c;

the function just put the char like #1063, then the buffer becomes it
doesn't include any NUL char like below.

  deb + 

After this, by calling re_reresh() the crash happens.

I had looked into the code deeply, but it didn't become clear how
should we fix this problem.  But I suppose the code assumes the entire
buffer is initialized, so the following diff replaces the code which
initializes only first char by the code which calls
re__copy_and_pad().

comment?
ok?

Index: lib/libedit/refresh.c
===
RCS file: /var/cvs/openbsd/src/lib/libedit/refresh.c,v
retrieving revision 1.22
diff -u -p -r1.22 refresh.c
--- lib/libedit/refresh.c   11 Oct 2018 15:19:09 -  1.22
+++ lib/libedit/refresh.c   1 Aug 2019 04:55:34 -
@@ -320,7 +320,8 @@ re_refresh(EditLine *el)
 #ifdef DEBUG_REFRESH
terminal_overwrite(el, L"C\b", 2);
 #endif /* DEBUG_REFRESH */
-   el->el_display[i][0] = '\0';
+   re__copy_and_pad(el->el_display[i], L"",
+   (size_t) el->el_terminal.t_size.h);
}
 
el->el_refresh.r_oldcv = el->el_refresh.r_newcv; /* set for next time */
@@ -1154,7 +1155,8 @@ re_clear_display(EditLine *el)
el->el_cursor.v = 0;
el->el_cursor.h = 0;
for (i = 0; i < el->el_terminal.t_size.v; i++)
-   el->el_display[i][0] = 

Re: pf: use proper interface for route-to when it is used with sticky-address

2019-07-10 Thread YASUOKA Masahiko
On Wed, 10 Jul 2019 23:50:23 +0100
Stuart Henderson  wrote:
> On 2019/07/10 23:27, Alexandr Nedvedicky wrote:
>> Hello Stuart,
>> 
>> On Wed, Jul 10, 2019 at 08:19:13PM +0100, Stuart Henderson wrote:
>> > On 2019/07/05 17:09, YASUOKA Masahiko wrote:
>> > > Hi,
>> > > 
>> > > Previous diff made src-node have a reference for the kif.  My
>> > > colleague pointed out that incrementing the reference count of the kif
>> > > is required.
>> > > 
>> > > ok?
>> > > 
>> > > Fix previous commit which made src-node have a reference for the kif.
>> > > Src-node should use the reference counter since it might live longer
>> > > than its table entry, rule or the associated states.
>> > 
>> > I'm seeing crashes soon after starting network which must be related
>> > to this.
>> > 
>> > I have a few rules with standard "max-src-conn-rate" options, e.g.
>> > "keep state (max-src-conn-rate 5/8 overload  flush global)"
>> > If I remove the max-src-conn-rate things are stable again.
>> > 
>> 
>> does patch below fix the NULL pointer dereference panic for you?
>> 
>> thanks for report and
>> sorry for inconveniences
>> 
>> sashan
> 
> Yes, that's working OK here now, thanks for the quick response.

Thank you for find and fix.

ok yasuoka

On Wed, 10 Jul 2019 23:50:23 +0100
Stuart Henderson  wrote:
> On 2019/07/10 23:27, Alexandr Nedvedicky wrote:
>> Hello Stuart,
>> 
>> On Wed, Jul 10, 2019 at 08:19:13PM +0100, Stuart Henderson wrote:
>> > On 2019/07/05 17:09, YASUOKA Masahiko wrote:
>> > > Hi,
>> > > 
>> > > Previous diff made src-node have a reference for the kif.  My
>> > > colleague pointed out that incrementing the reference count of the kif
>> > > is required.
>> > > 
>> > > ok?
>> > > 
>> > > Fix previous commit which made src-node have a reference for the kif.
>> > > Src-node should use the reference counter since it might live longer
>> > > than its table entry, rule or the associated states.
>> > 
>> > I'm seeing crashes soon after starting network which must be related
>> > to this.
>> > 
>> > I have a few rules with standard "max-src-conn-rate" options, e.g.
>> > "keep state (max-src-conn-rate 5/8 overload  flush global)"
>> > If I remove the max-src-conn-rate things are stable again.
>> > 
>> 
>> does patch below fix the NULL pointer dereference panic for you?
>> 
>> thanks for report and
>> sorry for inconveniences
>> 
>> sashan
> 
> Yes, that's working OK here now, thanks for the quick response.
> 
> 
>> 8<---8<---8<--8<
>> diff --git a/sys/net/pf.c b/sys/net/pf.c
>> index 26c3d420254..9addec6d788 100644
>> --- a/sys/net/pf.c
>> +++ b/sys/net/pf.c
>> @@ -586,10 +586,12 @@ pf_insert_src_node(struct pf_src_node **sn, struct 
>> pf_rule *rule,
>>  }
>>  (*sn)->creation = time_uptime;
>>  (*sn)->rule.ptr->src_nodes++;
>> -(*sn)->kif = kif;
>> +if (kif != NULL) {
>> +(*sn)->kif = kif;
>> +pfi_kif_ref(kif, PFI_KIF_REF_SRCNODE);
>> +}
>>  pf_status.scounters[SCNT_SRC_NODE_INSERT]++;
>>  pf_status.src_nodes++;
>> -pfi_kif_ref(kif, PFI_KIF_REF_SRCNODE);
>>  } else {
>>  if (rule->max_src_states &&
>>  (*sn)->states >= rule->max_src_states) {
>> 



Re: pf: use proper interface for route-to when it is used with sticky-address

2019-07-05 Thread YASUOKA Masahiko
Hi,

Previous diff made src-node have a reference for the kif.  My
colleague pointed out that incrementing the reference count of the kif
is required.

ok?

Fix previous commit which made src-node have a reference for the kif.
Src-node should use the reference counter since it might live longer
than its table entry, rule or the associated states.

Index: sys/net/pf.c
===
RCS file: /cvs/src/sys/net/pf.c,v
retrieving revision 1.1083
diff -u -p -r1.1083 pf.c
--- sys/net/pf.c2 Jul 2019 09:04:53 -   1.1083
+++ sys/net/pf.c5 Jul 2019 07:57:57 -
@@ -589,6 +589,7 @@ pf_insert_src_node(struct pf_src_node **
(*sn)->kif = kif;
pf_status.scounters[SCNT_SRC_NODE_INSERT]++;
pf_status.src_nodes++;
+   pfi_kif_ref(kif, PFI_KIF_REF_SRCNODE);
} else {
if (rule->max_src_states &&
(*sn)->states >= rule->max_src_states) {
@@ -612,6 +613,7 @@ pf_remove_src_node(struct pf_src_node *s
RB_REMOVE(pf_src_tree, _src_tracking, sn);
pf_status.scounters[SCNT_SRC_NODE_REMOVALS]++;
pf_status.src_nodes--;
+   pfi_kif_unref(sn->kif, PFI_KIF_REF_SRCNODE);
pool_put(_src_tree_pl, sn);
 }
 
Index: sys/net/pf_if.c
===
RCS file: /cvs/src/sys/net/pf_if.c,v
retrieving revision 1.96
diff -u -p -r1.96 pf_if.c
--- sys/net/pf_if.c 10 Dec 2018 16:48:15 -  1.96
+++ sys/net/pf_if.c 5 Jul 2019 07:57:57 -
@@ -147,6 +147,9 @@ pfi_kif_ref(struct pfi_kif *kif, enum pf
case PFI_KIF_REF_ROUTE:
kif->pfik_routes++;
break;
+   case PFI_KIF_REF_SRCNODE:
+   kif->pfik_srcnodes++;
+   break;
default:
panic("pfi_kif_ref with unknown type");
}
@@ -185,6 +188,14 @@ pfi_kif_unref(struct pfi_kif *kif, enum 
}
kif->pfik_routes--;
break;
+   case PFI_KIF_REF_SRCNODE:
+   if (kif->pfik_srcnodes <= 0) {
+   DPFPRINTF(LOG_ERR,
+   "pfi_kif_unref: src-node refcount <= 0");
+   return;
+   }
+   kif->pfik_srcnodes--;
+   break;
default:
panic("pfi_kif_unref with unknown type");
}
@@ -192,7 +203,8 @@ pfi_kif_unref(struct pfi_kif *kif, enum 
if (kif->pfik_ifp != NULL || kif->pfik_group != NULL || kif == pfi_all)
return;
 
-   if (kif->pfik_rules || kif->pfik_states || kif->pfik_routes)
+   if (kif->pfik_rules || kif->pfik_states || kif->pfik_routes ||
+   kif->pfik_srcnodes)
return;
 
RB_REMOVE(pfi_ifhead, _ifs, kif);
Index: sys/net/pfvar.h
===
RCS file: /cvs/src/sys/net/pfvar.h,v
retrieving revision 1.491
diff -u -p -r1.491 pfvar.h
--- sys/net/pfvar.h 2 Jul 2019 09:04:53 -   1.491
+++ sys/net/pfvar.h 5 Jul 2019 07:57:58 -
@@ -1162,6 +1162,7 @@ struct pfi_kif {
int  pfik_states;
int  pfik_rules;
int  pfik_routes;
+   int  pfik_srcnodes;
TAILQ_HEAD(, pfi_dynaddr)pfik_dynaddrs;
 };
 
@@ -1169,7 +1170,8 @@ enum pfi_kif_refs {
PFI_KIF_REF_NONE,
PFI_KIF_REF_STATE,
PFI_KIF_REF_RULE,
-   PFI_KIF_REF_ROUTE
+   PFI_KIF_REF_ROUTE,
+   PFI_KIF_REF_SRCNODE
 };
 
 #define PFI_IFLAG_SKIP 0x0100  /* skip filtering on interface */



pf: use proper interface for route-to when it is used with sticky-address

2019-07-01 Thread YASUOKA Masahiko
Hi,

"route-to" is used with the interface used for the next hop.  But if
it is used with a source address track (sticky-address), it sometimes
output the packet to wrong interface.

Example:

  pass in quick inet proto tcp from any to 192.168.0.10 port 80 \
keep state (sloppy) route-to  source-hash hogehoge sticky-address

  # pfctl -Tshow -tLB
  127.0.0.1@lo0
  192.168.0.101@em0
  192.168.0.102@em0
  #

pf_test() => pf_route() uses r->route->kif as the output inteface.

If there is no active source address tracking record, r->route->kif is
properly set with the table entry at pfr_pool_get() (called from
pf_map_addr() <= pf_route() <= pf_route()).  But there is an active
source tracking record, pf_map_addr() returns without updating
r->route->kif.

339 int
340 pf_map_addr(sa_family_t af, struct pf_rule *r, struct pf_addr *saddr,
341 struct pf_addr *naddr, struct pf_addr *init_addr, struct 
pf_src_node **sns,
342 struct pf_pool *rpool, enum pf_sn_types type)
343 {
(snip)
355 if (sns[type] == NULL && rpool->opts & PF_POOL_STICKYADDR &&
356 (rpool->opts & PF_POOL_TYPEMASK) != PF_POOL_NONE &&
357 pf_map_addr_sticky(af, r, saddr, naddr, sns, rpool, type) 
== 0)
358 return (0);

Since pf_map_sticky() doesn't update the kif, kif used previous is
used mistakenly.

ok?

When source address tracking record is used for "route-to", the next
hop interface configured with "route-to" was not used.  Keep the
interface within the pf_src_node and use it when the record is used.

Index: sys/net/pf.c
===
RCS file: /cvs/src/sys/net/pf.c,v
retrieving revision 1.1081
diff -u -p -r1.1081 pf.c
--- sys/net/pf.c20 Mar 2019 20:07:28 -  1.1081
+++ sys/net/pf.c1 Jul 2019 11:47:36 -
@@ -542,7 +542,7 @@ pf_src_connlimit(struct pf_state **state
 int
 pf_insert_src_node(struct pf_src_node **sn, struct pf_rule *rule,
 enum pf_sn_types type, sa_family_t af, struct pf_addr *src,
-struct pf_addr *raddr)
+struct pf_addr *raddr, struct pfi_kif *kif)
 {
struct pf_src_node  k;
 
@@ -586,6 +586,7 @@ pf_insert_src_node(struct pf_src_node **
}
(*sn)->creation = time_uptime;
(*sn)->rule.ptr->src_nodes++;
+   (*sn)->kif = kif;
pf_status.scounters[SCNT_SRC_NODE_INSERT]++;
pf_status.src_nodes++;
} else {
@@ -3882,7 +3883,7 @@ pf_test_rule(struct pf_pdesc *pd, struct
 
if (r->rule_flag & PFRULE_SRCTRACK &&
pf_insert_src_node([PF_SN_NONE], r, PF_SN_NONE,
-   pd->af, pd->src, NULL) != 0) {
+   pd->af, pd->src, NULL, NULL) != 0) {
REASON_SET(, PFRES_SRCLIMIT);
goto cleanup;
}
Index: sys/net/pf_lb.c
===
RCS file: /cvs/src/sys/net/pf_lb.c,v
retrieving revision 1.63
diff -u -p -r1.63 pf_lb.c
--- sys/net/pf_lb.c 10 Dec 2018 16:48:15 -  1.63
+++ sys/net/pf_lb.c 1 Jul 2019 11:47:36 -
@@ -329,6 +329,10 @@ pf_map_addr_sticky(sa_family_t af, struc
pf_print_host(naddr, 0, af);
addlog("\n");
}
+
+   if (sns[type]->kif != NULL)
+   rpool->kif = sns[type]->kif;
+
return (0);
 }
 
@@ -618,7 +622,8 @@ pf_map_addr(sa_family_t af, struct pf_ru
pf_remove_src_node(sns[type]);
sns[type] = NULL;
}
-   if (pf_insert_src_node([type], r, type, af, saddr, naddr))
+   if (pf_insert_src_node([type], r, type, af, saddr, naddr,
+   rpool->kif))
return (1);
}
 
Index: sys/net/pfvar.h
===
RCS file: /cvs/src/sys/net/pfvar.h,v
retrieving revision 1.490
diff -u -p -r1.490 pfvar.h
--- sys/net/pfvar.h 18 Feb 2019 13:11:44 -  1.490
+++ sys/net/pfvar.h 1 Jul 2019 11:47:36 -
@@ -1712,7 +1712,7 @@ extern int pf_state_insert(struct 
pfi
 int pf_insert_src_node(struct pf_src_node **,
struct pf_rule *, enum pf_sn_types,
sa_family_t, struct pf_addr *,
-   struct pf_addr *);
+   struct pf_addr *, struct pfi_kif *);
 voidpf_remove_src_node(struct pf_src_node *);
 struct pf_src_node *pf_get_src_node(struct pf_state *,
enum pf_sn_types);



pf: keep src track for route-to while its state exists

2019-06-30 Thread YASUOKA Masahiko
Hi,

The source address tracking (sticky-address) is kept dulring there are
states which refer it.  This is mentioned in pf.conf(5).  This is true
for translation(nat-to, rdr-to) but it was not true for
routing(route-to).

ok?

Link the state and the source track to keep the source track while
there are states which refer it.

Index: sys/net/pf.c
===
RCS file: /cvs/src/sys/net/pf.c,v
retrieving revision 1.1081
diff -u -p -r1.1081 pf.c
--- sys/net/pf.c20 Mar 2019 20:07:28 -  1.1081
+++ sys/net/pf.c1 Jul 2019 05:15:37 -
@@ -222,7 +222,7 @@ int  pf_test_state_icmp(struct pf_pdes
 u_int16_t   pf_calc_mss(struct pf_addr *, sa_family_t, int,
u_int16_t);
 static __inline int pf_set_rt_ifp(struct pf_state *, struct pf_addr *,
-   sa_family_t);
+   sa_family_t, struct pf_src_node **);
 struct pf_divert   *pf_get_divert(struct mbuf *);
 int pf_walk_header(struct pf_pdesc *, struct ip *,
u_short *);
@@ -3410,17 +3410,16 @@ pf_calc_mss(struct pf_addr *addr, sa_fam
 }
 
 static __inline int
-pf_set_rt_ifp(struct pf_state *s, struct pf_addr *saddr, sa_family_t af)
+pf_set_rt_ifp(struct pf_state *s, struct pf_addr *saddr, sa_family_t af,
+struct pf_src_node **sns)
 {
struct pf_rule *r = s->rule.ptr;
-   struct pf_src_node *sns[PF_SN_MAX];
int rv;
 
s->rt_kif = NULL;
if (!r->rt)
return (0);
 
-   memset(sns, 0, sizeof(sns));
switch (af) {
case AF_INET:
rv = pf_map_addr(AF_INET, r, saddr, >rt_addr, NULL, sns,
@@ -4089,6 +4088,11 @@ pf_create_state(struct pf_pdesc *pd, str
goto csfailed;
}
 
+   if (pf_set_rt_ifp(s, pd->src, (*skw)->af, sns) != 0) {
+   REASON_SET(, PFRES_NOROUTE);
+   goto csfailed;
+   }
+
for (i = 0; i < PF_SN_MAX; i++)
if (sns[i] != NULL) {
struct pf_sn_item   *sni;
@@ -4102,11 +4106,6 @@ pf_create_state(struct pf_pdesc *pd, str
SLIST_INSERT_HEAD(>src_nodes, sni, next);
sni->sn->states++;
}
-
-   if (pf_set_rt_ifp(s, pd->src, (*skw)->af) != 0) {
-   REASON_SET(, PFRES_NOROUTE);
-   goto csfailed;
-   }
 
if (pf_state_insert(BOUND_IFACE(r, pd->kif), skw, sks, s)) {
pf_detach_state(s);



Re: ipmi(4): limit watchdog period

2019-06-07 Thread YASUOKA Masahiko
This also seems good to me.

ok?

On Fri, 07 Jun 2019 19:23:57 +0900 (JST)
Naoki Fukaumi  wrote:
> hi tech@,
> 
> here is patch for ipmi(4).
> 
> watchdog timer on ipmi(4) is 16bit, and unit is 100ms. round down a
> value greater than (UINT16_MAX / 10) to the maximum value.
> 
> --
> FUKAUMI Naoki
> 
> Index: sys/dev/ipmi.c
> ===
> RCS file: /cvs/src/sys/dev/ipmi.c,v
> retrieving revision 1.102
> diff -u -p -r1.102 ipmi.c
> --- sys/dev/ipmi.c15 Jun 2018 12:21:41 -  1.102
> +++ sys/dev/ipmi.c7 Jun 2019 09:53:50 -
> @@ -37,6 +37,7 @@
>  #include 
>  #include 
>  #include 
> +#include 
>  
>  #include 
>  #include 
> @@ -1884,12 +1885,18 @@ reset:
>  }
>  
>  #define  MIN_PERIOD  10
> +#define  MAX_PERIOD  (UINT16_MAX / 10)
>  
>  int
>  ipmi_watchdog(void *arg, int period)
>  {
>   struct ipmi_softc   *sc = arg;
>  
> + if (period < MIN_PERIOD && period > 0)
> + period = MIN_PERIOD;
> + else if (period > MAX_PERIOD)
> + period = MAX_PERIOD;
> +
>   if (sc->sc_wdog_period == period) {
>   if (period != 0) {
>   struct task *t;
> @@ -1903,8 +1910,6 @@ ipmi_watchdog(void *arg, int period)
>   return (period);
>   }
>  
> - if (period < MIN_PERIOD && period > 0)
> - period = MIN_PERIOD;
>   sc->sc_wdog_period = period;
>   ipmi_watchdog_set(sc);
>   printf("%s: watchdog %sabled\n", DEVNAME(sc),
> 



Re: watchdog(4): limit watchdog period

2019-06-07 Thread YASUOKA Masahiko
The diff seems good to me.

ok?

On Fri, 07 Jun 2019 19:20:35 +0900 (JST)
Naoki Fukaumi  wrote:
> hi tech@,
> 
> setting kern.watchdog.period to negative value is currently allowed,
> but many watchdog device drivers don't handle it properly. it will
> enable timer with uninteded period without auto refreshing, so machine
> will be rebooted at some point.
> 
> some drivers handle negative value, but results aren't same. some
> drivers disable timer, others enable it.
> 
> this patch makes kern.watchdog.period unsigned, and introduce maximum
> value in watchdog(4).
> 
> kern.watchdog.period can be between 0 and UINT_MAX. negative value is
> invalid.
> 
> large value will be rounded down to maximum value (INT32_MAX / hz) in
> watchdog(4) for avoiding ticks overflow for timeout_add(9).
> 
> some device may have more strict limit, then the value needs to be
> rounded down to the device specific maximum value in the driver.
> 
> here is quick summary of drivers. (if something is wrong, sorry!)
> 
> following drivers should be fixed for maximum value.
> 
> arch/armv7/omap/omdog.c (period>=2^17)
> dev/fdt/bcm2835_dog.c (period>15)
> dev/ipmi.c (period>6553)
> dev/isa/fins.c (period>15300)
> dev/isa/sch311x.c (period>15300)
> dev/pci/ichwdt.c (period>65)
> dev/pci/tcpcib.c (period>65) 
> dev/pci/wdt.c (period>1310)
> 
> following drivers should have a problem with negative value, but it
> will be fixed by this patch.
> 
> arch/i386/i386/esm.c
> arch/i386/pci/geodesc.c
> arch/sparc64/dev/pmc.c
> dev/isa/viasio.c
> dev/pci/berkwdt.c
> dev/pci/glxpcib.c
> dev/pci/pwdog.c
> 
> following drivers should have no problem.
> 
> arch/armv7/sunxi/sxidog.c
> arch/i386/pci/elan520.c
> arch/sgi/localbus/imc.c
> arch/sparc64/dev/lom.c
> dev/isa/it.c
> 
> Regards,
> 
> --
> FUKAUMI Naoki
> 
> Index: sbin/sysctl/sysctl.c
> ===
> RCS file: /cvs/src/sbin/sysctl/sysctl.c,v
> retrieving revision 1.242
> diff -u -p -r1.242 sysctl.c
> --- sbin/sysctl/sysctl.c  13 May 2019 20:47:19 -  1.242
> +++ sbin/sysctl/sysctl.c  7 Jun 2019 09:53:39 -
> @@ -479,6 +479,7 @@ parse(char *string, int flags)
>   );
>   if (len < 0)
>   return;
> + special |= UNSIGNED;
>   break;
>   case KERN_TIMECOUNTER:
>   len = sysctl_tc(string, , mib, flags,
> Index: sys/kern/kern_watchdog.c
> ===
> RCS file: /cvs/src/sys/kern/kern_watchdog.c,v
> retrieving revision 1.13
> diff -u -p -r1.13 kern_watchdog.c
> --- sys/kern/kern_watchdog.c  9 Jan 2017 17:58:44 -   1.13
> +++ sys/kern/kern_watchdog.c  7 Jun 2019 09:53:39 -
> @@ -29,6 +29,7 @@
>  #include 
>  #include 
>  #include 
> +#include 
>  
>  void wdog_tickle(void *arg);
>  int  (*wdog_ctl_cb)(void *, int) = NULL;
> @@ -37,6 +38,8 @@ int wdog_period = 0;
>  int  wdog_auto = 1;
>  struct   timeout wdog_timeout;
>  
> +#define WDOG_PERIOD_MAX  (INT32_MAX / hz)
> +
>  void
>  wdog_register(int (*cb)(void *, int), void *cb_arg)
>  {
> @@ -84,6 +87,8 @@ sysctl_wdog(int *name, u_int namelen, vo
>   error = sysctl_int(oldp, oldlenp, newp, newlen, );
>   if (error)
>   return (error);
> + if ((unsigned int)period > WDOG_PERIOD_MAX)
> + period = WDOG_PERIOD_MAX;
>   if (newp) {
>   timeout_del(_timeout);
>   wdog_period = (*wdog_ctl_cb)(wdog_ctl_cb_arg, period);
> 



does sticky-address work with source-hash?

2019-04-23 Thread YASUOKA Masahiko
"sticky-address" seems to work with "source-hash".

But the manual page looks like:

  round-robin [sticky-address]

  The round-robin option...
  
  source-hash [key]

  The source-hash option...

This misleads people think source-hash doesn't work with
sticky-address.

ok?

Index: share/man/man5/pf.conf.5
===
RCS file: /cvs/src/share/man/man5/pf.conf.5,v
retrieving revision 1.577
diff -u -p -r1.577 pf.conf.5
--- share/man/man5/pf.conf.512 Jul 2018 05:54:49 -  1.577
+++ share/man/man5/pf.conf.524 Apr 2019 05:17:02 -
@@ -1045,7 +1045,7 @@ Weights can be specified between 1 and 6
 Addresses with higher weights are selected more often.
 .Cm sticky-address
 is as described above.
-.It Cm source-hash Op Ar key
+.It Cm source-hash Oo Ar key Oc Op Cm sticky-address
 The
 .Cm source-hash
 option uses a hash of the source address to determine the redirection address,
@@ -1057,6 +1057,8 @@ by default
 .Xr pfctl 8
 randomly generates a key for source-hash every time the
 ruleset is reloaded.
+.Cm sticky-address
+is as described above.
 .It Cm static-port
 With
 .Cm nat-to



Re: npppd-users.5 typo fix

2019-02-20 Thread YASUOKA Masahiko
Thanks.

ok yasuoka

On Wed, 20 Feb 2019 18:28:40 +0100
Denis Fondras  wrote:
> Documentation better be accurate when syntax is not checked :)
> 
> Index: npppd/npppd-users.5
> ===
> RCS file: /cvs/src/usr.sbin/npppd/npppd/npppd-users.5,v
> retrieving revision 1.7
> diff -u -p -r1.7 npppd-users.5
> --- npppd/npppd-users.5   13 Jul 2017 19:16:33 -  1.7
> +++ npppd/npppd-users.5   20 Feb 2019 17:24:19 -
> @@ -42,7 +42,7 @@ See
>  for the escape sequence.
>  .It framed-ip-address Ta
>  IPv4 address to be assigned for the user.
> -.It framed-ip-network Ta
> +.It framed-ip-netmask Ta
>  IPv4 netmask to be used for the user.
>  .It calling-number Ta
>  Calling phone number to check user's phone number.
> 



  1   2   3   >