Re: svn commit: r1884505 - in /httpd/httpd/trunk: changes-entries/pr64339.txt modules/filters/mod_xml2enc.c

2020-12-20 Thread Yann Ylavic
On Sat, Dec 19, 2020 at 11:02 PM Nick Kew  wrote:
>
> On Thu, 17 Dec 2020 17:31:20 +
> Nick Kew  wrote:
>
> > > On 17 Dec 2020, at 16:22, Joe Orton  wrote:
> >  [chop]
> > Thanks for prompting me to take a proper look at where this thread
> > started.
>
> Further thought: anything hardwired will come up against future
> use cases where it acts contrary to expectations and indeed
> requirements.  A future-proof solution is to make it configurable.
>
> I attach a patch I've just hacked (untested): if folks are happy
> with the approach I'll flesh it out and commit.

Looks good to me.

I still wonder if the default (when no xml2MimeType is configured)
shouldn't be something like:

if (!ctx->checkedmime) {
if (strncmp(ctype, "text/html", 9)
&& (!(x = strstr(ctype, "xml"))
|| (x > ctype && apr_isalnum(x[-1])
|| apr_isalnum(x[3]  {
ap_remove_output_filter(f);
return ap_pass_brigade(f->next, bb) ;
}
ctx->checkedmime = 1;
}

so that we still include "text/html" and anything "xml" but not in the
middle of a word like the problematic
"application/vnd.openxmlformats-officedocument.spreadsheetml.sheet"
from PR 64339 (or github #150).
Or are there some real middle-word "xml" mime types which we want to handle?

Regards;
Yann.


Re: svn commit: r1884505 - in /httpd/httpd/trunk: changes-entries/pr64339.txt modules/filters/mod_xml2enc.c

2020-12-19 Thread Nick Kew
On Thu, 17 Dec 2020 17:31:20 +
Nick Kew  wrote:

> > On 17 Dec 2020, at 16:22, Joe Orton  wrote:
>  [chop]
> Thanks for prompting me to take a proper look at where this thread
> started.

Further thought: anything hardwired will come up against future
use cases where it acts contrary to expectations and indeed
requirements.  A future-proof solution is to make it configurable.

I attach a patch I've just hacked (untested): if folks are happy
with the approach I'll flesh it out and commit.

-- 
Nick Kew
Index: mod_xml2enc.c
===
--- mod_xml2enc.c	(revision 1884632)
+++ mod_xml2enc.c	(working copy)
@@ -84,12 +84,19 @@
 apr_bucket_brigade* bbnext;
 apr_bucket_brigade* bbsave;
 const char* encoding;
+int checkedmime;
 } xml2ctx;
 
 typedef struct {
+  int onoff;
+  const char *ctype;
+} mimetype;
+
+typedef struct {
 const char* default_charset;
 xmlCharEncoding default_encoding;
 apr_array_header_t* skipto;
+apr_array_header_t* mimetypes;
 } xml2cfg;
 
 typedef struct {
@@ -331,6 +338,7 @@
 int pending_meta = 0;
 char *ctype;
 char *p;
+xml2cfg *cfg;
 
 if (!ctx || !f->r->content_type) {
 /* log error about configuring this */
@@ -338,15 +346,38 @@
 return ap_pass_brigade(f->next, bb) ;
 }
 
-ctype = apr_pstrdup(f->r->pool, f->r->content_type);
-for (p = ctype; *p; ++p)
-if (isupper(*p))
-*p = tolower(*p);
+if (!ctx->checkedmime) {
+cfg = ap_get_module_config(f->r->per_dir_config, _module);
+ctype = apr_pstrdup(f->r->pool, f->r->content_type);
+for (p = ctype; *p; ++p)
+if (isupper(*p))
+*p = tolower(*p);
 
-/* only act if starts-with "text/" or contains "+xml" */
-if (strncmp(ctype, "text/", 5) && !strstr(ctype, "+xml"))  {
-ap_remove_output_filter(f);
-return ap_pass_brigade(f->next, bb) ;
+/* Check whether we're configured to act on this MIME type */
+if (cfg->mimetypes != NULL) {
+mimetype *conftypes = (mimetype*)cfg->mimetypes->elts;
+int i;
+for (i = 0; i < cfg->mimetypes->nelts; ++i) {
+if (!strncmp(conftypes[i].ctype, ctype,
+strlen(conftypes[i].ctype))) {
+if (conftypes[i].onoff == 0) {
+ap_remove_output_filter(f);
+return ap_pass_brigade(f->next, bb) ;
+}
+ctx->checkedmime = 1;
+break;
+}
+}
+}
+
+/* else only act if starts-with "text/" or contains "xml" */
+if (!ctx->checkedmime) {
+if (strncmp(ctype, "text/", 5) && !strstr(ctype, "xml"))  {
+ap_remove_output_filter(f);
+return ap_pass_brigade(f->next, bb) ;
+}
+ctx->checkedmime = 1;
+}
 }
 
 if (ctx->bbsave == NULL) {
@@ -637,6 +668,27 @@
 attr->val = arg;
 return NULL;
 }
+static const char* set_mimetype(cmd_parms* cmd, void* CFG, const char* arg)
+{
+mimetype* type;
+xml2cfg* cfg = CFG;
+if (cfg->mimetypes == NULL)
+cfg->mimetypes = apr_array_make(cmd->pool, 6, sizeof(mimetype));
+type = apr_array_push(cfg->mimetypes);
+if (arg[0] == '-') {
+type->onoff = 0;
+type->ctype = arg+1;
+}
+else if (arg[0] == '+') {
+type->onoff = 1;
+type->ctype = arg+1;
+}
+else {
+type->onoff = 1;
+type->ctype = arg;
+}
+return NULL;
+}
 
 static const command_rec xml2enc_cmds[] = {
 AP_INIT_TAKE1("xml2EncDefault", set_default, NULL, OR_ALL,
@@ -645,6 +697,8 @@
  "EncodingAlias charset alias [more aliases]"),
 AP_INIT_ITERATE("xml2StartParse", set_skipto, NULL, OR_ALL,
 "Ignore anything in front of the first of these elements"),
+AP_INIT_ITERATE("xml2MimeType", set_mimetype, NULL, OR_ALL,
+"List MIME types to process or leave untouched"),
 { NULL }
 };
 static void* xml2enc_config(apr_pool_t* pool, char* x)
@@ -664,6 +718,7 @@
 ret->default_charset = add->default_charset
  ? add->default_charset : base->default_charset;
 ret->skipto = add->skipto ? add->skipto : base->skipto;
+ret->mimetypes = add->mimetypes ? add->mimetypes : base->mimetypes;
 return ret;
 }
 


Re: svn commit: r1884505 - in /httpd/httpd/trunk: changes-entries/pr64339.txt modules/filters/mod_xml2enc.c

2020-12-17 Thread Nick Kew



> On 17 Dec 2020, at 16:22, Joe Orton  wrote:
> 
> On Wed, Dec 16, 2020 at 07:41:59PM +, Nick Kew wrote:
>>> On 16 Dec 2020, at 17:47, Yann Ylavic  wrote:
 Wouldn't this stop matching "application/xml" for instance?
 
 Possibly this test instead:
   if (strncmp(ctype, "text/", 5)
   && (!(x = strstr(ctype, "xml"))
   || x == ctype || !strchr("/+", x[-1]))) {
 ?
>>> 
>>> I would even remove the "text/" check (why act on "text/plain" for
>>> instance), so maybe:
>>>   if (!(x = strstr(ctype, "xml"))
>>>   || x == ctype || !strchr("/+", x[-1])
>>>   || apr_isalnum(x[3])) {
>>> ?
>> 
>> Be liberal in what you accept.  You can limit it further in configuration,
>> but you can't override a hardwired check.
>> 
>> It certainly needs to operate on text/html for mod_proxy_html, and users 
>> might
>> find reasons for running it on other text types as an alternative to an iconv
>> filter like mod_charset(_lite).
> 
> I'm not sure if you are agreeing with Yann or not.  You wrote the code, 
> how do you think we should you resolve PR 64339, NOTABUG & revert 
> r1884505 or something else?

Thanks for prompting me to take a proper look at where this thread started.

I started to compose a reply here, bug got bogged down.  So I've gone to the
PR instead.  I see there's a patch submitted by Giovanni Bechis which looks
more-or-less acceptable, though it does raise further questions.

I've marked the bug NEEDINFO and asked further questions there.

-- 
Nick Kew

Re: svn commit: r1884505 - in /httpd/httpd/trunk: changes-entries/pr64339.txt modules/filters/mod_xml2enc.c

2020-12-17 Thread Joe Orton
On Wed, Dec 16, 2020 at 07:41:59PM +, Nick Kew wrote:
> > On 16 Dec 2020, at 17:47, Yann Ylavic  wrote:
> >> Wouldn't this stop matching "application/xml" for instance?
> >> 
> >> Possibly this test instead:
> >>if (strncmp(ctype, "text/", 5)
> >>&& (!(x = strstr(ctype, "xml"))
> >>|| x == ctype || !strchr("/+", x[-1]))) {
> >> ?
> > 
> > I would even remove the "text/" check (why act on "text/plain" for
> > instance), so maybe:
> >if (!(x = strstr(ctype, "xml"))
> >|| x == ctype || !strchr("/+", x[-1])
> >|| apr_isalnum(x[3])) {
> > ?
> 
> Be liberal in what you accept.  You can limit it further in configuration,
> but you can't override a hardwired check.
> 
> It certainly needs to operate on text/html for mod_proxy_html, and users might
> find reasons for running it on other text types as an alternative to an iconv
> filter like mod_charset(_lite).

I'm not sure if you are agreeing with Yann or not.  You wrote the code, 
how do you think we should you resolve PR 64339, NOTABUG & revert 
r1884505 or something else?

Regards, Joe



Re: svn commit: r1884505 - in /httpd/httpd/trunk: changes-entries/pr64339.txt modules/filters/mod_xml2enc.c

2020-12-16 Thread Nick Kew



> On 16 Dec 2020, at 17:47, Yann Ylavic  wrote:
> 
> On Wed, Dec 16, 2020 at 6:36 PM Yann Ylavic  wrote:
>> 
>> On Wed, Dec 16, 2020 at 5:23 PM  wrote:
>>> 
>>> -/* only act if starts-with "text/" or contains "xml" */
>>> -if (strncmp(ctype, "text/", 5) && !strstr(ctype, "xml"))  {
>>> +/* only act if starts-with "text/" or contains "+xml" */
>>> +if (strncmp(ctype, "text/", 5) && !strstr(ctype, "+xml"))  {
>>> ap_remove_output_filter(f);
>>> return ap_pass_brigade(f->next, bb) ;
>>> }
>> 
>> Wouldn't this stop matching "application/xml" for instance?
>> 
>> Possibly this test instead:
>>if (strncmp(ctype, "text/", 5)
>>&& (!(x = strstr(ctype, "xml"))
>>|| x == ctype || !strchr("/+", x[-1]))) {
>> ?
> 
> I would even remove the "text/" check (why act on "text/plain" for
> instance), so maybe:
>if (!(x = strstr(ctype, "xml"))
>|| x == ctype || !strchr("/+", x[-1])
>|| apr_isalnum(x[3])) {
> ?

Be liberal in what you accept.  You can limit it further in configuration,
but you can't override a hardwired check.

It certainly needs to operate on text/html for mod_proxy_html, and users might
find reasons for running it on other text types as an alternative to an iconv
filter like mod_charset(_lite).

-- 
Nick Kew

Re: svn commit: r1884505 - in /httpd/httpd/trunk: changes-entries/pr64339.txt modules/filters/mod_xml2enc.c

2020-12-16 Thread Yann Ylavic
On Wed, Dec 16, 2020 at 6:36 PM Yann Ylavic  wrote:
>
> On Wed, Dec 16, 2020 at 5:23 PM  wrote:
> >
> > -/* only act if starts-with "text/" or contains "xml" */
> > -if (strncmp(ctype, "text/", 5) && !strstr(ctype, "xml"))  {
> > +/* only act if starts-with "text/" or contains "+xml" */
> > +if (strncmp(ctype, "text/", 5) && !strstr(ctype, "+xml"))  {
> >  ap_remove_output_filter(f);
> >  return ap_pass_brigade(f->next, bb) ;
> >  }
>
> Wouldn't this stop matching "application/xml" for instance?
>
> Possibly this test instead:
> if (strncmp(ctype, "text/", 5)
> && (!(x = strstr(ctype, "xml"))
> || x == ctype || !strchr("/+", x[-1]))) {
> ?

I would even remove the "text/" check (why act on "text/plain" for
instance), so maybe:
if (!(x = strstr(ctype, "xml"))
|| x == ctype || !strchr("/+", x[-1])
|| apr_isalnum(x[3])) {
?


Re: svn commit: r1884505 - in /httpd/httpd/trunk: changes-entries/pr64339.txt modules/filters/mod_xml2enc.c

2020-12-16 Thread Yann Ylavic
On Wed, Dec 16, 2020 at 5:23 PM  wrote:
>
> -/* only act if starts-with "text/" or contains "xml" */
> -if (strncmp(ctype, "text/", 5) && !strstr(ctype, "xml"))  {
> +/* only act if starts-with "text/" or contains "+xml" */
> +if (strncmp(ctype, "text/", 5) && !strstr(ctype, "+xml"))  {
>  ap_remove_output_filter(f);
>  return ap_pass_brigade(f->next, bb) ;
>  }

Wouldn't this stop matching "application/xml" for instance?

Possibly this test instead:
if (strncmp(ctype, "text/", 5)
&& (!(x = strstr(ctype, "xml"))
|| x == ctype || !strchr("/+", x[-1]))) {
?