[Patch v7 06/14] notmuch-restore: move query handling for batch restore to parser

2012-12-16 Thread Jani Nikula
On Fri, 14 Dec 2012, david at tethera.net wrote:
> From: David Bremner 
>
> We are able to detect more errors by looking at the string before it
> is hex-decoded. We also need this to avoid the query quoting for more
> general queries (to be written) that will mess up raw message-ids.
> ---
>  notmuch-restore.c |   18 +-
>  tag-util.c|   26 --
>  tag-util.h|5 -
>  test/dump-restore |5 ++---
>  4 files changed, 27 insertions(+), 27 deletions(-)
>
> diff --git a/notmuch-restore.c b/notmuch-restore.c
> index 40596a8..112f2f3 100644
> --- a/notmuch-restore.c
> +++ b/notmuch-restore.c
> @@ -208,24 +208,8 @@ notmuch_restore_command (unused (void *ctx), int argc, 
> char *argv[])
>   if (input_format == DUMP_FORMAT_SUP) {
>   ret = parse_sup_line (ctx, line, _string, tag_ops);
>   } else {
> - ret = parse_tag_line (ctx, line, TAG_FLAG_BE_GENEROUS,
> + ret = parse_tag_line (ctx, line, TAG_FLAG_BE_GENEROUS | 
> TAG_FLAG_ID_ONLY,
> _string, tag_ops);

I realize that we've screwed up a bit here already in the restore
series. parse_sup_line() allocates query_string for each input line, but
doesn't free it during parsing. parse_tag_line() sets query_string to
point to the query string within line. And now it gets worse when
query_string is allocated vs. set to point to another buffer depending
on TAG_FLAG_ID_ONLY, so it's not an easy interface to use.

> -
> - if (ret == 0) {
> - if (strncmp ("id:", query_string, 3) != 0) {
> - fprintf (stderr, "Warning: unsupported query: %s\n", 
> query_string);
> - continue;
> - }
> - /* delete id: from front of string; tag_message
> -  * expects a raw message-id.
> -  *
> -  * XXX: Note that query string id:foo and bar will be
> -  * interpreted as a message id "foo and bar". This
> -  * should eventually be fixed to give a better error
> -  * message.
> -  */
> - query_string = query_string + 3;
> - }



>   }
>  
>   if (ret > 0)
> diff --git a/tag-util.c b/tag-util.c
> index e1181f8..8fea76c 100644
> --- a/tag-util.c
> +++ b/tag-util.c
> @@ -201,14 +201,28 @@ parse_tag_line (void *ctx, char *line,
>  }
>  
>  /* tok now points to the query string */
> -if (hex_decode_inplace (tok) != HEX_SUCCESS) {
> - ret = line_error (TAG_PARSE_INVALID, line_for_error,
> -   "hex decoding of query %s failed", tok);
> - goto DONE;
> +if (flags & TAG_FLAG_ID_ONLY) {
> + /* this is under the assumption that any whitespace in the
> +  * message-id must be hex-encoded. The check is probably not
> +  * perfect for exotic unicode whitespace; as fallback the
> +  * search for strange message-ids will fail */
> + if ((strncmp ("id:", tok, 3) != 0) ||

The current wording is, "Any characters in  and  MAY
be hex encoded with %NN...", but the above no longer matches that, as
"id:" must not be encoded. In the interest of keeping the documentation
concise, I think you should move the above check after
hex_decode_inplace(), but keep the below check here. That should do it,
right?

> + (strcspn (tok, " \t") < strlen (tok))) {
> + ret = line_error (TAG_PARSE_INVALID, line_for_error,
> +   "query '%s' is not 'id:'", tok);
> + goto DONE;
> + }
> + if (hex_decode_inplace (tok) != HEX_SUCCESS) {
> + ret = line_error (TAG_PARSE_INVALID, line_for_error,
> +   "hex decoding of query %s failed", tok);
> + goto DONE;
> + }
> + /* skip 'id:' */
> + *query_string = tok + 3;
> +} else {
> + ret = quote_and_decode_query (ctx, tok, line_for_error, query_string);
>  }

It's not pretty that query_string gets allocated or not depending on a
flag that doesn't seem to have anything to do with that. I don't have a
good suggestion now, though, and I'd like to keep the restore path like
it is, without allocation.

>  
> -*query_string = tok;
> -
>DONE:
>  talloc_free (line_for_error);
>  return ret;
> diff --git a/tag-util.h b/tag-util.h
> index 2889736..7674051 100644
> --- a/tag-util.h
> +++ b/tag-util.h
> @@ -26,7 +26,10 @@ typedef enum {
>  /* Accept strange tags that might be user error;
>   * intended for use by notmuch-restore.
>   */
> -TAG_FLAG_BE_GENEROUS = (1 << 3)
> +TAG_FLAG_BE_GENEROUS = (1 << 3),
> +
> +/* Query consists of a single id:$message-id */
> +TAG_FLAG_ID_ONLY = (1 << 4)
>  
>  } tag_op_flag_t;
>  
> diff --git a/test/dump-restore b/test/dump-restore
> index 6a989b6..eb7933a 100755
> --- a/test/dump-restore
> +++ b/test/dump-restore
> @@ -199,19 +199,18 @@ a
>  # the next non-comment line should report an an empty tag error for
>  # batch tagging, but not for restore
> 

[Patch v7 06/14] notmuch-restore: move query handling for batch restore to parser

2012-12-15 Thread Mark Walters
On Fri, 14 Dec 2012, david at tethera.net wrote:
> From: David Bremner 
>
> We are able to detect more errors by looking at the string before it
> is hex-decoded. We also need this to avoid the query quoting for more
> general queries (to be written) that will mess up raw message-ids.
> ---
>  notmuch-restore.c |   18 +-
>  tag-util.c|   26 --
>  tag-util.h|5 -
>  test/dump-restore |5 ++---
>  4 files changed, 27 insertions(+), 27 deletions(-)
>
> diff --git a/notmuch-restore.c b/notmuch-restore.c
> index 40596a8..112f2f3 100644
> --- a/notmuch-restore.c
> +++ b/notmuch-restore.c
> @@ -208,24 +208,8 @@ notmuch_restore_command (unused (void *ctx), int argc, 
> char *argv[])
>   if (input_format == DUMP_FORMAT_SUP) {
>   ret = parse_sup_line (ctx, line, _string, tag_ops);
>   } else {
> - ret = parse_tag_line (ctx, line, TAG_FLAG_BE_GENEROUS,
> + ret = parse_tag_line (ctx, line, TAG_FLAG_BE_GENEROUS | 
> TAG_FLAG_ID_ONLY,
> _string, tag_ops);
> -
> - if (ret == 0) {
> - if (strncmp ("id:", query_string, 3) != 0) {
> - fprintf (stderr, "Warning: unsupported query: %s\n", 
> query_string);
> - continue;
> - }
> - /* delete id: from front of string; tag_message
> -  * expects a raw message-id.
> -  *
> -  * XXX: Note that query string id:foo and bar will be
> -  * interpreted as a message id "foo and bar". This
> -  * should eventually be fixed to give a better error
> -  * message.
> -  */
> - query_string = query_string + 3;
> - }
>   }
>  
>   if (ret > 0)
> diff --git a/tag-util.c b/tag-util.c
> index e1181f8..8fea76c 100644
> --- a/tag-util.c
> +++ b/tag-util.c
> @@ -201,14 +201,28 @@ parse_tag_line (void *ctx, char *line,
>  }
>  
>  /* tok now points to the query string */
> -if (hex_decode_inplace (tok) != HEX_SUCCESS) {
> - ret = line_error (TAG_PARSE_INVALID, line_for_error,
> -   "hex decoding of query %s failed", tok);
> - goto DONE;
> +if (flags & TAG_FLAG_ID_ONLY) {
> + /* this is under the assumption that any whitespace in the
> +  * message-id must be hex-encoded. The check is probably not
> +  * perfect for exotic unicode whitespace; as fallback the
> +  * search for strange message-ids will fail */
> + if ((strncmp ("id:", tok, 3) != 0) ||
> + (strcspn (tok, " \t") < strlen (tok))) {
> + ret = line_error (TAG_PARSE_INVALID, line_for_error,
> +   "query '%s' is not 'id:'", tok);
> + goto DONE;
> + }
> + if (hex_decode_inplace (tok) != HEX_SUCCESS) {
> + ret = line_error (TAG_PARSE_INVALID, line_for_error,
> +   "hex decoding of query %s failed", tok);
> + goto DONE;
> + }
> + /* skip 'id:' */
> + *query_string = tok + 3;

This looks like it doesn't double_quote the query_string in this (the
TAG_FLAG_ID_ONLY) case. Is that deliberate?

Best wishes

Mark

> +} else {
> + ret = quote_and_decode_query (ctx, tok, line_for_error, query_string);
>  }
>  
> -*query_string = tok;
> -
>DONE:
>  talloc_free (line_for_error);
>  return ret;
> diff --git a/tag-util.h b/tag-util.h
> index 2889736..7674051 100644
> --- a/tag-util.h
> +++ b/tag-util.h
> @@ -26,7 +26,10 @@ typedef enum {
>  /* Accept strange tags that might be user error;
>   * intended for use by notmuch-restore.
>   */
> -TAG_FLAG_BE_GENEROUS = (1 << 3)
> +TAG_FLAG_BE_GENEROUS = (1 << 3),
> +
> +/* Query consists of a single id:$message-id */
> +TAG_FLAG_ID_ONLY = (1 << 4)
>  
>  } tag_op_flag_t;
>  
> diff --git a/test/dump-restore b/test/dump-restore
> index 6a989b6..eb7933a 100755
> --- a/test/dump-restore
> +++ b/test/dump-restore
> @@ -199,19 +199,18 @@ a
>  # the next non-comment line should report an an empty tag error for
>  # batch tagging, but not for restore
>  + +e -- id:20091117232137.GA7669 at griffis1.net
> -# highlight the sketchy id parsing; this should be last
>  +g -- id:foo and bar
>  EOF
>  
>  cat < EXPECTED
> -Warning: unsupported query: a
> +Warning: query 'a' is not 'id:' [a]
>  Warning: no query string [+0]
>  Warning: no query string [+a +b]
>  Warning: missing query string [+a +b ]
>  Warning: no query string after -- [+c +d --]
>  Warning: hex decoding of tag %zz failed [+%zz -- id:whatever]
>  Warning: hex decoding of query id:%yy failed [+e +f id:%yy]
> -Warning: cannot apply tags to missing message: foo and bar
> +Warning: query 'id:foo and bar' is not 'id:' [+g -- id:foo and 
> bar]
>  EOF
>  
>  test_expect_equal_file EXPECTED OUTPUT
> -- 
> 1.7.10.4
>
> ___
> notmuch mailing list
> notmuch at notmuchmail.org
> 

[Patch v7 06/14] notmuch-restore: move query handling for batch restore to parser

2012-12-15 Thread David Bremner
Mark Walters  writes:

>> +if (hex_decode_inplace (tok) != HEX_SUCCESS) {
>> +ret = line_error (TAG_PARSE_INVALID, line_for_error,
>> +  "hex decoding of query %s failed", tok);
>> +goto DONE;
>> +}
>> +/* skip 'id:' */
>> +*query_string = tok + 3;
>
> This looks like it doesn't double_quote the query_string in this (the
> TAG_FLAG_ID_ONLY) case. Is that deliberate?

Yes, that's what I meant by 

,
| We also need this to avoid the query quoting for more
| general queries (to be written) that will mess up raw message-ids.
`

perhaps it deserves a comment in the code.


Re: [Patch v7 06/14] notmuch-restore: move query handling for batch restore to parser

2012-12-15 Thread Mark Walters
On Fri, 14 Dec 2012, da...@tethera.net wrote:
 From: David Bremner brem...@debian.org

 We are able to detect more errors by looking at the string before it
 is hex-decoded. We also need this to avoid the query quoting for more
 general queries (to be written) that will mess up raw message-ids.
 ---
  notmuch-restore.c |   18 +-
  tag-util.c|   26 --
  tag-util.h|5 -
  test/dump-restore |5 ++---
  4 files changed, 27 insertions(+), 27 deletions(-)

 diff --git a/notmuch-restore.c b/notmuch-restore.c
 index 40596a8..112f2f3 100644
 --- a/notmuch-restore.c
 +++ b/notmuch-restore.c
 @@ -208,24 +208,8 @@ notmuch_restore_command (unused (void *ctx), int argc, 
 char *argv[])
   if (input_format == DUMP_FORMAT_SUP) {
   ret = parse_sup_line (ctx, line, query_string, tag_ops);
   } else {
 - ret = parse_tag_line (ctx, line, TAG_FLAG_BE_GENEROUS,
 + ret = parse_tag_line (ctx, line, TAG_FLAG_BE_GENEROUS | 
 TAG_FLAG_ID_ONLY,
 query_string, tag_ops);
 -
 - if (ret == 0) {
 - if (strncmp (id:, query_string, 3) != 0) {
 - fprintf (stderr, Warning: unsupported query: %s\n, 
 query_string);
 - continue;
 - }
 - /* delete id: from front of string; tag_message
 -  * expects a raw message-id.
 -  *
 -  * XXX: Note that query string id:foo and bar will be
 -  * interpreted as a message id foo and bar. This
 -  * should eventually be fixed to give a better error
 -  * message.
 -  */
 - query_string = query_string + 3;
 - }
   }
  
   if (ret  0)
 diff --git a/tag-util.c b/tag-util.c
 index e1181f8..8fea76c 100644
 --- a/tag-util.c
 +++ b/tag-util.c
 @@ -201,14 +201,28 @@ parse_tag_line (void *ctx, char *line,
  }
  
  /* tok now points to the query string */
 -if (hex_decode_inplace (tok) != HEX_SUCCESS) {
 - ret = line_error (TAG_PARSE_INVALID, line_for_error,
 -   hex decoding of query %s failed, tok);
 - goto DONE;
 +if (flags  TAG_FLAG_ID_ONLY) {
 + /* this is under the assumption that any whitespace in the
 +  * message-id must be hex-encoded. The check is probably not
 +  * perfect for exotic unicode whitespace; as fallback the
 +  * search for strange message-ids will fail */
 + if ((strncmp (id:, tok, 3) != 0) ||
 + (strcspn (tok,  \t)  strlen (tok))) {
 + ret = line_error (TAG_PARSE_INVALID, line_for_error,
 +   query '%s' is not 'id:message-id', tok);
 + goto DONE;
 + }
 + if (hex_decode_inplace (tok) != HEX_SUCCESS) {
 + ret = line_error (TAG_PARSE_INVALID, line_for_error,
 +   hex decoding of query %s failed, tok);
 + goto DONE;
 + }
 + /* skip 'id:' */
 + *query_string = tok + 3;

This looks like it doesn't double_quote the query_string in this (the
TAG_FLAG_ID_ONLY) case. Is that deliberate?

Best wishes

Mark

 +} else {
 + ret = quote_and_decode_query (ctx, tok, line_for_error, query_string);
  }
  
 -*query_string = tok;
 -
DONE:
  talloc_free (line_for_error);
  return ret;
 diff --git a/tag-util.h b/tag-util.h
 index 2889736..7674051 100644
 --- a/tag-util.h
 +++ b/tag-util.h
 @@ -26,7 +26,10 @@ typedef enum {
  /* Accept strange tags that might be user error;
   * intended for use by notmuch-restore.
   */
 -TAG_FLAG_BE_GENEROUS = (1  3)
 +TAG_FLAG_BE_GENEROUS = (1  3),
 +
 +/* Query consists of a single id:$message-id */
 +TAG_FLAG_ID_ONLY = (1  4)
  
  } tag_op_flag_t;
  
 diff --git a/test/dump-restore b/test/dump-restore
 index 6a989b6..eb7933a 100755
 --- a/test/dump-restore
 +++ b/test/dump-restore
 @@ -199,19 +199,18 @@ a
  # the next non-comment line should report an an empty tag error for
  # batch tagging, but not for restore
  + +e -- id:20091117232137.ga7...@griffis1.net
 -# highlight the sketchy id parsing; this should be last
  +g -- id:foo and bar
  EOF
  
  cat EOF  EXPECTED
 -Warning: unsupported query: a
 +Warning: query 'a' is not 'id:message-id' [a]
  Warning: no query string [+0]
  Warning: no query string [+a +b]
  Warning: missing query string [+a +b ]
  Warning: no query string after -- [+c +d --]
  Warning: hex decoding of tag %zz failed [+%zz -- id:whatever]
  Warning: hex decoding of query id:%yy failed [+e +f id:%yy]
 -Warning: cannot apply tags to missing message: foo and bar
 +Warning: query 'id:foo and bar' is not 'id:message-id' [+g -- id:foo and 
 bar]
  EOF
  
  test_expect_equal_file EXPECTED OUTPUT
 -- 
 1.7.10.4

 ___
 notmuch mailing list
 notmuch@notmuchmail.org
 http://notmuchmail.org/mailman/listinfo/notmuch
___
notmuch mailing list

Re: [Patch v7 06/14] notmuch-restore: move query handling for batch restore to parser

2012-12-15 Thread David Bremner
Mark Walters markwalters1...@gmail.com writes:

 +if (hex_decode_inplace (tok) != HEX_SUCCESS) {
 +ret = line_error (TAG_PARSE_INVALID, line_for_error,
 +  hex decoding of query %s failed, tok);
 +goto DONE;
 +}
 +/* skip 'id:' */
 +*query_string = tok + 3;

 This looks like it doesn't double_quote the query_string in this (the
 TAG_FLAG_ID_ONLY) case. Is that deliberate?

Yes, that's what I meant by 

,
| We also need this to avoid the query quoting for more
| general queries (to be written) that will mess up raw message-ids.
`

perhaps it deserves a comment in the code.
___
notmuch mailing list
notmuch@notmuchmail.org
http://notmuchmail.org/mailman/listinfo/notmuch


Re: [Patch v7 06/14] notmuch-restore: move query handling for batch restore to parser

2012-12-15 Thread Jani Nikula
On Fri, 14 Dec 2012, da...@tethera.net wrote:
 From: David Bremner brem...@debian.org

 We are able to detect more errors by looking at the string before it
 is hex-decoded. We also need this to avoid the query quoting for more
 general queries (to be written) that will mess up raw message-ids.
 ---
  notmuch-restore.c |   18 +-
  tag-util.c|   26 --
  tag-util.h|5 -
  test/dump-restore |5 ++---
  4 files changed, 27 insertions(+), 27 deletions(-)

 diff --git a/notmuch-restore.c b/notmuch-restore.c
 index 40596a8..112f2f3 100644
 --- a/notmuch-restore.c
 +++ b/notmuch-restore.c
 @@ -208,24 +208,8 @@ notmuch_restore_command (unused (void *ctx), int argc, 
 char *argv[])
   if (input_format == DUMP_FORMAT_SUP) {
   ret = parse_sup_line (ctx, line, query_string, tag_ops);
   } else {
 - ret = parse_tag_line (ctx, line, TAG_FLAG_BE_GENEROUS,
 + ret = parse_tag_line (ctx, line, TAG_FLAG_BE_GENEROUS | 
 TAG_FLAG_ID_ONLY,
 query_string, tag_ops);

I realize that we've screwed up a bit here already in the restore
series. parse_sup_line() allocates query_string for each input line, but
doesn't free it during parsing. parse_tag_line() sets query_string to
point to the query string within line. And now it gets worse when
query_string is allocated vs. set to point to another buffer depending
on TAG_FLAG_ID_ONLY, so it's not an easy interface to use.

 -
 - if (ret == 0) {
 - if (strncmp (id:, query_string, 3) != 0) {
 - fprintf (stderr, Warning: unsupported query: %s\n, 
 query_string);
 - continue;
 - }
 - /* delete id: from front of string; tag_message
 -  * expects a raw message-id.
 -  *
 -  * XXX: Note that query string id:foo and bar will be
 -  * interpreted as a message id foo and bar. This
 -  * should eventually be fixed to give a better error
 -  * message.
 -  */
 - query_string = query_string + 3;
 - }



   }
  
   if (ret  0)
 diff --git a/tag-util.c b/tag-util.c
 index e1181f8..8fea76c 100644
 --- a/tag-util.c
 +++ b/tag-util.c
 @@ -201,14 +201,28 @@ parse_tag_line (void *ctx, char *line,
  }
  
  /* tok now points to the query string */
 -if (hex_decode_inplace (tok) != HEX_SUCCESS) {
 - ret = line_error (TAG_PARSE_INVALID, line_for_error,
 -   hex decoding of query %s failed, tok);
 - goto DONE;
 +if (flags  TAG_FLAG_ID_ONLY) {
 + /* this is under the assumption that any whitespace in the
 +  * message-id must be hex-encoded. The check is probably not
 +  * perfect for exotic unicode whitespace; as fallback the
 +  * search for strange message-ids will fail */
 + if ((strncmp (id:, tok, 3) != 0) ||

The current wording is, Any characters in tag and search-term MAY
be hex encoded with %NN..., but the above no longer matches that, as
id: must not be encoded. In the interest of keeping the documentation
concise, I think you should move the above check after
hex_decode_inplace(), but keep the below check here. That should do it,
right?

 + (strcspn (tok,  \t)  strlen (tok))) {
 + ret = line_error (TAG_PARSE_INVALID, line_for_error,
 +   query '%s' is not 'id:message-id', tok);
 + goto DONE;
 + }
 + if (hex_decode_inplace (tok) != HEX_SUCCESS) {
 + ret = line_error (TAG_PARSE_INVALID, line_for_error,
 +   hex decoding of query %s failed, tok);
 + goto DONE;
 + }
 + /* skip 'id:' */
 + *query_string = tok + 3;
 +} else {
 + ret = quote_and_decode_query (ctx, tok, line_for_error, query_string);
  }

It's not pretty that query_string gets allocated or not depending on a
flag that doesn't seem to have anything to do with that. I don't have a
good suggestion now, though, and I'd like to keep the restore path like
it is, without allocation.

  
 -*query_string = tok;
 -
DONE:
  talloc_free (line_for_error);
  return ret;
 diff --git a/tag-util.h b/tag-util.h
 index 2889736..7674051 100644
 --- a/tag-util.h
 +++ b/tag-util.h
 @@ -26,7 +26,10 @@ typedef enum {
  /* Accept strange tags that might be user error;
   * intended for use by notmuch-restore.
   */
 -TAG_FLAG_BE_GENEROUS = (1  3)
 +TAG_FLAG_BE_GENEROUS = (1  3),
 +
 +/* Query consists of a single id:$message-id */
 +TAG_FLAG_ID_ONLY = (1  4)
  
  } tag_op_flag_t;
  
 diff --git a/test/dump-restore b/test/dump-restore
 index 6a989b6..eb7933a 100755
 --- a/test/dump-restore
 +++ b/test/dump-restore
 @@ -199,19 +199,18 @@ a
  # the next non-comment line should report an an empty tag error for
  # batch tagging, but not for restore
  + +e -- id:20091117232137.ga7...@griffis1.net
 -# highlight the sketchy id parsing; 

[Patch v7 06/14] notmuch-restore: move query handling for batch restore to parser

2012-12-14 Thread da...@tethera.net
From: David Bremner 

We are able to detect more errors by looking at the string before it
is hex-decoded. We also need this to avoid the query quoting for more
general queries (to be written) that will mess up raw message-ids.
---
 notmuch-restore.c |   18 +-
 tag-util.c|   26 --
 tag-util.h|5 -
 test/dump-restore |5 ++---
 4 files changed, 27 insertions(+), 27 deletions(-)

diff --git a/notmuch-restore.c b/notmuch-restore.c
index 40596a8..112f2f3 100644
--- a/notmuch-restore.c
+++ b/notmuch-restore.c
@@ -208,24 +208,8 @@ notmuch_restore_command (unused (void *ctx), int argc, 
char *argv[])
if (input_format == DUMP_FORMAT_SUP) {
ret = parse_sup_line (ctx, line, _string, tag_ops);
} else {
-   ret = parse_tag_line (ctx, line, TAG_FLAG_BE_GENEROUS,
+   ret = parse_tag_line (ctx, line, TAG_FLAG_BE_GENEROUS | 
TAG_FLAG_ID_ONLY,
  _string, tag_ops);
-
-   if (ret == 0) {
-   if (strncmp ("id:", query_string, 3) != 0) {
-   fprintf (stderr, "Warning: unsupported query: %s\n", 
query_string);
-   continue;
-   }
-   /* delete id: from front of string; tag_message
-* expects a raw message-id.
-*
-* XXX: Note that query string id:foo and bar will be
-* interpreted as a message id "foo and bar". This
-* should eventually be fixed to give a better error
-* message.
-*/
-   query_string = query_string + 3;
-   }
}

if (ret > 0)
diff --git a/tag-util.c b/tag-util.c
index e1181f8..8fea76c 100644
--- a/tag-util.c
+++ b/tag-util.c
@@ -201,14 +201,28 @@ parse_tag_line (void *ctx, char *line,
 }

 /* tok now points to the query string */
-if (hex_decode_inplace (tok) != HEX_SUCCESS) {
-   ret = line_error (TAG_PARSE_INVALID, line_for_error,
- "hex decoding of query %s failed", tok);
-   goto DONE;
+if (flags & TAG_FLAG_ID_ONLY) {
+   /* this is under the assumption that any whitespace in the
+* message-id must be hex-encoded. The check is probably not
+* perfect for exotic unicode whitespace; as fallback the
+* search for strange message-ids will fail */
+   if ((strncmp ("id:", tok, 3) != 0) ||
+   (strcspn (tok, " \t") < strlen (tok))) {
+   ret = line_error (TAG_PARSE_INVALID, line_for_error,
+ "query '%s' is not 'id:'", tok);
+   goto DONE;
+   }
+   if (hex_decode_inplace (tok) != HEX_SUCCESS) {
+   ret = line_error (TAG_PARSE_INVALID, line_for_error,
+ "hex decoding of query %s failed", tok);
+   goto DONE;
+   }
+   /* skip 'id:' */
+   *query_string = tok + 3;
+} else {
+   ret = quote_and_decode_query (ctx, tok, line_for_error, query_string);
 }

-*query_string = tok;
-
   DONE:
 talloc_free (line_for_error);
 return ret;
diff --git a/tag-util.h b/tag-util.h
index 2889736..7674051 100644
--- a/tag-util.h
+++ b/tag-util.h
@@ -26,7 +26,10 @@ typedef enum {
 /* Accept strange tags that might be user error;
  * intended for use by notmuch-restore.
  */
-TAG_FLAG_BE_GENEROUS = (1 << 3)
+TAG_FLAG_BE_GENEROUS = (1 << 3),
+
+/* Query consists of a single id:$message-id */
+TAG_FLAG_ID_ONLY = (1 << 4)

 } tag_op_flag_t;

diff --git a/test/dump-restore b/test/dump-restore
index 6a989b6..eb7933a 100755
--- a/test/dump-restore
+++ b/test/dump-restore
@@ -199,19 +199,18 @@ a
 # the next non-comment line should report an an empty tag error for
 # batch tagging, but not for restore
 + +e -- id:20091117232137.GA7669 at griffis1.net
-# highlight the sketchy id parsing; this should be last
 +g -- id:foo and bar
 EOF

 cat < EXPECTED
-Warning: unsupported query: a
+Warning: query 'a' is not 'id:' [a]
 Warning: no query string [+0]
 Warning: no query string [+a +b]
 Warning: missing query string [+a +b ]
 Warning: no query string after -- [+c +d --]
 Warning: hex decoding of tag %zz failed [+%zz -- id:whatever]
 Warning: hex decoding of query id:%yy failed [+e +f id:%yy]
-Warning: cannot apply tags to missing message: foo and bar
+Warning: query 'id:foo and bar' is not 'id:' [+g -- id:foo and bar]
 EOF

 test_expect_equal_file EXPECTED OUTPUT
-- 
1.7.10.4



[Patch v7 06/14] notmuch-restore: move query handling for batch restore to parser

2012-12-14 Thread david
From: David Bremner brem...@debian.org

We are able to detect more errors by looking at the string before it
is hex-decoded. We also need this to avoid the query quoting for more
general queries (to be written) that will mess up raw message-ids.
---
 notmuch-restore.c |   18 +-
 tag-util.c|   26 --
 tag-util.h|5 -
 test/dump-restore |5 ++---
 4 files changed, 27 insertions(+), 27 deletions(-)

diff --git a/notmuch-restore.c b/notmuch-restore.c
index 40596a8..112f2f3 100644
--- a/notmuch-restore.c
+++ b/notmuch-restore.c
@@ -208,24 +208,8 @@ notmuch_restore_command (unused (void *ctx), int argc, 
char *argv[])
if (input_format == DUMP_FORMAT_SUP) {
ret = parse_sup_line (ctx, line, query_string, tag_ops);
} else {
-   ret = parse_tag_line (ctx, line, TAG_FLAG_BE_GENEROUS,
+   ret = parse_tag_line (ctx, line, TAG_FLAG_BE_GENEROUS | 
TAG_FLAG_ID_ONLY,
  query_string, tag_ops);
-
-   if (ret == 0) {
-   if (strncmp (id:, query_string, 3) != 0) {
-   fprintf (stderr, Warning: unsupported query: %s\n, 
query_string);
-   continue;
-   }
-   /* delete id: from front of string; tag_message
-* expects a raw message-id.
-*
-* XXX: Note that query string id:foo and bar will be
-* interpreted as a message id foo and bar. This
-* should eventually be fixed to give a better error
-* message.
-*/
-   query_string = query_string + 3;
-   }
}
 
if (ret  0)
diff --git a/tag-util.c b/tag-util.c
index e1181f8..8fea76c 100644
--- a/tag-util.c
+++ b/tag-util.c
@@ -201,14 +201,28 @@ parse_tag_line (void *ctx, char *line,
 }
 
 /* tok now points to the query string */
-if (hex_decode_inplace (tok) != HEX_SUCCESS) {
-   ret = line_error (TAG_PARSE_INVALID, line_for_error,
- hex decoding of query %s failed, tok);
-   goto DONE;
+if (flags  TAG_FLAG_ID_ONLY) {
+   /* this is under the assumption that any whitespace in the
+* message-id must be hex-encoded. The check is probably not
+* perfect for exotic unicode whitespace; as fallback the
+* search for strange message-ids will fail */
+   if ((strncmp (id:, tok, 3) != 0) ||
+   (strcspn (tok,  \t)  strlen (tok))) {
+   ret = line_error (TAG_PARSE_INVALID, line_for_error,
+ query '%s' is not 'id:message-id', tok);
+   goto DONE;
+   }
+   if (hex_decode_inplace (tok) != HEX_SUCCESS) {
+   ret = line_error (TAG_PARSE_INVALID, line_for_error,
+ hex decoding of query %s failed, tok);
+   goto DONE;
+   }
+   /* skip 'id:' */
+   *query_string = tok + 3;
+} else {
+   ret = quote_and_decode_query (ctx, tok, line_for_error, query_string);
 }
 
-*query_string = tok;
-
   DONE:
 talloc_free (line_for_error);
 return ret;
diff --git a/tag-util.h b/tag-util.h
index 2889736..7674051 100644
--- a/tag-util.h
+++ b/tag-util.h
@@ -26,7 +26,10 @@ typedef enum {
 /* Accept strange tags that might be user error;
  * intended for use by notmuch-restore.
  */
-TAG_FLAG_BE_GENEROUS = (1  3)
+TAG_FLAG_BE_GENEROUS = (1  3),
+
+/* Query consists of a single id:$message-id */
+TAG_FLAG_ID_ONLY = (1  4)
 
 } tag_op_flag_t;
 
diff --git a/test/dump-restore b/test/dump-restore
index 6a989b6..eb7933a 100755
--- a/test/dump-restore
+++ b/test/dump-restore
@@ -199,19 +199,18 @@ a
 # the next non-comment line should report an an empty tag error for
 # batch tagging, but not for restore
 + +e -- id:20091117232137.ga7...@griffis1.net
-# highlight the sketchy id parsing; this should be last
 +g -- id:foo and bar
 EOF
 
 cat EOF  EXPECTED
-Warning: unsupported query: a
+Warning: query 'a' is not 'id:message-id' [a]
 Warning: no query string [+0]
 Warning: no query string [+a +b]
 Warning: missing query string [+a +b ]
 Warning: no query string after -- [+c +d --]
 Warning: hex decoding of tag %zz failed [+%zz -- id:whatever]
 Warning: hex decoding of query id:%yy failed [+e +f id:%yy]
-Warning: cannot apply tags to missing message: foo and bar
+Warning: query 'id:foo and bar' is not 'id:message-id' [+g -- id:foo and bar]
 EOF
 
 test_expect_equal_file EXPECTED OUTPUT
-- 
1.7.10.4

___
notmuch mailing list
notmuch@notmuchmail.org
http://notmuchmail.org/mailman/listinfo/notmuch