On Thu, 26 Jul 2012 13:22:00 +0200 Markus Armbruster <arm...@redhat.com> wrote:
> Peter Maydell <peter.mayd...@linaro.org> writes: > > > On 25 July 2012 20:18, Luiz Capitulino <lcapitul...@redhat.com> wrote: > >> Peter Maydell <peter.mayd...@linaro.org> wrote: > >>> On 25 July 2012 17:54, Luiz Capitulino <lcapitul...@redhat.com> wrote: > >>> > --- a/scripts/qapi.py > >>> > +++ b/scripts/qapi.py > >>> > @@ -21,7 +21,9 @@ def tokenize(data): > >>> > elif data[0] == "'": > >>> > data = data[1:] > >>> > string = '' > >>> > - while data[0] != "'": > >>> > + while True: > >>> > + if data[0] == "'" and string[len(string)-1] != "\\": > >>> > + break > >>> > string += data[0] > >>> > data = data[1:] > >>> > data = data[1:] > >>> > >>> Won't this cause us to look at string[-1] if > >>> the input data has two ' characters in a row? > >> > >> Non escaped? If you meant '' that's a zero length string and should work, > >> but > >> if you meant 'foo '' bar' that's illegal, because ' characters > >> should be escaped. > > > > I meant the zero length string case. yes. We come in with data = "''", > > strip the first ' and set string to empty. Then in the first time > > in the while loop data[0] is "'" but len(string) is 0 and so we'll > > do string[-1] which I think will throw an exception. > > > > ...and yep, quick test of a nobbbled qapi-schema.json confirms: > > $ python /home/pm215/src/qemu/qemu/scripts/qapi-types.py -h -o "." < > > /home/pm215/src/qemu/qemu/qapi-schema.json > > Traceback (most recent call last): > > File "/home/pm215/src/qemu/qemu/scripts/qapi-types.py", line 260, in > > <module> > > exprs = parse_schema(sys.stdin) > > File "/home/pm215/src/qemu/qemu/scripts/qapi.py", line 78, in parse_schema > > expr_eval = evaluate(expr) > > File "/home/pm215/src/qemu/qemu/scripts/qapi.py", line 64, in evaluate > > return parse(map(lambda x: x, tokenize(string)))[0] > > File "/home/pm215/src/qemu/qemu/scripts/qapi.py", line 25, in tokenize > > if data[0] == "'" and string[len(string)-1] != "\\": > > IndexError: string index out of range > > > > Try this (very lightly tested but seems to work): > > (feel free to do something nicer than raising an exception on > > the syntax error, and sorry I'm feeling too lazy to make this > > an actual patch email) > > > > Signed-off-by: Peter Maydell <peter.mayd...@linaro.org> Peter, I've replaced my original 07/11 patch with your patch below. > > > > --- a/scripts/qapi.py > > +++ b/scripts/qapi.py > > @@ -21,10 +21,16 @@ def tokenize(data): > > elif data[0] == "'": > > data = data[1:] > > string = '' > > - while data[0] != "'": > > - string += data[0] > > - data = data[1:] > > - data = data[1:] > > + while True: > > + pos = data.find("'") > > + if pos == -1: > > + raise Exception("Mismatched quotes") > > + string += data[0:pos] > > + data = data[pos+1:] > > + if len(string) == 0 or string[-1] != "\\": > > + # found a ' and it wasn't escaped > > + break > > + string = string[0:-1] + "'" > > yield string > > > > def parse(tokens): > > > > (if anybody wants to be able to use '\\' to escape escapes then > > this approach is a bit stuffed, of course.) > > For what it's worth, the orthodox way to lexically analyze strings is a > finite automaton. Utterly untested sketch: Feel free to send a patch if you're strong about this. > > diff --git a/scripts/qapi.py b/scripts/qapi.py > index 8082af3..a745e92 100644 > --- a/scripts/qapi.py > +++ b/scripts/qapi.py > @@ -21,8 +21,17 @@ def tokenize(data): > elif data[0] == "'": > data = data[1:] > string = '' > - while data[0] != "'": > - string += data[0] > + esc = False > + while True: > + if esc: > + string += data[0] > + esc = False > + elif data[0] == "\\": > + esc = True > + elif data[0] == "'": > + break > + else > + string += data[0] > data = data[1:] > data = data[1:] > yield string > > Doesn't handle missing close quote gracefully; you may want to add that. > > >> PS: Peter, I get claustrophobic when reading emails from you :) > > > > I can add more blank lines if that helps? :-) > > > > -- PMM >