Hello community, here is the log from the commit of package rubygem-yajl-ruby for openSUSE:Factory checked in at 2018-07-18 22:51:57 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ Comparing /work/SRC/openSUSE:Factory/rubygem-yajl-ruby (Old) and /work/SRC/openSUSE:Factory/.rubygem-yajl-ruby.new (New) ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Package is "rubygem-yajl-ruby" Wed Jul 18 22:51:57 2018 rev:14 rq:621045 version:1.4.0 Changes: -------- --- /work/SRC/openSUSE:Factory/rubygem-yajl-ruby/rubygem-yajl-ruby.changes 2017-11-16 14:04:25.934193513 +0100 +++ /work/SRC/openSUSE:Factory/.rubygem-yajl-ruby.new/rubygem-yajl-ruby.changes 2018-07-18 22:52:32.887237819 +0200 @@ -1,0 +2,6 @@ +Fri Apr 27 19:23:03 UTC 2018 - [email protected] + +- updated to version 1.4.0 + see installed CHANGELOG.md + +------------------------------------------------------------------- Old: ---- yajl-ruby-1.3.1.gem New: ---- yajl-ruby-1.4.0.gem ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ Other differences: ------------------ ++++++ rubygem-yajl-ruby.spec ++++++ --- /var/tmp/diff_new_pack.z5cqDE/_old 2018-07-18 22:52:33.303236439 +0200 +++ /var/tmp/diff_new_pack.z5cqDE/_new 2018-07-18 22:52:33.303236439 +0200 @@ -1,7 +1,7 @@ # # spec file for package rubygem-yajl-ruby # -# Copyright (c) 2017 SUSE LINUX GmbH, Nuernberg, Germany. +# Copyright (c) 2018 SUSE LINUX GmbH, Nuernberg, Germany. # # All modifications and additions to the file contributed by third parties # remain the property of their copyright owners, unless otherwise agreed @@ -24,7 +24,7 @@ # Name: rubygem-yajl-ruby -Version: 1.3.1 +Version: 1.4.0 Release: 0 %define mod_name yajl-ruby %define mod_full_name %{mod_name}-%{version} @@ -37,7 +37,7 @@ Source1: rubygem-yajl-ruby-rpmlintrc Source2: gem2rpm.yml Summary: Ruby C bindings to the excellent Yajl JSON stream-based parser -License: MIT and BSD-3-Clause +License: MIT AND BSD-3-Clause Group: Development/Languages/Ruby %description ++++++ yajl-ruby-1.3.1.gem -> yajl-ruby-1.4.0.gem ++++++ Binary files old/checksums.yaml.gz and new/checksums.yaml.gz differ diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/ext/yajl/extconf.rb new/ext/yajl/extconf.rb --- old/ext/yajl/extconf.rb 2017-11-07 07:19:50.000000000 +0100 +++ new/ext/yajl/extconf.rb 2018-04-27 20:16:03.000000000 +0200 @@ -1,7 +1,7 @@ require 'mkmf' require 'rbconfig' -$CFLAGS << ' -Wall -funroll-loops' +$CFLAGS << ' -Wall -funroll-loops -Wno-declaration-after-statement' $CFLAGS << ' -Werror-implicit-function-declaration -Wextra -O0 -ggdb3' if ENV['DEBUG'] create_makefile('yajl/yajl') diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/ext/yajl/yajl_ext.c new/ext/yajl/yajl_ext.c --- old/ext/yajl/yajl_ext.c 2017-11-07 07:19:50.000000000 +0100 +++ new/ext/yajl/yajl_ext.c 2018-04-27 20:16:03.000000000 +0200 @@ -22,6 +22,12 @@ */ #include "yajl_ext.h" +#include "yajl_lex.h" +#include "yajl_alloc.h" +#include "yajl_buf.h" +#include "yajl_encode.h" +#include "api/yajl_common.h" +#include "assert.h" #define YAJL_RB_TO_JSON \ VALUE rb_encoder, cls; \ @@ -32,6 +38,25 @@ } \ return rb_yajl_encoder_encode(1, &self, rb_encoder); \ +static void *rb_internal_malloc(void *ctx, unsigned int sz) { + return xmalloc(sz); +} + +static void *rb_internal_realloc(void *ctx, void *previous, unsigned int sz) { + return xrealloc(previous, sz); +} + +static void rb_internal_free(void *ctx, void *ptr) { + xfree(ptr); +} + +static yajl_alloc_funcs rb_alloc_funcs = { + rb_internal_malloc, + rb_internal_realloc, + rb_internal_free, + NULL +}; + /* Helpers for building objects */ static void yajl_check_and_fire_callback(void * ctx) { yajl_parser_wrapper * wrapper; @@ -39,12 +64,12 @@ /* No need to do any of this if the callback isn't even setup */ if (wrapper->parse_complete_callback != Qnil) { - int len = RARRAY_LEN(wrapper->builderStack); + long len = RARRAY_LEN(wrapper->builderStack); if (len == 1 && wrapper->nestedArrayLevel == 0 && wrapper->nestedHashLevel == 0) { rb_funcall(wrapper->parse_complete_callback, intern_call, 1, rb_ary_pop(wrapper->builderStack)); } } else { - int len = RARRAY_LEN(wrapper->builderStack); + long len = RARRAY_LEN(wrapper->builderStack); if (len == 1 && wrapper->nestedArrayLevel == 0 && wrapper->nestedHashLevel == 0) { wrapper->objectsFound++; if (wrapper->objectsFound > 1) { @@ -76,7 +101,7 @@ static void yajl_set_static_value(void * ctx, VALUE val) { yajl_parser_wrapper * wrapper; VALUE lastEntry, hash; - int len; + long len; GetParser((VALUE)ctx, wrapper); @@ -198,7 +223,7 @@ case T_BIGNUM: str = rb_funcall(obj, intern_to_s, 0); cptr = RSTRING_PTR(str); - len = RSTRING_LEN(str); + len = (unsigned int)RSTRING_LEN(str); if (memcmp(cptr, "NaN", 3) == 0 || memcmp(cptr, "Infinity", 8) == 0 || memcmp(cptr, "-Infinity", 9) == 0) { rb_raise(cEncodeError, "'%s' is an invalid number", cptr); } @@ -206,7 +231,7 @@ break; case T_STRING: cptr = RSTRING_PTR(obj); - len = RSTRING_LEN(obj); + len = (unsigned int)RSTRING_LEN(obj); CHECK_STATUS(yajl_gen_string(w->encoder, (const unsigned char *)cptr, len)); break; default: @@ -214,13 +239,13 @@ str = rb_funcall(obj, intern_to_json, 0); Check_Type(str, T_STRING); cptr = RSTRING_PTR(str); - len = RSTRING_LEN(str); + len = (unsigned int)RSTRING_LEN(str); CHECK_STATUS(yajl_gen_number(w->encoder, cptr, len)); } else { str = rb_funcall(obj, intern_to_s, 0); Check_Type(str, T_STRING); cptr = RSTRING_PTR(str); - len = RSTRING_LEN(str); + len = (unsigned int)RSTRING_LEN(str); CHECK_STATUS(yajl_gen_string(w->encoder, (const unsigned char *)cptr, len)); } break; @@ -420,7 +445,7 @@ cfg = (yajl_parser_config){allowComments, checkUTF8}; obj = Data_Make_Struct(klass, yajl_parser_wrapper, yajl_parser_wrapper_mark, yajl_parser_wrapper_free, wrapper); - wrapper->parser = yajl_alloc(&callbacks, &cfg, NULL, (void *)obj); + wrapper->parser = yajl_alloc(&callbacks, &cfg, &rb_alloc_funcs, (void *)obj); wrapper->nestedArrayLevel = 0; wrapper->nestedHashLevel = 0; wrapper->objectsFound = 0; @@ -489,13 +514,13 @@ if (TYPE(input) == T_STRING) { cptr = RSTRING_PTR(input); - len = RSTRING_LEN(input); + len = (unsigned int)RSTRING_LEN(input); yajl_parse_chunk((const unsigned char*)cptr, len, wrapper->parser); } else if (rb_respond_to(input, intern_io_read)) { VALUE parsed = rb_str_new(0, FIX2LONG(rbufsize)); while (rb_funcall(input, intern_io_read, 2, rbufsize, parsed) != Qnil) { cptr = RSTRING_PTR(parsed); - len = RSTRING_LEN(parsed); + len = (unsigned int)RSTRING_LEN(parsed); yajl_parse_chunk((const unsigned char*)cptr, len, wrapper->parser); } } else { @@ -535,7 +560,7 @@ if (wrapper->parse_complete_callback != Qnil) { const char * cptr = RSTRING_PTR(chunk); - len = RSTRING_LEN(chunk); + len = (unsigned int)RSTRING_LEN(chunk); yajl_parse_chunk((const unsigned char*)cptr, len, wrapper->parser); } else { rb_raise(cParseError, "The on_parse_complete callback isn't setup, parsing useless."); @@ -561,6 +586,402 @@ } /* + * An event stream pulls data off the IO source into the buffer, + * then runs the lexer over that stream. + */ +struct yajl_event_stream_s { + yajl_alloc_funcs *funcs; + + VALUE stream; // source + + VALUE buffer; + unsigned int offset; + + yajl_lexer lexer; // event source +}; + +typedef struct yajl_event_stream_s *yajl_event_stream_t; + +struct yajl_event_s { + yajl_tok token; + const char *buf; + unsigned int len; +}; +typedef struct yajl_event_s yajl_event_t; + +static yajl_event_t yajl_event_stream_next(yajl_event_stream_t parser, int pop) { + assert(parser->stream); + assert(parser->buffer); + + while (1) { + if (parser->offset >= RSTRING_LEN(parser->buffer)) { + //printf("reading offset %d size %ld\n", parser->offset, RSTRING_LEN(parser->buffer)); + + // Refill the buffer + rb_funcall(parser->stream, intern_io_read, 2, INT2FIX(RSTRING_LEN(parser->buffer)), parser->buffer); + if (RSTRING_LEN(parser->buffer) == 0) { + yajl_event_t event = { + .token = yajl_tok_eof, + }; + return event; + } + + parser->offset = 0; + } + + // Try to pull an event off the lexer + yajl_event_t event; + + yajl_tok token; + if (pop == 0) { + //printf("peeking %p %ld %d\n", RSTRING_PTR(parser->buffer), RSTRING_LEN(parser->buffer), parser->offset); + token = yajl_lex_peek(parser->lexer, (const unsigned char *)RSTRING_PTR(parser->buffer), (unsigned int)RSTRING_LEN(parser->buffer), parser->offset); + //printf("peeked event %d\n", token); + + if (token == yajl_tok_eof) { + parser->offset = (unsigned int)RSTRING_LEN(parser->buffer); + continue; + } + + event.token = token; + + return event; + } + + //printf("popping\n"); + token = yajl_lex_lex(parser->lexer, (const unsigned char *)RSTRING_PTR(parser->buffer), (unsigned int)RSTRING_LEN(parser->buffer), &parser->offset, (const unsigned char **)&event.buf, &event.len); + //printf("popped event %d\n", token); + + if (token == yajl_tok_eof) { + continue; + } + + event.token = token; + + return event; + } + + return (yajl_event_t){}; +} + +static VALUE rb_yajl_projector_filter_array_subtree(yajl_event_stream_t parser, VALUE schema, yajl_event_t event); +static VALUE rb_yajl_projector_filter_object_subtree(yajl_event_stream_t parser, VALUE schema, yajl_event_t event); +static void rb_yajl_projector_ignore_value(yajl_event_stream_t parser); +static void rb_yajl_projector_ignore_container(yajl_event_stream_t parser); +static VALUE rb_yajl_projector_build_simple_value(yajl_event_stream_t parser, yajl_event_t event); +static VALUE rb_yajl_projector_build_string(yajl_event_stream_t parser, yajl_event_t event); + +static VALUE rb_yajl_projector_filter(yajl_event_stream_t parser, VALUE schema, yajl_event_t event) { + assert(parser->stream); + + switch(event.token) { + case yajl_tok_left_brace: + return rb_yajl_projector_filter_array_subtree(parser, schema, event); + break; + case yajl_tok_left_bracket: + return rb_yajl_projector_filter_object_subtree(parser, schema, event); + break; + default: + return rb_yajl_projector_build_simple_value(parser, event); + } +} + +static VALUE rb_yajl_projector_filter_array_subtree(yajl_event_stream_t parser, VALUE schema, yajl_event_t event) { + assert(event.token == yajl_tok_left_brace); + + VALUE ary = rb_ary_new(); + + while (1) { + event = yajl_event_stream_next(parser, 1); + + if (event.token == yajl_tok_right_brace) { + break; + } + + VALUE val = rb_yajl_projector_filter(parser, schema, event); + rb_ary_push(ary, val); + + event = yajl_event_stream_next(parser, 0); + if (event.token == yajl_tok_comma) { + event = yajl_event_stream_next(parser, 1); + assert(event.token == yajl_tok_comma); + + event = yajl_event_stream_next(parser, 0); + if (!(event.token == yajl_tok_string || event.token == yajl_tok_integer || event.token == yajl_tok_double || event.token == yajl_tok_null || event.token == yajl_tok_bool || event.token == yajl_tok_left_bracket || event.token == yajl_tok_left_brace)) { + rb_raise(cParseError, "read a comma, expected a value to follow, actually read %s", yajl_tok_name(event.token)); + } + } else if (event.token != yajl_tok_right_brace) { + rb_raise(cParseError, "didn't read a comma, expected closing array, actually read %s", yajl_tok_name(event.token)); + } + } + + return ary; +} + +static VALUE rb_yajl_projector_filter_object_subtree(yajl_event_stream_t parser, VALUE schema, yajl_event_t event) { + assert(event.token == yajl_tok_left_bracket); + + VALUE hsh = rb_hash_new(); + + while (1) { + event = yajl_event_stream_next(parser, 1); + + if (event.token == yajl_tok_right_bracket) { + break; + } + + if (!(event.token == yajl_tok_string || event.token == yajl_tok_string_with_escapes)) { + rb_raise(cParseError, "Expected string, unexpected stream event %s", yajl_tok_name(event.token)); + } + + VALUE key = rb_yajl_projector_build_string(parser, event); + + event = yajl_event_stream_next(parser, 1); + if (!(event.token == yajl_tok_colon)) { + rb_raise(cParseError, "Expected colon, unexpected stream event %s", yajl_tok_name(event.token)); + } + + // nil schema means reify the subtree from here on + // otherwise if the schema has a key for this we want it + int interesting = (schema == Qnil || rb_funcall(schema, rb_intern("key?"), 1, key) == Qtrue); + if (!interesting) { + rb_yajl_projector_ignore_value(parser); + goto peek_comma; + } + + yajl_event_t value_event = yajl_event_stream_next(parser, 1); + + VALUE key_schema; + if (schema == Qnil) { + key_schema = Qnil; + } else { + key_schema = rb_hash_aref(schema, key); + } + + VALUE val = rb_yajl_projector_filter(parser, key_schema, value_event); + + rb_str_freeze(key); + rb_hash_aset(hsh, key, val); + + peek_comma: + + event = yajl_event_stream_next(parser, 0); + if (event.token == yajl_tok_comma) { + event = yajl_event_stream_next(parser, 1); + assert(event.token == yajl_tok_comma); + + event = yajl_event_stream_next(parser, 0); + if (!(event.token == yajl_tok_string || event.token == yajl_tok_string_with_escapes)) { + rb_raise(cParseError, "read a comma, expected a key to follow, actually read %s", yajl_tok_name(event.token)); + } + } else if (event.token != yajl_tok_right_bracket) { + rb_raise(cParseError, "read a value without tailing comma, expected closing bracket, actually read %s", yajl_tok_name(event.token)); + } + } + + return hsh; +} + +/* +# After reading a key if we know we are not interested in the next value, + # read and discard all its stream events. + # + # Values can be simple (string, numeric, boolean, null) or compound (object + # or array). + # + # Returns nothing. +*/ +static void rb_yajl_projector_ignore_value(yajl_event_stream_t parser) { + yajl_event_t value_event = yajl_event_stream_next(parser, 1); + + switch (value_event.token) { + case yajl_tok_null: + case yajl_tok_bool: + case yajl_tok_integer: + case yajl_tok_double: + case yajl_tok_string: + case yajl_tok_string_with_escapes: + return; + default: + break; + } + + if (value_event.token == yajl_tok_left_brace || value_event.token == yajl_tok_left_bracket) { + rb_yajl_projector_ignore_container(parser); + return; + } + + rb_raise(cParseError, "unknown value type to ignore %s", yajl_tok_name(value_event.token)); +} + +/* +# Given the start of an array or object, read until the closing event. +# Object structures can nest and this is considered. +# +# Returns nothing. +*/ +static void rb_yajl_projector_ignore_container(yajl_event_stream_t parser) { + int depth = 1; + + while (depth > 0) { + yajl_event_t event = yajl_event_stream_next(parser, 1); + + if (event.token == yajl_tok_eof) { + return; + } + + if (event.token == yajl_tok_left_bracket || event.token == yajl_tok_left_brace) { + depth += 1; + } else if (event.token == yajl_tok_right_bracket || event.token == yajl_tok_right_brace) { + depth -= 1; + } + } +} + +static VALUE rb_yajl_projector_build_simple_value(yajl_event_stream_t parser, yajl_event_t event) { + assert(parser->stream); + + switch (event.token) { + case yajl_tok_null:; + return Qnil; + case yajl_tok_bool:; + if (memcmp(event.buf, "true", 4) == 0) { + return Qtrue; + } else if (memcmp(event.buf, "false", 4) == 0) { + return Qfalse; + } else { + rb_raise(cStandardError, "unknown boolean token %s", event.buf); + } + case yajl_tok_integer:; + case yajl_tok_double:; + char *buf = (char *)malloc(event.len + 1); + buf[event.len] = 0; + memcpy(buf, event.buf, event.len); + + VALUE val; + if (memchr(buf, '.', event.len) || + memchr(buf, 'e', event.len) || + memchr(buf, 'E', event.len)) { + val = rb_float_new(strtod(buf, NULL)); + } else { + val = rb_cstr2inum(buf, 10); + } + free(buf); + + return val; + + case yajl_tok_string:; + case yajl_tok_string_with_escapes:; + return rb_yajl_projector_build_string(parser, event); + + case yajl_tok_eof:; + rb_raise(cParseError, "unexpected eof while constructing value"); + + case yajl_tok_comma: + rb_raise(cParseError, "unexpected comma while constructing value"); + + case yajl_tok_colon: + rb_raise(cParseError, "unexpected colon while constructing value"); + + default:; + assert(0); + } +} + +static VALUE rb_yajl_projector_build_string(yajl_event_stream_t parser, yajl_event_t event) { + switch (event.token) { + case yajl_tok_string:; { + VALUE str = rb_str_new(event.buf, event.len); + rb_enc_associate(str, utf8Encoding); + + rb_encoding *default_internal_enc = rb_default_internal_encoding(); + if (default_internal_enc) { + str = rb_str_export_to_enc(str, default_internal_enc); + } + + return str; + } + + case yajl_tok_string_with_escapes:; { + //printf("decoding string with escapes\n"); + + yajl_buf strBuf = yajl_buf_alloc(parser->funcs); + yajl_string_decode(strBuf, (const unsigned char *)event.buf, event.len); + + VALUE str = rb_str_new((const char *)yajl_buf_data(strBuf), yajl_buf_len(strBuf)); + rb_enc_associate(str, utf8Encoding); + + yajl_buf_free(strBuf); + + rb_encoding *default_internal_enc = rb_default_internal_encoding(); + if (default_internal_enc) { + str = rb_str_export_to_enc(str, default_internal_enc); + } + + return str; + } + + default:; { + assert(0); + } + } +} + +static VALUE rb_protected_yajl_projector_filter(VALUE pointer) { + VALUE *args = (VALUE *)pointer; + return rb_yajl_projector_filter((struct yajl_event_stream_s *)args[0], + args[1], + *(yajl_event_t *)args[2]); +} + +/* + * Document-method: project + */ +static VALUE rb_yajl_projector_project(VALUE self, VALUE schema) { + VALUE stream = rb_iv_get(self, "@stream"); + + long buffer_size = FIX2LONG(rb_iv_get(self, "@buffer_size")); + VALUE buffer = rb_str_new(0, buffer_size); + + struct yajl_event_stream_s parser = { + .funcs = &rb_alloc_funcs, + + .stream = stream, + + .buffer = buffer, + .offset = (unsigned int)buffer_size, + + .lexer = yajl_lex_alloc(&rb_alloc_funcs, 0, 1), + }; + + yajl_event_t event = yajl_event_stream_next(&parser, 1); + + RB_GC_GUARD(stream); + RB_GC_GUARD(buffer); + + VALUE result; + int state = 0; + + if (event.token == yajl_tok_left_brace || event.token == yajl_tok_left_bracket) { + VALUE args[3]; + args[0] = (VALUE)&parser; + args[1] = schema; + args[2] = (VALUE)&event; + result = rb_protect(rb_protected_yajl_projector_filter, + (VALUE)args, + &state); + } else { + yajl_lex_free(parser.lexer); + rb_raise(cParseError, "expected left bracket or brace, actually read %s", yajl_tok_name(event.token)); + } + + yajl_lex_free(parser.lexer); + if (state) rb_jump_tag(state); + + return result; +} + +/* * Document-class: Yajl::Encoder * * This class contains methods for encoding a Ruby object into JSON, streaming it's output into an IO object. @@ -620,7 +1041,7 @@ obj = Data_Make_Struct(klass, yajl_encoder_wrapper, yajl_encoder_wrapper_mark, yajl_encoder_wrapper_free, wrapper); wrapper->indentString = actualIndent; - wrapper->encoder = yajl_gen_alloc(&cfg, NULL); + wrapper->encoder = yajl_gen_alloc(&cfg, &rb_alloc_funcs); wrapper->on_progress_callback = Qnil; if (opts != Qnil && rb_funcall(opts, intern_has_key, 1, sym_terminator) == Qtrue) { wrapper->terminator = rb_hash_aref(opts, sym_terminator); @@ -900,6 +1321,7 @@ cParseError = rb_define_class_under(mYajl, "ParseError", rb_eStandardError); cEncodeError = rb_define_class_under(mYajl, "EncodeError", rb_eStandardError); + cStandardError = rb_const_get(rb_cObject, rb_intern("StandardError")); cParser = rb_define_class_under(mYajl, "Parser", rb_cObject); rb_define_singleton_method(cParser, "new", rb_yajl_parser_new, -1); @@ -909,6 +1331,9 @@ rb_define_method(cParser, "<<", rb_yajl_parser_parse_chunk, 1); rb_define_method(cParser, "on_parse_complete=", rb_yajl_parser_set_complete_cb, 1); + cProjector = rb_define_class_under(mYajl, "Projector", rb_cObject); + rb_define_method(cProjector, "project", rb_yajl_projector_project, 1); + cEncoder = rb_define_class_under(mYajl, "Encoder", rb_cObject); rb_define_singleton_method(cEncoder, "new", rb_yajl_encoder_new, -1); rb_define_method(cEncoder, "initialize", rb_yajl_encoder_init, -1); diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/ext/yajl/yajl_ext.h new/ext/yajl/yajl_ext.h --- old/ext/yajl/yajl_ext.h 2017-11-07 07:19:50.000000000 +0100 +++ new/ext/yajl/yajl_ext.h 2018-04-27 20:16:03.000000000 +0200 @@ -53,7 +53,7 @@ #define RARRAY_LEN(s) (RARRAY(s)->len) #endif -static VALUE cParseError, cEncodeError, mYajl, cParser, cEncoder; +static VALUE cStandardError, cParseError, cEncodeError, mYajl, cParser, cProjector, cEncoder; static ID intern_io_read, intern_call, intern_keys, intern_to_s, intern_to_json, intern_has_key, intern_to_sym, intern_as_json; static ID sym_allow_comments, sym_check_utf8, sym_pretty, sym_indent, sym_terminator, sym_symbolize_keys, sym_symbolize_names, sym_html_safe; diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/ext/yajl/yajl_lex.c new/ext/yajl/yajl_lex.c --- old/ext/yajl/yajl_lex.c 2017-11-07 07:19:50.000000000 +0100 +++ new/ext/yajl/yajl_lex.c 2018-04-27 20:16:03.000000000 +0200 @@ -38,29 +38,25 @@ #include <assert.h> #include <string.h> -#ifdef YAJL_LEXER_DEBUG -static const char * -tokToStr(yajl_tok tok) -{ +const char *yajl_tok_name(yajl_tok tok) { switch (tok) { case yajl_tok_bool: return "bool"; case yajl_tok_colon: return "colon"; case yajl_tok_comma: return "comma"; case yajl_tok_eof: return "eof"; case yajl_tok_error: return "error"; - case yajl_tok_left_brace: return "brace"; - case yajl_tok_left_bracket: return "bracket"; + case yajl_tok_left_brace: return "open_array"; + case yajl_tok_left_bracket: return "open_object"; case yajl_tok_null: return "null"; case yajl_tok_integer: return "integer"; case yajl_tok_double: return "double"; - case yajl_tok_right_brace: return "brace"; - case yajl_tok_right_bracket: return "bracket"; + case yajl_tok_right_brace: return "close_array"; + case yajl_tok_right_bracket: return "close_object"; case yajl_tok_string: return "string"; case yajl_tok_string_with_escapes: return "string_with_escapes"; } return "unknown"; } -#endif /* Impact of the stream parsing feature on the lexer: * @@ -740,6 +736,10 @@ tok = yajl_lex_lex(lexer, jsonText, jsonTextLen, &offset, &outBuf, &outLen); + if (tok == yajl_tok_eof) { + return tok; + } + lexer->bufOff = bufOff; lexer->bufInUse = bufInUse; yajl_buf_truncate(lexer->buf, bufLen); diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/ext/yajl/yajl_lex.h new/ext/yajl/yajl_lex.h --- old/ext/yajl/yajl_lex.h 2017-11-07 07:19:50.000000000 +0100 +++ new/ext/yajl/yajl_lex.h 2018-04-27 20:16:03.000000000 +0200 @@ -36,33 +36,34 @@ #include "api/yajl_common.h" typedef enum { - yajl_tok_bool, - yajl_tok_colon, - yajl_tok_comma, - yajl_tok_eof, - yajl_tok_error, - yajl_tok_left_brace, - yajl_tok_left_bracket, - yajl_tok_null, - yajl_tok_right_brace, - yajl_tok_right_bracket, + yajl_tok_bool, // 0 + yajl_tok_colon, // 1 + yajl_tok_comma, // 2 + yajl_tok_eof, // 3 + yajl_tok_error, // 4 + yajl_tok_left_brace, // 5 + yajl_tok_left_bracket, // 6 + yajl_tok_null, // 7 + yajl_tok_right_brace, // 8 + yajl_tok_right_bracket, // 9 /* we differentiate between integers and doubles to allow the * parser to interpret the number without re-scanning */ - yajl_tok_integer, - yajl_tok_double, + yajl_tok_integer, // 10 + yajl_tok_double, // 11 /* we differentiate between strings which require further processing, * and strings that do not */ - yajl_tok_string, - yajl_tok_string_with_escapes, + yajl_tok_string, // 12 + yajl_tok_string_with_escapes, // 13 /* comment tokens are not currently returned to the parser, ever */ - yajl_tok_comment + yajl_tok_comment // 14 } yajl_tok; typedef struct yajl_lexer_t * yajl_lexer; +const char *yajl_tok_name(yajl_tok tok); YAJL_API yajl_lexer yajl_lex_alloc(yajl_alloc_funcs * alloc, diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/lib/yajl/version.rb new/lib/yajl/version.rb --- old/lib/yajl/version.rb 2017-11-07 07:19:50.000000000 +0100 +++ new/lib/yajl/version.rb 2018-04-27 20:16:03.000000000 +0200 @@ -1,3 +1,3 @@ module Yajl - VERSION = '1.3.1' + VERSION = '1.4.0' end diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/lib/yajl.rb new/lib/yajl.rb --- old/lib/yajl.rb 2017-11-07 07:19:50.000000000 +0100 +++ new/lib/yajl.rb 2018-04-27 20:16:03.000000000 +0200 @@ -23,6 +23,13 @@ Encoder.encode(obj, args, &block) end + class Projector + def initialize(stream, read_bufsize=4096) + @stream = stream + @buffer_size = read_bufsize + end + end + class Parser # A helper method for parse-and-forget use-cases # diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/metadata new/metadata --- old/metadata 2017-11-07 07:19:50.000000000 +0100 +++ new/metadata 2018-04-27 20:16:03.000000000 +0200 @@ -1,7 +1,7 @@ --- !ruby/object:Gem::Specification name: yajl-ruby version: !ruby/object:Gem::Version - version: 1.3.1 + version: 1.4.0 platform: ruby authors: - Brian Lopez @@ -9,7 +9,7 @@ autorequire: bindir: bin cert_chain: [] -date: 2017-11-07 00:00:00.000000000 Z +date: 2018-04-27 00:00:00.000000000 Z dependencies: - !ruby/object:Gem::Dependency name: rake-compiler @@ -67,6 +67,20 @@ - - ">=" - !ruby/object:Gem::Version version: '0' +- !ruby/object:Gem::Dependency + name: benchmark-memory + requirement: !ruby/object:Gem::Requirement + requirements: + - - "~>" + - !ruby/object:Gem::Version + version: '0.1' + type: :development + prerelease: false + version_requirements: !ruby/object:Gem::Requirement + requirements: + - - "~>" + - !ruby/object:Gem::Version + version: '0.1' description: email: [email protected] executables: [] @@ -221,6 +235,8 @@ - spec/parsing/fixtures_spec.rb - spec/parsing/large_number_spec.rb - spec/parsing/one_off_spec.rb +- spec/projection/project_file.rb +- spec/projection/projection.rb - spec/rcov.opts - spec/spec_helper.rb - tasks/compile.rake @@ -337,5 +353,7 @@ - spec/parsing/fixtures_spec.rb - spec/parsing/large_number_spec.rb - spec/parsing/one_off_spec.rb +- spec/projection/project_file.rb +- spec/projection/projection.rb - spec/rcov.opts - spec/spec_helper.rb diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/spec/projection/project_file.rb new/spec/projection/project_file.rb --- old/spec/projection/project_file.rb 1970-01-01 01:00:00.000000000 +0100 +++ new/spec/projection/project_file.rb 2018-04-27 20:16:03.000000000 +0200 @@ -0,0 +1,41 @@ +require File.expand_path(File.dirname(__FILE__) + '/../spec_helper.rb') + +require 'benchmark' +require 'benchmark/memory' + +describe "file projection" do + it "projects file streams" do + schema = { + "forced" => nil, + "created" => nil, + "pusher" => { + "name" => nil, + }, + "repository" => { + "name" => nil, + "full_name" => nil, + }, + "ref" => nil, + "compare" => nil, + "commits" => { + "distinct" => nil, + "message" => nil, + "url" => nil, + "id" => nil, + "author" => { + "username" => nil, + } + } + } + + file_path = ENV['JSON_FILE'] + if file_path.nil? || file_path.empty? + return + end + + Benchmark.memory { |x| + x.report("project (yajl)") { Yajl::Projector.new(File.open(file_path, 'r')).project(schema) } + x.compare! + } + end +end diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/spec/projection/projection.rb new/spec/projection/projection.rb --- old/spec/projection/projection.rb 1970-01-01 01:00:00.000000000 +0100 +++ new/spec/projection/projection.rb 2018-04-27 20:16:03.000000000 +0200 @@ -0,0 +1,498 @@ +require File.expand_path(File.dirname(__FILE__) + '/../spec_helper.rb') + +require 'stringio' +require 'json' + +describe "projection" do + it "should work" do + stream = StringIO.new('{"name": "keith", "age": 27}') + projector = Yajl::Projector.new(stream) + projection = projector.project({"name" => nil}) + expect(projection['name']).to eql("keith") + end + + it "should filter" do + stream = StringIO.new('{"name": "keith", "age": 27}') + projector = Yajl::Projector.new(stream) + projection = projector.project({"name" => nil}) + expect(projection['age']).to eql(nil) + end + + it "should raise an exception and not leak memory" do + stream = StringIO.new('foo') + projector = Yajl::Projector.new(stream) + expect { + projector.project({"name" => nil}) + }.to raise_error(Yajl::ParseError) + end + + it "should raise an exception and not segv" do + stream = StringIO.new('[,,,,]') + projector = Yajl::Projector.new(stream) + expect { + projector.project({"name" => nil}) + }.to raise_error(Yajl::ParseError) + end + + it "should raise an exception and not segv on colons" do + stream = StringIO.new('[::::]') + projector = Yajl::Projector.new(stream) + expect { + projector.project({"name" => nil}) + }.to raise_error(Yajl::ParseError) + end + + it "should behave the same way as the regular parser on bad tokens like comma" do + bad_json = '{"name": "keith", "age":, 27}' + stream = StringIO.new(bad_json) + projector = Yajl::Projector.new(stream) + expect { + projector.project({"name" => nil}) + }.to raise_error(capture_exception_for(bad_json).class) + end + + it "should behave the same way as the regular parser on bad tokens like colon" do + bad_json = '{"name": "keith", "age":: 27}' + stream = StringIO.new(bad_json) + projector = Yajl::Projector.new(stream) + expect { + projector.project({"name" => nil}) + }.to raise_error(capture_exception_for(bad_json).class) + end + + it "should behave the same way as the regular parser on not enough json" do + bad_json = '{"name": "keith", "age":' + stream = StringIO.new(bad_json) + projector = Yajl::Projector.new(stream) + expect { + projector.project({"name" => nil}) + }.to raise_error(capture_exception_for(bad_json).class) + end + + def capture_exception_for(bad_json) + Yajl::Parser.new.parse(bad_json) + rescue Exception => e + e + end + + def project(schema, over: "", json: nil, stream: nil) + if stream.nil? + if json.nil? + json = over.to_json + end + + stream = StringIO.new(json) + end + + Yajl::Projector.new(stream).project(schema) + end + + it "filters arrays" do + json = { + "users" => [ + { + "name" => "keith", + "company" => "internet plumbing inc", + "department" => "janitorial", + }, + { + "name" => "justin", + "company" => "big blue", + "department" => "programming?", + }, + { + "name" => "alan", + "company" => "different colour of blue", + "department" => "drop bear containment", + } + ] + }.to_json + + puts json + + schema = { + # /users is an array of objects, each having many keys we only want name + "users" => { + "name" => nil, + } + } + + expect(project(schema, json: json)).to eql({ + "users" => [ + { "name" => "keith" }, + { "name" => "justin" }, + { "name" => "alan" } + ] + }) + end + + it "filters top level arrays" do + json = [ + { + "name" => "keith", + "personal detail" => "thing", + }, + { + "name" => "cory", + "phone number" => "unknown", + } + ] + + schema = { + "name" => nil, + } + + expect(project(schema, over: json)).to eql([ + { "name" => "keith" }, + { "name" => "cory" }, + ]) + end + + it "filters nested schemas" do + json = { + "foo" => 42, + + "bar" => { + "name" => "keith", + "occupation" => "professional computering", + "age" => 26, + "hobbies" => [ + "not computering", + ] + }, + + "qux" => { + "quux" => [ + { + "name" => "Reactive X", + "members" => "many", + }, + { + "name" => "lstoll", + "members" => "such", + }, + { + "name" => "github", + "members" => "very", + }, + { + "name" => "theleague", + "members" => "numerous", + } + ], + + "corge" => { + "name" => "Brighton", + "address" =>"Buckingham Road", + }, + }, + + "grault" => nil, + + "waldo" => true, + } + + schema = { + # include the /foo subtree (is a single number) + "foo" => nil, + + # ignore the bar subtree (is an object) + # "bar" => ??? + + # include some of the /qux subtree (is an object) + "qux" => { + # include the whole /qux/quux subtree (is an array of objects) + "quux" => nil, + + # include some of the /qux/corge subtree (is another object) + "corge" => { + # include name (is a string) + "name" => nil, + # include age (is missing from source doc) + "age" => nil, + # ignore address + # "address" => ??? + }, + }, + + # include the /grault subtree (is a null literal) + "grault" => nil, + + # include the /waldo subtree (is a boolean literal) + "waldo" => nil, + } + + expect(project(schema, over: json)).to eql({ + "foo" => 42, + + "qux" => { + "quux" => [ + { + "name" => "Reactive X", + "members" => "many", + }, + { + "name" => "lstoll", + "members" => "such", + }, + { + "name" => "github", + "members" => "very", + }, + { + "name" => "theleague", + "members" => "numerous", + } + ], + + "corge" => { + "name" => "Brighton", + }, + }, + + "grault" => nil, + + "waldo" => true, + }) + end + + it "supports incompatible schemas" do + json = { + # surprise! the json doesn't include an object under the foo key + "foo" => 42, + } + + schema = { + # include some of the /foo subtree + "foo" => { + # include the whole /foo/baz subtree + "baz" => nil, + } + } + + # expect the 42 to be pulled out + expect(project(schema, over: json)).to eql({ + "foo" => 42 + }) + end + + it "supports nil schema" do + json = { + "foo" => "bar", + } + + expect(project(nil, over: json)).to eql({ + "foo" => "bar" + }) + end + + it "supports empty schema" do + json = { + "foo" => "bar", + } + expect(project({}, over: json)).to eql({}) + end + + it "supports object projection" do + json = { + "foo" => "bar", + "qux" => "quux", + } + + schema = { + "foo" => nil, + } + + expect(project(schema, over: json)).to eql({ + "foo" => "bar" + }) + end + + it "projects the readme example" do + json = <<-EOJ + [ + { + "user": { + "name": "keith", + "age": 26, + "jobs": [ + { + "title": "director of overworking", + "company": "south coast software", + "department": "most" + }, + { + "title": "some kind of computering", + "company": "github the website dot com", + "department": true + } + ] + }, + "another key": { + + }, + "woah this document is huge": { + + }, + "many megabytes": { + + }, + "etc": { + + } + } + ] +EOJ + + schema = { + "user" => { + "name" => nil, + "jobs" => { + "title" => nil, + }, + }, + } + + expect(project(schema, json: json)).to eql([{ + "user" => { + "name" => "keith", + "jobs" => [ + { "title" => "director of overworking" }, + { "title" => "some kind of computering" }, + ] + } + }]) + end + + it "errors with invalid json" do + expect { + project({"b" => nil}, json: '{"a":, "b": 2}') + }.to raise_error(StandardError) + end + + it "errors with ignored unbalanced object syntax" do + expect { + project({"b" => nil}, json: '{"a": {{, "b": 2}') + }.to raise_error(StandardError) + end + + it "errors with accepted unbalanced object tokens" do + expect { + project({"a" => nil}, json: '{"a": {"b": 2}') + }.to raise_error(Yajl::ParseError) + end + + it "errors when projecting if an object comma is missing" do + expect { + project({"a" => nil}, json: '{"a": 1 "b": 2}') + }.to raise_error(Yajl::ParseError) + end + + it "errors when building if an object comma is missing" do + expect { + project(nil, json: '{"a": {"b": 2 "c": 3}}') + }.to raise_error(Yajl::ParseError) + end + + it "errors when eof instead of simple value" do + expect { + project(nil, json: '[') + }.to raise_error(Yajl::ParseError) + end + + it "errors when arrays don't have a comma between elements" do + expect { + project(nil, json: '[1 2]') + }.to raise_error(Yajl::ParseError) + end + + it "supports parsing empty array" do + expect(project(nil, json: '[]')).to eql([]) + end + + it "supports parsing empty object" do + expect(project(nil, json: '{}')).to eql({}) + end + + it "reads a full buffer" do + json = "[" + "1,"*2046 + "1 ]" + expect(json.size).to eql(4096) + expect(project(nil, json: json)).to eql(Array.new(2047, 1)) + end + + it "reads into a second buffer" do + json = "[" + "1,"*2047 + "1 ]" + expect(json.size).to eql(4098) + expect(JSON.parse(json)).to eql(Array.new(2048, 1)) + expect(project(nil, json: json)).to eql(Array.new(2048, 1)) + end + + it "supports parsing big strings" do + json = [ + "a", + "b"*10_000, + "c", + ] + expect(project(nil, over: json)).to eql(json) + end + + it "supports bigger read buffers" do + json = { + "a"*10_000 => "b"*10_000 + }.to_json + stream = StringIO.new(json) + expect(Yajl::Projector.new(stream, 8192).project(nil)).to have_key("a"*10_000) + end + + it "errors if starting with closing object" do + expect { + project(nil, json: '}') + }.to raise_error(Yajl::ParseError) + end + + it "handles objects with utf16 escape sequences as keys" do + projection = project(nil, json: '{"\ud83d\ude00": "grinning face"}') + literal = {"😀" => "grinning face"} + expect(projection).to eql(literal) + end + + it "handles objects with non-ascii utf8 bytes as keys" do + expect(project(nil, json: '{"😀": "grinning face"}')).to eql({"😀" => "grinning face"}) + end + + it "handles strings with utf16 escape sequences as object values" do + expect(project(nil, json: '{"grinning face": "\ud83d\ude00"}')).to eql({"grinning face" => "😀"}) + end + + it "handles strings with utf16 escape sequences as array values" do + projection = project(nil, json: '["\ud83d\ude00"]') + puts projection.first.inspect + puts projection.first.bytes + + literal = ["😀"] + puts literal.first.inspect + puts literal.first.bytes + + expect(projection).to eql(literal) + end + + it "handles strings with non-ascii utf8 bytes as array values" do + projection = project(nil, json: '["😀"]') + puts projection.first.inspect + puts projection.first.bytes + + literal = ["😀"] + puts literal.first.inspect + puts literal.first.bytes + + expect(projection).to eql(literal) + end + + it "ignores strings with utf16 escape sequences" do + expect(project({"grinning face with open mouth" => nil}, json: '{"grinning face": "\ud83d\ude00", "grinning face with open mouth": "\ud83d\ude03"}')).to eql({"grinning face with open mouth" => "😃"}) + end + + it "handles objects whose second key has escape sequences" do + expect(project(nil, json: '{"foo": "bar", "\ud83d\ude00": "grinning face"}')).to eql({"foo" => "bar", "😀" => "grinning face"}) + end +end diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/yajl-ruby.gemspec new/yajl-ruby.gemspec --- old/yajl-ruby.gemspec 2017-11-07 07:19:50.000000000 +0100 +++ new/yajl-ruby.gemspec 2018-04-27 20:16:03.000000000 +0200 @@ -22,5 +22,6 @@ # benchmarks s.add_development_dependency 'activesupport', '~> 3.1.2' s.add_development_dependency 'json' + s.add_development_dependency "benchmark-memory", "~> 0.1" end
