This is an automated email from the ASF dual-hosted git repository.
tjwp pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/avro.git
The following commit(s) were added to refs/heads/master by this push:
new 389d351 AVRO-3036: add schema matching for decimal logical type in
Ruby (#1082)
389d351 is described below
commit 389d35131af037bbf3bd5da739528a13191ade43
Author: Tim Perkins <[email protected]>
AuthorDate: Sat May 8 07:46:13 2021 -0400
AVRO-3036: add schema matching for decimal logical type in Ruby (#1082)
Includes schema support for fixed decimal logical types, but not encoding
and decoding.
---
lang/ruby/lib/avro/schema.rb | 48 +++++++++++--
lang/ruby/lib/avro/schema_compatibility.rb | 19 +++--
lang/ruby/test/test_schema.rb | 70 +++++++++++++++++++
lang/ruby/test/test_schema_compatibility.rb | 105 ++++++++++++++++++++++++++++
4 files changed, 227 insertions(+), 15 deletions(-)
diff --git a/lang/ruby/lib/avro/schema.rb b/lang/ruby/lib/avro/schema.rb
index 8105b0a..5febf12 100644
--- a/lang/ruby/lib/avro/schema.rb
+++ b/lang/ruby/lib/avro/schema.rb
@@ -39,6 +39,8 @@ module Avro
DEFAULT_VALIDATE_OPTIONS = { recursive: true, encoded: false }.freeze
+ DECIMAL_LOGICAL_TYPE = 'decimal'.freeze
+
def self.parse(json_string)
real_parse(MultiJson.load(json_string), {})
end
@@ -76,7 +78,9 @@ module Avro
case type_sym
when :fixed
size = json_obj['size']
- return FixedSchema.new(name, namespace, size, names, logical_type,
aliases)
+ precision = json_obj['precision']
+ scale = json_obj['scale']
+ return FixedSchema.new(name, namespace, size, names, logical_type,
aliases, precision, scale)
when :enum
symbols = json_obj['symbols']
doc = json_obj['doc']
@@ -284,6 +288,10 @@ module Avro
def match_fullname?(name)
name == fullname || fullname_aliases.include?(name)
end
+
+ def match_schema?(schema)
+ type_sym == schema.type_sym && match_fullname?(schema.fullname)
+ end
end
class RecordSchema < NamedSchema
@@ -468,6 +476,11 @@ module Avro
hsh = super
hsh.size == 1 ? type : hsh
end
+
+ def match_schema?(schema)
+ return type_sym == schema.type_sym
+ # TODO: eventually this could handle schema promotion for primitive
schemas too
+ end
end
class BytesSchema < PrimitiveSchema
@@ -486,22 +499,49 @@ module Avro
avro['scale'] = scale if scale
avro
end
+
+ def match_schema?(schema)
+ return true if super
+
+ if logical_type == DECIMAL_LOGICAL_TYPE && schema.logical_type ==
DECIMAL_LOGICAL_TYPE
+ return precision == schema.precision && (scale || 0) ==
(schema.scale || 0)
+ end
+
+ false
+ end
end
class FixedSchema < NamedSchema
- attr_reader :size
- def initialize(name, space, size, names=nil, logical_type=nil,
aliases=nil)
+ attr_reader :size, :precision, :scale
+ def initialize(name, space, size, names=nil, logical_type=nil,
aliases=nil, precision=nil, scale=nil)
# Ensure valid cto args
unless size.is_a?(Integer)
raise AvroError, 'Fixed Schema requires a valid integer for size
property.'
end
super(:fixed, name, space, names, nil, logical_type, aliases)
@size = size
+ @precision = precision
+ @scale = scale
end
def to_avro(names=Set.new)
avro = super
- avro.is_a?(Hash) ? avro.merge('size' => size) : avro
+ return avro if avro.is_a?(String)
+
+ avro['size'] = size
+ avro['precision'] = precision if precision
+ avro['scale'] = scale if scale
+ avro
+ end
+
+ def match_schema?(schema)
+ return true if super && size == schema.size
+
+ if logical_type == DECIMAL_LOGICAL_TYPE && schema.logical_type ==
DECIMAL_LOGICAL_TYPE
+ return precision == schema.precision && (scale || 0) ==
(schema.scale || 0)
+ end
+
+ false
end
end
diff --git a/lang/ruby/lib/avro/schema_compatibility.rb
b/lang/ruby/lib/avro/schema_compatibility.rb
index 9f21067..910e1a5 100644
--- a/lang/ruby/lib/avro/schema_compatibility.rb
+++ b/lang/ruby/lib/avro/schema_compatibility.rb
@@ -47,24 +47,17 @@ module Avro
end
if w_type == r_type
- return true if Schema::PRIMITIVE_TYPES_SYM.include?(r_type)
+ return readers_schema.match_schema?(writers_schema) if
Schema::PRIMITIVE_TYPES_SYM.include?(r_type)
case r_type
- when :record
- return readers_schema.match_fullname?(writers_schema.fullname)
- when :error
- return readers_schema.match_fullname?(writers_schema.fullname)
when :request
return true
- when :fixed
- return readers_schema.match_fullname?(writers_schema.fullname) &&
- writers_schema.size == readers_schema.size
- when :enum
- return readers_schema.match_fullname?(writers_schema.fullname)
when :map
return match_schemas(writers_schema.values, readers_schema.values)
when :array
return match_schemas(writers_schema.items, readers_schema.items)
+ else
+ return readers_schema.match_schema?(writers_schema)
end
end
@@ -81,7 +74,11 @@ module Avro
return true
end
- return false
+ if readers_schema.respond_to?(:match_schema?)
+ readers_schema.match_schema?(writers_schema)
+ else
+ false
+ end
end
class Checker
diff --git a/lang/ruby/test/test_schema.rb b/lang/ruby/test/test_schema.rb
index ff1cd3c..3b66f28 100644
--- a/lang/ruby/test/test_schema.rb
+++ b/lang/ruby/test/test_schema.rb
@@ -542,6 +542,76 @@ class TestSchema < Test::Unit::TestCase
exception.to_s)
end
+ def test_fixed_decimal_to_include_precision_scale
+ schema = Avro::Schema.parse <<-SCHEMA
+ {
+ "type": "fixed",
+ "name": "aFixed",
+ "logicalType": "decimal",
+ "size": 4,
+ "precision": 9,
+ "scale": 2
+ }
+ SCHEMA
+
+ schema_hash =
+ {
+ 'type' => 'fixed',
+ 'name' => 'aFixed',
+ 'logicalType' => 'decimal',
+ 'size' => 4,
+ 'precision' => 9,
+ 'scale' => 2
+ }
+
+ assert_equal schema_hash, schema.to_avro
+ end
+
+ def test_fixed_decimal_to_include_precision_no_scale
+ schema = Avro::Schema.parse <<-SCHEMA
+ {
+ "type": "fixed",
+ "name": "aFixed",
+ "logicalType": "decimal",
+ "size": 4,
+ "precision": 9
+ }
+ SCHEMA
+
+ schema_hash =
+ {
+ 'type' => 'fixed',
+ 'name' => 'aFixed',
+ 'logicalType' => 'decimal',
+ 'size' => 4,
+ 'precision' => 9
+ }
+
+ assert_equal schema_hash, schema.to_avro
+ end
+
+ # Note: this is not valid but validation is not yet implemented
+ def test_fixed_decimal_to_without_precision_scale
+ schema = Avro::Schema.parse <<-SCHEMA
+ {
+ "type": "fixed",
+ "size": 4,
+ "name": "aFixed",
+ "logicalType": "decimal"
+ }
+ SCHEMA
+
+ schema_hash =
+ {
+ 'type' => 'fixed',
+ 'name' => 'aFixed',
+ 'logicalType' => 'decimal',
+ 'size' => 4
+ }
+
+ assert_equal schema_hash, schema.to_avro
+ end
+
def test_bytes_decimal_to_include_precision_scale
schema = Avro::Schema.parse <<-SCHEMA
{
diff --git a/lang/ruby/test/test_schema_compatibility.rb
b/lang/ruby/test/test_schema_compatibility.rb
index 62a5bd9..3773109 100644
--- a/lang/ruby/test/test_schema_compatibility.rb
+++ b/lang/ruby/test/test_schema_compatibility.rb
@@ -293,6 +293,111 @@ class TestSchemaCompatibility < Test::Unit::TestCase
assert_false(can_read?(writer_schema, reader_schema))
end
+ def test_bytes_decimal
+ bytes_decimal_schema = Avro::Schema.
+ parse('{"type":"bytes", "logicalType":"decimal", "precision":4,
"scale":4}')
+ bytes2_decimal_schema = Avro::Schema.
+ parse('{"type":"bytes", "logicalType":"decimal", "precision":4,
"scale":4}')
+ bytes_decimal_different_precision_schema = Avro::Schema.
+ parse('{"type":"bytes", "logicalType":"decimal", "precision":5,
"scale":4}')
+ bytes_decimal_no_scale_schema = Avro::Schema.
+ parse('{"type":"bytes", "logicalType":"decimal", "precision":4}')
+ bytes2_decimal_no_scale_schema = Avro::Schema.
+ parse('{"type":"bytes", "logicalType":"decimal", "precision":4}')
+ bytes_decimal_zero_scale_schema = Avro::Schema.
+ parse('{"type":"bytes", "logicalType":"decimal", "precision":4,
"scale":0}')
+ bytes_unknown_logical_type_schema = Avro::Schema.
+ parse('{"type":"bytes", "logicalType":"unknown"}')
+
+ # decimal bytes and non-decimal bytes can be mixed
+ assert_true(can_read?(bytes_schema, bytes_decimal_schema))
+ assert_true(can_read?(bytes_decimal_schema, bytes_schema))
+ assert_true(can_read?(bytes_decimal_schema,
bytes_unknown_logical_type_schema))
+
+ # decimal bytes match even if precision and scale differ
+ assert_true(can_read?(bytes_decimal_schema,
bytes_decimal_different_precision_schema))
+ assert_true(can_read?(bytes_decimal_schema, bytes_decimal_no_scale_schema))
+ assert_true(can_read?(bytes_decimal_schema,
bytes_decimal_zero_scale_schema))
+ # - zero and no scale are equivalent
+ assert_true(can_read?(bytes_decimal_zero_scale_schema,
bytes_decimal_no_scale_schema))
+ # - different schemas with the same attributes match
+ assert_true(can_read?(bytes_decimal_schema, bytes2_decimal_schema))
+ # - different schemas with the same no scale match
+ assert_true(can_read?(bytes2_decimal_no_scale_schema,
bytes_decimal_no_scale_schema))
+ end
+
+ def test_fixed_decimal
+ fixed_decimal_schema = Avro::Schema.
+ parse('{"type":"fixed", "size":2, "name":"Fixed1",
"logicalType":"decimal", "precision":4, "scale":2}')
+ fixed2_decimal_schema = Avro::Schema.
+ parse('{"type":"fixed", "size":2, "name":"Fixed2",
"logicalType":"decimal", "precision":4, "scale":2}')
+ fixed_decimal_different_precision_schema = Avro::Schema.
+ parse('{"type":"fixed", "size":2, "name":"Fixed1",
"logicalType":"decimal", "precision":3, "scale":2}')
+ fixed_decimal_size3_schema = Avro::Schema.
+ parse('{"type":"fixed", "size":3, "name":"FixedS3",
"logicalType":"decimal", "precision":4, "scale":2}')
+ fixed_unknown_schema = Avro::Schema.
+ parse('{"type":"fixed", "size":2, "name":"Fixed1",
"logicalType":"unknown"}')
+ fixed_decimal_zero_scale_schema = Avro::Schema.
+ parse('{"type":"fixed", "size":2, "name":"Fixed1",
"logicalType":"decimal", "precision":4, "scale":0}')
+ fixed_decimal_no_scale_schema = Avro::Schema.
+ parse('{"type":"fixed", "size":2, "name":"Fixed1",
"logicalType":"decimal", "precision":4}')
+
+ # decimal fixed and non-decimal can be mixed if fixed name matches
+ assert_true(can_read?(fixed_decimal_schema, fixed1_schema))
+ assert_true(can_read?(fixed1_schema, fixed_decimal_schema))
+ assert_false(can_read?(fixed2_schema, fixed_decimal_schema))
+
+ # decimal logical types match even if fixed name differs
+ assert_true(can_read?(fixed_decimal_schema, fixed2_decimal_schema))
+
+ # fixed with the same name & size match even if decimal precision and
scale differ
+ assert_true(can_read?(fixed_decimal_schema,
fixed_decimal_different_precision_schema))
+ assert_true(can_read?(fixed_decimal_schema, fixed_decimal_size3_schema))
+ assert_true(can_read?(fixed_decimal_schema, fixed_unknown_schema))
+ # - zero and no scale are equivalent but these match anyway due to same
name & size
+ assert_true(can_read?(fixed_decimal_no_scale_schema,
fixed_decimal_zero_scale_schema))
+ # - scale does not match
+ assert_true(can_read?(fixed_decimal_schema, fixed_decimal_no_scale_schema))
+ assert_true(can_read?(fixed_decimal_schema,
fixed_decimal_zero_scale_schema))
+ end
+
+ def test_decimal_different_types
+ fixed_decimal_schema = Avro::Schema.
+ parse('{"type":"fixed", "size":2, "name":"Fixed1",
"logicalType":"decimal", "precision":4, "scale":2}')
+ fixed_decimal_scale4_schema = Avro::Schema.
+ parse('{"type":"fixed", "size":2, "name":"Fixed1",
"logicalType":"decimal", "precision":4, "scale":4}')
+ bytes_decimal_schema = Avro::Schema.
+ parse('{"type":"bytes", "logicalType":"decimal", "precision":4,
"scale":2}')
+ fixed_decimal_zero_scale_schema = Avro::Schema.
+ parse('{"type":"fixed", "size":2, "name":"Fixed1",
"logicalType":"decimal", "precision":4, "scale":0}')
+ fixed_decimal_no_scale_schema = Avro::Schema.
+ parse('{"type":"fixed", "size":2, "name":"Fixed1",
"logicalType":"decimal", "precision":4}')
+ bytes_decimal_zero_scale_schema = Avro::Schema.
+ parse('{"type":"bytes", "logicalType":"decimal", "precision":4,
"scale":0}')
+ bytes_decimal_no_scale_schema = Avro::Schema.
+ parse('{"type":"bytes", "logicalType":"decimal", "precision":4}')
+
+ # decimal logical types can be read
+ assert_true(can_read?(fixed_decimal_schema, bytes_decimal_schema))
+ assert_true(can_read?(bytes_decimal_schema, fixed_decimal_schema))
+
+ # non-decimal bytes and fixed cannot be mixed
+ assert_false(can_read?(fixed_decimal_schema, bytes_schema))
+ assert_false(can_read?(bytes_schema, fixed_decimal_schema))
+ assert_false(can_read?(fixed1_schema, bytes_decimal_schema))
+ assert_false(can_read?(bytes_decimal_schema, fixed1_schema))
+
+ # decimal precision and scale must match
+ assert_false(can_read?(fixed_decimal_scale4_schema, bytes_decimal_schema))
+ assert_false(can_read?(bytes_decimal_schema, fixed_decimal_scale4_schema))
+
+ # zero scale and no scale are equivalent
+ assert_true(can_read?(bytes_decimal_no_scale_schema,
fixed_decimal_zero_scale_schema))
+ assert_true(can_read?(fixed_decimal_zero_scale_schema,
bytes_decimal_no_scale_schema))
+ assert_true(can_read?(bytes_decimal_zero_scale_schema,
fixed_decimal_no_scale_schema))
+ assert_true(can_read?(fixed_decimal_no_scale_schema,
bytes_decimal_zero_scale_schema))
+ end
+
# Tests from
lang/java/avro/src/test/java/org/apache/avro/io/parsing/TestResolvingGrammarGenerator2.java
def point_2d_schema