This is an automated email from the ASF dual-hosted git repository.

tjwp pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/avro.git


The following commit(s) were added to refs/heads/master by this push:
     new 389d351  AVRO-3036: add schema matching for decimal logical type in 
Ruby (#1082)
389d351 is described below

commit 389d35131af037bbf3bd5da739528a13191ade43
Author: Tim Perkins <[email protected]>
AuthorDate: Sat May 8 07:46:13 2021 -0400

    AVRO-3036: add schema matching for decimal logical type in Ruby (#1082)
    
    Includes schema support for fixed decimal logical types, but not encoding 
and decoding.
---
 lang/ruby/lib/avro/schema.rb                |  48 +++++++++++--
 lang/ruby/lib/avro/schema_compatibility.rb  |  19 +++--
 lang/ruby/test/test_schema.rb               |  70 +++++++++++++++++++
 lang/ruby/test/test_schema_compatibility.rb | 105 ++++++++++++++++++++++++++++
 4 files changed, 227 insertions(+), 15 deletions(-)

diff --git a/lang/ruby/lib/avro/schema.rb b/lang/ruby/lib/avro/schema.rb
index 8105b0a..5febf12 100644
--- a/lang/ruby/lib/avro/schema.rb
+++ b/lang/ruby/lib/avro/schema.rb
@@ -39,6 +39,8 @@ module Avro
 
     DEFAULT_VALIDATE_OPTIONS = { recursive: true, encoded: false }.freeze
 
+    DECIMAL_LOGICAL_TYPE = 'decimal'.freeze
+
     def self.parse(json_string)
       real_parse(MultiJson.load(json_string), {})
     end
@@ -76,7 +78,9 @@ module Avro
           case type_sym
           when :fixed
             size = json_obj['size']
-            return FixedSchema.new(name, namespace, size, names, logical_type, 
aliases)
+            precision = json_obj['precision']
+            scale = json_obj['scale']
+            return FixedSchema.new(name, namespace, size, names, logical_type, 
aliases, precision, scale)
           when :enum
             symbols = json_obj['symbols']
             doc     = json_obj['doc']
@@ -284,6 +288,10 @@ module Avro
       def match_fullname?(name)
         name == fullname || fullname_aliases.include?(name)
       end
+
+      def match_schema?(schema)
+        type_sym == schema.type_sym && match_fullname?(schema.fullname)
+      end
     end
 
     class RecordSchema < NamedSchema
@@ -468,6 +476,11 @@ module Avro
         hsh = super
         hsh.size == 1 ? type : hsh
       end
+
+      def match_schema?(schema)
+        return type_sym == schema.type_sym
+        # TODO: eventually this could handle schema promotion for primitive 
schemas too
+      end
     end
 
     class BytesSchema < PrimitiveSchema
@@ -486,22 +499,49 @@ module Avro
         avro['scale'] = scale if scale
         avro
       end
+
+      def match_schema?(schema)
+        return true if super
+
+        if logical_type == DECIMAL_LOGICAL_TYPE && schema.logical_type == 
DECIMAL_LOGICAL_TYPE
+          return precision == schema.precision && (scale || 0) == 
(schema.scale || 0)
+        end
+
+        false
+      end
     end
 
     class FixedSchema < NamedSchema
-      attr_reader :size
-      def initialize(name, space, size, names=nil, logical_type=nil, 
aliases=nil)
+      attr_reader :size, :precision, :scale
+      def initialize(name, space, size, names=nil, logical_type=nil, 
aliases=nil, precision=nil, scale=nil)
         # Ensure valid cto args
         unless size.is_a?(Integer)
           raise AvroError, 'Fixed Schema requires a valid integer for size 
property.'
         end
         super(:fixed, name, space, names, nil, logical_type, aliases)
         @size = size
+        @precision = precision
+        @scale = scale
       end
 
       def to_avro(names=Set.new)
         avro = super
-        avro.is_a?(Hash) ? avro.merge('size' => size) : avro
+        return avro if avro.is_a?(String)
+
+        avro['size'] = size
+        avro['precision'] = precision if precision
+        avro['scale'] = scale if scale
+        avro
+      end
+
+      def match_schema?(schema)
+        return true if super && size == schema.size
+
+        if logical_type == DECIMAL_LOGICAL_TYPE && schema.logical_type == 
DECIMAL_LOGICAL_TYPE
+          return precision == schema.precision && (scale || 0) == 
(schema.scale || 0)
+        end
+
+        false
       end
     end
 
diff --git a/lang/ruby/lib/avro/schema_compatibility.rb 
b/lang/ruby/lib/avro/schema_compatibility.rb
index 9f21067..910e1a5 100644
--- a/lang/ruby/lib/avro/schema_compatibility.rb
+++ b/lang/ruby/lib/avro/schema_compatibility.rb
@@ -47,24 +47,17 @@ module Avro
       end
 
       if w_type == r_type
-        return true if Schema::PRIMITIVE_TYPES_SYM.include?(r_type)
+        return readers_schema.match_schema?(writers_schema) if 
Schema::PRIMITIVE_TYPES_SYM.include?(r_type)
 
         case r_type
-        when :record
-          return readers_schema.match_fullname?(writers_schema.fullname)
-        when :error
-          return readers_schema.match_fullname?(writers_schema.fullname)
         when :request
           return true
-        when :fixed
-          return readers_schema.match_fullname?(writers_schema.fullname) &&
-            writers_schema.size == readers_schema.size
-        when :enum
-          return readers_schema.match_fullname?(writers_schema.fullname)
         when :map
           return match_schemas(writers_schema.values, readers_schema.values)
         when :array
           return match_schemas(writers_schema.items, readers_schema.items)
+        else
+          return readers_schema.match_schema?(writers_schema)
         end
       end
 
@@ -81,7 +74,11 @@ module Avro
         return true
       end
 
-      return false
+      if readers_schema.respond_to?(:match_schema?)
+        readers_schema.match_schema?(writers_schema)
+      else
+        false
+      end
     end
 
     class Checker
diff --git a/lang/ruby/test/test_schema.rb b/lang/ruby/test/test_schema.rb
index ff1cd3c..3b66f28 100644
--- a/lang/ruby/test/test_schema.rb
+++ b/lang/ruby/test/test_schema.rb
@@ -542,6 +542,76 @@ class TestSchema < Test::Unit::TestCase
                  exception.to_s)
   end
 
+  def test_fixed_decimal_to_include_precision_scale
+    schema = Avro::Schema.parse <<-SCHEMA
+      {
+        "type": "fixed",
+        "name": "aFixed",
+        "logicalType": "decimal",
+        "size": 4,
+        "precision": 9,
+        "scale": 2
+      }
+    SCHEMA
+
+    schema_hash =
+      {
+        'type' => 'fixed',
+        'name' => 'aFixed',
+        'logicalType' => 'decimal',
+        'size' => 4,
+        'precision' => 9,
+        'scale' => 2
+      }
+
+    assert_equal schema_hash, schema.to_avro
+  end
+
+  def test_fixed_decimal_to_include_precision_no_scale
+    schema = Avro::Schema.parse <<-SCHEMA
+      {
+        "type": "fixed",
+        "name": "aFixed",
+        "logicalType": "decimal",
+        "size": 4,
+        "precision": 9
+      }
+    SCHEMA
+
+    schema_hash =
+      {
+        'type' => 'fixed',
+        'name' => 'aFixed',
+        'logicalType' => 'decimal',
+        'size' => 4,
+        'precision' => 9
+      }
+
+    assert_equal schema_hash, schema.to_avro
+  end
+
+  # Note: this is not valid but validation is not yet implemented
+  def test_fixed_decimal_to_without_precision_scale
+    schema = Avro::Schema.parse <<-SCHEMA
+      {
+        "type": "fixed",
+        "size": 4,
+        "name": "aFixed",
+        "logicalType": "decimal"
+      }
+    SCHEMA
+
+    schema_hash =
+      {
+        'type' => 'fixed',
+        'name' => 'aFixed',
+        'logicalType' => 'decimal',
+        'size' => 4
+      }
+
+    assert_equal schema_hash, schema.to_avro
+  end
+
   def test_bytes_decimal_to_include_precision_scale
     schema = Avro::Schema.parse <<-SCHEMA
       {
diff --git a/lang/ruby/test/test_schema_compatibility.rb 
b/lang/ruby/test/test_schema_compatibility.rb
index 62a5bd9..3773109 100644
--- a/lang/ruby/test/test_schema_compatibility.rb
+++ b/lang/ruby/test/test_schema_compatibility.rb
@@ -293,6 +293,111 @@ class TestSchemaCompatibility < Test::Unit::TestCase
     assert_false(can_read?(writer_schema, reader_schema))
   end
 
+  def test_bytes_decimal
+    bytes_decimal_schema = Avro::Schema.
+      parse('{"type":"bytes", "logicalType":"decimal", "precision":4, 
"scale":4}')
+    bytes2_decimal_schema = Avro::Schema.
+      parse('{"type":"bytes", "logicalType":"decimal", "precision":4, 
"scale":4}')
+    bytes_decimal_different_precision_schema = Avro::Schema.
+      parse('{"type":"bytes", "logicalType":"decimal", "precision":5, 
"scale":4}')
+    bytes_decimal_no_scale_schema = Avro::Schema.
+      parse('{"type":"bytes", "logicalType":"decimal", "precision":4}')
+    bytes2_decimal_no_scale_schema = Avro::Schema.
+      parse('{"type":"bytes", "logicalType":"decimal", "precision":4}')
+    bytes_decimal_zero_scale_schema = Avro::Schema.
+      parse('{"type":"bytes", "logicalType":"decimal", "precision":4, 
"scale":0}')
+    bytes_unknown_logical_type_schema = Avro::Schema.
+      parse('{"type":"bytes", "logicalType":"unknown"}')
+
+    # decimal bytes and non-decimal bytes can be mixed
+    assert_true(can_read?(bytes_schema, bytes_decimal_schema))
+    assert_true(can_read?(bytes_decimal_schema, bytes_schema))
+    assert_true(can_read?(bytes_decimal_schema, 
bytes_unknown_logical_type_schema))
+
+    # decimal bytes match even if precision and scale differ
+    assert_true(can_read?(bytes_decimal_schema, 
bytes_decimal_different_precision_schema))
+    assert_true(can_read?(bytes_decimal_schema, bytes_decimal_no_scale_schema))
+    assert_true(can_read?(bytes_decimal_schema, 
bytes_decimal_zero_scale_schema))
+    # - zero and no scale are equivalent
+    assert_true(can_read?(bytes_decimal_zero_scale_schema, 
bytes_decimal_no_scale_schema))
+    # - different schemas with the same attributes match
+    assert_true(can_read?(bytes_decimal_schema, bytes2_decimal_schema))
+    # - different schemas with the same no scale match
+    assert_true(can_read?(bytes2_decimal_no_scale_schema, 
bytes_decimal_no_scale_schema))
+  end
+
+  def test_fixed_decimal
+    fixed_decimal_schema = Avro::Schema.
+      parse('{"type":"fixed", "size":2, "name":"Fixed1", 
"logicalType":"decimal", "precision":4, "scale":2}')
+    fixed2_decimal_schema = Avro::Schema.
+      parse('{"type":"fixed", "size":2, "name":"Fixed2", 
"logicalType":"decimal", "precision":4, "scale":2}')
+    fixed_decimal_different_precision_schema = Avro::Schema.
+      parse('{"type":"fixed", "size":2, "name":"Fixed1", 
"logicalType":"decimal", "precision":3, "scale":2}')
+    fixed_decimal_size3_schema = Avro::Schema.
+      parse('{"type":"fixed", "size":3, "name":"FixedS3", 
"logicalType":"decimal", "precision":4, "scale":2}')
+    fixed_unknown_schema = Avro::Schema.
+      parse('{"type":"fixed", "size":2, "name":"Fixed1", 
"logicalType":"unknown"}')
+    fixed_decimal_zero_scale_schema = Avro::Schema.
+      parse('{"type":"fixed", "size":2, "name":"Fixed1", 
"logicalType":"decimal", "precision":4, "scale":0}')
+    fixed_decimal_no_scale_schema = Avro::Schema.
+      parse('{"type":"fixed", "size":2, "name":"Fixed1", 
"logicalType":"decimal", "precision":4}')
+
+    # decimal fixed and non-decimal can be mixed if fixed name matches
+    assert_true(can_read?(fixed_decimal_schema, fixed1_schema))
+    assert_true(can_read?(fixed1_schema, fixed_decimal_schema))
+    assert_false(can_read?(fixed2_schema, fixed_decimal_schema))
+
+    # decimal logical types match even if fixed name differs
+    assert_true(can_read?(fixed_decimal_schema, fixed2_decimal_schema))
+
+    # fixed with the same name & size match even if decimal precision and 
scale differ
+    assert_true(can_read?(fixed_decimal_schema, 
fixed_decimal_different_precision_schema))
+    assert_true(can_read?(fixed_decimal_schema, fixed_decimal_size3_schema))
+    assert_true(can_read?(fixed_decimal_schema, fixed_unknown_schema))
+    # - zero and no scale are equivalent but these match anyway due to same 
name & size
+    assert_true(can_read?(fixed_decimal_no_scale_schema, 
fixed_decimal_zero_scale_schema))
+    # - scale does not match
+    assert_true(can_read?(fixed_decimal_schema, fixed_decimal_no_scale_schema))
+    assert_true(can_read?(fixed_decimal_schema, 
fixed_decimal_zero_scale_schema))
+  end
+
+  def test_decimal_different_types
+    fixed_decimal_schema = Avro::Schema.
+      parse('{"type":"fixed", "size":2, "name":"Fixed1", 
"logicalType":"decimal", "precision":4, "scale":2}')
+    fixed_decimal_scale4_schema = Avro::Schema.
+      parse('{"type":"fixed", "size":2, "name":"Fixed1", 
"logicalType":"decimal", "precision":4, "scale":4}')
+    bytes_decimal_schema = Avro::Schema.
+      parse('{"type":"bytes", "logicalType":"decimal", "precision":4, 
"scale":2}')
+    fixed_decimal_zero_scale_schema = Avro::Schema.
+      parse('{"type":"fixed", "size":2, "name":"Fixed1", 
"logicalType":"decimal", "precision":4, "scale":0}')
+    fixed_decimal_no_scale_schema = Avro::Schema.
+      parse('{"type":"fixed", "size":2, "name":"Fixed1", 
"logicalType":"decimal", "precision":4}')
+    bytes_decimal_zero_scale_schema = Avro::Schema.
+      parse('{"type":"bytes", "logicalType":"decimal", "precision":4, 
"scale":0}')
+    bytes_decimal_no_scale_schema = Avro::Schema.
+      parse('{"type":"bytes", "logicalType":"decimal", "precision":4}')
+
+    # decimal logical types can be read
+    assert_true(can_read?(fixed_decimal_schema, bytes_decimal_schema))
+    assert_true(can_read?(bytes_decimal_schema, fixed_decimal_schema))
+
+    # non-decimal bytes and fixed cannot be mixed
+    assert_false(can_read?(fixed_decimal_schema, bytes_schema))
+    assert_false(can_read?(bytes_schema, fixed_decimal_schema))
+    assert_false(can_read?(fixed1_schema, bytes_decimal_schema))
+    assert_false(can_read?(bytes_decimal_schema, fixed1_schema))
+
+    # decimal precision and scale must match
+    assert_false(can_read?(fixed_decimal_scale4_schema, bytes_decimal_schema))
+    assert_false(can_read?(bytes_decimal_schema, fixed_decimal_scale4_schema))
+
+    # zero scale and no scale are equivalent
+    assert_true(can_read?(bytes_decimal_no_scale_schema, 
fixed_decimal_zero_scale_schema))
+    assert_true(can_read?(fixed_decimal_zero_scale_schema, 
bytes_decimal_no_scale_schema))
+    assert_true(can_read?(bytes_decimal_zero_scale_schema, 
fixed_decimal_no_scale_schema))
+    assert_true(can_read?(fixed_decimal_no_scale_schema, 
bytes_decimal_zero_scale_schema))
+  end
+
   # Tests from 
lang/java/avro/src/test/java/org/apache/avro/io/parsing/TestResolvingGrammarGenerator2.java
 
   def point_2d_schema

Reply via email to