TGooch44 commented on a change in pull request #407: [python] Parquet read path
URL: https://github.com/apache/incubator-iceberg/pull/407#discussion_r331028923
 
 

 ##########
 File path: python/iceberg/core/avro/avro_to_iceberg.py
 ##########
 @@ -228,37 +232,41 @@ def is_option_schema(field_type):
         return False
 
     @staticmethod
-    def read_avro_row(iceberg_schema, avro_reader):
-        try:
-            avro_row = avro_reader.__next__()
+    def read_avro_file(iceberg_schema, data_file):
+        fo = data_file.new_fo()
+        avro_reader = fastavro.reader(fo)
+        for avro_row in avro_reader:
             iceberg_row = dict()
             for field in iceberg_schema.as_struct().fields:
                 iceberg_row[field.name] = 
AvroToIceberg.get_field_from_avro(avro_row, field)
             yield iceberg_row
+        fo.close()
+
+    @staticmethod
+    def read_avro_row(iceberg_schema, avro_reader):
+        try:
+            for avro_row in avro_reader:
+                iceberg_row = dict()
+                for field in iceberg_schema.as_struct().fields:
+                    iceberg_row[field.name] = 
AvroToIceberg.get_field_from_avro(avro_row, field)
+                yield iceberg_row
         except StopIteration:
             return
 
     @staticmethod
     def get_field_from_avro(avro_row, field):
-        process_funcs = {TypeID.STRUCT: lambda avro_row, field: 
AvroToIceberg.get_field_from_struct(avro_row, field),
-                         TypeID.LIST: lambda avro_row, field: 
AvroToIceberg.get_field_from_list(avro_row, field),
-                         TypeID.MAP: lambda avro_row, field: 
AvroToIceberg.get_field_from_map(avro_row, field)}
-        if field.type.is_primitive_type():
-            processing_func = AvroToIceberg.get_field_from_primitive
-        else:
-            processing_func = process_funcs.get(field.type.type_id)
-
-        if processing_func is None:
+        try:
 
 Review comment:
   I added in the is_required check to the raise and also tweaked the get map 
function to follow the pattern of the rest of the functions eg use bracket with 
a try block instead of get.
   
   I'm going to write up some tests for the avro module to explicitly test some 
of these cases.  Not sure if you want to wait on the merge for this or not

----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


With regards,
Apache Git Services

---------------------------------------------------------------------
To unsubscribe, e-mail: issues-unsubscr...@iceberg.apache.org
For additional commands, e-mail: issues-h...@iceberg.apache.org

Reply via email to