[
https://issues.apache.org/jira/browse/BEAM-13016?focusedWorklogId=679230&page=com.atlassian.jira.plugin.system.issuetabpanels:worklog-tabpanel#worklog-679230
]
ASF GitHub Bot logged work on BEAM-13016:
-----------------------------------------
Author: ASF GitHub Bot
Created on: 09/Nov/21 18:16
Start Date: 09/Nov/21 18:16
Worklog Time Spent: 10m
Work Description: tvalentyn commented on a change in pull request #15900:
URL: https://github.com/apache/beam/pull/15900#discussion_r745882326
##########
File path: sdks/python/apache_beam/io/gcp/bigquery_avro_tools_test.py
##########
@@ -36,156 +34,50 @@ def test_convert_bigquery_schema_to_avro_schema(self):
]
fields = [
- bigquery.TableFieldSchema(
- name="number", type="INTEGER", mode="REQUIRED"),
- bigquery.TableFieldSchema(
- name="species", type="STRING", mode="NULLABLE"),
- bigquery.TableFieldSchema(
- name="quality", type="FLOAT"), # default to NULLABLE
- bigquery.TableFieldSchema(
- name="grade", type="FLOAT64"), # default to NULLABLE
- bigquery.TableFieldSchema(
- name="quantity", type="INTEGER"), # default to NULLABLE
- bigquery.TableFieldSchema(
- name="dependents", type="INT64"), # default to NULLABLE
- bigquery.TableFieldSchema(
- name="birthday", type="TIMESTAMP", mode="NULLABLE"),
- bigquery.TableFieldSchema(
- name="birthdayMoney", type="NUMERIC", mode="NULLABLE"),
- bigquery.TableFieldSchema(
- name="flighted", type="BOOL", mode="NULLABLE"),
- bigquery.TableFieldSchema(
- name="flighted2", type="BOOLEAN", mode="NULLABLE"),
- bigquery.TableFieldSchema(
- name="sound", type="BYTES", mode="NULLABLE"),
- bigquery.TableFieldSchema(
- name="anniversaryDate", type="DATE", mode="NULLABLE"),
- bigquery.TableFieldSchema(
- name="anniversaryDatetime", type="DATETIME", mode="NULLABLE"),
- bigquery.TableFieldSchema(
- name="anniversaryTime", type="TIME", mode="NULLABLE"),
- bigquery.TableFieldSchema(
- name="scion", type="RECORD", mode="NULLABLE", fields=subfields),
- bigquery.TableFieldSchema(
- name="family", type="STRUCT", mode="NULLABLE", fields=subfields),
- bigquery.TableFieldSchema(
- name="associates", type="RECORD", mode="REPEATED", fields=subfields),
- bigquery.TableFieldSchema(
- name="geoPositions", type="GEOGRAPHY", mode="NULLABLE"),
+ bigquery.TableFieldSchema(
+ name="number", type="INTEGER", mode="REQUIRED"),
+ bigquery.TableFieldSchema(
+ name="species", type="STRING", mode="NULLABLE"),
+ bigquery.TableFieldSchema(name="quality",
+ type="FLOAT"), # default to NULLABLE
+ bigquery.TableFieldSchema(name="grade",
+ type="FLOAT64"), # default to NULLABLE
+ bigquery.TableFieldSchema(name="quantity",
+ type="INTEGER"), # default to NULLABLE
+ bigquery.TableFieldSchema(name="dependents",
+ type="INT64"), # default to NULLABLE
+ bigquery.TableFieldSchema(
+ name="birthday", type="TIMESTAMP", mode="NULLABLE"),
+ bigquery.TableFieldSchema(
+ name="birthdayMoney", type="NUMERIC", mode="NULLABLE"),
+ bigquery.TableFieldSchema(
+ name="flighted", type="BOOL", mode="NULLABLE"),
+ bigquery.TableFieldSchema(
+ name="flighted2", type="BOOLEAN", mode="NULLABLE"),
+ bigquery.TableFieldSchema(name="sound", type="BYTES", mode="NULLABLE"),
+ bigquery.TableFieldSchema(
+ name="anniversaryDate", type="DATE", mode="NULLABLE"),
+ bigquery.TableFieldSchema(
+ name="anniversaryDatetime", type="DATETIME", mode="NULLABLE"),
+ bigquery.TableFieldSchema(
+ name="anniversaryTime", type="TIME", mode="NULLABLE"),
+ bigquery.TableFieldSchema(
+ name="scion", type="RECORD", mode="NULLABLE", fields=subfields),
+ bigquery.TableFieldSchema(
+ name="family", type="STRUCT", mode="NULLABLE", fields=subfields),
+ bigquery.TableFieldSchema(
+ name="associates", type="RECORD", mode="REPEATED",
+ fields=subfields),
+ bigquery.TableFieldSchema(
+ name="geoPositions", type="GEOGRAPHY", mode="NULLABLE"),
]
table_schema = bigquery.TableSchema(fields=fields)
avro_schema = bigquery_avro_tools.get_record_schema_from_dict_table_schema(
"root", bigquery_tools.get_dict_table_schema(table_schema))
- # Test that schema can be parsed correctly by fastavro
- fastavro.parse_schema(avro_schema)
-
- # Test that schema can be parsed correctly by avro
- parsed_schema = Parse(json.dumps(avro_schema))
-
- self.assertEqual(
- parsed_schema.field_map["number"].type, Parse(json.dumps("long")))
- self.assertEqual(
- parsed_schema.field_map["species"].type,
- Parse(json.dumps(["null", "string"])))
- self.assertEqual(
- parsed_schema.field_map["quality"].type,
- Parse(json.dumps(["null", "double"])))
- self.assertEqual(
- parsed_schema.field_map["grade"].type,
- Parse(json.dumps(["null", "double"])))
- self.assertEqual(
- parsed_schema.field_map["quantity"].type,
- Parse(json.dumps(["null", "long"])))
- self.assertEqual(
- parsed_schema.field_map["dependents"].type,
- Parse(json.dumps(["null", "long"])))
- self.assertEqual(
- parsed_schema.field_map["birthday"].type,
- Parse(
- json.dumps(
- ["null", {
- "type": "long", "logicalType": "timestamp-micros"
- }])))
- self.assertEqual(
- parsed_schema.field_map["birthdayMoney"].type,
- Parse(
- json.dumps([
- "null",
- {
- "type": "bytes",
- "logicalType": "decimal",
- "precision": 38,
- "scale": 9
- }
- ])))
- self.assertEqual(
- parsed_schema.field_map["flighted"].type,
- Parse(json.dumps(["null", "boolean"])))
- self.assertEqual(
- parsed_schema.field_map["flighted2"].type,
- Parse(json.dumps(["null", "boolean"])))
- self.assertEqual(
- parsed_schema.field_map["sound"].type,
- Parse(json.dumps(["null", "bytes"])))
- self.assertEqual(
- parsed_schema.field_map["anniversaryDate"].type,
- Parse(json.dumps(["null", {
- "type": "int", "logicalType": "date"
- }])))
- self.assertEqual(
- parsed_schema.field_map["anniversaryDatetime"].type,
- Parse(json.dumps(["null", "string"])))
- self.assertEqual(
- parsed_schema.field_map["anniversaryTime"].type,
- Parse(
- json.dumps(["null", {
- "type": "long", "logicalType": "time-micros"
- }])))
- self.assertEqual(
- parsed_schema.field_map["geoPositions"].type,
- Parse(json.dumps(["null", "string"])))
-
- for field in ("scion", "family"):
- self.assertEqual(
- parsed_schema.field_map[field].type,
- Parse(
- json.dumps([
- "null",
- {
- "type": "record",
- "name": field,
- "fields": [
- {
- "type": ["null", "string"],
- "name": "species",
- },
- ],
- "doc": "Translated Avro Schema for {}".format(field),
- "namespace":
"apache_beam.io.gcp.bigquery.root.{}".format(
- field),
- }
- ])))
-
- self.assertEqual(
- parsed_schema.field_map["associates"].type,
- Parse(
- json.dumps({
- "type": "array",
- "items": {
- "type": "record",
- "name": "associates",
- "fields": [
- {
- "type": ["null", "string"],
- "name": "species",
- },
- ],
- "doc": "Translated Avro Schema for associates",
- "namespace": "apache_beam.io.gcp.bigquery.root.associates",
- }
- })))
+ parsed_schema = parse_schema(avro_schema)
+ self.assertEqual(type(parsed_schema), dict)
Review comment:
placeholder comment to update this test.
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
Issue Time Tracking
-------------------
Worklog Id: (was: 679230)
Time Spent: 19.5h (was: 19h 20m)
> Remove avro-python3 dependency from Beam
> ----------------------------------------
>
> Key: BEAM-13016
> URL: https://issues.apache.org/jira/browse/BEAM-13016
> Project: Beam
> Issue Type: Bug
> Components: io-py-avro
> Reporter: Valentyn Tymofieiev
> Assignee: Ananda Prasad Inguva
> Priority: P2
> Time Spent: 19.5h
> Remaining Estimate: 0h
>
> Beam has been using fastavro as default dependency for Avro IO on Python3 and
> no issues have been reported. Fastavro has more frequent release cycle and is
> faster.
> Let's remove the dependency on avro-python3 to avoid branching in our
> codebase so that we don't replicate it in other parts of Beam, which was
> recently happening on:
> https://github.com/apache/beam/pull/15185#pullrequestreview-729352200
> https://github.com/apache/beam/pull/15185#pullrequestreview-731157837
--
This message was sent by Atlassian Jira
(v8.20.1#820001)