jecsand838 commented on code in PR #8348: URL: https://github.com/apache/arrow-rs/pull/8348#discussion_r2360507347
########## arrow-avro/src/schema.rs: ########## @@ -970,13 +973,59 @@ fn merge_extras(schema: Value, mut extras: JsonMap<String, Value>) -> Value { } } +#[inline] +fn is_avro_json_null(v: &Value) -> bool { + matches!(v, Value::String(s) if s == "null") +} + fn wrap_nullable(inner: Value, null_order: Nullability) -> Value { let null = Value::String("null".into()); - let elements = match null_order { - Nullability::NullFirst => vec![null, inner], - Nullability::NullSecond => vec![inner, null], - }; - Value::Array(elements) + match inner { + Value::Array(mut union) => { + union.retain(|v| !is_avro_json_null(v)); + match null_order { + Nullability::NullFirst => { + let mut out = Vec::with_capacity(union.len() + 1); + out.push(null); + out.extend(union); + Value::Array(out) + } + Nullability::NullSecond => { + union.push(null); + Value::Array(union) + } + } + } + other => match null_order { + Nullability::NullFirst => Value::Array(vec![null, other]), + Nullability::NullSecond => Value::Array(vec![other, null]), + }, + } +} + +fn union_branch_signature(branch: &Value) -> Result<String, ArrowError> { + match branch { + Value::String(t) => Ok(format!("P:{t}")), + Value::Object(map) => { + let t = map.get("type").and_then(|v| v.as_str()).ok_or_else(|| { + ArrowError::SchemaError("Union branch object missing string 'type'".into()) + })?; + match t { + "record" | "enum" | "fixed" => { + let name = map.get("name").and_then(|v| v.as_str()).unwrap_or_default(); Review Comment: You know what, I should be throwing an error here. This is out of spec, since `"record" | "enum" | "fixed"` are named types. Ty for pointing this out. I'll include it in the follow-up. -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: github-unsubscr...@arrow.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org