etseidl commented on code in PR #7402: URL: https://github.com/apache/arrow-rs/pull/7402#discussion_r2037847313
########## arrow-array/src/record_batch.rs: ########## @@ -1637,4 +1653,57 @@ mod tests { "bar" ); } + + #[test] + fn test_batch_with_force_schema() { Review Comment: Could you please also add a check where the forced schema succeeds? ########## arrow-array/src/record_batch.rs: ########## @@ -359,6 +359,20 @@ impl RecordBatch { }) } + /// Forcibly overrides the schema of this [`RecordBatch`] + /// without additional schema checks however bringing all the schema compatibility responsibilities + /// to the caller site. + /// + /// If provided schema is not compatible with this [`RecordBatch`] columns the runtime behavior + /// is undefined + pub fn with_schema_force(self, schema: SchemaRef) -> Result<Self, ArrowError> { Review Comment: Maybe `with_schema_unchecked` would be better? ########## arrow-array/src/record_batch.rs: ########## @@ -359,6 +359,20 @@ impl RecordBatch { }) } + /// Forcibly overrides the schema of this [`RecordBatch`] + /// without additional schema checks however bringing all the schema compatibility responsibilities + /// to the caller site. Review Comment: ```suggestion /// Overrides the schema of this [`RecordBatch`] /// without additional schema checks. Note, however, that this pushes all the schema compatibility responsibilities /// to the caller site. In particular, the caller guarantees that `schema` is a superset /// of the current schema as determined by [`Schema::contains`]. ``` ########## arrow-array/src/record_batch.rs: ########## @@ -1637,4 +1653,57 @@ mod tests { "bar" ); } + + #[test] + fn test_batch_with_force_schema() { + fn force_schema_and_get_err_from_batch( + record_batch: &RecordBatch, + schema_ref: SchemaRef, + idx: usize, + ) -> Option<ArrowError> { + record_batch + .clone() + .with_schema_force(schema_ref) + .unwrap() + .project(&[idx]) + .err() + } + + let c: ArrayRef = Arc::new(StringArray::from(vec!["d", "e", "f"])); + + let record_batch = + RecordBatch::try_from_iter(vec![("c", c.clone())]).expect("valid conversion"); + + // Test empty schema for non-empty schema batch + let invalid_schema_empty = Schema::empty(); + assert_eq!( + force_schema_and_get_err_from_batch(&record_batch, invalid_schema_empty.into(), 0) + .unwrap() + .to_string(), + "Schema error: project index 0 out of bounds, max field 0" + ); + + // Wrong number of columns + let invalid_schema_more_cols = Schema::new(vec![ + Field::new("a", DataType::Utf8, false), + Field::new("a", DataType::Int32, false), Review Comment: ```suggestion Field::new("b", DataType::Int32, false), ``` ?. This triggers my OCD 😅 -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: github-unsubscr...@arrow.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org