ryanrussell opened a new issue, #8118: URL: https://github.com/apache/arrow-rs/issues/8118
# ARROW-009: IPC Dictionary Tracking API Removals ## Problem Arrow 56.0 removed multiple IPC dictionary-related APIs that were deprecated in 54.0.0. Our linter doesn't detect usage of these removed functions, causing compilation failures. ## API Changes Details **All removed in commit 82821e574:** ```rust // Dictionary Tracker methods: -pub fn arrow_ipc::writer::DictionaryTracker::new_with_preserve_dict_id(error_on_replacement: bool, preserve_dict_id: bool) -> Self -pub fn arrow_ipc::writer::DictionaryTracker::set_dict_id(&mut self, field: &arrow_schema::field::Field) -> i64 // IPC Write Options: -pub fn arrow_ipc::writer::IpcWriteOptions::preserve_dict_id(&self) -> bool -pub fn arrow_ipc::writer::IpcWriteOptions::with_preserve_dict_id(self, preserve_dict_id: bool) -> Self // Schema conversion functions: -pub fn arrow_ipc::convert::schema_to_fb(schema: &arrow_schema::schema::Schema) -> flatbuffers::builder::FlatBufferBuilder<'_> -pub fn arrow_ipc::writer::IpcDataGenerator::schema_to_bytes(&self, schema: &arrow_schema::schema::Schema, write_options: &arrow_ipc::writer::IpcWriteOptions) -> arrow_ipc::writer::EncodedData ``` **Commit:** `82821e574` - "arrow-ipc: Remove all abilities to preserve dict IDs" ## Implementation Task Create `src/rules/arrow_009_ipc_dict_api_removed.rs` with these specifications: ### Rule Implementation ```rust use regex::Regex; use std::path::Path; use crate::output::{Issue, Severity}; use crate::rules::Rule; /// ARROW-009: Detect removed IPC dictionary tracking APIs /// /// Arrow 56.0 removed dictionary preservation APIs deprecated in 54.0.0. /// These were removed as part of making dict_id purely an IPC concern. pub struct Arrow009Rule; impl Rule for Arrow009Rule { fn rule_id(&self) -> &'static str { "ARROW-009" } fn check_rust_source(&self, file_path: &Path, content: &str) -> Result<Vec<Issue>, Box<dyn std::error::Error>> { let mut issues = Vec::new(); let removed_methods = vec\![ (r"DictionaryTracker::new_with_preserve_dict_id\s*\(", "Use DictionaryTracker::new(error_on_replacement) instead. Dict ID preservation is no longer supported."), (r"\.set_dict_id\s*\(", "Use DictionaryTracker::next_dict_id() to get next available ID instead."), (r"\.preserve_dict_id\s*\(\s*\)", "Remove this call - dict ID preservation is no longer supported in IPC options."), (r"\.with_preserve_dict_id\s*\(", "Remove this call from IpcWriteOptions builder chain."), (r"schema_to_fb\s*\(", "This function was removed. Use IpcDataGenerator methods for schema serialization."), (r"\.schema_to_bytes\s*\(", "This method was removed from IpcDataGenerator. Use the new schema encoding methods."), ]; for (line_num, line) in content.lines().enumerate() { for (pattern, suggestion) in &removed_methods { let regex = Regex::new(pattern)?; if let Some(mat) = regex.find(line) { issues.push(Issue { rule_id: self.rule_id().to_string(), severity: Severity::Error, message: format\!("IPC dictionary API removed in Arrow 56.0: {}", line[mat.start()..mat.end()].trim_end_matches(['(', ' '])), file_path: file_path.to_path_buf(), line: line_num + 1, column: mat.start() + 1, suggestion: Some(suggestion.to_string()), auto_fixable: false, // Requires architectural changes deprecated_since: Some("54.0.0 (removed in 56.0.0)".to_string()), changelog_url: Some("https://github.com/apache/arrow-rs/pull/7940".to_string()), migration_guide_url: Some("https://arrow.apache.org/docs/rust/migration_guide.html#ipc-dict-id-removal".to_string()), }); break; // Only report first match per line } } } Ok(issues) } fn check_cargo_toml(&self, _file_path: &Path, _content: &str) -> Result<Vec<Issue>, Box<dyn std::error::Error>> { Ok(Vec::new()) } } ``` ### Tests Required ```rust #[cfg(test)] mod tests { use super::*; use std::path::PathBuf; #[test] fn test_detects_dictionary_tracker_removed_methods() { let rule = Arrow009Rule; let content = r#" use arrow_ipc::writer::DictionaryTracker; fn create_tracker() -> DictionaryTracker { let tracker = DictionaryTracker::new_with_preserve_dict_id(false, true); tracker.set_dict_id(&field); tracker } "#; let issues = rule.check_rust_source(&PathBuf::from("test.rs"), content).unwrap(); assert_eq\!(issues.len(), 2); // new_with_preserve_dict_id + set_dict_id assert\!(issues.iter().all(|i| i.rule_id == "ARROW-009")); assert\!(issues.iter().all(|i| matches\!(i.severity, Severity::Error))); assert\!(issues.iter().all(|i| \!i.auto_fixable)); } #[test] fn test_detects_ipc_write_options_removed_methods() { let rule = Arrow009Rule; let content = r#" use arrow_ipc::writer::IpcWriteOptions; fn create_options() -> IpcWriteOptions { let options = IpcWriteOptions::default() .with_preserve_dict_id(true); if options.preserve_dict_id() { println\!("Dict ID preserved"); } options } "#; let issues = rule.check_rust_source(&PathBuf::from("test.rs"), content).unwrap(); assert_eq\!(issues.len(), 2); // with_preserve_dict_id + preserve_dict_id assert\!(issues.iter().all(|i| i.message.contains("removed in Arrow 56.0"))); } #[test] fn test_detects_schema_conversion_functions() { let rule = Arrow009Rule; let content = r#" use arrow_ipc::convert::schema_to_fb; use arrow_ipc::writer::IpcDataGenerator; fn convert_schema(schema: &Schema, generator: &IpcDataGenerator) { let fb = schema_to_fb(schema); let bytes = generator.schema_to_bytes(schema, &options); } "#; let issues = rule.check_rust_source(&PathBuf::from("test.rs"), content).unwrap(); assert_eq\!(issues.len(), 2); // schema_to_fb + schema_to_bytes } #[test] fn test_no_issues_for_replacement_apis() { let rule = Arrow009Rule; let content = r#" use arrow_ipc::writer::{DictionaryTracker, IpcWriteOptions}; fn create_tracker() -> DictionaryTracker { let mut tracker = DictionaryTracker::new(false); let dict_id = tracker.next_dict_id(); tracker } fn create_options() -> IpcWriteOptions { IpcWriteOptions::default() } "#; let issues = rule.check_rust_source(&PathBuf::from("test.rs"), content).unwrap(); assert_eq\!(issues.len(), 0); } } ``` ### Integration Steps 1. Add to `src/rules/mod.rs`: ```rust pub mod arrow_009_ipc_dict_api_removed; // In RuleEngine::new(): rules.push(Box::new(arrow_009_ipc_dict_api_removed::Arrow009Rule)); ``` 2. Test with these patterns: ```rust DictionaryTracker::new_with_preserve_dict_id(false, true); // Error tracker.set_dict_id(&field); // Error options.with_preserve_dict_id(true); // Error options.preserve_dict_id(); // Error schema_to_fb(&schema); // Error generator.schema_to_bytes(&schema, &opts); // Error // Correct replacements: DictionaryTracker::new(false); // OK tracker.next_dict_id(); // OK IpcWriteOptions::default(); // OK ``` ## Acceptance Criteria - ✅ Detects all 6 removed IPC dictionary APIs - ✅ Provides specific replacement guidance for each - ✅ Error severity (compilation failures) - ✅ Non-auto-fixable (requires architectural changes) - ✅ References correct PR (#7940) and commit ## Migration Context - **Background**: Dict ID preservation removed to make dict_id purely IPC concern - **Impact**: Code using dictionary preservation must be refactored - **Alternative**: Use standard dictionary tracking without preservation - **Timeline**: Deprecated 54.0.0 → Removed 56.0.0 -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: github-unsubscr...@arrow.apache.org.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org