yjshen commented on code in PR #7454:
URL: https://github.com/apache/arrow-datafusion/pull/7454#discussion_r1311075351
##########
datafusion-cli/src/main.rs:
##########
@@ -252,26 +253,128 @@ fn is_valid_memory_pool_size(size: &str) -> Result<(),
String> {
}
}
+#[derive(Debug, Clone, Copy)]
+enum ByteUnit {
+ Byte,
+ KiB,
+ MiB,
+ GiB,
+ TiB,
+}
+
+impl ByteUnit {
+ fn multiplier(&self) -> usize {
+ match self {
+ ByteUnit::Byte => 1,
+ ByteUnit::KiB => 1 << 10,
+ ByteUnit::MiB => 1 << 20,
+ ByteUnit::GiB => 1 << 30,
+ ByteUnit::TiB => 1 << 40,
+ }
+ }
+}
+
fn extract_memory_pool_size(size: &str) -> Result<usize, String> {
- let mut size = size;
- let factor = if let Some(last_char) = size.chars().last() {
- match last_char {
- 'm' | 'M' => {
- size = &size[..size.len() - 1];
- 1024 * 1024
- }
- 'g' | 'G' => {
- size = &size[..size.len() - 1];
- 1024 * 1024 * 1024
- }
- _ => 1,
+ fn byte_suffixes() -> &'static HashMap<&'static str, ByteUnit> {
+ static BYTE_SUFFIXES: OnceLock<HashMap<&'static str, ByteUnit>> =
OnceLock::new();
+ BYTE_SUFFIXES.get_or_init(|| {
+ let mut m = HashMap::new();
+ m.insert("b", ByteUnit::Byte);
+ m.insert("k", ByteUnit::KiB);
+ m.insert("kb", ByteUnit::KiB);
+ m.insert("m", ByteUnit::MiB);
+ m.insert("mb", ByteUnit::MiB);
+ m.insert("g", ByteUnit::GiB);
+ m.insert("gb", ByteUnit::GiB);
+ m.insert("t", ByteUnit::TiB);
+ m.insert("tb", ByteUnit::TiB);
+ m
+ })
+ }
+
+ fn suffix_re() -> &'static regex::Regex {
+ static SUFFIX_REGEX: OnceLock<regex::Regex> = OnceLock::new();
+ SUFFIX_REGEX.get_or_init(||
regex::Regex::new(r"(-?[0-9]+)([a-z]+)?").unwrap())
+ }
+
+ let lower = size.to_lowercase();
+ if let Some(caps) = suffix_re().captures(&lower) {
+ let num_str = caps.get(1).unwrap().as_str();
+ if num_str.starts_with('-') {
+ return Err(format!(
+ "Negative memory pool size value is not allowed '{}'",
+ size
+ ));
}
+
+ let num = num_str.parse::<usize>().map_err(|_| {
+ format!("Invalid numeric value in memory pool size '{}'", size)
+ })?;
+
+ let suffix = caps.get(2).map(|m| m.as_str()).unwrap_or("b");
+ let unit = byte_suffixes()
+ .get(suffix)
+ .ok_or_else(|| format!("Invalid memory pool size '{}'", size))?;
+
+ Ok(num * unit.multiplier())
} else {
- return Err(format!("Invalid memory pool size '{}'", size));
- };
+ Err(format!("Invalid memory pool size '{}'", size))
+ }
+}
- match size.parse::<usize>() {
- Ok(size) if size > 0 => Ok(factor * size),
- _ => Err(format!("Invalid memory pool size '{}'", size)),
+#[cfg(test)]
+mod tests {
+ use super::*;
+
+ fn assert_conversion(input: &str, expected: Result<usize, String>) {
+ let result = extract_memory_pool_size(input);
+ match expected {
+ Ok(v) => assert_eq!(result.unwrap(), v),
+ Err(e) => assert_eq!(result.unwrap_err(), e),
+ }
+ }
+
+ #[test]
+ fn memory_pool_size() -> Result<(), String> {
+ // Test basic sizes without suffix, assumed to be bytes
+ assert_conversion("5", Ok(5));
+ assert_conversion("100", Ok(100));
+
+ // Test various units
+ assert_conversion("5b", Ok(5));
+ assert_conversion("4k", Ok(4 * 1024));
+ assert_conversion("4kb", Ok(4 * 1024));
+ assert_conversion("20m", Ok(20 * 1024 * 1024));
+ assert_conversion("20mb", Ok(20 * 1024 * 1024));
+ assert_conversion("2g", Ok(2 * 1024 * 1024 * 1024));
+ assert_conversion("2gb", Ok(2 * 1024 * 1024 * 1024));
+ assert_conversion("3t", Ok(3 * 1024 * 1024 * 1024 * 1024));
+ assert_conversion("4tb", Ok(4 * 1024 * 1024 * 1024 * 1024));
+
+ // Test case insensitivity
+ assert_conversion("4K", Ok(4 * 1024));
+ assert_conversion("4KB", Ok(4 * 1024));
+ assert_conversion("20M", Ok(20 * 1024 * 1024));
+ assert_conversion("20MB", Ok(20 * 1024 * 1024));
+ assert_conversion("2G", Ok(2 * 1024 * 1024 * 1024));
+ assert_conversion("2GB", Ok(2 * 1024 * 1024 * 1024));
+ assert_conversion("2T", Ok(2 * 1024 * 1024 * 1024 * 1024));
+
+ // Test invalid input
+ assert_conversion(
+ "invalid",
+ Err("Invalid memory pool size 'invalid'".to_string()),
+ );
+ assert_conversion("4kbx", Err("Invalid memory pool size
'4kbx'".to_string()));
+ assert_conversion(
+ "-20mb",
+ Err("Negative memory pool size value is not allowed
'-20mb'".to_string()),
+ );
+ assert_conversion(
+ "-100",
+ Err("Negative memory pool size value is not allowed
'-100'".to_string()),
+ );
+
Review Comment:
I'm not sure if I understand you correctly. Do you mean the case "-100"
shouldn't fail with the Err `"Negative memory pool size value is not allowed
'-100'"`?
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]