yjshen commented on code in PR #7454:
URL: https://github.com/apache/arrow-datafusion/pull/7454#discussion_r1311091117
##########
datafusion-cli/src/main.rs:
##########
@@ -252,26 +253,128 @@ fn is_valid_memory_pool_size(size: &str) -> Result<(),
String> {
}
}
+#[derive(Debug, Clone, Copy)]
+enum ByteUnit {
+ Byte,
+ KiB,
+ MiB,
+ GiB,
+ TiB,
+}
+
+impl ByteUnit {
+ fn multiplier(&self) -> usize {
+ match self {
+ ByteUnit::Byte => 1,
+ ByteUnit::KiB => 1 << 10,
+ ByteUnit::MiB => 1 << 20,
+ ByteUnit::GiB => 1 << 30,
+ ByteUnit::TiB => 1 << 40,
+ }
+ }
+}
+
fn extract_memory_pool_size(size: &str) -> Result<usize, String> {
- let mut size = size;
- let factor = if let Some(last_char) = size.chars().last() {
- match last_char {
- 'm' | 'M' => {
- size = &size[..size.len() - 1];
- 1024 * 1024
- }
- 'g' | 'G' => {
- size = &size[..size.len() - 1];
- 1024 * 1024 * 1024
- }
- _ => 1,
+ fn byte_suffixes() -> &'static HashMap<&'static str, ByteUnit> {
+ static BYTE_SUFFIXES: OnceLock<HashMap<&'static str, ByteUnit>> =
OnceLock::new();
+ BYTE_SUFFIXES.get_or_init(|| {
+ let mut m = HashMap::new();
+ m.insert("b", ByteUnit::Byte);
+ m.insert("k", ByteUnit::KiB);
+ m.insert("kb", ByteUnit::KiB);
+ m.insert("m", ByteUnit::MiB);
+ m.insert("mb", ByteUnit::MiB);
+ m.insert("g", ByteUnit::GiB);
+ m.insert("gb", ByteUnit::GiB);
+ m.insert("t", ByteUnit::TiB);
+ m.insert("tb", ByteUnit::TiB);
+ m
+ })
+ }
+
+ fn suffix_re() -> &'static regex::Regex {
+ static SUFFIX_REGEX: OnceLock<regex::Regex> = OnceLock::new();
+ SUFFIX_REGEX.get_or_init(||
regex::Regex::new(r"(-?[0-9]+)([a-z]+)?").unwrap())
+ }
+
+ let lower = size.to_lowercase();
+ if let Some(caps) = suffix_re().captures(&lower) {
+ let num_str = caps.get(1).unwrap().as_str();
+ if num_str.starts_with('-') {
+ return Err(format!(
+ "Negative memory pool size value is not allowed '{}'",
+ size
+ ));
}
+
+ let num = num_str.parse::<usize>().map_err(|_| {
+ format!("Invalid numeric value in memory pool size '{}'", size)
+ })?;
+
Review Comment:
I have no strong opinion on this.
I changed the code above to parse numeric directly to `usize` for simplicity
as suggested, but the caveat is the new error message is less informative,
which I suppose should be fine in this case.
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]