This is an automated email from the ASF dual-hosted git repository.
yjshen pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow-datafusion.git
The following commit(s) were added to refs/heads/main by this push:
new edeb88533e chore(deps): update regex and regex-syntax requirement from
0.6.28 to 0.7.1 (#6095)
edeb88533e is described below
commit edeb88533e2f79049a7f34cb68c176ece5da90e4
Author: Andrew Lamb <[email protected]>
AuthorDate: Sun Apr 23 10:07:56 2023 -0400
chore(deps): update regex and regex-syntax requirement from 0.6.28 to 0.7.1
(#6095)
* Update regex and regex-syntax dependencies
* chore(deps): update regex-syntax requirement from 0.6.28 to 0.7.1
* fix up
* clippy
---
datafusion-cli/Cargo.lock | 6 ++---
datafusion/optimizer/Cargo.toml | 2 +-
.../optimizer/src/simplify_expressions/regex.rs | 27 ++++++++++++++++------
datafusion/physical-expr/Cargo.toml | 2 +-
4 files changed, 25 insertions(+), 12 deletions(-)
diff --git a/datafusion-cli/Cargo.lock b/datafusion-cli/Cargo.lock
index 7b1190a91a..556a296274 100644
--- a/datafusion-cli/Cargo.lock
+++ b/datafusion-cli/Cargo.lock
@@ -389,9 +389,9 @@ dependencies = [
[[package]]
name = "bumpalo"
-version = "3.12.0"
+version = "3.12.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "0d261e256854913907f67ed06efbc3338dfe6179796deefc1ff763fc1aee5535"
+checksum = "9b1ce199063694f33ffb7dd4e0ee620741495c32833cde5aa08f02a0bf96f0c8"
[[package]]
name = "byteorder"
@@ -811,7 +811,7 @@ dependencies = [
"hashbrown 0.13.2",
"itertools",
"log",
- "regex-syntax 0.6.29",
+ "regex-syntax 0.7.1",
]
[[package]]
diff --git a/datafusion/optimizer/Cargo.toml b/datafusion/optimizer/Cargo.toml
index d84dbc5c2d..4959291c42 100644
--- a/datafusion/optimizer/Cargo.toml
+++ b/datafusion/optimizer/Cargo.toml
@@ -49,7 +49,7 @@ datafusion-physical-expr = { path = "../physical-expr",
version = "23.0.0", defa
hashbrown = { version = "0.13", features = ["raw"] }
itertools = "0.10"
log = "^0.4"
-regex-syntax = "0.6.28"
+regex-syntax = "0.7.1"
[dev-dependencies]
ctor = "0.2.0"
diff --git a/datafusion/optimizer/src/simplify_expressions/regex.rs
b/datafusion/optimizer/src/simplify_expressions/regex.rs
index 13d170fd88..a7ae14542d 100644
--- a/datafusion/optimizer/src/simplify_expressions/regex.rs
+++ b/datafusion/optimizer/src/simplify_expressions/regex.rs
@@ -58,6 +58,7 @@ pub fn simplify_regex_expr(
Ok(Expr::BinaryExpr(BinaryExpr { left, op, right }))
}
+#[derive(Debug)]
struct OperatorMode {
not: bool,
i: bool,
@@ -101,11 +102,8 @@ fn collect_concat_to_like_string(parts: &[Hir]) ->
Option<String> {
s.push('%');
for sub in parts {
- if let HirKind::Literal(Literal::Unicode(c)) = sub.kind() {
- if !is_safe_for_like(*c) {
- return None;
- }
- s.push(*c);
+ if let HirKind::Literal(l) = sub.kind() {
+ s.push_str(str_from_literal(l)?);
} else {
return None;
}
@@ -115,17 +113,32 @@ fn collect_concat_to_like_string(parts: &[Hir]) ->
Option<String> {
Some(s)
}
+/// returns a str represented by `Literal` if it contains a valid utf8
+/// sequence and is safe for like (has no '%' and '_')
+fn str_from_literal(l: &Literal) -> Option<&str> {
+ // if not utf8, no good
+ let s = std::str::from_utf8(&l.0).ok()?;
+
+ if s.chars().all(is_safe_for_like) {
+ Some(s)
+ } else {
+ None
+ }
+}
+
fn is_safe_for_like(c: char) -> bool {
(c != '%') && (c != '_')
}
fn lower_simple(mode: &OperatorMode, left: &Expr, hir: &Hir) -> Option<Expr> {
+ println!("Considering hir kind: mode {mode:?} hir: {hir:?}");
match hir.kind() {
HirKind::Empty => {
return Some(mode.expr(Box::new(left.clone()), "%".to_owned()));
}
- HirKind::Literal(Literal::Unicode(c)) if is_safe_for_like(*c) => {
- return Some(mode.expr(Box::new(left.clone()), format!("%{c}%")));
+ HirKind::Literal(l) => {
+ let s = str_from_literal(l)?;
+ return Some(mode.expr(Box::new(left.clone()), format!("%{s}%")));
}
HirKind::Concat(inner) => {
if let Some(pattern) = collect_concat_to_like_string(inner) {
diff --git a/datafusion/physical-expr/Cargo.toml
b/datafusion/physical-expr/Cargo.toml
index 3cc85071ba..df19299e7e 100644
--- a/datafusion/physical-expr/Cargo.toml
+++ b/datafusion/physical-expr/Cargo.toml
@@ -63,7 +63,7 @@ md-5 = { version = "^0.10.0", optional = true }
paste = "^1.0"
petgraph = "0.6.2"
rand = "0.8"
-regex = { version = "^1.4.3", optional = true }
+regex = { version = "1.8", optional = true }
sha2 = { version = "^0.10.1", optional = true }
unicode-segmentation = { version = "^1.7.1", optional = true }
uuid = { version = "^1.2", features = ["v4"] }