melgenek commented on code in PR #5015:
URL: https://github.com/apache/arrow-datafusion/pull/5015#discussion_r1083328977


##########
datafusion/core/tests/sqllogictests/src/engines/postgres/mod.rs:
##########
@@ -29,69 +30,157 @@ use postgres_types::Type;
 use rust_decimal::Decimal;
 use tokio_postgres::{Column, Row};
 
-pub mod image;
+// default connect string, can be overridden by environment PG_DSN
+const PG_DSN: &str = "postgresql://[email protected]/test";
+
+/// DataFusion sql-logicaltest error
+#[derive(Debug, thiserror::Error)]
+pub enum Error {
+    #[error("Postgres error: {0}")]
+    Postgres(#[from] tokio_postgres::error::Error),
+    #[error("Error handling copy command: {0}")]
+    Copy(String),
+}
+
+pub type Result<T, E = Error> = std::result::Result<T, E>;
 
 pub struct Postgres {
-    client: Arc<tokio_postgres::Client>,
+    client: tokio_postgres::Client,
     join_handle: JoinHandle<()>,
+    /// Filename, for display purposes
     file_name: String,
 }
 
 impl Postgres {
-    pub async fn connect_with_retry(
-        file_name: String,
-        host: &str,
-        port: u16,
-        db: &str,
-        user: &str,
-        pass: &str,
-    ) -> Result<Self, tokio_postgres::error::Error> {
+    /// Creates a runner for executiong queries against an existing

Review Comment:
   ```suggestion
       /// Creates a runner for executing queries against an existing
   ```



##########
datafusion/core/tests/sqllogictests/src/engines/postgres/mod.rs:
##########
@@ -29,69 +30,157 @@ use postgres_types::Type;
 use rust_decimal::Decimal;
 use tokio_postgres::{Column, Row};
 
-pub mod image;
+// default connect string, can be overridden by environment PG_DSN
+const PG_DSN: &str = "postgresql://[email protected]/test";
+
+/// DataFusion sql-logicaltest error
+#[derive(Debug, thiserror::Error)]
+pub enum Error {
+    #[error("Postgres error: {0}")]
+    Postgres(#[from] tokio_postgres::error::Error),
+    #[error("Error handling copy command: {0}")]
+    Copy(String),
+}
+
+pub type Result<T, E = Error> = std::result::Result<T, E>;
 
 pub struct Postgres {
-    client: Arc<tokio_postgres::Client>,
+    client: tokio_postgres::Client,
     join_handle: JoinHandle<()>,
+    /// Filename, for display purposes
     file_name: String,
 }
 
 impl Postgres {
-    pub async fn connect_with_retry(
-        file_name: String,
-        host: &str,
-        port: u16,
-        db: &str,
-        user: &str,
-        pass: &str,
-    ) -> Result<Self, tokio_postgres::error::Error> {
+    /// Creates a runner for executiong queries against an existing
+    /// posgres connection. `file_name` is used for display output
+    ///
+    /// The database connection details can be overridden by the
+    /// `PG_DSN` environment variable.
+    ///
+    /// This defaults to
+    ///
+    /// ```text
+    /// PG_DSN="postgresql://[email protected]/test"
+    /// ```
+    ///
+    /// See 
https://docs.rs/tokio-postgres/latest/tokio_postgres/config/struct.Config.html#url
 for format
+    pub async fn connect(file_name: impl Into<String>) -> Result<Self> {
+        let file_name = file_name.into();
+
+        let dsn = if let Ok(val) = std::env::var("PG_DSN") {
+            val
+        } else {
+            PG_DSN.to_string()
+        };
+
+        debug!("Using posgres dsn: {dsn}");
+
+        let config = tokio_postgres::Config::from_str(&dsn)?;
+
         let mut retry = 0;

Review Comment:
   You can probably get rid of the retry. It was sort of a hack because queries 
were executed immediately after a docker startup. And sometimes Postgres wasn't 
fully ready. Assuming that Postgres gets started in advance, you can assume 
that the connection succeeds.



##########
datafusion/core/tests/sqllogictests/src/engines/postgres/mod.rs:
##########
@@ -29,69 +30,157 @@ use postgres_types::Type;
 use rust_decimal::Decimal;
 use tokio_postgres::{Column, Row};
 
-pub mod image;
+// default connect string, can be overridden by environment PG_DSN
+const PG_DSN: &str = "postgresql://[email protected]/test";
+
+/// DataFusion sql-logicaltest error
+#[derive(Debug, thiserror::Error)]
+pub enum Error {
+    #[error("Postgres error: {0}")]
+    Postgres(#[from] tokio_postgres::error::Error),
+    #[error("Error handling copy command: {0}")]
+    Copy(String),
+}
+
+pub type Result<T, E = Error> = std::result::Result<T, E>;
 
 pub struct Postgres {
-    client: Arc<tokio_postgres::Client>,
+    client: tokio_postgres::Client,
     join_handle: JoinHandle<()>,
+    /// Filename, for display purposes
     file_name: String,
 }
 
 impl Postgres {
-    pub async fn connect_with_retry(
-        file_name: String,
-        host: &str,
-        port: u16,
-        db: &str,
-        user: &str,
-        pass: &str,
-    ) -> Result<Self, tokio_postgres::error::Error> {
+    /// Creates a runner for executiong queries against an existing
+    /// posgres connection. `file_name` is used for display output
+    ///
+    /// The database connection details can be overridden by the
+    /// `PG_DSN` environment variable.
+    ///
+    /// This defaults to
+    ///
+    /// ```text
+    /// PG_DSN="postgresql://[email protected]/test"
+    /// ```
+    ///
+    /// See 
https://docs.rs/tokio-postgres/latest/tokio_postgres/config/struct.Config.html#url
 for format
+    pub async fn connect(file_name: impl Into<String>) -> Result<Self> {
+        let file_name = file_name.into();
+
+        let dsn = if let Ok(val) = std::env::var("PG_DSN") {
+            val
+        } else {
+            PG_DSN.to_string()
+        };

Review Comment:
   ```suggestion
        let dsn =
               std::env::var("PG_DSN").map_or(PG_DSN.to_string(), 
std::convert::identity);
   ```



##########
datafusion/core/tests/sqllogictests/test_files/pg_compat_simple.slt:
##########
@@ -15,6 +15,67 @@
 # specific language governing permissions and limitations
 # under the License.
 
+###
+## Setup test for postgres
+###
+
+onlyif postgres
+statement ok
+DROP TABLE IF EXISTS aggregate_test_100_by_sql;

Review Comment:
   Dropping a table seems like a hack to the fact that Postgres database is 
reused for multiple test files.
   It would be nice to introduce isolation by, for example, having a schema per 
file. The schema could be either random or based on the test file name.
   
   I drafted a very basic implementation of the approach. 
https://github.com/melgenek/arrow-datafusion/commit/3394c87c15810a02f2f1233eb6676d070c405c6b
 



##########
datafusion/core/tests/sqllogictests/README.md:
##########
@@ -46,16 +46,33 @@ cargo test -p datafusion --test sqllogictests -- information
 
 #### Running tests: Postgres compatibility
 
-Test files that start with prefix `pg_compat_` verify compatibility with 
Postgres.
-Datafusion runs these test files during normal sqllogictest runs.
+Test files that start with prefix `pg_compat_` verify compatibility
+with Postgres by running the same script files both with DataFusion and with 
Posgres
 
-In order to run sqllogictests with Postgres execute:
+In order to run the sqllogictests running against a previously running 
Postgres instance, do:
 
 ```shell
-PG_COMPAT=true cargo test -p datafusion --test sqllogictests
+PG_COMPAT=true PG_DSN="postgresql://[email protected]/postgres" cargo test -p 
datafusion --test sqllogictests
 ```
 
-This command requires a docker binary. Check that docker is properly installed 
with `which docker`.
+The environemnt variables:
+
+1. `PG_COMPAT` instructs sqllogictest to run against Postgres (not DataFusion)
+2. `PG_DSN` contains a `libpq` style connection string, whose format is 
described in

Review Comment:
   DSN feels like an alias for a connection string, rather than the connection 
string itself. I would probably call it `PG_URI` or something like that. But 
this is a nitpick, feel free to ignore it.



##########
datafusion/core/tests/sqllogictests/README.md:
##########
@@ -46,16 +46,33 @@ cargo test -p datafusion --test sqllogictests -- information
 
 #### Running tests: Postgres compatibility
 
-Test files that start with prefix `pg_compat_` verify compatibility with 
Postgres.
-Datafusion runs these test files during normal sqllogictest runs.
+Test files that start with prefix `pg_compat_` verify compatibility
+with Postgres by running the same script files both with DataFusion and with 
Posgres
 
-In order to run sqllogictests with Postgres execute:
+In order to run the sqllogictests running against a previously running 
Postgres instance, do:
 
 ```shell
-PG_COMPAT=true cargo test -p datafusion --test sqllogictests
+PG_COMPAT=true PG_DSN="postgresql://[email protected]/postgres" cargo test -p 
datafusion --test sqllogictests
 ```
 
-This command requires a docker binary. Check that docker is properly installed 
with `which docker`.
+The environemnt variables:
+
+1. `PG_COMPAT` instructs sqllogictest to run against Postgres (not DataFusion)
+2. `PG_DSN` contains a `libpq` style connection string, whose format is 
described in
+   [the 
docs](https://docs.rs/tokio-postgres/latest/tokio_postgres/config/struct.Config.html#url)
+
+One way to create a suitable a posgres container in docker is to use
+the [Official Image](https://hub.docker.com/_/postgres) with a command
+such as the following. Note the collation **must** be set to `C` otherwise
+`ORDER BY` will not match DataFusion and the tests will diff.
+
+```shell
+docker run \

Review Comment:
   ```suggestion
   docker run --rm \
   ```



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to