Copilot commented on code in PR #37307:
URL: https://github.com/apache/superset/pull/37307#discussion_r2713963413


##########
superset/db_engine_specs/mongodb.py:
##########
@@ -0,0 +1,85 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+"""MongoDB engine spec for Superset.
+
+Uses PyMongoSQL (https://github.com/passren/PyMongoSQL) as the SQLAlchemy 
dialect
+to enable SQL queries on MongoDB collections.
+"""
+
+from __future__ import annotations
+
+from datetime import datetime
+from typing import Any, Optional
+
+from sqlalchemy import types
+
+from superset.constants import TimeGrain
+from superset.db_engine_specs.base import BaseEngineSpec
+
+
+class MongoDBEngineSpec(BaseEngineSpec):
+    """Engine spec for MongoDB using PyMongoSQL dialect."""
+
+    engine = "mongodb"
+    engine_name = "MongoDB"
+    force_column_alias_quotes = False
+
+    _time_grain_expressions = {
+        None: "{col}",
+        TimeGrain.SECOND: "DATETIME(STRFTIME('%Y-%m-%dT%H:%M:%S', {col}))",
+        TimeGrain.MINUTE: "DATETIME(STRFTIME('%Y-%m-%dT%H:%M:00', {col}))",
+        TimeGrain.HOUR: "DATETIME(STRFTIME('%Y-%m-%dT%H:00:00', {col}))",
+        TimeGrain.DAY: "DATETIME({col}, 'start of day')",
+        TimeGrain.WEEK: "DATETIME({col}, 'start of day', \
+            -strftime('%w', {col}) || ' days')",

Review Comment:
   The backslash continuation on this line creates a string with embedded 
whitespace that differs from the SQLite implementation. In Python, using a 
backslash at the end of a line continues the string, but the leading spaces on 
the next line become part of the string value. This results in the actual value 
being: "DATETIME({col}, 'start of day',             -strftime('%w', {col}) || ' 
days')" with many spaces before the minus sign.
   
   While this matches your test expectation, it's inconsistent with how other 
engine specs handle multi-line strings. Consider using parentheses for string 
concatenation instead, like:
   TimeGrain.WEEK: (
       "DATETIME({col}, 'start of day', "
       "-strftime('%w', {col}) || ' days')"
   )
   
   This would result in cleaner SQL without the extra whitespace.
   ```suggestion
           TimeGrain.WEEK: (
               "DATETIME({col}, 'start of day', "
               "-strftime('%w', {col}) || ' days')"
           ),
   ```



##########
superset/db_engine_specs/mongodb.py:
##########
@@ -0,0 +1,85 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+"""MongoDB engine spec for Superset.
+
+Uses PyMongoSQL (https://github.com/passren/PyMongoSQL) as the SQLAlchemy 
dialect
+to enable SQL queries on MongoDB collections.
+"""
+
+from __future__ import annotations
+
+from datetime import datetime
+from typing import Any, Optional
+
+from sqlalchemy import types
+
+from superset.constants import TimeGrain
+from superset.db_engine_specs.base import BaseEngineSpec
+
+
+class MongoDBEngineSpec(BaseEngineSpec):
+    """Engine spec for MongoDB using PyMongoSQL dialect."""
+
+    engine = "mongodb"
+    engine_name = "MongoDB"
+    force_column_alias_quotes = False
+
+    _time_grain_expressions = {
+        None: "{col}",
+        TimeGrain.SECOND: "DATETIME(STRFTIME('%Y-%m-%dT%H:%M:%S', {col}))",
+        TimeGrain.MINUTE: "DATETIME(STRFTIME('%Y-%m-%dT%H:%M:00', {col}))",
+        TimeGrain.HOUR: "DATETIME(STRFTIME('%Y-%m-%dT%H:00:00', {col}))",
+        TimeGrain.DAY: "DATETIME({col}, 'start of day')",
+        TimeGrain.WEEK: "DATETIME({col}, 'start of day', \
+            -strftime('%w', {col}) || ' days')",
+        TimeGrain.MONTH: "DATETIME({col}, 'start of month')",
+        TimeGrain.QUARTER: (
+            "DATETIME({col}, 'start of month', "
+            "printf('-%d month', (strftime('%m', {col}) - 1) % 3))"
+        ),
+        TimeGrain.YEAR: "DATETIME({col}, 'start of year')",
+        TimeGrain.WEEK_ENDING_SATURDAY: "DATETIME({col}, 'start of day', 
'weekday 6')",
+        TimeGrain.WEEK_ENDING_SUNDAY: "DATETIME({col}, 'start of day', 
'weekday 0')",
+        TimeGrain.WEEK_STARTING_SUNDAY: (
+            "DATETIME({col}, 'start of day', 'weekday 0', '-7 days')"
+        ),
+        TimeGrain.WEEK_STARTING_MONDAY: (
+            "DATETIME({col}, 'start of day', 'weekday 1', '-7 days')"

Review Comment:
   The implementation of WEEK_STARTING_SUNDAY and WEEK_STARTING_MONDAY uses 
'weekday N' followed by '-7 days', which differs from SQLite's approach. 
   
   SQLite's 'weekday N' modifier moves to the NEXT occurrence of weekday N, or 
stays on the same date if already on that weekday. This means:
   - For a date already on Sunday/Monday, it would stay on that date, then go 
back 7 days (to the previous week)
   - For other dates, it moves forward to the next Sunday/Monday, then back 7 
days
   
   This might not give the expected "start of current week" behavior. Consider 
using SQLite's approach with dynamic offset calculation instead:
   - WEEK_STARTING_SUNDAY: "DATETIME({col}, 'start of day', -strftime('%w', 
{col}) || ' days')"
   - WEEK_STARTING_MONDAY: "DATETIME({col}, 'start of day', '-' || 
((strftime('%w', {col}) + 6) % 7) || ' days')"
   
   Please verify with test data that the current implementation produces 
correct week boundaries.
   ```suggestion
               "DATETIME({col}, 'start of day', -strftime('%w', {col}) || ' 
days')"
           ),
           TimeGrain.WEEK_STARTING_MONDAY: (
               "DATETIME({col}, 'start of day', '-' || ((strftime('%w', {col}) 
+ 6) % 7) || ' days')"
   ```



##########
tests/unit_tests/db_engine_specs/test_mongodb.py:
##########
@@ -0,0 +1,129 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+from datetime import datetime
+from typing import Optional
+
+import pytest
+
+from superset.constants import TimeGrain
+from tests.unit_tests.db_engine_specs.utils import assert_convert_dttm
+from tests.unit_tests.fixtures.common import dttm  # noqa: F401
+
+
[email protected](
+    "target_type,expected_result",
+    [
+        ("text", "'2019-01-02 03:04:05'"),
+        ("TEXT", "'2019-01-02 03:04:05'"),
+        ("dateTime", "'2019-01-02 03:04:05'"),
+        ("DateTime", "'2019-01-02 03:04:05'"),
+        ("DATETIME", "'2019-01-02 03:04:05'"),
+        ("string", "'2019-01-02 03:04:05'"),
+        ("String", "'2019-01-02 03:04:05'"),
+        ("STRING", "'2019-01-02 03:04:05'"),
+        ("integer", None),
+        ("number", None),
+        ("unknowntype", None),
+    ],
+)
+def test_convert_dttm(
+    target_type: str,
+    expected_result: Optional[str],
+    dttm: datetime,  # noqa: F811
+) -> None:
+    """Test datetime conversion for various MongoDB column types.
+    """
+    from superset.db_engine_specs.mongodb import (
+        MongoDBEngineSpec as spec,  # noqa: N813
+    )
+
+    assert_convert_dttm(spec, target_type, expected_result, dttm)
+
+
+def test_epoch_to_dttm() -> None:
+    """Test epoch to datetime conversion."""
+    from superset.db_engine_specs.mongodb import (
+        MongoDBEngineSpec as spec,  # noqa: N813
+    )
+
+    # MongoDB engine just passes through the column expression
+    assert spec.epoch_to_dttm() == "{col}"
+
+
[email protected](
+    "grain,expected_expression",
+    [
+        (None, "{col}"),
+        (TimeGrain.SECOND, "DATETIME(STRFTIME('%Y-%m-%dT%H:%M:%S', {col}))"),
+        (TimeGrain.MINUTE, "DATETIME(STRFTIME('%Y-%m-%dT%H:%M:00', {col}))"),
+        (TimeGrain.HOUR, "DATETIME(STRFTIME('%Y-%m-%dT%H:00:00', {col}))"),
+        (TimeGrain.DAY, "DATETIME({col}, 'start of day')"),
+        (
+            TimeGrain.WEEK,
+            "DATETIME({col}, 'start of day',             "

Review Comment:
   The test expects the time grain expression to have extra whitespace (line 
77: "DATETIME({col}, 'start of day',             "), which is created by the 
backslash continuation in the implementation. This whitespace is not 
semantically meaningful and makes the test fragile. If the implementation is 
fixed to use proper string concatenation with parentheses (as is done for other 
time grains), this test will need to be updated to remove the extra spaces.
   ```suggestion
               "DATETIME({col}, 'start of day', "
   ```



##########
superset/db_engine_specs/mongodb.py:
##########
@@ -0,0 +1,85 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+"""MongoDB engine spec for Superset.
+
+Uses PyMongoSQL (https://github.com/passren/PyMongoSQL) as the SQLAlchemy 
dialect
+to enable SQL queries on MongoDB collections.
+"""
+
+from __future__ import annotations
+
+from datetime import datetime
+from typing import Any, Optional
+
+from sqlalchemy import types
+
+from superset.constants import TimeGrain
+from superset.db_engine_specs.base import BaseEngineSpec
+
+
+class MongoDBEngineSpec(BaseEngineSpec):
+    """Engine spec for MongoDB using PyMongoSQL dialect."""
+
+    engine = "mongodb"
+    engine_name = "MongoDB"
+    force_column_alias_quotes = False
+
+    _time_grain_expressions = {
+        None: "{col}",
+        TimeGrain.SECOND: "DATETIME(STRFTIME('%Y-%m-%dT%H:%M:%S', {col}))",
+        TimeGrain.MINUTE: "DATETIME(STRFTIME('%Y-%m-%dT%H:%M:00', {col}))",
+        TimeGrain.HOUR: "DATETIME(STRFTIME('%Y-%m-%dT%H:00:00', {col}))",
+        TimeGrain.DAY: "DATETIME({col}, 'start of day')",
+        TimeGrain.WEEK: "DATETIME({col}, 'start of day', \
+            -strftime('%w', {col}) || ' days')",
+        TimeGrain.MONTH: "DATETIME({col}, 'start of month')",
+        TimeGrain.QUARTER: (
+            "DATETIME({col}, 'start of month', "
+            "printf('-%d month', (strftime('%m', {col}) - 1) % 3))"
+        ),
+        TimeGrain.YEAR: "DATETIME({col}, 'start of year')",
+        TimeGrain.WEEK_ENDING_SATURDAY: "DATETIME({col}, 'start of day', 
'weekday 6')",
+        TimeGrain.WEEK_ENDING_SUNDAY: "DATETIME({col}, 'start of day', 
'weekday 0')",
+        TimeGrain.WEEK_STARTING_SUNDAY: (
+            "DATETIME({col}, 'start of day', 'weekday 0', '-7 days')"
+        ),
+        TimeGrain.WEEK_STARTING_MONDAY: (
+            "DATETIME({col}, 'start of day', 'weekday 1', '-7 days')"
+        ),
+    }
+
+    @classmethod
+    def epoch_to_dttm(cls) -> str:
+        """Return SQL to convert epoch time to datetime."""
+        return "{col}"

Review Comment:
   The epoch_to_dttm method returns "{col}" without any conversion function, 
which differs from other database engines (e.g., SQLite uses "datetime({col}, 
'unixepoch')", PostgreSQL uses a proper timestamp conversion). 
   
   While the test indicates this is intentional ("MongoDB engine just passes 
through the column expression"), please verify this behavior is correct for 
PyMongoSQL. If epoch timestamps require conversion in actual usage, this could 
lead to incorrect datetime values in queries.



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to