gabotorresruiz commented on code in PR #35832:
URL: https://github.com/apache/superset/pull/35832#discussion_r2543881187


##########
superset/utils/slack.py:
##########
@@ -59,80 +62,277 @@ def get_slack_client() -> WebClient:
     return client
 
 
-@cache_util.memoized_func(
-    key="slack_conversations_list",
-    cache=cache_manager.cache,
-)
-def get_channels() -> list[SlackChannelSchema]:
-    """
-    Retrieves a list of all conversations accessible by the bot
-    from the Slack API, and caches results (to avoid rate limits).
-
-    The Slack API does not provide search so to apply a search use
-    get_channels_with_search instead.
-    """
-    client = get_slack_client()
-    channel_schema = SlackChannelSchema()
+def _fetch_channels_without_search(
+    client: WebClient,
+    channel_schema: SlackChannelSchema,
+    types_param: str,
+    cursor: Optional[str],
+    limit: int,
+) -> dict[str, Any]:
+    """Fetch channels without search filtering, paginating for large limits."""
     channels: list[SlackChannelSchema] = []
-    extra_params = {"types": ",".join(SlackChannelTypes)}
-    cursor = None
-    page_count = 0
+    slack_cursor = cursor
+    page_size = min(limit, 1000)
+
+    while True:
+        response = client.conversations_list(
+            limit=page_size,
+            cursor=slack_cursor,
+            exclude_archived=True,
+            types=types_param,
+        )
+
+        page_channels = [
+            channel_schema.load(channel) for channel in 
response.data["channels"]
+        ]
+        channels.extend(page_channels)
+
+        slack_cursor = response.data.get("response_metadata", 
{}).get("next_cursor")
+
+        if not slack_cursor or len(page_channels) < page_size or len(channels) 
>= limit:
+            break
+
+    return {
+        "result": channels[:limit],
+        "next_cursor": slack_cursor,
+        "has_more": bool(slack_cursor),
+    }
+
+
+def _fetch_channels_with_search(
+    client: WebClient,
+    channel_schema: SlackChannelSchema,
+    types_param: str,
+    search_string: str,
+    exact_match: bool,
+    cursor: Optional[str],
+    limit: int,
+) -> dict[str, Any]:
+    """Fetch channels with search filtering, streaming through pages."""
+    matches: list[SlackChannelSchema] = []
+    slack_cursor = cursor
+    search_terms = [
+        term.strip().lower() for term in search_string.split(",") if 
term.strip()
+    ]
+
+    while len(matches) < limit:
+        response = client.conversations_list(
+            limit=1000,

Review Comment:
   Good catch! I've updated all instances to use `999` as the maximum limit



##########
superset/tasks/slack.py:
##########
@@ -15,29 +15,128 @@
 # specific language governing permissions and limitations
 # under the License.
 import logging
+from typing import Optional
 
 from flask import current_app
 
-from superset.extensions import celery_app
-from superset.utils.slack import get_channels
+from superset.extensions import cache_manager, celery_app
+from superset.utils.slack import (
+    get_channels_with_search,
+    SLACK_CHANNELS_CACHE_KEY,
+    SLACK_CHANNELS_CONTINUATION_CURSOR_KEY,
+    SlackChannelTypes,
+)
 
 logger = logging.getLogger(__name__)
 
 
-@celery_app.task(name="slack.cache_channels")
+@celery_app.task(
+    name="slack.cache_channels",
+    time_limit=300,  # 5 minute hard timeout (via SLACK_CACHE_WARMUP_TIMEOUT)

Review Comment:
   Good question! Unfortunately, Celery task decorators are evaluated at import 
time before Flask app context is available, so we cannot use 
`SLACK_CACHE_WARMUP_TIMEOUT` here. And actually, looking at the code, since we 
can't actually use the config variable, I've removed 
`SLACK_CACHE_WARMUP_TIMEOUT` entirely to avoid confusion. The task now uses 
hardcoded values, which are reasonable defaults for most workspaces.



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to