This is an automated email from the ASF dual-hosted git repository.

yjc pushed a commit to branch perf-recent-activity
in repository https://gitbox.apache.org/repos/asf/incubator-superset.git

commit b881d197c515c8da97591cd0b99fccdfd5a6da30
Author: Jesse Yang <[email protected]>
AuthorDate: Wed Nov 25 19:06:00 2020 -0800

    perf: improve loading speed of recent activities
---
 superset/extensions.py                             |  3 +-
 .../versions/debf575ce1a1_add_index_to_logs.py     | 48 +++++++++++++
 superset/views/core.py                             | 82 ++++++++--------------
 3 files changed, 80 insertions(+), 53 deletions(-)

diff --git a/superset/extensions.py b/superset/extensions.py
index 7011bb2..ff41370 100644
--- a/superset/extensions.py
+++ b/superset/extensions.py
@@ -29,6 +29,7 @@ from werkzeug.local import LocalProxy
 
 from superset.utils.cache_manager import CacheManager
 from superset.utils.feature_flag_manager import FeatureFlagManager
+from superset.utils.log import AbstractEventLogger
 from superset.utils.machine_auth import MachineAuthProviderFactory
 
 
@@ -101,7 +102,7 @@ cache_manager = CacheManager()
 celery_app = celery.Celery()
 csrf = CSRFProtect()
 db = SQLA()
-_event_logger: Dict[str, Any] = {}
+_event_logger: Dict[str, AbstractEventLogger] = {}
 event_logger = LocalProxy(lambda: _event_logger.get("event_logger"))
 feature_flag_manager = FeatureFlagManager()
 machine_auth_provider_factory = MachineAuthProviderFactory()
diff --git a/superset/migrations/versions/debf575ce1a1_add_index_to_logs.py 
b/superset/migrations/versions/debf575ce1a1_add_index_to_logs.py
new file mode 100644
index 0000000..33b923a
--- /dev/null
+++ b/superset/migrations/versions/debf575ce1a1_add_index_to_logs.py
@@ -0,0 +1,48 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+"""add_index_to_logs
+
+Revision ID: debf575ce1a1
+Revises: a8173232b786
+Create Date: 2020-11-25 17:28:39.383290
+
+"""
+from alembic import op
+
+# revision identifiers, used by Alembic.
+revision = "debf575ce1a1"
+down_revision = "a8173232b786"
+
+
+IDX_LOGS_ACTION = "ix_logs_action"
+IDX_LOGS_USER_ID_DTTM = "ix_logs_user_id_dttm"
+
+
+def upgrade():
+    with op.batch_alter_table("logs") as batch_op:
+        batch_op.create_index(
+            op.f(IDX_LOGS_USER_ID_DTTM), ["user_id", "dttm"], unique=False,
+        )
+        batch_op.create_index(
+            op.f(IDX_LOGS_ACTION), ["action"], unique=False,
+        )
+
+
+def downgrade():
+    with op.batch_alter_table("logs") as batch_op:
+        batch_op.drop_index(op.f(IDX_LOGS_USER_ID_DTTM))
+        batch_op.drop_index(op.f(IDX_LOGS_ACTION))
diff --git a/superset/views/core.py b/superset/views/core.py
index 72cf12c..06d141f 100755
--- a/superset/views/core.py
+++ b/superset/views/core.py
@@ -17,8 +17,9 @@
 # pylint: disable=comparison-with-callable
 import logging
 import re
+import time
 from contextlib import closing
-from datetime import datetime, timedelta
+from datetime import datetime
 from typing import Any, Callable, cast, Dict, List, Optional, Union
 from urllib import parse
 
@@ -1216,7 +1217,7 @@ class Superset(BaseSupersetView):  # pylint: 
disable=too-many-public-methods
     ) -> FlaskResponse:
         """Recent activity (actions) for a given user"""
         limit = request.args.get("limit")
-        limit = int(limit) if limit and limit.isdigit() else 100
+        limit = int(limit) if limit and limit.isdigit() else 50
         actions = request.args.get("actions", "explore,dashboard").split(",")
         # whether to get distinct subjects
         distinct = request.args.get("distinct") != "false"
@@ -1228,60 +1229,37 @@ class Superset(BaseSupersetView):  # pylint: 
disable=too-many-public-methods
             and_(Slice.slice_name is not None, Slice.slice_name != ""),
         )
 
-        if distinct:
-            one_year_ago = datetime.today() - timedelta(days=365)
-            subqry = (
-                db.session.query(
-                    Log.dashboard_id,
-                    Log.slice_id,
-                    Log.action,
-                    func.max(Log.dttm).label("dttm"),
-                )
-                .group_by(Log.dashboard_id, Log.slice_id, Log.action)
-                .filter(
-                    and_(
-                        Log.action.in_(actions),
-                        Log.user_id == user_id,
-                        # limit to one year of data to improve performance
-                        Log.dttm > one_year_ago,
-                        or_(Log.dashboard_id.isnot(None), 
Log.slice_id.isnot(None)),
-                    )
-                )
-                .subquery()
-            )
-            qry = (
-                db.session.query(
-                    subqry,
-                    Dashboard.slug.label("dashboard_slug"),
-                    Dashboard.dashboard_title,
-                    Slice.slice_name,
-                )
-                .outerjoin(Dashboard, Dashboard.id == subqry.c.dashboard_id)
-                .outerjoin(Slice, Slice.id == subqry.c.slice_id,)
-                .filter(has_subject_title)
-                .order_by(subqry.c.dttm.desc())
-                .limit(limit)
+        qry = (
+            db.session.query(
+                Log.dttm,
+                Log.action,
+                Log.dashboard_id,
+                Log.slice_id,
+                Dashboard.slug.label("dashboard_slug"),
+                Dashboard.dashboard_title,
+                Slice.slice_name,
+            )
+            .outerjoin(Dashboard, Dashboard.id == Log.dashboard_id)
+            .outerjoin(Slice, Slice.id == Log.slice_id)
+            .filter(
+                and_(Log.action.in_(actions), Log.user_id == user_id, 
has_subject_title)
             )
+            .order_by(Log.dttm.desc())
+        )
+        if distinct:
+            # stream the list and try to find enough distinct results
+            items = []
+            seen = set()
+            for item in qry.limit(100 * limit).yield_per(5 * limit):
+                key = (item.dashboard_id, item.slice_id)
+                if key not in seen:
+                    seen.add(key)
+                    items.append(item)
         else:
-            qry = (
-                db.session.query(
-                    Log.dttm,
-                    Log.action,
-                    Log.dashboard_id,
-                    Log.slice_id,
-                    Dashboard.slug.label("dashboard_slug"),
-                    Dashboard.dashboard_title,
-                    Slice.slice_name,
-                )
-                .outerjoin(Dashboard, Dashboard.id == Log.dashboard_id)
-                .outerjoin(Slice, Slice.id == Log.slice_id)
-                .filter(has_subject_title)
-                .order_by(Log.dttm.desc())
-                .limit(limit)
-            )
+            items = qry.limit(limit).all()
 
         payload = []
-        for log in qry.all():
+        for log in items:
             item_url = None
             item_title = None
             item_type = None

Reply via email to