This is an automated email from the ASF dual-hosted git repository.

warren pushed a commit to branch fix/q-dev-dedup-user-tables
in repository https://gitbox.apache.org/repos/asf/incubator-devlake.git

commit 3f1143690c19ea6349cdb04e6ab29fe94a5e6511
Author: warren <[email protected]>
AuthorDate: Sat Feb 28 14:04:01 2026 +0800

    fix(q_dev): prevent data duplication in user_report and user_data tables
    
    Replace auto-increment ID with composite primary keys so that
    CreateOrUpdate can properly deduplicate rows on re-extraction.
    
    - user_report PK: (connection_id, scope_id, user_id, date, client_type)
    - user_data PK: (connection_id, scope_id, user_id, date)
    - Switch db.Create() to db.CreateOrUpdate() in s3_data_extractor
    - Migration drops old tables, rebuilds with new PKs, resets s3_file_meta
      processed flag to trigger re-extraction
---
 .../20260228_fix_dedup_user_tables.go              | 60 ++++++++++++++++
 .../20260228_reset_s3_file_meta_processed.go       | 51 ++++++++++++++
 .../migrationscripts/archived/user_data_v2.go      | 81 ++++++++++++++++++++++
 .../archived/user_report_v2.go}                    | 18 ++---
 .../q_dev/models/migrationscripts/register.go      |  2 +
 backend/plugins/q_dev/models/user_data.go          | 10 +--
 backend/plugins/q_dev/models/user_report.go        | 10 +--
 backend/plugins/q_dev/tasks/s3_data_extractor.go   |  5 +-
 8 files changed, 215 insertions(+), 22 deletions(-)

diff --git 
a/backend/plugins/q_dev/models/migrationscripts/20260228_fix_dedup_user_tables.go
 
b/backend/plugins/q_dev/models/migrationscripts/20260228_fix_dedup_user_tables.go
new file mode 100644
index 000000000..94b7666a9
--- /dev/null
+++ 
b/backend/plugins/q_dev/models/migrationscripts/20260228_fix_dedup_user_tables.go
@@ -0,0 +1,60 @@
+/*
+Licensed to the Apache Software Foundation (ASF) under one or more
+contributor license agreements.  See the NOTICE file distributed with
+this work for additional information regarding copyright ownership.
+The ASF licenses this file to You under the Apache License, Version 2.0
+(the "License"); you may not use this file except in compliance with
+the License.  You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+
+package migrationscripts
+
+import (
+       "github.com/apache/incubator-devlake/core/context"
+       "github.com/apache/incubator-devlake/core/errors"
+       "github.com/apache/incubator-devlake/helpers/migrationhelper"
+       
"github.com/apache/incubator-devlake/plugins/q_dev/models/migrationscripts/archived"
+)
+
+type fixDedupUserTables struct{}
+
+func (*fixDedupUserTables) Up(basicRes context.BasicRes) errors.Error {
+       db := basicRes.GetDal()
+
+       // Drop old tables that used auto-increment ID (which caused data 
duplication)
+       err := db.DropTables(
+               "_tool_q_dev_user_report",
+               "_tool_q_dev_user_data",
+       )
+       if err != nil {
+               return errors.Default.Wrap(err, "failed to drop old user 
tables")
+       }
+
+       // Recreate tables with composite primary keys for proper deduplication
+       err = migrationhelper.AutoMigrateTables(
+               basicRes,
+               &archived.QDevUserReportV2{},
+               &archived.QDevUserDataV2{},
+       )
+       if err != nil {
+               return errors.Default.Wrap(err, "failed to recreate user 
tables")
+       }
+
+       return nil
+}
+
+func (*fixDedupUserTables) Version() uint64 {
+       return 20260228000001
+}
+
+func (*fixDedupUserTables) Name() string {
+       return "Rebuild user_report and user_data tables with composite primary 
keys to fix data duplication"
+}
diff --git 
a/backend/plugins/q_dev/models/migrationscripts/20260228_reset_s3_file_meta_processed.go
 
b/backend/plugins/q_dev/models/migrationscripts/20260228_reset_s3_file_meta_processed.go
new file mode 100644
index 000000000..7d0b8ba1f
--- /dev/null
+++ 
b/backend/plugins/q_dev/models/migrationscripts/20260228_reset_s3_file_meta_processed.go
@@ -0,0 +1,51 @@
+/*
+Licensed to the Apache Software Foundation (ASF) under one or more
+contributor license agreements.  See the NOTICE file distributed with
+this work for additional information regarding copyright ownership.
+The ASF licenses this file to You under the Apache License, Version 2.0
+(the "License"); you may not use this file except in compliance with
+the License.  You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+
+package migrationscripts
+
+import (
+       "github.com/apache/incubator-devlake/core/context"
+       "github.com/apache/incubator-devlake/core/dal"
+       "github.com/apache/incubator-devlake/core/errors"
+)
+
+type resetS3FileMetaProcessed struct{}
+
+func (*resetS3FileMetaProcessed) Up(basicRes context.BasicRes) errors.Error {
+       db := basicRes.GetDal()
+
+       // Reset processed flag so data will be re-extracted with the new
+       // dedup-safe composite-PK schema on next pipeline run
+       err := db.UpdateColumn(
+               "_tool_q_dev_s3_file_meta",
+               "processed", false,
+               dal.Where("1 = 1"),
+       )
+       if err != nil {
+               return errors.Default.Wrap(err, "failed to reset s3_file_meta 
processed flag")
+       }
+
+       return nil
+}
+
+func (*resetS3FileMetaProcessed) Version() uint64 {
+       return 20260228000002
+}
+
+func (*resetS3FileMetaProcessed) Name() string {
+       return "Reset s3_file_meta processed flag to re-extract data with 
dedup-safe schema"
+}
diff --git 
a/backend/plugins/q_dev/models/migrationscripts/archived/user_data_v2.go 
b/backend/plugins/q_dev/models/migrationscripts/archived/user_data_v2.go
new file mode 100644
index 000000000..a5daf1bbf
--- /dev/null
+++ b/backend/plugins/q_dev/models/migrationscripts/archived/user_data_v2.go
@@ -0,0 +1,81 @@
+/*
+Licensed to the Apache Software Foundation (ASF) under one or more
+contributor license agreements.  See the NOTICE file distributed with
+this work for additional information regarding copyright ownership.
+The ASF licenses this file to You under the Apache License, Version 2.0
+(the "License"); you may not use this file except in compliance with
+the License.  You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+
+package archived
+
+import (
+       "time"
+
+       
"github.com/apache/incubator-devlake/core/models/migrationscripts/archived"
+)
+
+type QDevUserDataV2 struct {
+       archived.NoPKModel
+       ConnectionId                      uint64    `gorm:"primaryKey"`
+       ScopeId                           string    
`gorm:"primaryKey;type:varchar(255)" json:"scopeId"`
+       UserId                            string    
`gorm:"primaryKey;type:varchar(255)" json:"userId"`
+       Date                              time.Time 
`gorm:"primaryKey;type:date" json:"date"`
+       DisplayName                       string    `gorm:"type:varchar(255)" 
json:"displayName"`
+       CodeReview_FindingsCount          int
+       CodeReview_SucceededEventCount    int
+       InlineChat_AcceptanceEventCount   int
+       InlineChat_AcceptedLineAdditions  int
+       InlineChat_AcceptedLineDeletions  int
+       InlineChat_DismissalEventCount    int
+       InlineChat_DismissedLineAdditions int
+       InlineChat_DismissedLineDeletions int
+       InlineChat_RejectedLineAdditions  int
+       InlineChat_RejectedLineDeletions  int
+       InlineChat_RejectionEventCount    int
+       InlineChat_TotalEventCount        int
+       Inline_AICodeLines                int
+       Inline_AcceptanceCount            int
+       Inline_SuggestionsCount           int
+       Chat_AICodeLines                  int
+       Chat_MessagesInteracted           int
+       Chat_MessagesSent                 int
+       CodeFix_AcceptanceEventCount      int
+       CodeFix_AcceptedLines             int
+       CodeFix_GeneratedLines            int
+       CodeFix_GenerationEventCount      int
+       CodeReview_FailedEventCount       int
+       Dev_AcceptanceEventCount          int
+       Dev_AcceptedLines                 int
+       Dev_GeneratedLines                int
+       Dev_GenerationEventCount          int
+       DocGeneration_AcceptedFileUpdates    int
+       DocGeneration_AcceptedFilesCreations int
+       DocGeneration_AcceptedLineAdditions  int
+       DocGeneration_AcceptedLineUpdates    int
+       DocGeneration_EventCount             int
+       DocGeneration_RejectedFileCreations  int
+       DocGeneration_RejectedFileUpdates    int
+       DocGeneration_RejectedLineAdditions  int
+       DocGeneration_RejectedLineUpdates    int
+       TestGeneration_AcceptedLines         int
+       TestGeneration_AcceptedTests         int
+       TestGeneration_EventCount            int
+       TestGeneration_GeneratedLines        int
+       TestGeneration_GeneratedTests        int
+       Transformation_EventCount            int
+       Transformation_LinesGenerated        int
+       Transformation_LinesIngested         int
+}
+
+func (QDevUserDataV2) TableName() string {
+       return "_tool_q_dev_user_data"
+}
diff --git a/backend/plugins/q_dev/models/user_report.go 
b/backend/plugins/q_dev/models/migrationscripts/archived/user_report_v2.go
similarity index 73%
copy from backend/plugins/q_dev/models/user_report.go
copy to backend/plugins/q_dev/models/migrationscripts/archived/user_report_v2.go
index f64090e89..704587485 100644
--- a/backend/plugins/q_dev/models/user_report.go
+++ b/backend/plugins/q_dev/models/migrationscripts/archived/user_report_v2.go
@@ -15,22 +15,22 @@ See the License for the specific language governing 
permissions and
 limitations under the License.
 */
 
-package models
+package archived
 
 import (
        "time"
 
-       "github.com/apache/incubator-devlake/core/models/common"
+       
"github.com/apache/incubator-devlake/core/models/migrationscripts/archived"
 )
 
-type QDevUserReport struct {
-       common.Model
+type QDevUserReportV2 struct {
+       archived.NoPKModel
        ConnectionId       uint64    `gorm:"primaryKey"`
-       UserId             string    `gorm:"index" json:"userId"`
-       Date               time.Time `gorm:"index" json:"date"`
+       ScopeId            string    `gorm:"primaryKey;type:varchar(255)" 
json:"scopeId"`
+       UserId             string    `gorm:"primaryKey;type:varchar(255)" 
json:"userId"`
+       Date               time.Time `gorm:"primaryKey;type:date" json:"date"`
+       ClientType         string    `gorm:"primaryKey;type:varchar(50)" 
json:"clientType"`
        DisplayName        string    `gorm:"type:varchar(255)" 
json:"displayName"`
-       ScopeId            string    `gorm:"index;type:varchar(255)" 
json:"scopeId"`
-       ClientType         string    `gorm:"type:varchar(50)" json:"clientType"`
        SubscriptionTier   string    `gorm:"type:varchar(50)" 
json:"subscriptionTier"`
        ProfileId          string    `gorm:"type:varchar(512)" json:"profileId"`
        ChatConversations  int       `json:"chatConversations"`
@@ -41,6 +41,6 @@ type QDevUserReport struct {
        TotalMessages      int       `json:"totalMessages"`
 }
 
-func (QDevUserReport) TableName() string {
+func (QDevUserReportV2) TableName() string {
        return "_tool_q_dev_user_report"
 }
diff --git a/backend/plugins/q_dev/models/migrationscripts/register.go 
b/backend/plugins/q_dev/models/migrationscripts/register.go
index 825ab5658..9c68ae8f8 100644
--- a/backend/plugins/q_dev/models/migrationscripts/register.go
+++ b/backend/plugins/q_dev/models/migrationscripts/register.go
@@ -33,5 +33,7 @@ func All() []plugin.MigrationScript {
                new(addScopeIdFields),
                new(addUserReportTable),
                new(addAccountIdToS3Slice),
+               new(fixDedupUserTables),
+               new(resetS3FileMetaProcessed),
        }
 }
diff --git a/backend/plugins/q_dev/models/user_data.go 
b/backend/plugins/q_dev/models/user_data.go
index a668db78b..3d59f965a 100644
--- a/backend/plugins/q_dev/models/user_data.go
+++ b/backend/plugins/q_dev/models/user_data.go
@@ -25,12 +25,12 @@ import (
 
 // QDevUserData 存储从CSV中提取的原始数据
 type QDevUserData struct {
-       common.Model
+       common.NoPKModel
        ConnectionId uint64    `gorm:"primaryKey"`
-       UserId       string    `gorm:"index" json:"userId"`
-       Date         time.Time `gorm:"index" json:"date"`
-       DisplayName  string    `gorm:"type:varchar(255)" json:"displayName"` // 
New field for user display name
-       ScopeId      string    `gorm:"index;type:varchar(255)" json:"scopeId"`
+       ScopeId      string    `gorm:"primaryKey;type:varchar(255)" 
json:"scopeId"`
+       UserId       string    `gorm:"primaryKey;type:varchar(255)" 
json:"userId"`
+       Date         time.Time `gorm:"primaryKey;type:date" json:"date"`
+       DisplayName  string    `gorm:"type:varchar(255)" json:"displayName"`
 
        CodeReview_FindingsCount             int
        CodeReview_SucceededEventCount       int
diff --git a/backend/plugins/q_dev/models/user_report.go 
b/backend/plugins/q_dev/models/user_report.go
index f64090e89..17c4ac07d 100644
--- a/backend/plugins/q_dev/models/user_report.go
+++ b/backend/plugins/q_dev/models/user_report.go
@@ -24,13 +24,13 @@ import (
 )
 
 type QDevUserReport struct {
-       common.Model
+       common.NoPKModel
        ConnectionId       uint64    `gorm:"primaryKey"`
-       UserId             string    `gorm:"index" json:"userId"`
-       Date               time.Time `gorm:"index" json:"date"`
+       ScopeId            string    `gorm:"primaryKey;type:varchar(255)" 
json:"scopeId"`
+       UserId             string    `gorm:"primaryKey;type:varchar(255)" 
json:"userId"`
+       Date               time.Time `gorm:"primaryKey;type:date" json:"date"`
+       ClientType         string    `gorm:"primaryKey;type:varchar(50)" 
json:"clientType"`
        DisplayName        string    `gorm:"type:varchar(255)" 
json:"displayName"`
-       ScopeId            string    `gorm:"index;type:varchar(255)" 
json:"scopeId"`
-       ClientType         string    `gorm:"type:varchar(50)" json:"clientType"`
        SubscriptionTier   string    `gorm:"type:varchar(50)" 
json:"subscriptionTier"`
        ProfileId          string    `gorm:"type:varchar(512)" json:"profileId"`
        ChatConversations  int       `json:"chatConversations"`
diff --git a/backend/plugins/q_dev/tasks/s3_data_extractor.go 
b/backend/plugins/q_dev/tasks/s3_data_extractor.go
index 919c4dbe6..1cf2a9f2e 100644
--- a/backend/plugins/q_dev/tasks/s3_data_extractor.go
+++ b/backend/plugins/q_dev/tasks/s3_data_extractor.go
@@ -147,7 +147,7 @@ func processCSVData(taskCtx plugin.SubTaskContext, db 
dal.Dal, reader io.ReadClo
                        if err != nil {
                                return errors.Default.Wrap(err, "failed to 
create user report data")
                        }
-                       err = db.Create(reportData)
+                       err = db.CreateOrUpdate(reportData)
                        if err != nil {
                                return errors.Default.Wrap(err, "failed to save 
user report data")
                        }
@@ -158,8 +158,7 @@ func processCSVData(taskCtx plugin.SubTaskContext, db 
dal.Dal, reader io.ReadClo
                                return errors.Default.Wrap(err, "failed to 
create user data")
                        }
 
-                       // Save to database - no need to check for duplicates 
since we're processing each file only once
-                       err = db.Create(userData)
+                       err = db.CreateOrUpdate(userData)
                        if err != nil {
                                return errors.Default.Wrap(err, "failed to save 
user data")
                        }

Reply via email to