This is an automated email from the ASF dual-hosted git repository.

abeizn pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/incubator-devlake.git


The following commit(s) were added to refs/heads/main by this push:
     new 19b853a4d fix(q_dev): prevent data duplication in user_report and 
user_data tables (#8737)
19b853a4d is described below

commit 19b853a4dc8d62ae0adaf2888a265665b6dd0121
Author: Warren Chen <[email protected]>
AuthorDate: Sat Feb 28 19:32:43 2026 +0800

    fix(q_dev): prevent data duplication in user_report and user_data tables 
(#8737)
    
    * fix(q_dev): prevent data duplication in user_report and user_data tables
    
    Replace auto-increment ID with composite primary keys so that
    CreateOrUpdate can properly deduplicate rows on re-extraction.
    
    - user_report PK: (connection_id, scope_id, user_id, date, client_type)
    - user_data PK: (connection_id, scope_id, user_id, date)
    - Switch db.Create() to db.CreateOrUpdate() in s3_data_extractor
    - Migration drops old tables, rebuilds with new PKs, resets s3_file_meta
      processed flag to trigger re-extraction
    
    * fix(q_dev): gofmt archived user_data_v2 model
---
 .../20260228_fix_dedup_user_tables.go              | 60 ++++++++++++++++++++++
 .../20260228_reset_s3_file_meta_processed.go       | 51 ++++++++++++++++++
 .../archived/user_data_v2.go}                      | 22 ++++----
 .../archived/user_report_v2.go}                    | 18 +++----
 .../q_dev/models/migrationscripts/register.go      |  2 +
 backend/plugins/q_dev/models/user_data.go          | 10 ++--
 backend/plugins/q_dev/models/user_report.go        | 10 ++--
 backend/plugins/q_dev/tasks/s3_data_extractor.go   |  5 +-
 8 files changed, 144 insertions(+), 34 deletions(-)

diff --git 
a/backend/plugins/q_dev/models/migrationscripts/20260228_fix_dedup_user_tables.go
 
b/backend/plugins/q_dev/models/migrationscripts/20260228_fix_dedup_user_tables.go
new file mode 100644
index 000000000..94b7666a9
--- /dev/null
+++ 
b/backend/plugins/q_dev/models/migrationscripts/20260228_fix_dedup_user_tables.go
@@ -0,0 +1,60 @@
+/*
+Licensed to the Apache Software Foundation (ASF) under one or more
+contributor license agreements.  See the NOTICE file distributed with
+this work for additional information regarding copyright ownership.
+The ASF licenses this file to You under the Apache License, Version 2.0
+(the "License"); you may not use this file except in compliance with
+the License.  You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+
+package migrationscripts
+
+import (
+       "github.com/apache/incubator-devlake/core/context"
+       "github.com/apache/incubator-devlake/core/errors"
+       "github.com/apache/incubator-devlake/helpers/migrationhelper"
+       
"github.com/apache/incubator-devlake/plugins/q_dev/models/migrationscripts/archived"
+)
+
+type fixDedupUserTables struct{}
+
+func (*fixDedupUserTables) Up(basicRes context.BasicRes) errors.Error {
+       db := basicRes.GetDal()
+
+       // Drop old tables that used auto-increment ID (which caused data 
duplication)
+       err := db.DropTables(
+               "_tool_q_dev_user_report",
+               "_tool_q_dev_user_data",
+       )
+       if err != nil {
+               return errors.Default.Wrap(err, "failed to drop old user 
tables")
+       }
+
+       // Recreate tables with composite primary keys for proper deduplication
+       err = migrationhelper.AutoMigrateTables(
+               basicRes,
+               &archived.QDevUserReportV2{},
+               &archived.QDevUserDataV2{},
+       )
+       if err != nil {
+               return errors.Default.Wrap(err, "failed to recreate user 
tables")
+       }
+
+       return nil
+}
+
+func (*fixDedupUserTables) Version() uint64 {
+       return 20260228000001
+}
+
+func (*fixDedupUserTables) Name() string {
+       return "Rebuild user_report and user_data tables with composite primary 
keys to fix data duplication"
+}
diff --git 
a/backend/plugins/q_dev/models/migrationscripts/20260228_reset_s3_file_meta_processed.go
 
b/backend/plugins/q_dev/models/migrationscripts/20260228_reset_s3_file_meta_processed.go
new file mode 100644
index 000000000..7d0b8ba1f
--- /dev/null
+++ 
b/backend/plugins/q_dev/models/migrationscripts/20260228_reset_s3_file_meta_processed.go
@@ -0,0 +1,51 @@
+/*
+Licensed to the Apache Software Foundation (ASF) under one or more
+contributor license agreements.  See the NOTICE file distributed with
+this work for additional information regarding copyright ownership.
+The ASF licenses this file to You under the Apache License, Version 2.0
+(the "License"); you may not use this file except in compliance with
+the License.  You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+
+package migrationscripts
+
+import (
+       "github.com/apache/incubator-devlake/core/context"
+       "github.com/apache/incubator-devlake/core/dal"
+       "github.com/apache/incubator-devlake/core/errors"
+)
+
+type resetS3FileMetaProcessed struct{}
+
+func (*resetS3FileMetaProcessed) Up(basicRes context.BasicRes) errors.Error {
+       db := basicRes.GetDal()
+
+       // Reset processed flag so data will be re-extracted with the new
+       // dedup-safe composite-PK schema on next pipeline run
+       err := db.UpdateColumn(
+               "_tool_q_dev_s3_file_meta",
+               "processed", false,
+               dal.Where("1 = 1"),
+       )
+       if err != nil {
+               return errors.Default.Wrap(err, "failed to reset s3_file_meta 
processed flag")
+       }
+
+       return nil
+}
+
+func (*resetS3FileMetaProcessed) Version() uint64 {
+       return 20260228000002
+}
+
+func (*resetS3FileMetaProcessed) Name() string {
+       return "Reset s3_file_meta processed flag to re-extract data with 
dedup-safe schema"
+}
diff --git a/backend/plugins/q_dev/models/user_data.go 
b/backend/plugins/q_dev/models/migrationscripts/archived/user_data_v2.go
similarity index 80%
copy from backend/plugins/q_dev/models/user_data.go
copy to backend/plugins/q_dev/models/migrationscripts/archived/user_data_v2.go
index a668db78b..8d5db8496 100644
--- a/backend/plugins/q_dev/models/user_data.go
+++ b/backend/plugins/q_dev/models/migrationscripts/archived/user_data_v2.go
@@ -15,23 +15,21 @@ See the License for the specific language governing 
permissions and
 limitations under the License.
 */
 
-package models
+package archived
 
 import (
        "time"
 
-       "github.com/apache/incubator-devlake/core/models/common"
+       
"github.com/apache/incubator-devlake/core/models/migrationscripts/archived"
 )
 
-// QDevUserData 存储从CSV中提取的原始数据
-type QDevUserData struct {
-       common.Model
-       ConnectionId uint64    `gorm:"primaryKey"`
-       UserId       string    `gorm:"index" json:"userId"`
-       Date         time.Time `gorm:"index" json:"date"`
-       DisplayName  string    `gorm:"type:varchar(255)" json:"displayName"` // 
New field for user display name
-       ScopeId      string    `gorm:"index;type:varchar(255)" json:"scopeId"`
-
+type QDevUserDataV2 struct {
+       archived.NoPKModel
+       ConnectionId                         uint64    `gorm:"primaryKey"`
+       ScopeId                              string    
`gorm:"primaryKey;type:varchar(255)" json:"scopeId"`
+       UserId                               string    
`gorm:"primaryKey;type:varchar(255)" json:"userId"`
+       Date                                 time.Time 
`gorm:"primaryKey;type:date" json:"date"`
+       DisplayName                          string    
`gorm:"type:varchar(255)" json:"displayName"`
        CodeReview_FindingsCount             int
        CodeReview_SucceededEventCount       int
        InlineChat_AcceptanceEventCount      int
@@ -78,6 +76,6 @@ type QDevUserData struct {
        Transformation_LinesIngested         int
 }
 
-func (QDevUserData) TableName() string {
+func (QDevUserDataV2) TableName() string {
        return "_tool_q_dev_user_data"
 }
diff --git a/backend/plugins/q_dev/models/user_report.go 
b/backend/plugins/q_dev/models/migrationscripts/archived/user_report_v2.go
similarity index 73%
copy from backend/plugins/q_dev/models/user_report.go
copy to backend/plugins/q_dev/models/migrationscripts/archived/user_report_v2.go
index f64090e89..704587485 100644
--- a/backend/plugins/q_dev/models/user_report.go
+++ b/backend/plugins/q_dev/models/migrationscripts/archived/user_report_v2.go
@@ -15,22 +15,22 @@ See the License for the specific language governing 
permissions and
 limitations under the License.
 */
 
-package models
+package archived
 
 import (
        "time"
 
-       "github.com/apache/incubator-devlake/core/models/common"
+       
"github.com/apache/incubator-devlake/core/models/migrationscripts/archived"
 )
 
-type QDevUserReport struct {
-       common.Model
+type QDevUserReportV2 struct {
+       archived.NoPKModel
        ConnectionId       uint64    `gorm:"primaryKey"`
-       UserId             string    `gorm:"index" json:"userId"`
-       Date               time.Time `gorm:"index" json:"date"`
+       ScopeId            string    `gorm:"primaryKey;type:varchar(255)" 
json:"scopeId"`
+       UserId             string    `gorm:"primaryKey;type:varchar(255)" 
json:"userId"`
+       Date               time.Time `gorm:"primaryKey;type:date" json:"date"`
+       ClientType         string    `gorm:"primaryKey;type:varchar(50)" 
json:"clientType"`
        DisplayName        string    `gorm:"type:varchar(255)" 
json:"displayName"`
-       ScopeId            string    `gorm:"index;type:varchar(255)" 
json:"scopeId"`
-       ClientType         string    `gorm:"type:varchar(50)" json:"clientType"`
        SubscriptionTier   string    `gorm:"type:varchar(50)" 
json:"subscriptionTier"`
        ProfileId          string    `gorm:"type:varchar(512)" json:"profileId"`
        ChatConversations  int       `json:"chatConversations"`
@@ -41,6 +41,6 @@ type QDevUserReport struct {
        TotalMessages      int       `json:"totalMessages"`
 }
 
-func (QDevUserReport) TableName() string {
+func (QDevUserReportV2) TableName() string {
        return "_tool_q_dev_user_report"
 }
diff --git a/backend/plugins/q_dev/models/migrationscripts/register.go 
b/backend/plugins/q_dev/models/migrationscripts/register.go
index 825ab5658..9c68ae8f8 100644
--- a/backend/plugins/q_dev/models/migrationscripts/register.go
+++ b/backend/plugins/q_dev/models/migrationscripts/register.go
@@ -33,5 +33,7 @@ func All() []plugin.MigrationScript {
                new(addScopeIdFields),
                new(addUserReportTable),
                new(addAccountIdToS3Slice),
+               new(fixDedupUserTables),
+               new(resetS3FileMetaProcessed),
        }
 }
diff --git a/backend/plugins/q_dev/models/user_data.go 
b/backend/plugins/q_dev/models/user_data.go
index a668db78b..3d59f965a 100644
--- a/backend/plugins/q_dev/models/user_data.go
+++ b/backend/plugins/q_dev/models/user_data.go
@@ -25,12 +25,12 @@ import (
 
 // QDevUserData 存储从CSV中提取的原始数据
 type QDevUserData struct {
-       common.Model
+       common.NoPKModel
        ConnectionId uint64    `gorm:"primaryKey"`
-       UserId       string    `gorm:"index" json:"userId"`
-       Date         time.Time `gorm:"index" json:"date"`
-       DisplayName  string    `gorm:"type:varchar(255)" json:"displayName"` // 
New field for user display name
-       ScopeId      string    `gorm:"index;type:varchar(255)" json:"scopeId"`
+       ScopeId      string    `gorm:"primaryKey;type:varchar(255)" 
json:"scopeId"`
+       UserId       string    `gorm:"primaryKey;type:varchar(255)" 
json:"userId"`
+       Date         time.Time `gorm:"primaryKey;type:date" json:"date"`
+       DisplayName  string    `gorm:"type:varchar(255)" json:"displayName"`
 
        CodeReview_FindingsCount             int
        CodeReview_SucceededEventCount       int
diff --git a/backend/plugins/q_dev/models/user_report.go 
b/backend/plugins/q_dev/models/user_report.go
index f64090e89..17c4ac07d 100644
--- a/backend/plugins/q_dev/models/user_report.go
+++ b/backend/plugins/q_dev/models/user_report.go
@@ -24,13 +24,13 @@ import (
 )
 
 type QDevUserReport struct {
-       common.Model
+       common.NoPKModel
        ConnectionId       uint64    `gorm:"primaryKey"`
-       UserId             string    `gorm:"index" json:"userId"`
-       Date               time.Time `gorm:"index" json:"date"`
+       ScopeId            string    `gorm:"primaryKey;type:varchar(255)" 
json:"scopeId"`
+       UserId             string    `gorm:"primaryKey;type:varchar(255)" 
json:"userId"`
+       Date               time.Time `gorm:"primaryKey;type:date" json:"date"`
+       ClientType         string    `gorm:"primaryKey;type:varchar(50)" 
json:"clientType"`
        DisplayName        string    `gorm:"type:varchar(255)" 
json:"displayName"`
-       ScopeId            string    `gorm:"index;type:varchar(255)" 
json:"scopeId"`
-       ClientType         string    `gorm:"type:varchar(50)" json:"clientType"`
        SubscriptionTier   string    `gorm:"type:varchar(50)" 
json:"subscriptionTier"`
        ProfileId          string    `gorm:"type:varchar(512)" json:"profileId"`
        ChatConversations  int       `json:"chatConversations"`
diff --git a/backend/plugins/q_dev/tasks/s3_data_extractor.go 
b/backend/plugins/q_dev/tasks/s3_data_extractor.go
index 919c4dbe6..1cf2a9f2e 100644
--- a/backend/plugins/q_dev/tasks/s3_data_extractor.go
+++ b/backend/plugins/q_dev/tasks/s3_data_extractor.go
@@ -147,7 +147,7 @@ func processCSVData(taskCtx plugin.SubTaskContext, db 
dal.Dal, reader io.ReadClo
                        if err != nil {
                                return errors.Default.Wrap(err, "failed to 
create user report data")
                        }
-                       err = db.Create(reportData)
+                       err = db.CreateOrUpdate(reportData)
                        if err != nil {
                                return errors.Default.Wrap(err, "failed to save 
user report data")
                        }
@@ -158,8 +158,7 @@ func processCSVData(taskCtx plugin.SubTaskContext, db 
dal.Dal, reader io.ReadClo
                                return errors.Default.Wrap(err, "failed to 
create user data")
                        }
 
-                       // Save to database - no need to check for duplicates 
since we're processing each file only once
-                       err = db.Create(userData)
+                       err = db.CreateOrUpdate(userData)
                        if err != nil {
                                return errors.Default.Wrap(err, "failed to save 
user data")
                        }

Reply via email to