This is an automated email from the ASF dual-hosted git repository.
abeizn pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/incubator-devlake.git
The following commit(s) were added to refs/heads/main by this push:
new 19b853a4d fix(q_dev): prevent data duplication in user_report and
user_data tables (#8737)
19b853a4d is described below
commit 19b853a4dc8d62ae0adaf2888a265665b6dd0121
Author: Warren Chen <[email protected]>
AuthorDate: Sat Feb 28 19:32:43 2026 +0800
fix(q_dev): prevent data duplication in user_report and user_data tables
(#8737)
* fix(q_dev): prevent data duplication in user_report and user_data tables
Replace auto-increment ID with composite primary keys so that
CreateOrUpdate can properly deduplicate rows on re-extraction.
- user_report PK: (connection_id, scope_id, user_id, date, client_type)
- user_data PK: (connection_id, scope_id, user_id, date)
- Switch db.Create() to db.CreateOrUpdate() in s3_data_extractor
- Migration drops old tables, rebuilds with new PKs, resets s3_file_meta
processed flag to trigger re-extraction
* fix(q_dev): gofmt archived user_data_v2 model
---
.../20260228_fix_dedup_user_tables.go | 60 ++++++++++++++++++++++
.../20260228_reset_s3_file_meta_processed.go | 51 ++++++++++++++++++
.../archived/user_data_v2.go} | 22 ++++----
.../archived/user_report_v2.go} | 18 +++----
.../q_dev/models/migrationscripts/register.go | 2 +
backend/plugins/q_dev/models/user_data.go | 10 ++--
backend/plugins/q_dev/models/user_report.go | 10 ++--
backend/plugins/q_dev/tasks/s3_data_extractor.go | 5 +-
8 files changed, 144 insertions(+), 34 deletions(-)
diff --git
a/backend/plugins/q_dev/models/migrationscripts/20260228_fix_dedup_user_tables.go
b/backend/plugins/q_dev/models/migrationscripts/20260228_fix_dedup_user_tables.go
new file mode 100644
index 000000000..94b7666a9
--- /dev/null
+++
b/backend/plugins/q_dev/models/migrationscripts/20260228_fix_dedup_user_tables.go
@@ -0,0 +1,60 @@
+/*
+Licensed to the Apache Software Foundation (ASF) under one or more
+contributor license agreements. See the NOTICE file distributed with
+this work for additional information regarding copyright ownership.
+The ASF licenses this file to You under the Apache License, Version 2.0
+(the "License"); you may not use this file except in compliance with
+the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+
+package migrationscripts
+
+import (
+ "github.com/apache/incubator-devlake/core/context"
+ "github.com/apache/incubator-devlake/core/errors"
+ "github.com/apache/incubator-devlake/helpers/migrationhelper"
+
"github.com/apache/incubator-devlake/plugins/q_dev/models/migrationscripts/archived"
+)
+
+type fixDedupUserTables struct{}
+
+func (*fixDedupUserTables) Up(basicRes context.BasicRes) errors.Error {
+ db := basicRes.GetDal()
+
+ // Drop old tables that used auto-increment ID (which caused data
duplication)
+ err := db.DropTables(
+ "_tool_q_dev_user_report",
+ "_tool_q_dev_user_data",
+ )
+ if err != nil {
+ return errors.Default.Wrap(err, "failed to drop old user
tables")
+ }
+
+ // Recreate tables with composite primary keys for proper deduplication
+ err = migrationhelper.AutoMigrateTables(
+ basicRes,
+ &archived.QDevUserReportV2{},
+ &archived.QDevUserDataV2{},
+ )
+ if err != nil {
+ return errors.Default.Wrap(err, "failed to recreate user
tables")
+ }
+
+ return nil
+}
+
+func (*fixDedupUserTables) Version() uint64 {
+ return 20260228000001
+}
+
+func (*fixDedupUserTables) Name() string {
+ return "Rebuild user_report and user_data tables with composite primary
keys to fix data duplication"
+}
diff --git
a/backend/plugins/q_dev/models/migrationscripts/20260228_reset_s3_file_meta_processed.go
b/backend/plugins/q_dev/models/migrationscripts/20260228_reset_s3_file_meta_processed.go
new file mode 100644
index 000000000..7d0b8ba1f
--- /dev/null
+++
b/backend/plugins/q_dev/models/migrationscripts/20260228_reset_s3_file_meta_processed.go
@@ -0,0 +1,51 @@
+/*
+Licensed to the Apache Software Foundation (ASF) under one or more
+contributor license agreements. See the NOTICE file distributed with
+this work for additional information regarding copyright ownership.
+The ASF licenses this file to You under the Apache License, Version 2.0
+(the "License"); you may not use this file except in compliance with
+the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+
+package migrationscripts
+
+import (
+ "github.com/apache/incubator-devlake/core/context"
+ "github.com/apache/incubator-devlake/core/dal"
+ "github.com/apache/incubator-devlake/core/errors"
+)
+
+type resetS3FileMetaProcessed struct{}
+
+func (*resetS3FileMetaProcessed) Up(basicRes context.BasicRes) errors.Error {
+ db := basicRes.GetDal()
+
+ // Reset processed flag so data will be re-extracted with the new
+ // dedup-safe composite-PK schema on next pipeline run
+ err := db.UpdateColumn(
+ "_tool_q_dev_s3_file_meta",
+ "processed", false,
+ dal.Where("1 = 1"),
+ )
+ if err != nil {
+ return errors.Default.Wrap(err, "failed to reset s3_file_meta
processed flag")
+ }
+
+ return nil
+}
+
+func (*resetS3FileMetaProcessed) Version() uint64 {
+ return 20260228000002
+}
+
+func (*resetS3FileMetaProcessed) Name() string {
+ return "Reset s3_file_meta processed flag to re-extract data with
dedup-safe schema"
+}
diff --git a/backend/plugins/q_dev/models/user_data.go
b/backend/plugins/q_dev/models/migrationscripts/archived/user_data_v2.go
similarity index 80%
copy from backend/plugins/q_dev/models/user_data.go
copy to backend/plugins/q_dev/models/migrationscripts/archived/user_data_v2.go
index a668db78b..8d5db8496 100644
--- a/backend/plugins/q_dev/models/user_data.go
+++ b/backend/plugins/q_dev/models/migrationscripts/archived/user_data_v2.go
@@ -15,23 +15,21 @@ See the License for the specific language governing
permissions and
limitations under the License.
*/
-package models
+package archived
import (
"time"
- "github.com/apache/incubator-devlake/core/models/common"
+
"github.com/apache/incubator-devlake/core/models/migrationscripts/archived"
)
-// QDevUserData 存储从CSV中提取的原始数据
-type QDevUserData struct {
- common.Model
- ConnectionId uint64 `gorm:"primaryKey"`
- UserId string `gorm:"index" json:"userId"`
- Date time.Time `gorm:"index" json:"date"`
- DisplayName string `gorm:"type:varchar(255)" json:"displayName"` //
New field for user display name
- ScopeId string `gorm:"index;type:varchar(255)" json:"scopeId"`
-
+type QDevUserDataV2 struct {
+ archived.NoPKModel
+ ConnectionId uint64 `gorm:"primaryKey"`
+ ScopeId string
`gorm:"primaryKey;type:varchar(255)" json:"scopeId"`
+ UserId string
`gorm:"primaryKey;type:varchar(255)" json:"userId"`
+ Date time.Time
`gorm:"primaryKey;type:date" json:"date"`
+ DisplayName string
`gorm:"type:varchar(255)" json:"displayName"`
CodeReview_FindingsCount int
CodeReview_SucceededEventCount int
InlineChat_AcceptanceEventCount int
@@ -78,6 +76,6 @@ type QDevUserData struct {
Transformation_LinesIngested int
}
-func (QDevUserData) TableName() string {
+func (QDevUserDataV2) TableName() string {
return "_tool_q_dev_user_data"
}
diff --git a/backend/plugins/q_dev/models/user_report.go
b/backend/plugins/q_dev/models/migrationscripts/archived/user_report_v2.go
similarity index 73%
copy from backend/plugins/q_dev/models/user_report.go
copy to backend/plugins/q_dev/models/migrationscripts/archived/user_report_v2.go
index f64090e89..704587485 100644
--- a/backend/plugins/q_dev/models/user_report.go
+++ b/backend/plugins/q_dev/models/migrationscripts/archived/user_report_v2.go
@@ -15,22 +15,22 @@ See the License for the specific language governing
permissions and
limitations under the License.
*/
-package models
+package archived
import (
"time"
- "github.com/apache/incubator-devlake/core/models/common"
+
"github.com/apache/incubator-devlake/core/models/migrationscripts/archived"
)
-type QDevUserReport struct {
- common.Model
+type QDevUserReportV2 struct {
+ archived.NoPKModel
ConnectionId uint64 `gorm:"primaryKey"`
- UserId string `gorm:"index" json:"userId"`
- Date time.Time `gorm:"index" json:"date"`
+ ScopeId string `gorm:"primaryKey;type:varchar(255)"
json:"scopeId"`
+ UserId string `gorm:"primaryKey;type:varchar(255)"
json:"userId"`
+ Date time.Time `gorm:"primaryKey;type:date" json:"date"`
+ ClientType string `gorm:"primaryKey;type:varchar(50)"
json:"clientType"`
DisplayName string `gorm:"type:varchar(255)"
json:"displayName"`
- ScopeId string `gorm:"index;type:varchar(255)"
json:"scopeId"`
- ClientType string `gorm:"type:varchar(50)" json:"clientType"`
SubscriptionTier string `gorm:"type:varchar(50)"
json:"subscriptionTier"`
ProfileId string `gorm:"type:varchar(512)" json:"profileId"`
ChatConversations int `json:"chatConversations"`
@@ -41,6 +41,6 @@ type QDevUserReport struct {
TotalMessages int `json:"totalMessages"`
}
-func (QDevUserReport) TableName() string {
+func (QDevUserReportV2) TableName() string {
return "_tool_q_dev_user_report"
}
diff --git a/backend/plugins/q_dev/models/migrationscripts/register.go
b/backend/plugins/q_dev/models/migrationscripts/register.go
index 825ab5658..9c68ae8f8 100644
--- a/backend/plugins/q_dev/models/migrationscripts/register.go
+++ b/backend/plugins/q_dev/models/migrationscripts/register.go
@@ -33,5 +33,7 @@ func All() []plugin.MigrationScript {
new(addScopeIdFields),
new(addUserReportTable),
new(addAccountIdToS3Slice),
+ new(fixDedupUserTables),
+ new(resetS3FileMetaProcessed),
}
}
diff --git a/backend/plugins/q_dev/models/user_data.go
b/backend/plugins/q_dev/models/user_data.go
index a668db78b..3d59f965a 100644
--- a/backend/plugins/q_dev/models/user_data.go
+++ b/backend/plugins/q_dev/models/user_data.go
@@ -25,12 +25,12 @@ import (
// QDevUserData 存储从CSV中提取的原始数据
type QDevUserData struct {
- common.Model
+ common.NoPKModel
ConnectionId uint64 `gorm:"primaryKey"`
- UserId string `gorm:"index" json:"userId"`
- Date time.Time `gorm:"index" json:"date"`
- DisplayName string `gorm:"type:varchar(255)" json:"displayName"` //
New field for user display name
- ScopeId string `gorm:"index;type:varchar(255)" json:"scopeId"`
+ ScopeId string `gorm:"primaryKey;type:varchar(255)"
json:"scopeId"`
+ UserId string `gorm:"primaryKey;type:varchar(255)"
json:"userId"`
+ Date time.Time `gorm:"primaryKey;type:date" json:"date"`
+ DisplayName string `gorm:"type:varchar(255)" json:"displayName"`
CodeReview_FindingsCount int
CodeReview_SucceededEventCount int
diff --git a/backend/plugins/q_dev/models/user_report.go
b/backend/plugins/q_dev/models/user_report.go
index f64090e89..17c4ac07d 100644
--- a/backend/plugins/q_dev/models/user_report.go
+++ b/backend/plugins/q_dev/models/user_report.go
@@ -24,13 +24,13 @@ import (
)
type QDevUserReport struct {
- common.Model
+ common.NoPKModel
ConnectionId uint64 `gorm:"primaryKey"`
- UserId string `gorm:"index" json:"userId"`
- Date time.Time `gorm:"index" json:"date"`
+ ScopeId string `gorm:"primaryKey;type:varchar(255)"
json:"scopeId"`
+ UserId string `gorm:"primaryKey;type:varchar(255)"
json:"userId"`
+ Date time.Time `gorm:"primaryKey;type:date" json:"date"`
+ ClientType string `gorm:"primaryKey;type:varchar(50)"
json:"clientType"`
DisplayName string `gorm:"type:varchar(255)"
json:"displayName"`
- ScopeId string `gorm:"index;type:varchar(255)"
json:"scopeId"`
- ClientType string `gorm:"type:varchar(50)" json:"clientType"`
SubscriptionTier string `gorm:"type:varchar(50)"
json:"subscriptionTier"`
ProfileId string `gorm:"type:varchar(512)" json:"profileId"`
ChatConversations int `json:"chatConversations"`
diff --git a/backend/plugins/q_dev/tasks/s3_data_extractor.go
b/backend/plugins/q_dev/tasks/s3_data_extractor.go
index 919c4dbe6..1cf2a9f2e 100644
--- a/backend/plugins/q_dev/tasks/s3_data_extractor.go
+++ b/backend/plugins/q_dev/tasks/s3_data_extractor.go
@@ -147,7 +147,7 @@ func processCSVData(taskCtx plugin.SubTaskContext, db
dal.Dal, reader io.ReadClo
if err != nil {
return errors.Default.Wrap(err, "failed to
create user report data")
}
- err = db.Create(reportData)
+ err = db.CreateOrUpdate(reportData)
if err != nil {
return errors.Default.Wrap(err, "failed to save
user report data")
}
@@ -158,8 +158,7 @@ func processCSVData(taskCtx plugin.SubTaskContext, db
dal.Dal, reader io.ReadClo
return errors.Default.Wrap(err, "failed to
create user data")
}
- // Save to database - no need to check for duplicates
since we're processing each file only once
- err = db.Create(userData)
+ err = db.CreateOrUpdate(userData)
if err != nil {
return errors.Default.Wrap(err, "failed to save
user data")
}