This is an automated email from the ASF dual-hosted git repository. warren pushed a commit to branch fix/q-dev-dedup-user-tables in repository https://gitbox.apache.org/repos/asf/incubator-devlake.git
commit 3f1143690c19ea6349cdb04e6ab29fe94a5e6511 Author: warren <[email protected]> AuthorDate: Sat Feb 28 14:04:01 2026 +0800 fix(q_dev): prevent data duplication in user_report and user_data tables Replace auto-increment ID with composite primary keys so that CreateOrUpdate can properly deduplicate rows on re-extraction. - user_report PK: (connection_id, scope_id, user_id, date, client_type) - user_data PK: (connection_id, scope_id, user_id, date) - Switch db.Create() to db.CreateOrUpdate() in s3_data_extractor - Migration drops old tables, rebuilds with new PKs, resets s3_file_meta processed flag to trigger re-extraction --- .../20260228_fix_dedup_user_tables.go | 60 ++++++++++++++++ .../20260228_reset_s3_file_meta_processed.go | 51 ++++++++++++++ .../migrationscripts/archived/user_data_v2.go | 81 ++++++++++++++++++++++ .../archived/user_report_v2.go} | 18 ++--- .../q_dev/models/migrationscripts/register.go | 2 + backend/plugins/q_dev/models/user_data.go | 10 +-- backend/plugins/q_dev/models/user_report.go | 10 +-- backend/plugins/q_dev/tasks/s3_data_extractor.go | 5 +- 8 files changed, 215 insertions(+), 22 deletions(-) diff --git a/backend/plugins/q_dev/models/migrationscripts/20260228_fix_dedup_user_tables.go b/backend/plugins/q_dev/models/migrationscripts/20260228_fix_dedup_user_tables.go new file mode 100644 index 000000000..94b7666a9 --- /dev/null +++ b/backend/plugins/q_dev/models/migrationscripts/20260228_fix_dedup_user_tables.go @@ -0,0 +1,60 @@ +/* +Licensed to the Apache Software Foundation (ASF) under one or more +contributor license agreements. See the NOTICE file distributed with +this work for additional information regarding copyright ownership. +The ASF licenses this file to You under the Apache License, Version 2.0 +(the "License"); you may not use this file except in compliance with +the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package migrationscripts + +import ( + "github.com/apache/incubator-devlake/core/context" + "github.com/apache/incubator-devlake/core/errors" + "github.com/apache/incubator-devlake/helpers/migrationhelper" + "github.com/apache/incubator-devlake/plugins/q_dev/models/migrationscripts/archived" +) + +type fixDedupUserTables struct{} + +func (*fixDedupUserTables) Up(basicRes context.BasicRes) errors.Error { + db := basicRes.GetDal() + + // Drop old tables that used auto-increment ID (which caused data duplication) + err := db.DropTables( + "_tool_q_dev_user_report", + "_tool_q_dev_user_data", + ) + if err != nil { + return errors.Default.Wrap(err, "failed to drop old user tables") + } + + // Recreate tables with composite primary keys for proper deduplication + err = migrationhelper.AutoMigrateTables( + basicRes, + &archived.QDevUserReportV2{}, + &archived.QDevUserDataV2{}, + ) + if err != nil { + return errors.Default.Wrap(err, "failed to recreate user tables") + } + + return nil +} + +func (*fixDedupUserTables) Version() uint64 { + return 20260228000001 +} + +func (*fixDedupUserTables) Name() string { + return "Rebuild user_report and user_data tables with composite primary keys to fix data duplication" +} diff --git a/backend/plugins/q_dev/models/migrationscripts/20260228_reset_s3_file_meta_processed.go b/backend/plugins/q_dev/models/migrationscripts/20260228_reset_s3_file_meta_processed.go new file mode 100644 index 000000000..7d0b8ba1f --- /dev/null +++ b/backend/plugins/q_dev/models/migrationscripts/20260228_reset_s3_file_meta_processed.go @@ -0,0 +1,51 @@ +/* +Licensed to the Apache Software Foundation (ASF) under one or more +contributor license agreements. See the NOTICE file distributed with +this work for additional information regarding copyright ownership. +The ASF licenses this file to You under the Apache License, Version 2.0 +(the "License"); you may not use this file except in compliance with +the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package migrationscripts + +import ( + "github.com/apache/incubator-devlake/core/context" + "github.com/apache/incubator-devlake/core/dal" + "github.com/apache/incubator-devlake/core/errors" +) + +type resetS3FileMetaProcessed struct{} + +func (*resetS3FileMetaProcessed) Up(basicRes context.BasicRes) errors.Error { + db := basicRes.GetDal() + + // Reset processed flag so data will be re-extracted with the new + // dedup-safe composite-PK schema on next pipeline run + err := db.UpdateColumn( + "_tool_q_dev_s3_file_meta", + "processed", false, + dal.Where("1 = 1"), + ) + if err != nil { + return errors.Default.Wrap(err, "failed to reset s3_file_meta processed flag") + } + + return nil +} + +func (*resetS3FileMetaProcessed) Version() uint64 { + return 20260228000002 +} + +func (*resetS3FileMetaProcessed) Name() string { + return "Reset s3_file_meta processed flag to re-extract data with dedup-safe schema" +} diff --git a/backend/plugins/q_dev/models/migrationscripts/archived/user_data_v2.go b/backend/plugins/q_dev/models/migrationscripts/archived/user_data_v2.go new file mode 100644 index 000000000..a5daf1bbf --- /dev/null +++ b/backend/plugins/q_dev/models/migrationscripts/archived/user_data_v2.go @@ -0,0 +1,81 @@ +/* +Licensed to the Apache Software Foundation (ASF) under one or more +contributor license agreements. See the NOTICE file distributed with +this work for additional information regarding copyright ownership. +The ASF licenses this file to You under the Apache License, Version 2.0 +(the "License"); you may not use this file except in compliance with +the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package archived + +import ( + "time" + + "github.com/apache/incubator-devlake/core/models/migrationscripts/archived" +) + +type QDevUserDataV2 struct { + archived.NoPKModel + ConnectionId uint64 `gorm:"primaryKey"` + ScopeId string `gorm:"primaryKey;type:varchar(255)" json:"scopeId"` + UserId string `gorm:"primaryKey;type:varchar(255)" json:"userId"` + Date time.Time `gorm:"primaryKey;type:date" json:"date"` + DisplayName string `gorm:"type:varchar(255)" json:"displayName"` + CodeReview_FindingsCount int + CodeReview_SucceededEventCount int + InlineChat_AcceptanceEventCount int + InlineChat_AcceptedLineAdditions int + InlineChat_AcceptedLineDeletions int + InlineChat_DismissalEventCount int + InlineChat_DismissedLineAdditions int + InlineChat_DismissedLineDeletions int + InlineChat_RejectedLineAdditions int + InlineChat_RejectedLineDeletions int + InlineChat_RejectionEventCount int + InlineChat_TotalEventCount int + Inline_AICodeLines int + Inline_AcceptanceCount int + Inline_SuggestionsCount int + Chat_AICodeLines int + Chat_MessagesInteracted int + Chat_MessagesSent int + CodeFix_AcceptanceEventCount int + CodeFix_AcceptedLines int + CodeFix_GeneratedLines int + CodeFix_GenerationEventCount int + CodeReview_FailedEventCount int + Dev_AcceptanceEventCount int + Dev_AcceptedLines int + Dev_GeneratedLines int + Dev_GenerationEventCount int + DocGeneration_AcceptedFileUpdates int + DocGeneration_AcceptedFilesCreations int + DocGeneration_AcceptedLineAdditions int + DocGeneration_AcceptedLineUpdates int + DocGeneration_EventCount int + DocGeneration_RejectedFileCreations int + DocGeneration_RejectedFileUpdates int + DocGeneration_RejectedLineAdditions int + DocGeneration_RejectedLineUpdates int + TestGeneration_AcceptedLines int + TestGeneration_AcceptedTests int + TestGeneration_EventCount int + TestGeneration_GeneratedLines int + TestGeneration_GeneratedTests int + Transformation_EventCount int + Transformation_LinesGenerated int + Transformation_LinesIngested int +} + +func (QDevUserDataV2) TableName() string { + return "_tool_q_dev_user_data" +} diff --git a/backend/plugins/q_dev/models/user_report.go b/backend/plugins/q_dev/models/migrationscripts/archived/user_report_v2.go similarity index 73% copy from backend/plugins/q_dev/models/user_report.go copy to backend/plugins/q_dev/models/migrationscripts/archived/user_report_v2.go index f64090e89..704587485 100644 --- a/backend/plugins/q_dev/models/user_report.go +++ b/backend/plugins/q_dev/models/migrationscripts/archived/user_report_v2.go @@ -15,22 +15,22 @@ See the License for the specific language governing permissions and limitations under the License. */ -package models +package archived import ( "time" - "github.com/apache/incubator-devlake/core/models/common" + "github.com/apache/incubator-devlake/core/models/migrationscripts/archived" ) -type QDevUserReport struct { - common.Model +type QDevUserReportV2 struct { + archived.NoPKModel ConnectionId uint64 `gorm:"primaryKey"` - UserId string `gorm:"index" json:"userId"` - Date time.Time `gorm:"index" json:"date"` + ScopeId string `gorm:"primaryKey;type:varchar(255)" json:"scopeId"` + UserId string `gorm:"primaryKey;type:varchar(255)" json:"userId"` + Date time.Time `gorm:"primaryKey;type:date" json:"date"` + ClientType string `gorm:"primaryKey;type:varchar(50)" json:"clientType"` DisplayName string `gorm:"type:varchar(255)" json:"displayName"` - ScopeId string `gorm:"index;type:varchar(255)" json:"scopeId"` - ClientType string `gorm:"type:varchar(50)" json:"clientType"` SubscriptionTier string `gorm:"type:varchar(50)" json:"subscriptionTier"` ProfileId string `gorm:"type:varchar(512)" json:"profileId"` ChatConversations int `json:"chatConversations"` @@ -41,6 +41,6 @@ type QDevUserReport struct { TotalMessages int `json:"totalMessages"` } -func (QDevUserReport) TableName() string { +func (QDevUserReportV2) TableName() string { return "_tool_q_dev_user_report" } diff --git a/backend/plugins/q_dev/models/migrationscripts/register.go b/backend/plugins/q_dev/models/migrationscripts/register.go index 825ab5658..9c68ae8f8 100644 --- a/backend/plugins/q_dev/models/migrationscripts/register.go +++ b/backend/plugins/q_dev/models/migrationscripts/register.go @@ -33,5 +33,7 @@ func All() []plugin.MigrationScript { new(addScopeIdFields), new(addUserReportTable), new(addAccountIdToS3Slice), + new(fixDedupUserTables), + new(resetS3FileMetaProcessed), } } diff --git a/backend/plugins/q_dev/models/user_data.go b/backend/plugins/q_dev/models/user_data.go index a668db78b..3d59f965a 100644 --- a/backend/plugins/q_dev/models/user_data.go +++ b/backend/plugins/q_dev/models/user_data.go @@ -25,12 +25,12 @@ import ( // QDevUserData 存储从CSV中提取的原始数据 type QDevUserData struct { - common.Model + common.NoPKModel ConnectionId uint64 `gorm:"primaryKey"` - UserId string `gorm:"index" json:"userId"` - Date time.Time `gorm:"index" json:"date"` - DisplayName string `gorm:"type:varchar(255)" json:"displayName"` // New field for user display name - ScopeId string `gorm:"index;type:varchar(255)" json:"scopeId"` + ScopeId string `gorm:"primaryKey;type:varchar(255)" json:"scopeId"` + UserId string `gorm:"primaryKey;type:varchar(255)" json:"userId"` + Date time.Time `gorm:"primaryKey;type:date" json:"date"` + DisplayName string `gorm:"type:varchar(255)" json:"displayName"` CodeReview_FindingsCount int CodeReview_SucceededEventCount int diff --git a/backend/plugins/q_dev/models/user_report.go b/backend/plugins/q_dev/models/user_report.go index f64090e89..17c4ac07d 100644 --- a/backend/plugins/q_dev/models/user_report.go +++ b/backend/plugins/q_dev/models/user_report.go @@ -24,13 +24,13 @@ import ( ) type QDevUserReport struct { - common.Model + common.NoPKModel ConnectionId uint64 `gorm:"primaryKey"` - UserId string `gorm:"index" json:"userId"` - Date time.Time `gorm:"index" json:"date"` + ScopeId string `gorm:"primaryKey;type:varchar(255)" json:"scopeId"` + UserId string `gorm:"primaryKey;type:varchar(255)" json:"userId"` + Date time.Time `gorm:"primaryKey;type:date" json:"date"` + ClientType string `gorm:"primaryKey;type:varchar(50)" json:"clientType"` DisplayName string `gorm:"type:varchar(255)" json:"displayName"` - ScopeId string `gorm:"index;type:varchar(255)" json:"scopeId"` - ClientType string `gorm:"type:varchar(50)" json:"clientType"` SubscriptionTier string `gorm:"type:varchar(50)" json:"subscriptionTier"` ProfileId string `gorm:"type:varchar(512)" json:"profileId"` ChatConversations int `json:"chatConversations"` diff --git a/backend/plugins/q_dev/tasks/s3_data_extractor.go b/backend/plugins/q_dev/tasks/s3_data_extractor.go index 919c4dbe6..1cf2a9f2e 100644 --- a/backend/plugins/q_dev/tasks/s3_data_extractor.go +++ b/backend/plugins/q_dev/tasks/s3_data_extractor.go @@ -147,7 +147,7 @@ func processCSVData(taskCtx plugin.SubTaskContext, db dal.Dal, reader io.ReadClo if err != nil { return errors.Default.Wrap(err, "failed to create user report data") } - err = db.Create(reportData) + err = db.CreateOrUpdate(reportData) if err != nil { return errors.Default.Wrap(err, "failed to save user report data") } @@ -158,8 +158,7 @@ func processCSVData(taskCtx plugin.SubTaskContext, db dal.Dal, reader io.ReadClo return errors.Default.Wrap(err, "failed to create user data") } - // Save to database - no need to check for duplicates since we're processing each file only once - err = db.Create(userData) + err = db.CreateOrUpdate(userData) if err != nil { return errors.Default.Wrap(err, "failed to save user data") }
