From e9b84f50e961ee7c3abfb8192de8f4fc778df041 Mon Sep 17 00:00:00 2001 From: Felipe Artur Date: Mon, 11 Feb 2019 15:48:37 -0200 Subject: [PATCH 01/18] Migrate issuable states to integer patch 1 Patch 1 that migrates issues/merge requests states from integer to string. On this commit we are only adding the state_id column and syncing it with a backgroud migration. On Patch 2 the code to use the new integer column will be deployed and the old column will be removed. --- app/models/concerns/issuable.rb | 1 + app/models/concerns/issuable_states.rb | 37 ++++++++ app/models/merge_request.rb | 2 + ...0190211131150_add_state_id_to_issuables.rb | 37 ++++++++ db/schema.rb | 4 +- .../sync_issuables_state_id.rb | 29 ++++++ lib/gitlab/database/migration_helpers.rb | 2 +- .../add_state_id_to_issuables_spec.rb | 89 +++++++++++++++++++ 8 files changed, 199 insertions(+), 2 deletions(-) create mode 100644 app/models/concerns/issuable_states.rb create mode 100644 db/migrate/20190211131150_add_state_id_to_issuables.rb create mode 100644 lib/gitlab/background_migration/sync_issuables_state_id.rb create mode 100644 spec/migrations/add_state_id_to_issuables_spec.rb diff --git a/app/models/concerns/issuable.rb b/app/models/concerns/issuable.rb index 0a77fbeba08..7bf553a0221 100644 --- a/app/models/concerns/issuable.rb +++ b/app/models/concerns/issuable.rb @@ -23,6 +23,7 @@ module Issuable include Sortable include CreatedAtFilterable include UpdatedAtFilterable + include IssuableStates # This object is used to gather issuable meta data for displaying # upvotes, downvotes, notes and closing merge requests count for issues and merge requests diff --git a/app/models/concerns/issuable_states.rb b/app/models/concerns/issuable_states.rb new file mode 100644 index 00000000000..7feaf7e8aac --- /dev/null +++ b/app/models/concerns/issuable_states.rb @@ -0,0 +1,37 @@ +# frozen_string_literal: true + +# == IssuableStates concern +# +# Defines statuses shared by issuables which are persisted on state column +# using the state machine. +# +# Used by EE::Epic, Issue and MergeRequest +# +module IssuableStates + extend ActiveSupport::Concern + + # Override this constant on model where different states are needed + # Check MergeRequest::AVAILABLE_STATES + AVAILABLE_STATES = { opened: 1, closed: 2 }.freeze + + included do + before_save :set_state_id + end + + class_methods do + def states + @states ||= OpenStruct.new(self::AVAILABLE_STATES) + end + end + + # The state:string column is being migrated to state_id:integer column + # This is a temporary hook to populate state_id column with new values + # and can be removed after the complete migration is done. + def set_state_id + return if state.nil? || state.empty? + + states_hash = self.class.states.to_h.with_indifferent_access + + self.state_id = states_hash[state] + end +end diff --git a/app/models/merge_request.rb b/app/models/merge_request.rb index 2035bffd829..67600383cf9 100644 --- a/app/models/merge_request.rb +++ b/app/models/merge_request.rb @@ -22,6 +22,8 @@ class MergeRequest < ActiveRecord::Base self.reactive_cache_lifetime = 10.minutes SORTING_PREFERENCE_FIELD = :merge_requests_sort + MERGE_REQUEST_STATES = + AVAILABLE_STATES = AVAILABLE_STATES.merge(merged: 3, locked: 4).freeze ignore_column :locked_at, :ref_fetched, diff --git a/db/migrate/20190211131150_add_state_id_to_issuables.rb b/db/migrate/20190211131150_add_state_id_to_issuables.rb new file mode 100644 index 00000000000..af02aa84afd --- /dev/null +++ b/db/migrate/20190211131150_add_state_id_to_issuables.rb @@ -0,0 +1,37 @@ +class AddStateIdToIssuables < ActiveRecord::Migration[5.0] + include Gitlab::Database::MigrationHelpers + + DOWNTIME = false + MIGRATION = 'SyncIssuablesStateId'.freeze + + # TODO - find out how many issues and merge requests in production + # to adapt the batch size and delay interval + # Keep in mind that the migration will be scheduled for issues and merge requests. + BATCH_SIZE = 5000 + DELAY_INTERVAL = 5.minutes.to_i + + class Issue < ActiveRecord::Base + include EachBatch + + self.table_name = 'issues' + end + + class MergeRequest < ActiveRecord::Base + include EachBatch + + self.table_name = 'merge_requests' + end + + def up + add_column :issues, :state_id, :integer, limit: 1 + add_column :merge_requests, :state_id, :integer, limit: 1 + + queue_background_migration_jobs_by_range_at_intervals(Issue.where(state_id: nil), MIGRATION, DELAY_INTERVAL, batch_size: BATCH_SIZE) + queue_background_migration_jobs_by_range_at_intervals(MergeRequest.where(state_id: nil), MIGRATION, DELAY_INTERVAL, batch_size: BATCH_SIZE) + end + + def down + remove_column :issues, :state_id + remove_column :merge_requests, :state_id + end +end diff --git a/db/schema.rb b/db/schema.rb index 023eee5f33e..0fb31ce2ef2 100644 --- a/db/schema.rb +++ b/db/schema.rb @@ -10,7 +10,7 @@ # # It's strongly recommended that you check this file into your version control system. -ActiveRecord::Schema.define(version: 20190204115450) do +ActiveRecord::Schema.define(version: 20190211131150) do # These are extensions that must be enabled in order to support this database enable_extension "plpgsql" @@ -1046,6 +1046,7 @@ ActiveRecord::Schema.define(version: 20190204115450) do t.boolean "discussion_locked" t.datetime_with_timezone "closed_at" t.integer "closed_by_id" + t.integer "state_id", limit: 2 t.index ["author_id"], name: "index_issues_on_author_id", using: :btree t.index ["closed_by_id"], name: "index_issues_on_closed_by_id", using: :btree t.index ["confidential"], name: "index_issues_on_confidential", using: :btree @@ -1283,6 +1284,7 @@ ActiveRecord::Schema.define(version: 20190204115450) do t.string "rebase_commit_sha" t.boolean "squash", default: false, null: false t.boolean "allow_maintainer_to_push" + t.integer "state_id", limit: 2 t.index ["assignee_id"], name: "index_merge_requests_on_assignee_id", using: :btree t.index ["author_id"], name: "index_merge_requests_on_author_id", using: :btree t.index ["created_at"], name: "index_merge_requests_on_created_at", using: :btree diff --git a/lib/gitlab/background_migration/sync_issuables_state_id.rb b/lib/gitlab/background_migration/sync_issuables_state_id.rb new file mode 100644 index 00000000000..1ac86b8acf2 --- /dev/null +++ b/lib/gitlab/background_migration/sync_issuables_state_id.rb @@ -0,0 +1,29 @@ +# frozen_string_literal: true +# rubocop:disable Style/Documentation + +module Gitlab + module BackgroundMigration + class SyncIssuablesStateId + def perform(start_id, end_id, model_class) + populate_new_state_id(start_id, end_id, model_class) + end + + def populate_new_state_id(start_id, end_id, model_class) + Rails.logger.info("#{model_class.model_name.human} - Populating state_id: #{start_id} - #{end_id}") + + ActiveRecord::Base.connection.execute <<~SQL + UPDATE #{model_class.table_name} + SET state_id = + CASE state + WHEN 'opened' THEN 1 + WHEN 'closed' THEN 2 + WHEN 'merged' THEN 3 + WHEN 'locked' THEN 4 + END + WHERE state_id IS NULL + AND id BETWEEN #{start_id} AND #{end_id} + SQL + end + end + end +end diff --git a/lib/gitlab/database/migration_helpers.rb b/lib/gitlab/database/migration_helpers.rb index 3abd0600e9d..20cbb9e096b 100644 --- a/lib/gitlab/database/migration_helpers.rb +++ b/lib/gitlab/database/migration_helpers.rb @@ -1033,7 +1033,7 @@ into similar problems in the future (e.g. when new tables are created). # `BackgroundMigrationWorker.bulk_perform_in` schedules all jobs for # the same time, which is not helpful in most cases where we wish to # spread the work over time. - BackgroundMigrationWorker.perform_in(delay_interval * index, job_class_name, [start_id, end_id]) + BackgroundMigrationWorker.perform_in(delay_interval * index, job_class_name, [start_id, end_id, model_class]) end end diff --git a/spec/migrations/add_state_id_to_issuables_spec.rb b/spec/migrations/add_state_id_to_issuables_spec.rb new file mode 100644 index 00000000000..4416f416c18 --- /dev/null +++ b/spec/migrations/add_state_id_to_issuables_spec.rb @@ -0,0 +1,89 @@ +# frozen_string_literal: true + +require 'spec_helper' +require Rails.root.join('db', 'migrate', '20190206144959_change_issuable_states_to_integer.rb') + +describe AddStateIdToIssuables, :migration do + let(:namespaces) { table(:namespaces) } + let(:projects) { table(:projects) } + let(:merge_requests) { table(:merge_requests) } + let(:issues) { table(:issues) } + let(:migration) { described_class.new } + + before do + @group = namespaces.create!(name: 'gitlab', path: 'gitlab') + @project = projects.create!(namespace_id: @group.id) + end + + describe '#up' do + context 'issues' do + it 'migrates state column to integer' do + opened_issue = issues.create!(description: 'first', state: 'opened') + closed_issue = issues.create!(description: 'second', state: 'closed') + nil_state_issue = issues.create!(description: 'third', state: nil) + + migrate! + + issues.reset_column_information + expect(opened_issue.reload.state).to eq(Issue.states.opened) + expect(closed_issue.reload.state).to eq(Issue.states.closed) + expect(nil_state_issue.reload.state).to eq(nil) + end + end + + context 'merge requests' do + it 'migrates state column to integer' do + opened_merge_request = merge_requests.create!(state: 'opened', target_project_id: @project.id, target_branch: 'feature1', source_branch: 'master') + closed_merge_request = merge_requests.create!(state: 'closed', target_project_id: @project.id, target_branch: 'feature2', source_branch: 'master') + merged_merge_request = merge_requests.create!(state: 'merged', target_project_id: @project.id, target_branch: 'feature3', source_branch: 'master') + locked_merge_request = merge_requests.create!(state: 'locked', target_project_id: @project.id, target_branch: 'feature4', source_branch: 'master') + nil_state_merge_request = merge_requests.create!(state: nil, target_project_id: @project.id, target_branch: 'feature5', source_branch: 'master') + + migrate! + + merge_requests.reset_column_information + expect(opened_merge_request.reload.state).to eq(MergeRequest.states.opened) + expect(closed_merge_request.reload.state).to eq(MergeRequest.states.closed) + expect(merged_merge_request.reload.state).to eq(MergeRequest.states.merged) + expect(locked_merge_request.reload.state).to eq(MergeRequest.states.locked) + expect(nil_state_merge_request.reload.state).to eq(nil) + end + end + end + + describe '#down' do + context 'issues' do + it 'migrates state column to string' do + opened_issue = issues.create!(description: 'first', state: 1) + closed_issue = issues.create!(description: 'second', state: 2) + nil_state_issue = issues.create!(description: 'third', state: nil) + + migration.down + + issues.reset_column_information + expect(opened_issue.reload.state).to eq('opened') + expect(closed_issue.reload.state).to eq('closed') + expect(nil_state_issue.reload.state).to eq(nil) + end + end + + context 'merge requests' do + it 'migrates state column to string' do + opened_merge_request = merge_requests.create!(state: 1, target_project_id: @project.id, target_branch: 'feature1', source_branch: 'master') + closed_merge_request = merge_requests.create!(state: 2, target_project_id: @project.id, target_branch: 'feature2', source_branch: 'master') + merged_merge_request = merge_requests.create!(state: 3, target_project_id: @project.id, target_branch: 'feature3', source_branch: 'master') + locked_merge_request = merge_requests.create!(state: 4, target_project_id: @project.id, target_branch: 'feature4', source_branch: 'master') + nil_state_merge_request = merge_requests.create!(state: nil, target_project_id: @project.id, target_branch: 'feature5', source_branch: 'master') + + migration.down + + merge_requests.reset_column_information + expect(opened_merge_request.reload.state).to eq('opened') + expect(closed_merge_request.reload.state).to eq('closed') + expect(merged_merge_request.reload.state).to eq('merged') + expect(locked_merge_request.reload.state).to eq('locked') + expect(nil_state_merge_request.reload.state).to eq(nil) + end + end + end +end -- GitLab From 362d56e65a0e23fcf4fd5bd4535d258c3659ffd5 Mon Sep 17 00:00:00 2001 From: Felipe Artur Date: Tue, 12 Feb 2019 14:40:37 -0200 Subject: [PATCH 02/18] Schedule background migrations and specs --- ...0190211131150_add_state_id_to_issuables.rb | 10 +++- .../sync_issuables_state_id.rb | 10 ++-- lib/gitlab/database/migration_helpers.rb | 3 +- .../add_state_id_to_issuables_spec.rb | 60 ++++--------------- 4 files changed, 27 insertions(+), 56 deletions(-) diff --git a/db/migrate/20190211131150_add_state_id_to_issuables.rb b/db/migrate/20190211131150_add_state_id_to_issuables.rb index af02aa84afd..b9d52fe63cd 100644 --- a/db/migrate/20190211131150_add_state_id_to_issuables.rb +++ b/db/migrate/20190211131150_add_state_id_to_issuables.rb @@ -1,5 +1,6 @@ class AddStateIdToIssuables < ActiveRecord::Migration[5.0] include Gitlab::Database::MigrationHelpers + #include AfterCommitQueue DOWNTIME = false MIGRATION = 'SyncIssuablesStateId'.freeze @@ -26,8 +27,13 @@ class AddStateIdToIssuables < ActiveRecord::Migration[5.0] add_column :issues, :state_id, :integer, limit: 1 add_column :merge_requests, :state_id, :integer, limit: 1 - queue_background_migration_jobs_by_range_at_intervals(Issue.where(state_id: nil), MIGRATION, DELAY_INTERVAL, batch_size: BATCH_SIZE) - queue_background_migration_jobs_by_range_at_intervals(MergeRequest.where(state_id: nil), MIGRATION, DELAY_INTERVAL, batch_size: BATCH_SIZE) + # Is this safe? + # Added to avoid an warning about jobs running inside transactions. + # Since we only add a column this should be ok + Sidekiq::Worker.skipping_transaction_check do + queue_background_migration_jobs_by_range_at_intervals(Issue.where(state_id: nil), MIGRATION, DELAY_INTERVAL, batch_size: BATCH_SIZE) + queue_background_migration_jobs_by_range_at_intervals(MergeRequest.where(state_id: nil), MIGRATION, DELAY_INTERVAL, batch_size: BATCH_SIZE) + end end def down diff --git a/lib/gitlab/background_migration/sync_issuables_state_id.rb b/lib/gitlab/background_migration/sync_issuables_state_id.rb index 1ac86b8acf2..95734a7310e 100644 --- a/lib/gitlab/background_migration/sync_issuables_state_id.rb +++ b/lib/gitlab/background_migration/sync_issuables_state_id.rb @@ -4,15 +4,15 @@ module Gitlab module BackgroundMigration class SyncIssuablesStateId - def perform(start_id, end_id, model_class) - populate_new_state_id(start_id, end_id, model_class) + def perform(start_id, end_id, table_name) + populate_new_state_id(start_id, end_id, table_name) end - def populate_new_state_id(start_id, end_id, model_class) - Rails.logger.info("#{model_class.model_name.human} - Populating state_id: #{start_id} - #{end_id}") + def populate_new_state_id(start_id, end_id, table_name) + Rails.logger.info("#{table_name} - Populating state_id: #{start_id} - #{end_id}") ActiveRecord::Base.connection.execute <<~SQL - UPDATE #{model_class.table_name} + UPDATE #{table_name} SET state_id = CASE state WHEN 'opened' THEN 1 diff --git a/lib/gitlab/database/migration_helpers.rb b/lib/gitlab/database/migration_helpers.rb index 20cbb9e096b..46b36d07c20 100644 --- a/lib/gitlab/database/migration_helpers.rb +++ b/lib/gitlab/database/migration_helpers.rb @@ -1029,11 +1029,12 @@ into similar problems in the future (e.g. when new tables are created). model_class.each_batch(of: batch_size) do |relation, index| start_id, end_id = relation.pluck('MIN(id), MAX(id)').first + table_name = relation.base_class.table_name # `BackgroundMigrationWorker.bulk_perform_in` schedules all jobs for # the same time, which is not helpful in most cases where we wish to # spread the work over time. - BackgroundMigrationWorker.perform_in(delay_interval * index, job_class_name, [start_id, end_id, model_class]) + BackgroundMigrationWorker.perform_in(delay_interval * index, job_class_name, [start_id, end_id, table_name]) end end diff --git a/spec/migrations/add_state_id_to_issuables_spec.rb b/spec/migrations/add_state_id_to_issuables_spec.rb index 4416f416c18..b0e285db1f3 100644 --- a/spec/migrations/add_state_id_to_issuables_spec.rb +++ b/spec/migrations/add_state_id_to_issuables_spec.rb @@ -1,7 +1,7 @@ # frozen_string_literal: true require 'spec_helper' -require Rails.root.join('db', 'migrate', '20190206144959_change_issuable_states_to_integer.rb') +require Rails.root.join('db', 'migrate', '20190211131150_add_state_id_to_issuables.rb') describe AddStateIdToIssuables, :migration do let(:namespaces) { table(:namespaces) } @@ -20,14 +20,15 @@ describe AddStateIdToIssuables, :migration do it 'migrates state column to integer' do opened_issue = issues.create!(description: 'first', state: 'opened') closed_issue = issues.create!(description: 'second', state: 'closed') + invalid_state_issue = issues.create!(description: 'fourth', state: 'not valid') nil_state_issue = issues.create!(description: 'third', state: nil) migrate! - issues.reset_column_information - expect(opened_issue.reload.state).to eq(Issue.states.opened) - expect(closed_issue.reload.state).to eq(Issue.states.closed) - expect(nil_state_issue.reload.state).to eq(nil) + expect(opened_issue.reload.state_id).to eq(Issue.states.opened) + expect(closed_issue.reload.state_id).to eq(Issue.states.closed) + expect(invalid_state_issue.reload.state_id).to be_nil + expect(nil_state_issue.reload.state_id).to be_nil end end @@ -37,52 +38,15 @@ describe AddStateIdToIssuables, :migration do closed_merge_request = merge_requests.create!(state: 'closed', target_project_id: @project.id, target_branch: 'feature2', source_branch: 'master') merged_merge_request = merge_requests.create!(state: 'merged', target_project_id: @project.id, target_branch: 'feature3', source_branch: 'master') locked_merge_request = merge_requests.create!(state: 'locked', target_project_id: @project.id, target_branch: 'feature4', source_branch: 'master') - nil_state_merge_request = merge_requests.create!(state: nil, target_project_id: @project.id, target_branch: 'feature5', source_branch: 'master') + invalid_state_merge_request = merge_requests.create!(state: 'not valid', target_project_id: @project.id, target_branch: 'feature5', source_branch: 'master') migrate! - merge_requests.reset_column_information - expect(opened_merge_request.reload.state).to eq(MergeRequest.states.opened) - expect(closed_merge_request.reload.state).to eq(MergeRequest.states.closed) - expect(merged_merge_request.reload.state).to eq(MergeRequest.states.merged) - expect(locked_merge_request.reload.state).to eq(MergeRequest.states.locked) - expect(nil_state_merge_request.reload.state).to eq(nil) - end - end - end - - describe '#down' do - context 'issues' do - it 'migrates state column to string' do - opened_issue = issues.create!(description: 'first', state: 1) - closed_issue = issues.create!(description: 'second', state: 2) - nil_state_issue = issues.create!(description: 'third', state: nil) - - migration.down - - issues.reset_column_information - expect(opened_issue.reload.state).to eq('opened') - expect(closed_issue.reload.state).to eq('closed') - expect(nil_state_issue.reload.state).to eq(nil) - end - end - - context 'merge requests' do - it 'migrates state column to string' do - opened_merge_request = merge_requests.create!(state: 1, target_project_id: @project.id, target_branch: 'feature1', source_branch: 'master') - closed_merge_request = merge_requests.create!(state: 2, target_project_id: @project.id, target_branch: 'feature2', source_branch: 'master') - merged_merge_request = merge_requests.create!(state: 3, target_project_id: @project.id, target_branch: 'feature3', source_branch: 'master') - locked_merge_request = merge_requests.create!(state: 4, target_project_id: @project.id, target_branch: 'feature4', source_branch: 'master') - nil_state_merge_request = merge_requests.create!(state: nil, target_project_id: @project.id, target_branch: 'feature5', source_branch: 'master') - - migration.down - - merge_requests.reset_column_information - expect(opened_merge_request.reload.state).to eq('opened') - expect(closed_merge_request.reload.state).to eq('closed') - expect(merged_merge_request.reload.state).to eq('merged') - expect(locked_merge_request.reload.state).to eq('locked') - expect(nil_state_merge_request.reload.state).to eq(nil) + expect(opened_merge_request.reload.state_id).to eq(MergeRequest.states.opened) + expect(closed_merge_request.reload.state_id).to eq(MergeRequest.states.closed) + expect(merged_merge_request.reload.state_id).to eq(MergeRequest.states.merged) + expect(locked_merge_request.reload.state_id).to eq(MergeRequest.states.locked) + expect(invalid_state_merge_request.reload.state_id).to be_nil end end end -- GitLab From e2aa332504f6cd9eebaa30dfeb71edcea6f9495a Mon Sep 17 00:00:00 2001 From: Felipe Artur Date: Tue, 12 Feb 2019 16:39:56 -0200 Subject: [PATCH 03/18] Improve batch size --- app/models/concerns/issuable_states.rb | 12 ++++++------ app/models/merge_request.rb | 1 - .../20190211131150_add_state_id_to_issuables.rb | 10 ++++++---- 3 files changed, 12 insertions(+), 11 deletions(-) diff --git a/app/models/concerns/issuable_states.rb b/app/models/concerns/issuable_states.rb index 7feaf7e8aac..f9f5797065d 100644 --- a/app/models/concerns/issuable_states.rb +++ b/app/models/concerns/issuable_states.rb @@ -2,7 +2,7 @@ # == IssuableStates concern # -# Defines statuses shared by issuables which are persisted on state column +# Defines states shared by issuables which are persisted on state_id column # using the state machine. # # Used by EE::Epic, Issue and MergeRequest @@ -14,10 +14,6 @@ module IssuableStates # Check MergeRequest::AVAILABLE_STATES AVAILABLE_STATES = { opened: 1, closed: 2 }.freeze - included do - before_save :set_state_id - end - class_methods do def states @states ||= OpenStruct.new(self::AVAILABLE_STATES) @@ -26,7 +22,11 @@ module IssuableStates # The state:string column is being migrated to state_id:integer column # This is a temporary hook to populate state_id column with new values - # and can be removed after the complete migration is done. + # and can be removed after the state column is removed. + included do + before_save :set_state_id + end + def set_state_id return if state.nil? || state.empty? diff --git a/app/models/merge_request.rb b/app/models/merge_request.rb index 67600383cf9..ece31b359d1 100644 --- a/app/models/merge_request.rb +++ b/app/models/merge_request.rb @@ -22,7 +22,6 @@ class MergeRequest < ActiveRecord::Base self.reactive_cache_lifetime = 10.minutes SORTING_PREFERENCE_FIELD = :merge_requests_sort - MERGE_REQUEST_STATES = AVAILABLE_STATES = AVAILABLE_STATES.merge(merged: 3, locked: 4).freeze ignore_column :locked_at, diff --git a/db/migrate/20190211131150_add_state_id_to_issuables.rb b/db/migrate/20190211131150_add_state_id_to_issuables.rb index b9d52fe63cd..d23c946cf88 100644 --- a/db/migrate/20190211131150_add_state_id_to_issuables.rb +++ b/db/migrate/20190211131150_add_state_id_to_issuables.rb @@ -5,10 +5,12 @@ class AddStateIdToIssuables < ActiveRecord::Migration[5.0] DOWNTIME = false MIGRATION = 'SyncIssuablesStateId'.freeze - # TODO - find out how many issues and merge requests in production - # to adapt the batch size and delay interval - # Keep in mind that the migration will be scheduled for issues and merge requests. - BATCH_SIZE = 5000 + # 2019-02-12 Gitlab.com issuable numbers + # issues count: 13587305 + # merge requests count: 18925274 + # Using this 50000 as batch size should take around 13 hours + # to migrate both issues and merge requests + BATCH_SIZE = 50000 DELAY_INTERVAL = 5.minutes.to_i class Issue < ActiveRecord::Base -- GitLab From 26f40aefb09b96538fa99f74d46542ad39bb1679 Mon Sep 17 00:00:00 2001 From: Felipe Artur Date: Wed, 13 Feb 2019 17:31:14 -0200 Subject: [PATCH 04/18] Code improvements --- app/models/concerns/issuable.rb | 9 ++++++++ app/models/concerns/issuable_states.rb | 21 +------------------ app/models/merge_request.rb | 5 ++++- ...0190211131150_add_state_id_to_issuables.rb | 8 +++---- 4 files changed, 18 insertions(+), 25 deletions(-) diff --git a/app/models/concerns/issuable.rb b/app/models/concerns/issuable.rb index 7bf553a0221..83bd8cc6478 100644 --- a/app/models/concerns/issuable.rb +++ b/app/models/concerns/issuable.rb @@ -132,6 +132,15 @@ module Issuable fuzzy_search(query, [:title]) end + # Available state values persisted in state_id column using state machine + # + # Override this on subclasses if different states are needed + # + # Check MergeRequest.available_states for example + def available_states + @available_states ||= { opened: 1, closed: 2 }.with_indifferent_access + end + # Searches for records with a matching title or description. # # This method uses ILIKE on PostgreSQL and LIKE on MySQL. diff --git a/app/models/concerns/issuable_states.rb b/app/models/concerns/issuable_states.rb index f9f5797065d..d7992f8e6db 100644 --- a/app/models/concerns/issuable_states.rb +++ b/app/models/concerns/issuable_states.rb @@ -1,25 +1,6 @@ -# frozen_string_literal: true - -# == IssuableStates concern -# -# Defines states shared by issuables which are persisted on state_id column -# using the state machine. -# -# Used by EE::Epic, Issue and MergeRequest -# module IssuableStates extend ActiveSupport::Concern - # Override this constant on model where different states are needed - # Check MergeRequest::AVAILABLE_STATES - AVAILABLE_STATES = { opened: 1, closed: 2 }.freeze - - class_methods do - def states - @states ||= OpenStruct.new(self::AVAILABLE_STATES) - end - end - # The state:string column is being migrated to state_id:integer column # This is a temporary hook to populate state_id column with new values # and can be removed after the state column is removed. @@ -30,7 +11,7 @@ module IssuableStates def set_state_id return if state.nil? || state.empty? - states_hash = self.class.states.to_h.with_indifferent_access + states_hash = self.class.available_states self.state_id = states_hash[state] end diff --git a/app/models/merge_request.rb b/app/models/merge_request.rb index ece31b359d1..6baf479d8d1 100644 --- a/app/models/merge_request.rb +++ b/app/models/merge_request.rb @@ -22,7 +22,6 @@ class MergeRequest < ActiveRecord::Base self.reactive_cache_lifetime = 10.minutes SORTING_PREFERENCE_FIELD = :merge_requests_sort - AVAILABLE_STATES = AVAILABLE_STATES.merge(merged: 3, locked: 4).freeze ignore_column :locked_at, :ref_fetched, @@ -194,6 +193,10 @@ class MergeRequest < ActiveRecord::Base '!' end + def self.available_states + @states ||= super.merge(locked: 3, merged: 4) + end + def rebase_in_progress? strong_memoize(:rebase_in_progress) do # The source project can be deleted diff --git a/db/migrate/20190211131150_add_state_id_to_issuables.rb b/db/migrate/20190211131150_add_state_id_to_issuables.rb index d23c946cf88..440f577e1a3 100644 --- a/db/migrate/20190211131150_add_state_id_to_issuables.rb +++ b/db/migrate/20190211131150_add_state_id_to_issuables.rb @@ -8,9 +8,9 @@ class AddStateIdToIssuables < ActiveRecord::Migration[5.0] # 2019-02-12 Gitlab.com issuable numbers # issues count: 13587305 # merge requests count: 18925274 - # Using this 50000 as batch size should take around 13 hours + # Using this 25000 as batch size should take around 26 hours # to migrate both issues and merge requests - BATCH_SIZE = 50000 + BATCH_SIZE = 25000 DELAY_INTERVAL = 5.minutes.to_i class Issue < ActiveRecord::Base @@ -26,8 +26,8 @@ class AddStateIdToIssuables < ActiveRecord::Migration[5.0] end def up - add_column :issues, :state_id, :integer, limit: 1 - add_column :merge_requests, :state_id, :integer, limit: 1 + add_column :issues, :state_id, :integer, limit: 2 + add_column :merge_requests, :state_id, :integer, limit: 2 # Is this safe? # Added to avoid an warning about jobs running inside transactions. -- GitLab From 37741c59a4daf1b0d6d9f7a6a51337e9d8effb66 Mon Sep 17 00:00:00 2001 From: Felipe Artur Date: Thu, 14 Feb 2019 11:48:20 -0200 Subject: [PATCH 05/18] Split background migration for issues and merge requests --- app/models/merge_request.rb | 2 +- ...0190211131150_add_state_id_to_issuables.rb | 30 ------------- ...112022_schedule_sync_issuables_state_id.rb | 43 +++++++++++++++++++ db/schema.rb | 2 +- .../sync_issues_state_id.rb | 23 ++++++++++ ..._id.rb => sync_merge_requests_state_id.rb} | 12 ++---- lib/gitlab/database/migration_helpers.rb | 3 +- ... schedule_sync_issuables_state_id_spec.rb} | 16 +++---- 8 files changed, 81 insertions(+), 50 deletions(-) create mode 100644 db/post_migrate/20190214112022_schedule_sync_issuables_state_id.rb create mode 100644 lib/gitlab/background_migration/sync_issues_state_id.rb rename lib/gitlab/background_migration/{sync_issuables_state_id.rb => sync_merge_requests_state_id.rb} (59%) rename spec/migrations/{add_state_id_to_issuables_spec.rb => schedule_sync_issuables_state_id_spec.rb} (82%) diff --git a/app/models/merge_request.rb b/app/models/merge_request.rb index 6baf479d8d1..0ec0789a24f 100644 --- a/app/models/merge_request.rb +++ b/app/models/merge_request.rb @@ -194,7 +194,7 @@ class MergeRequest < ActiveRecord::Base end def self.available_states - @states ||= super.merge(locked: 3, merged: 4) + @states ||= super.merge(merged: 3, locked: 4) end def rebase_in_progress? diff --git a/db/migrate/20190211131150_add_state_id_to_issuables.rb b/db/migrate/20190211131150_add_state_id_to_issuables.rb index 440f577e1a3..cf3f7671a67 100644 --- a/db/migrate/20190211131150_add_state_id_to_issuables.rb +++ b/db/migrate/20190211131150_add_state_id_to_issuables.rb @@ -1,41 +1,11 @@ class AddStateIdToIssuables < ActiveRecord::Migration[5.0] include Gitlab::Database::MigrationHelpers - #include AfterCommitQueue DOWNTIME = false - MIGRATION = 'SyncIssuablesStateId'.freeze - - # 2019-02-12 Gitlab.com issuable numbers - # issues count: 13587305 - # merge requests count: 18925274 - # Using this 25000 as batch size should take around 26 hours - # to migrate both issues and merge requests - BATCH_SIZE = 25000 - DELAY_INTERVAL = 5.minutes.to_i - - class Issue < ActiveRecord::Base - include EachBatch - - self.table_name = 'issues' - end - - class MergeRequest < ActiveRecord::Base - include EachBatch - - self.table_name = 'merge_requests' - end def up add_column :issues, :state_id, :integer, limit: 2 add_column :merge_requests, :state_id, :integer, limit: 2 - - # Is this safe? - # Added to avoid an warning about jobs running inside transactions. - # Since we only add a column this should be ok - Sidekiq::Worker.skipping_transaction_check do - queue_background_migration_jobs_by_range_at_intervals(Issue.where(state_id: nil), MIGRATION, DELAY_INTERVAL, batch_size: BATCH_SIZE) - queue_background_migration_jobs_by_range_at_intervals(MergeRequest.where(state_id: nil), MIGRATION, DELAY_INTERVAL, batch_size: BATCH_SIZE) - end end def down diff --git a/db/post_migrate/20190214112022_schedule_sync_issuables_state_id.rb b/db/post_migrate/20190214112022_schedule_sync_issuables_state_id.rb new file mode 100644 index 00000000000..d9b77d2f02c --- /dev/null +++ b/db/post_migrate/20190214112022_schedule_sync_issuables_state_id.rb @@ -0,0 +1,43 @@ +# frozen_string_literal: true + +# See http://doc.gitlab.com/ce/development/migration_style_guide.html +# for more information on how to write migrations for GitLab. + +class ScheduleSyncIssuablesStateId < ActiveRecord::Migration[5.0] + include Gitlab::Database::MigrationHelpers + + DOWNTIME = false + + # 2019-02-12 Gitlab.com issuable numbers + # issues count: 13587305 + # merge requests count: 18925274 + # Using this 25000 as batch size should take around 26 hours + # to migrate both issues and merge requests + BATCH_SIZE = 25000 + DELAY_INTERVAL = 5.minutes.to_i + ISSUE_MIGRATION = 'SyncIssuesStateId'.freeze + MERGE_REQUEST_MIGRATION = 'SyncMergeRequestsStateId'.freeze + + class Issue < ActiveRecord::Base + include EachBatch + + self.table_name = 'issues' + end + + class MergeRequest < ActiveRecord::Base + include EachBatch + + self.table_name = 'merge_requests' + end + + def up + Sidekiq::Worker.skipping_transaction_check do + queue_background_migration_jobs_by_range_at_intervals(Issue.where(state_id: nil), ISSUE_MIGRATION, DELAY_INTERVAL, batch_size: BATCH_SIZE) + queue_background_migration_jobs_by_range_at_intervals(MergeRequest.where(state_id: nil), MERGE_REQUEST_MIGRATION, DELAY_INTERVAL, batch_size: BATCH_SIZE) + end + end + + def down + # No op + end +end diff --git a/db/schema.rb b/db/schema.rb index 0fb31ce2ef2..4a347e7289b 100644 --- a/db/schema.rb +++ b/db/schema.rb @@ -10,7 +10,7 @@ # # It's strongly recommended that you check this file into your version control system. -ActiveRecord::Schema.define(version: 20190211131150) do +ActiveRecord::Schema.define(version: 20190214112022) do # These are extensions that must be enabled in order to support this database enable_extension "plpgsql" diff --git a/lib/gitlab/background_migration/sync_issues_state_id.rb b/lib/gitlab/background_migration/sync_issues_state_id.rb new file mode 100644 index 00000000000..33b997c8533 --- /dev/null +++ b/lib/gitlab/background_migration/sync_issues_state_id.rb @@ -0,0 +1,23 @@ +# frozen_string_literal: true +# rubocop:disable Style/Documentation + +module Gitlab + module BackgroundMigration + class SyncIssuesStateId + def perform(start_id, end_id) + Rails.logger.info("Issues - Populating state_id: #{start_id} - #{end_id}") + + ActiveRecord::Base.connection.execute <<~SQL + UPDATE issues + SET state_id = + CASE state + WHEN 'opened' THEN 1 + WHEN 'closed' THEN 2 + END + WHERE state_id IS NULL + AND id BETWEEN #{start_id} AND #{end_id} + SQL + end + end + end +end diff --git a/lib/gitlab/background_migration/sync_issuables_state_id.rb b/lib/gitlab/background_migration/sync_merge_requests_state_id.rb similarity index 59% rename from lib/gitlab/background_migration/sync_issuables_state_id.rb rename to lib/gitlab/background_migration/sync_merge_requests_state_id.rb index 95734a7310e..923ceaeec54 100644 --- a/lib/gitlab/background_migration/sync_issuables_state_id.rb +++ b/lib/gitlab/background_migration/sync_merge_requests_state_id.rb @@ -3,16 +3,12 @@ module Gitlab module BackgroundMigration - class SyncIssuablesStateId - def perform(start_id, end_id, table_name) - populate_new_state_id(start_id, end_id, table_name) - end - - def populate_new_state_id(start_id, end_id, table_name) - Rails.logger.info("#{table_name} - Populating state_id: #{start_id} - #{end_id}") + class SyncMergeRequestsStateId + def perform(start_id, end_id) + Rails.logger.info("Merge Requests - Populating state_id: #{start_id} - #{end_id}") ActiveRecord::Base.connection.execute <<~SQL - UPDATE #{table_name} + UPDATE merge_requests SET state_id = CASE state WHEN 'opened' THEN 1 diff --git a/lib/gitlab/database/migration_helpers.rb b/lib/gitlab/database/migration_helpers.rb index 46b36d07c20..3abd0600e9d 100644 --- a/lib/gitlab/database/migration_helpers.rb +++ b/lib/gitlab/database/migration_helpers.rb @@ -1029,12 +1029,11 @@ into similar problems in the future (e.g. when new tables are created). model_class.each_batch(of: batch_size) do |relation, index| start_id, end_id = relation.pluck('MIN(id), MAX(id)').first - table_name = relation.base_class.table_name # `BackgroundMigrationWorker.bulk_perform_in` schedules all jobs for # the same time, which is not helpful in most cases where we wish to # spread the work over time. - BackgroundMigrationWorker.perform_in(delay_interval * index, job_class_name, [start_id, end_id, table_name]) + BackgroundMigrationWorker.perform_in(delay_interval * index, job_class_name, [start_id, end_id]) end end diff --git a/spec/migrations/add_state_id_to_issuables_spec.rb b/spec/migrations/schedule_sync_issuables_state_id_spec.rb similarity index 82% rename from spec/migrations/add_state_id_to_issuables_spec.rb rename to spec/migrations/schedule_sync_issuables_state_id_spec.rb index b0e285db1f3..a926ee38387 100644 --- a/spec/migrations/add_state_id_to_issuables_spec.rb +++ b/spec/migrations/schedule_sync_issuables_state_id_spec.rb @@ -1,9 +1,9 @@ # frozen_string_literal: true require 'spec_helper' -require Rails.root.join('db', 'migrate', '20190211131150_add_state_id_to_issuables.rb') +require Rails.root.join('db', 'post_migrate', '20190214112022_schedule_sync_issuables_state_id.rb') -describe AddStateIdToIssuables, :migration do +describe ScheduleSyncIssuablesStateId, :migration do let(:namespaces) { table(:namespaces) } let(:projects) { table(:projects) } let(:merge_requests) { table(:merge_requests) } @@ -25,8 +25,8 @@ describe AddStateIdToIssuables, :migration do migrate! - expect(opened_issue.reload.state_id).to eq(Issue.states.opened) - expect(closed_issue.reload.state_id).to eq(Issue.states.closed) + expect(opened_issue.reload.state_id).to eq(Issue.available_states[:opened]) + expect(closed_issue.reload.state_id).to eq(Issue.available_states[:closed]) expect(invalid_state_issue.reload.state_id).to be_nil expect(nil_state_issue.reload.state_id).to be_nil end @@ -42,10 +42,10 @@ describe AddStateIdToIssuables, :migration do migrate! - expect(opened_merge_request.reload.state_id).to eq(MergeRequest.states.opened) - expect(closed_merge_request.reload.state_id).to eq(MergeRequest.states.closed) - expect(merged_merge_request.reload.state_id).to eq(MergeRequest.states.merged) - expect(locked_merge_request.reload.state_id).to eq(MergeRequest.states.locked) + expect(opened_merge_request.reload.state_id).to eq(MergeRequest.available_states[:opened]) + expect(closed_merge_request.reload.state_id).to eq(MergeRequest.available_states[:closed]) + expect(merged_merge_request.reload.state_id).to eq(MergeRequest.available_states[:merged]) + expect(locked_merge_request.reload.state_id).to eq(MergeRequest.available_states[:locked]) expect(invalid_state_merge_request.reload.state_id).to be_nil end end -- GitLab From d4a5d8d07069acb6f068990633baaf56d20bc18b Mon Sep 17 00:00:00 2001 From: Felipe Artur Date: Thu, 14 Feb 2019 14:24:23 -0200 Subject: [PATCH 06/18] Add specs for issuable states sync --- app/models/concerns/issuable_states.rb | 1 + spec/models/concerns/issuable_states_spec.rb | 30 ++++++++++++++++++++ 2 files changed, 31 insertions(+) create mode 100644 spec/models/concerns/issuable_states_spec.rb diff --git a/app/models/concerns/issuable_states.rb b/app/models/concerns/issuable_states.rb index d7992f8e6db..a39a0ef77ed 100644 --- a/app/models/concerns/issuable_states.rb +++ b/app/models/concerns/issuable_states.rb @@ -4,6 +4,7 @@ module IssuableStates # The state:string column is being migrated to state_id:integer column # This is a temporary hook to populate state_id column with new values # and can be removed after the state column is removed. + # Check https://gitlab.com/gitlab-org/gitlab-ce/issues/51789 for more information included do before_save :set_state_id end diff --git a/spec/models/concerns/issuable_states_spec.rb b/spec/models/concerns/issuable_states_spec.rb new file mode 100644 index 00000000000..70450159cc0 --- /dev/null +++ b/spec/models/concerns/issuable_states_spec.rb @@ -0,0 +1,30 @@ +# frozen_string_literal: true + +require 'spec_helper' + +# This spec checks if state_id column of issues and merge requests +# are being synced on every save. +# It can be removed in the next release. Check https://gitlab.com/gitlab-org/gitlab-ce/issues/51789 for more information. +describe IssuableStates do + [Issue, MergeRequest].each do |klass| + it "syncs state_id column when #{klass.model_name.human} gets created" do + klass.available_states.each do |state, state_id| + issuable = build(klass.model_name.param_key, state: state.to_s) + + issuable.save! + + expect(issuable.state_id).to eq(state_id) + end + end + + it "syncs state_id column when #{klass.model_name.human} gets updated" do + klass.available_states.each do |state, state_id| + issuable = create(klass.model_name.param_key, state: state.to_s) + + issuable.update(state: state) + + expect(issuable.state_id).to eq(state_id) + end + end + end +end -- GitLab From 9671a67a4ce58683ca0188ff9e75b1d5dfcc5dec Mon Sep 17 00:00:00 2001 From: Felipe Artur Date: Thu, 14 Feb 2019 16:33:26 -0200 Subject: [PATCH 07/18] Fix broken specs --- app/models/concerns/issuable_states.rb | 10 +++++++++- app/models/merge_request.rb | 2 +- db/migrate/20190211131150_add_state_id_to_issuables.rb | 2 ++ 3 files changed, 12 insertions(+), 2 deletions(-) diff --git a/app/models/concerns/issuable_states.rb b/app/models/concerns/issuable_states.rb index a39a0ef77ed..12fa97a1469 100644 --- a/app/models/concerns/issuable_states.rb +++ b/app/models/concerns/issuable_states.rb @@ -1,9 +1,11 @@ +# frozen_string_literal: true + module IssuableStates extend ActiveSupport::Concern # The state:string column is being migrated to state_id:integer column # This is a temporary hook to populate state_id column with new values - # and can be removed after the state column is removed. + # and should be removed after the state column is removed. # Check https://gitlab.com/gitlab-org/gitlab-ce/issues/51789 for more information included do before_save :set_state_id @@ -12,6 +14,12 @@ module IssuableStates def set_state_id return if state.nil? || state.empty? + # Needed to prevent breaking some migration specs that + # rollback database to a point where state_id does not exist. + # We can use this guard clause for now since this file will should + # be removed in the next release. + return unless self.respond_to?(:state_id) + states_hash = self.class.available_states self.state_id = states_hash[state] diff --git a/app/models/merge_request.rb b/app/models/merge_request.rb index 0ec0789a24f..063433111cc 100644 --- a/app/models/merge_request.rb +++ b/app/models/merge_request.rb @@ -194,7 +194,7 @@ class MergeRequest < ActiveRecord::Base end def self.available_states - @states ||= super.merge(merged: 3, locked: 4) + @available_states ||= super.merge(merged: 3, locked: 4) end def rebase_in_progress? diff --git a/db/migrate/20190211131150_add_state_id_to_issuables.rb b/db/migrate/20190211131150_add_state_id_to_issuables.rb index cf3f7671a67..c1173eb4249 100644 --- a/db/migrate/20190211131150_add_state_id_to_issuables.rb +++ b/db/migrate/20190211131150_add_state_id_to_issuables.rb @@ -1,3 +1,5 @@ +# frozen_string_literal: true + class AddStateIdToIssuables < ActiveRecord::Migration[5.0] include Gitlab::Database::MigrationHelpers -- GitLab From 7b79f6ab0d342e335609a7f3eb6cb691d1f03111 Mon Sep 17 00:00:00 2001 From: Felipe Artur Date: Thu, 14 Feb 2019 16:37:12 -0200 Subject: [PATCH 08/18] fix typo --- app/models/concerns/issuable_states.rb | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/app/models/concerns/issuable_states.rb b/app/models/concerns/issuable_states.rb index 12fa97a1469..2ebd5013a4c 100644 --- a/app/models/concerns/issuable_states.rb +++ b/app/models/concerns/issuable_states.rb @@ -16,8 +16,8 @@ module IssuableStates # Needed to prevent breaking some migration specs that # rollback database to a point where state_id does not exist. - # We can use this guard clause for now since this file will should - # be removed in the next release. + # We can use this guard clause for now since this file will + # be removed on a future the next release. return unless self.respond_to?(:state_id) states_hash = self.class.available_states -- GitLab From bf99ce7bf8bf4bda75ca57f5db4d216f3585615a Mon Sep 17 00:00:00 2001 From: Felipe Artur Date: Fri, 15 Feb 2019 14:37:55 -0200 Subject: [PATCH 09/18] Fix specs --- spec/db/schema_spec.rb | 4 ++-- spec/lib/gitlab/import_export/safe_model_attributes.yml | 2 ++ 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/spec/db/schema_spec.rb b/spec/db/schema_spec.rb index 897b4411055..40c3a6d90d0 100644 --- a/spec/db/schema_spec.rb +++ b/spec/db/schema_spec.rb @@ -25,12 +25,12 @@ describe 'Database schema' do events: %w[target_id], forked_project_links: %w[forked_from_project_id], identities: %w[user_id], - issues: %w[last_edited_by_id], + issues: %w[last_edited_by_id state_id], keys: %w[user_id], label_links: %w[target_id], lfs_objects_projects: %w[lfs_object_id project_id], members: %w[source_id created_by_id], - merge_requests: %w[last_edited_by_id], + merge_requests: %w[last_edited_by_id state_id], namespaces: %w[owner_id parent_id], notes: %w[author_id commit_id noteable_id updated_by_id resolved_by_id discussion_id], notification_settings: %w[source_id], diff --git a/spec/lib/gitlab/import_export/safe_model_attributes.yml b/spec/lib/gitlab/import_export/safe_model_attributes.yml index baca8f6d542..9cfa29ca7d9 100644 --- a/spec/lib/gitlab/import_export/safe_model_attributes.yml +++ b/spec/lib/gitlab/import_export/safe_model_attributes.yml @@ -11,6 +11,7 @@ Issue: - branch_name - description - state +- state_id - iid - updated_by_id - confidential @@ -158,6 +159,7 @@ MergeRequest: - created_at - updated_at - state +- state_id - merge_status - target_project_id - iid -- GitLab From b1346db3a0b49b295a9702921285fdb30029563c Mon Sep 17 00:00:00 2001 From: Felipe Artur Date: Fri, 15 Feb 2019 17:11:41 -0200 Subject: [PATCH 10/18] Add Reschedulable module --- lib/gitlab/background_migration.rb | 2 + .../concerns/reschedulable.rb | 50 +++++++++++++++++++ .../background_migration/delete_diff_files.rb | 37 +++----------- .../delete_diff_files_spec.rb | 10 ++-- 4 files changed, 63 insertions(+), 36 deletions(-) create mode 100644 lib/gitlab/background_migration/concerns/reschedulable.rb diff --git a/lib/gitlab/background_migration.rb b/lib/gitlab/background_migration.rb index 5251e0fadf9..b308e94bfa0 100644 --- a/lib/gitlab/background_migration.rb +++ b/lib/gitlab/background_migration.rb @@ -1,5 +1,7 @@ # frozen_string_literal: true +Dir[Rails.root.join("lib/gitlab/background_migration/concerns/*.rb")].each { |f| require f } + module Gitlab module BackgroundMigration def self.queue diff --git a/lib/gitlab/background_migration/concerns/reschedulable.rb b/lib/gitlab/background_migration/concerns/reschedulable.rb new file mode 100644 index 00000000000..fbf3d799743 --- /dev/null +++ b/lib/gitlab/background_migration/concerns/reschedulable.rb @@ -0,0 +1,50 @@ +# frozen_string_literal: true + +module Gitlab + module BackgroundMigration + module Reschedulable + extend ActiveSupport::Concern + + def reschedule_if_needed(args, &block) + if should_reschedule? + BackgroundMigrationWorker.perform_in(vacuum_wait_time, self.class.name.demodulize, args) + else + yield + end + end + + # Override this on base class if you need a different reschedule condition + def should_reschedule? + raise NotImplementedError, "#{self.class} does not implement #{__method__}" + end + + def wait_for_deadtuple_vacuum?(table_name) + return false unless Gitlab::Database.postgresql? + + dead_tuples_count_for(table_name) >= dead_tuples_threshold + end + + def dead_tuples_count_for(table_name) + dead_tuple = + execute_statement("SELECT n_dead_tup FROM pg_stat_all_tables "\ + "WHERE relname = '#{table_name}'")[0] + + dead_tuple&.fetch('n_dead_tup', 0).to_i + end + + def execute_statement(sql) + ActiveRecord::Base.connection.execute(sql) + end + + # Override in subclass if you need a different dead tuple threshold + def dead_tuples_threshold + @dead_tuples_threshold ||= 50_000 + end + + # Override in subclass if you need a different vacuum wait time + def vacuum_wait_time + @vacuum_wait_time ||= 5.minutes + end + end + end +end diff --git a/lib/gitlab/background_migration/delete_diff_files.rb b/lib/gitlab/background_migration/delete_diff_files.rb index 664ead1af44..0a7cbd5c30f 100644 --- a/lib/gitlab/background_migration/delete_diff_files.rb +++ b/lib/gitlab/background_migration/delete_diff_files.rb @@ -4,6 +4,8 @@ module Gitlab module BackgroundMigration class DeleteDiffFiles + include Reschedulable + class MergeRequestDiff < ActiveRecord::Base self.table_name = 'merge_request_diffs' @@ -15,47 +17,24 @@ module Gitlab self.table_name = 'merge_request_diff_files' end - DEAD_TUPLES_THRESHOLD = 50_000 - VACUUM_WAIT_TIME = 5.minutes - def perform(ids) @ids = ids - # We should reschedule until deadtuples get in a desirable - # state (e.g. < 50_000). That may take more than one reschedule. - # - if should_wait_deadtuple_vacuum? - reschedule - return + reschedule_if_needed([ids]) do + prune_diff_files end - - prune_diff_files - end - - def should_wait_deadtuple_vacuum? - return false unless Gitlab::Database.postgresql? - - diff_files_dead_tuples_count >= DEAD_TUPLES_THRESHOLD end private - def reschedule - BackgroundMigrationWorker.perform_in(VACUUM_WAIT_TIME, self.class.name.demodulize, [@ids]) + def should_reschedule? + wait_for_deadtuple_vacuum?(MergeRequestDiffFile.table_name) end def diffs_collection MergeRequestDiff.where(id: @ids) end - def diff_files_dead_tuples_count - dead_tuple = - execute_statement("SELECT n_dead_tup FROM pg_stat_all_tables "\ - "WHERE relname = 'merge_request_diff_files'")[0] - - dead_tuple&.fetch('n_dead_tup', 0).to_i - end - def prune_diff_files removed = 0 updated = 0 @@ -69,10 +48,6 @@ module Gitlab "updated #{updated} merge_request_diffs rows") end - def execute_statement(sql) - ActiveRecord::Base.connection.execute(sql) - end - def log_info(message) Rails.logger.info("BackgroundMigration::DeleteDiffFiles - #{message}") end diff --git a/spec/lib/gitlab/background_migration/delete_diff_files_spec.rb b/spec/lib/gitlab/background_migration/delete_diff_files_spec.rb index 27281333348..690881ab59b 100644 --- a/spec/lib/gitlab/background_migration/delete_diff_files_spec.rb +++ b/spec/lib/gitlab/background_migration/delete_diff_files_spec.rb @@ -40,14 +40,14 @@ describe Gitlab::BackgroundMigration::DeleteDiffFiles, :migration, :sidekiq, sch end end - it 'reschedules itself when should_wait_deadtuple_vacuum' do + it 'reschedules itself when should wait for dead tuple vacuum' do merge_request = create(:merge_request, :merged) first_diff = merge_request.merge_request_diff second_diff = merge_request.create_merge_request_diff Sidekiq::Testing.fake! do worker = described_class.new - allow(worker).to receive(:should_wait_deadtuple_vacuum?) { true } + allow(worker).to receive(:wait_for_deadtuple_vacuum?) { true } worker.perform([first_diff.id, second_diff.id]) @@ -57,10 +57,10 @@ describe Gitlab::BackgroundMigration::DeleteDiffFiles, :migration, :sidekiq, sch end end - describe '#should_wait_deadtuple_vacuum?' do + describe '#wait_for_deadtuple_vacuum?' do it 'returns true when hitting merge_request_diff_files hits DEAD_TUPLES_THRESHOLD', :postgresql do worker = described_class.new - threshold_query_result = [{ "n_dead_tup" => described_class::DEAD_TUPLES_THRESHOLD.to_s }] + threshold_query_result = [{ "n_dead_tup" => 50_000 }] normal_query_result = [{ "n_dead_tup" => '3' }] allow(worker) @@ -68,7 +68,7 @@ describe Gitlab::BackgroundMigration::DeleteDiffFiles, :migration, :sidekiq, sch .with(/SELECT n_dead_tup */) .and_return(threshold_query_result, normal_query_result) - expect(worker.should_wait_deadtuple_vacuum?).to be(true) + expect(worker.wait_for_deadtuple_vacuum?('merge_request_diff_files')).to be(true) end end end -- GitLab From a9886c7dd434d6936c455c9877f6c695cafebe0a Mon Sep 17 00:00:00 2001 From: Felipe Artur Date: Mon, 18 Feb 2019 11:51:44 -0300 Subject: [PATCH 11/18] Fix rubocop --- lib/gitlab/background_migration.rb | 2 +- .../concerns/reschedulable.rb | 50 ---------------- .../background_migration/delete_diff_files.rb | 2 +- .../helpers/reschedulable.rb | 59 +++++++++++++++++++ 4 files changed, 61 insertions(+), 52 deletions(-) delete mode 100644 lib/gitlab/background_migration/concerns/reschedulable.rb create mode 100644 lib/gitlab/background_migration/helpers/reschedulable.rb diff --git a/lib/gitlab/background_migration.rb b/lib/gitlab/background_migration.rb index b308e94bfa0..948488c844c 100644 --- a/lib/gitlab/background_migration.rb +++ b/lib/gitlab/background_migration.rb @@ -1,6 +1,6 @@ # frozen_string_literal: true -Dir[Rails.root.join("lib/gitlab/background_migration/concerns/*.rb")].each { |f| require f } +Dir[Rails.root.join("lib/gitlab/background_migration/helpers/*.rb")].each { |f| require f } module Gitlab module BackgroundMigration diff --git a/lib/gitlab/background_migration/concerns/reschedulable.rb b/lib/gitlab/background_migration/concerns/reschedulable.rb deleted file mode 100644 index fbf3d799743..00000000000 --- a/lib/gitlab/background_migration/concerns/reschedulable.rb +++ /dev/null @@ -1,50 +0,0 @@ -# frozen_string_literal: true - -module Gitlab - module BackgroundMigration - module Reschedulable - extend ActiveSupport::Concern - - def reschedule_if_needed(args, &block) - if should_reschedule? - BackgroundMigrationWorker.perform_in(vacuum_wait_time, self.class.name.demodulize, args) - else - yield - end - end - - # Override this on base class if you need a different reschedule condition - def should_reschedule? - raise NotImplementedError, "#{self.class} does not implement #{__method__}" - end - - def wait_for_deadtuple_vacuum?(table_name) - return false unless Gitlab::Database.postgresql? - - dead_tuples_count_for(table_name) >= dead_tuples_threshold - end - - def dead_tuples_count_for(table_name) - dead_tuple = - execute_statement("SELECT n_dead_tup FROM pg_stat_all_tables "\ - "WHERE relname = '#{table_name}'")[0] - - dead_tuple&.fetch('n_dead_tup', 0).to_i - end - - def execute_statement(sql) - ActiveRecord::Base.connection.execute(sql) - end - - # Override in subclass if you need a different dead tuple threshold - def dead_tuples_threshold - @dead_tuples_threshold ||= 50_000 - end - - # Override in subclass if you need a different vacuum wait time - def vacuum_wait_time - @vacuum_wait_time ||= 5.minutes - end - end - end -end diff --git a/lib/gitlab/background_migration/delete_diff_files.rb b/lib/gitlab/background_migration/delete_diff_files.rb index 0a7cbd5c30f..11851c23ee3 100644 --- a/lib/gitlab/background_migration/delete_diff_files.rb +++ b/lib/gitlab/background_migration/delete_diff_files.rb @@ -4,7 +4,7 @@ module Gitlab module BackgroundMigration class DeleteDiffFiles - include Reschedulable + include Helpers::Reschedulable class MergeRequestDiff < ActiveRecord::Base self.table_name = 'merge_request_diffs' diff --git a/lib/gitlab/background_migration/helpers/reschedulable.rb b/lib/gitlab/background_migration/helpers/reschedulable.rb new file mode 100644 index 00000000000..7810f2627c8 --- /dev/null +++ b/lib/gitlab/background_migration/helpers/reschedulable.rb @@ -0,0 +1,59 @@ +# frozen_string_literal: true + +module Gitlab + module BackgroundMigration + module Helpers + # == Reschedulable helper + # + # Allows background migrations to be reschedule if a condition is not met. + # + # Check DeleteDiffFiles migration which reschedules itself if dead tuple count + # on DB is not acceptable. + # + module Reschedulable + extend ActiveSupport::Concern + + def reschedule_if_needed(args, &block) + if should_reschedule? + BackgroundMigrationWorker.perform_in(vacuum_wait_time, self.class.name.demodulize, args) + else + yield + end + end + + # Override this on base class if you need a different reschedule condition + def should_reschedule? + raise NotImplementedError, "#{self.class} does not implement #{__method__}" + end + + def wait_for_deadtuple_vacuum?(table_name) + return false unless Gitlab::Database.postgresql? + + dead_tuples_count_for(table_name) >= dead_tuples_threshold + end + + def dead_tuples_count_for(table_name) + dead_tuple = + execute_statement("SELECT n_dead_tup FROM pg_stat_all_tables "\ + "WHERE relname = '#{table_name}'")[0] + + dead_tuple&.fetch('n_dead_tup', 0).to_i + end + + def execute_statement(sql) + ActiveRecord::Base.connection.execute(sql) + end + + # Override in subclass if you need a different dead tuple threshold + def dead_tuples_threshold + @dead_tuples_threshold ||= 50_000 + end + + # Override in subclass if you need a different vacuum wait time + def vacuum_wait_time + @vacuum_wait_time ||= 5.minutes + end + end + end + end +end -- GitLab From d88b44caad35136bc42f245162c99e8bae0a5912 Mon Sep 17 00:00:00 2001 From: Felipe Artur Date: Mon, 18 Feb 2019 12:00:14 -0300 Subject: [PATCH 12/18] Fix typo --- lib/gitlab/background_migration/helpers/reschedulable.rb | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/lib/gitlab/background_migration/helpers/reschedulable.rb b/lib/gitlab/background_migration/helpers/reschedulable.rb index 7810f2627c8..3b653059858 100644 --- a/lib/gitlab/background_migration/helpers/reschedulable.rb +++ b/lib/gitlab/background_migration/helpers/reschedulable.rb @@ -5,9 +5,9 @@ module Gitlab module Helpers # == Reschedulable helper # - # Allows background migrations to be reschedule if a condition is not met. + # Allows background migrations to be reschedule itself if a condition is not met. # - # Check DeleteDiffFiles migration which reschedules itself if dead tuple count + # For example, check DeleteDiffFiles migration which is rescheduled if dead tuple count # on DB is not acceptable. # module Reschedulable -- GitLab From cc7a44c8e130a8f3f753ba0ed32e45b0a6b0e6f7 Mon Sep 17 00:00:00 2001 From: Felipe Artur Date: Mon, 18 Feb 2019 17:43:16 -0300 Subject: [PATCH 13/18] Make migrations reschedulable --- .../background_migration/delete_diff_files.rb | 2 +- .../helpers/reschedulable.rb | 8 ++-- .../sync_issues_state_id.rb | 30 ++++++++----- .../sync_merge_requests_state_id.rb | 34 +++++++++----- .../schedule_sync_issuables_state_id_spec.rb | 45 +++++++++++++++++++ 5 files changed, 92 insertions(+), 27 deletions(-) diff --git a/lib/gitlab/background_migration/delete_diff_files.rb b/lib/gitlab/background_migration/delete_diff_files.rb index 11851c23ee3..4cb068eb71a 100644 --- a/lib/gitlab/background_migration/delete_diff_files.rb +++ b/lib/gitlab/background_migration/delete_diff_files.rb @@ -27,7 +27,7 @@ module Gitlab private - def should_reschedule? + def need_reschedule? wait_for_deadtuple_vacuum?(MergeRequestDiffFile.table_name) end diff --git a/lib/gitlab/background_migration/helpers/reschedulable.rb b/lib/gitlab/background_migration/helpers/reschedulable.rb index 3b653059858..6087181609e 100644 --- a/lib/gitlab/background_migration/helpers/reschedulable.rb +++ b/lib/gitlab/background_migration/helpers/reschedulable.rb @@ -14,7 +14,7 @@ module Gitlab extend ActiveSupport::Concern def reschedule_if_needed(args, &block) - if should_reschedule? + if need_reschedule? BackgroundMigrationWorker.perform_in(vacuum_wait_time, self.class.name.demodulize, args) else yield @@ -22,9 +22,9 @@ module Gitlab end # Override this on base class if you need a different reschedule condition - def should_reschedule? - raise NotImplementedError, "#{self.class} does not implement #{__method__}" - end + # def need_reschedule? + # raise NotImplementedError, "#{self.class} does not implement #{__method__}" + # end def wait_for_deadtuple_vacuum?(table_name) return false unless Gitlab::Database.postgresql? diff --git a/lib/gitlab/background_migration/sync_issues_state_id.rb b/lib/gitlab/background_migration/sync_issues_state_id.rb index 33b997c8533..f95c433c64c 100644 --- a/lib/gitlab/background_migration/sync_issues_state_id.rb +++ b/lib/gitlab/background_migration/sync_issues_state_id.rb @@ -4,19 +4,29 @@ module Gitlab module BackgroundMigration class SyncIssuesStateId + include Helpers::Reschedulable + def perform(start_id, end_id) Rails.logger.info("Issues - Populating state_id: #{start_id} - #{end_id}") - ActiveRecord::Base.connection.execute <<~SQL - UPDATE issues - SET state_id = - CASE state - WHEN 'opened' THEN 1 - WHEN 'closed' THEN 2 - END - WHERE state_id IS NULL - AND id BETWEEN #{start_id} AND #{end_id} - SQL + reschedule_if_needed([start_id, end_id]) do + ActiveRecord::Base.connection.execute <<~SQL + UPDATE issues + SET state_id = + CASE state + WHEN 'opened' THEN 1 + WHEN 'closed' THEN 2 + END + WHERE state_id IS NULL + AND id BETWEEN #{start_id} AND #{end_id} + SQL + end + end + + private + + def need_reschedule? + wait_for_deadtuple_vacuum?('issues') end end end diff --git a/lib/gitlab/background_migration/sync_merge_requests_state_id.rb b/lib/gitlab/background_migration/sync_merge_requests_state_id.rb index 923ceaeec54..5d92553f577 100644 --- a/lib/gitlab/background_migration/sync_merge_requests_state_id.rb +++ b/lib/gitlab/background_migration/sync_merge_requests_state_id.rb @@ -4,21 +4,31 @@ module Gitlab module BackgroundMigration class SyncMergeRequestsStateId + include Helpers::Reschedulable + def perform(start_id, end_id) Rails.logger.info("Merge Requests - Populating state_id: #{start_id} - #{end_id}") - ActiveRecord::Base.connection.execute <<~SQL - UPDATE merge_requests - SET state_id = - CASE state - WHEN 'opened' THEN 1 - WHEN 'closed' THEN 2 - WHEN 'merged' THEN 3 - WHEN 'locked' THEN 4 - END - WHERE state_id IS NULL - AND id BETWEEN #{start_id} AND #{end_id} - SQL + reschedule_if_needed([start_id, end_id]) do + ActiveRecord::Base.connection.execute <<~SQL + UPDATE merge_requests + SET state_id = + CASE state + WHEN 'opened' THEN 1 + WHEN 'closed' THEN 2 + WHEN 'merged' THEN 3 + WHEN 'locked' THEN 4 + END + WHERE state_id IS NULL + AND id BETWEEN #{start_id} AND #{end_id} + SQL + end + end + + private + + def need_reschedule? + wait_for_deadtuple_vacuum?('issues') end end end diff --git a/spec/migrations/schedule_sync_issuables_state_id_spec.rb b/spec/migrations/schedule_sync_issuables_state_id_spec.rb index a926ee38387..a746fb68c30 100644 --- a/spec/migrations/schedule_sync_issuables_state_id_spec.rb +++ b/spec/migrations/schedule_sync_issuables_state_id_spec.rb @@ -15,7 +15,40 @@ describe ScheduleSyncIssuablesStateId, :migration do @project = projects.create!(namespace_id: @group.id) end + shared_examples 'rescheduling migrations' do + before do + Sidekiq::Worker.clear_all + end + + it 'reschedules migrations when should wait for dead tuple vacuum' do + worker = worker_class.new + + Sidekiq::Testing.fake! do + allow(worker).to receive(:wait_for_deadtuple_vacuum?) { true } + + worker.perform(resource_1.id, resource_2.id) + + expect(worker_class.name.demodulize).to be_scheduled_delayed_migration(5.minutes, resource_1.id, resource_2.id) + expect(BackgroundMigrationWorker.jobs.size).to eq(1) + end + end + end + describe '#up' do + # it 'correctly schedules diff file deletion workers' do + # Sidekiq::Testing.fake! do + # Timecop.freeze do + # described_class.new.perform + + # expect(described_class::MIGRATION).to be_scheduled_delayed_migration(5.minutes, [1, 4, 5]) + + # expect(described_class::MIGRATION).to be_scheduled_delayed_migration(10.minutes, [6]) + + # expect(BackgroundMigrationWorker.jobs.size).to eq(2) + # end + # end + # end + context 'issues' do it 'migrates state column to integer' do opened_issue = issues.create!(description: 'first', state: 'opened') @@ -30,6 +63,12 @@ describe ScheduleSyncIssuablesStateId, :migration do expect(invalid_state_issue.reload.state_id).to be_nil expect(nil_state_issue.reload.state_id).to be_nil end + + it_behaves_like 'rescheduling migrations' do + let(:worker_class) { Gitlab::BackgroundMigration::SyncIssuesStateId } + let(:resource_1) { issues.create!(description: 'first', state: 'opened') } + let(:resource_2) { issues.create!(description: 'second', state: 'closed') } + end end context 'merge requests' do @@ -48,6 +87,12 @@ describe ScheduleSyncIssuablesStateId, :migration do expect(locked_merge_request.reload.state_id).to eq(MergeRequest.available_states[:locked]) expect(invalid_state_merge_request.reload.state_id).to be_nil end + + it_behaves_like 'rescheduling migrations' do + let(:worker_class) { Gitlab::BackgroundMigration::SyncMergeRequestsStateId } + let(:resource_1) { merge_requests.create!(state: 'opened', target_project_id: @project.id, target_branch: 'feature1', source_branch: 'master') } + let(:resource_2) { merge_requests.create!(state: 'closed', target_project_id: @project.id, target_branch: 'feature2', source_branch: 'master') } + end end end end -- GitLab From 52155d8cf8374e9184c2ae834cab761b7520db93 Mon Sep 17 00:00:00 2001 From: Felipe Artur Date: Tue, 19 Feb 2019 11:33:30 -0300 Subject: [PATCH 14/18] Add more specs and code improvements --- ...112022_schedule_sync_issuables_state_id.rb | 31 +++++++---- .../background_migration/delete_diff_files.rb | 4 +- .../helpers/reschedulable.rb | 45 ++++++++-------- .../sync_issues_state_id.rb | 6 +-- .../sync_merge_requests_state_id.rb | 6 +-- .../schedule_sync_issuables_state_id_spec.rb | 53 +++++++++++++------ 6 files changed, 89 insertions(+), 56 deletions(-) diff --git a/db/post_migrate/20190214112022_schedule_sync_issuables_state_id.rb b/db/post_migrate/20190214112022_schedule_sync_issuables_state_id.rb index d9b77d2f02c..898bad043ac 100644 --- a/db/post_migrate/20190214112022_schedule_sync_issuables_state_id.rb +++ b/db/post_migrate/20190214112022_schedule_sync_issuables_state_id.rb @@ -1,22 +1,22 @@ -# frozen_string_literal: true - -# See http://doc.gitlab.com/ce/development/migration_style_guide.html -# for more information on how to write migrations for GitLab. - class ScheduleSyncIssuablesStateId < ActiveRecord::Migration[5.0] + # This migration schedules the sync of state_id for issues and merge requests + # which are converting the state column from string to integer. + # For more information check: https://gitlab.com/gitlab-org/gitlab-ce/issues/51789 + include Gitlab::Database::MigrationHelpers DOWNTIME = false - # 2019-02-12 Gitlab.com issuable numbers + # 2019-02-12 gitlab.com issuable numbers # issues count: 13587305 # merge requests count: 18925274 - # Using this 25000 as batch size should take around 26 hours + # + # Using 25000 as batch size should take around 26 hours # to migrate both issues and merge requests BATCH_SIZE = 25000 DELAY_INTERVAL = 5.minutes.to_i - ISSUE_MIGRATION = 'SyncIssuesStateId'.freeze - MERGE_REQUEST_MIGRATION = 'SyncMergeRequestsStateId'.freeze + ISSUES_MIGRATION = 'SyncIssuesStateId'.freeze + MERGE_REQUESTS_MIGRATION = 'SyncMergeRequestsStateId'.freeze class Issue < ActiveRecord::Base include EachBatch @@ -32,8 +32,17 @@ class ScheduleSyncIssuablesStateId < ActiveRecord::Migration[5.0] def up Sidekiq::Worker.skipping_transaction_check do - queue_background_migration_jobs_by_range_at_intervals(Issue.where(state_id: nil), ISSUE_MIGRATION, DELAY_INTERVAL, batch_size: BATCH_SIZE) - queue_background_migration_jobs_by_range_at_intervals(MergeRequest.where(state_id: nil), MERGE_REQUEST_MIGRATION, DELAY_INTERVAL, batch_size: BATCH_SIZE) + queue_background_migration_jobs_by_range_at_intervals(Issue.where(state_id: nil), + ISSUES_MIGRATION, + DELAY_INTERVAL, + batch_size: BATCH_SIZE + ) + + queue_background_migration_jobs_by_range_at_intervals(MergeRequest.where(state_id: nil), + MERGE_REQUESTS_MIGRATION, + DELAY_INTERVAL, + batch_size: BATCH_SIZE + ) end end diff --git a/lib/gitlab/background_migration/delete_diff_files.rb b/lib/gitlab/background_migration/delete_diff_files.rb index 4cb068eb71a..0066faa23dd 100644 --- a/lib/gitlab/background_migration/delete_diff_files.rb +++ b/lib/gitlab/background_migration/delete_diff_files.rb @@ -20,14 +20,14 @@ module Gitlab def perform(ids) @ids = ids - reschedule_if_needed([ids]) do + reschedule_if_needed(ids) do prune_diff_files end end private - def need_reschedule? + def should_reschedule? wait_for_deadtuple_vacuum?(MergeRequestDiffFile.table_name) end diff --git a/lib/gitlab/background_migration/helpers/reschedulable.rb b/lib/gitlab/background_migration/helpers/reschedulable.rb index 6087181609e..60ab62cd973 100644 --- a/lib/gitlab/background_migration/helpers/reschedulable.rb +++ b/lib/gitlab/background_migration/helpers/reschedulable.rb @@ -5,7 +5,8 @@ module Gitlab module Helpers # == Reschedulable helper # - # Allows background migrations to be reschedule itself if a condition is not met. + # Allows background migrations to be rescheduled if a condition is met, + # the condition should be overridden in classes in #should_reschedule? method. # # For example, check DeleteDiffFiles migration which is rescheduled if dead tuple count # on DB is not acceptable. @@ -13,18 +14,34 @@ module Gitlab module Reschedulable extend ActiveSupport::Concern - def reschedule_if_needed(args, &block) - if need_reschedule? - BackgroundMigrationWorker.perform_in(vacuum_wait_time, self.class.name.demodulize, args) + # Use this method to perform the background migration and it will be rescheduled + # if #should_reschedule? returns true. + def reschedule_if_needed(*args, &block) + if should_reschedule? + BackgroundMigrationWorker.perform_in(wait_time, self.class.name.demodulize, args) else yield end end # Override this on base class if you need a different reschedule condition - # def need_reschedule? - # raise NotImplementedError, "#{self.class} does not implement #{__method__}" - # end + def should_reschedule? + raise NotImplementedError, "#{self.class} does not implement #{__method__}" + end + + # Override in subclass if a different dead tuple threshold + def dead_tuples_threshold + @dead_tuples_threshold ||= 50_000 + end + + # Override in subclass if a different wait time + def wait_time + @wait_time ||= 5.minutes + end + + def execute_statement(sql) + ActiveRecord::Base.connection.execute(sql) + end def wait_for_deadtuple_vacuum?(table_name) return false unless Gitlab::Database.postgresql? @@ -39,20 +56,6 @@ module Gitlab dead_tuple&.fetch('n_dead_tup', 0).to_i end - - def execute_statement(sql) - ActiveRecord::Base.connection.execute(sql) - end - - # Override in subclass if you need a different dead tuple threshold - def dead_tuples_threshold - @dead_tuples_threshold ||= 50_000 - end - - # Override in subclass if you need a different vacuum wait time - def vacuum_wait_time - @vacuum_wait_time ||= 5.minutes - end end end end diff --git a/lib/gitlab/background_migration/sync_issues_state_id.rb b/lib/gitlab/background_migration/sync_issues_state_id.rb index f95c433c64c..50841c3fe4d 100644 --- a/lib/gitlab/background_migration/sync_issues_state_id.rb +++ b/lib/gitlab/background_migration/sync_issues_state_id.rb @@ -9,8 +9,8 @@ module Gitlab def perform(start_id, end_id) Rails.logger.info("Issues - Populating state_id: #{start_id} - #{end_id}") - reschedule_if_needed([start_id, end_id]) do - ActiveRecord::Base.connection.execute <<~SQL + reschedule_if_needed(start_id, end_id) do + execute_statement <<~SQL UPDATE issues SET state_id = CASE state @@ -25,7 +25,7 @@ module Gitlab private - def need_reschedule? + def should_reschedule? wait_for_deadtuple_vacuum?('issues') end end diff --git a/lib/gitlab/background_migration/sync_merge_requests_state_id.rb b/lib/gitlab/background_migration/sync_merge_requests_state_id.rb index 5d92553f577..4cf8e993c52 100644 --- a/lib/gitlab/background_migration/sync_merge_requests_state_id.rb +++ b/lib/gitlab/background_migration/sync_merge_requests_state_id.rb @@ -9,8 +9,8 @@ module Gitlab def perform(start_id, end_id) Rails.logger.info("Merge Requests - Populating state_id: #{start_id} - #{end_id}") - reschedule_if_needed([start_id, end_id]) do - ActiveRecord::Base.connection.execute <<~SQL + reschedule_if_needed(start_id, end_id) do + execute_statement <<~SQL UPDATE merge_requests SET state_id = CASE state @@ -27,7 +27,7 @@ module Gitlab private - def need_reschedule? + def should_reschedule? wait_for_deadtuple_vacuum?('issues') end end diff --git a/spec/migrations/schedule_sync_issuables_state_id_spec.rb b/spec/migrations/schedule_sync_issuables_state_id_spec.rb index a746fb68c30..6c4c31ae554 100644 --- a/spec/migrations/schedule_sync_issuables_state_id_spec.rb +++ b/spec/migrations/schedule_sync_issuables_state_id_spec.rb @@ -15,6 +15,25 @@ describe ScheduleSyncIssuablesStateId, :migration do @project = projects.create!(namespace_id: @group.id) end + shared_examples 'scheduling migrations' do + before do + Sidekiq::Worker.clear_all + stub_const("#{described_class.name}::BATCH_SIZE", 2) + end + + it 'correctly schedules issuable sync background migration' do + Sidekiq::Testing.fake! do + Timecop.freeze do + migrate! + + expect(migration).to be_scheduled_delayed_migration(5.minutes, resource_1.id, resource_2.id) + expect(migration).to be_scheduled_delayed_migration(10.minutes, resource_3.id, resource_4.id) + expect(BackgroundMigrationWorker.jobs.size).to eq(2) + end + end + end + end + shared_examples 'rescheduling migrations' do before do Sidekiq::Worker.clear_all @@ -35,20 +54,6 @@ describe ScheduleSyncIssuablesStateId, :migration do end describe '#up' do - # it 'correctly schedules diff file deletion workers' do - # Sidekiq::Testing.fake! do - # Timecop.freeze do - # described_class.new.perform - - # expect(described_class::MIGRATION).to be_scheduled_delayed_migration(5.minutes, [1, 4, 5]) - - # expect(described_class::MIGRATION).to be_scheduled_delayed_migration(10.minutes, [6]) - - # expect(BackgroundMigrationWorker.jobs.size).to eq(2) - # end - # end - # end - context 'issues' do it 'migrates state column to integer' do opened_issue = issues.create!(description: 'first', state: 'opened') @@ -64,10 +69,18 @@ describe ScheduleSyncIssuablesStateId, :migration do expect(nil_state_issue.reload.state_id).to be_nil end + it_behaves_like 'scheduling migrations' do + let(:migration) { described_class::ISSUES_MIGRATION } + let!(:resource_1) { issues.create!(description: 'first', state: 'opened') } + let!(:resource_2) { issues.create!(description: 'second', state: 'closed') } + let!(:resource_3) { issues.create!(description: 'third', state: 'closed') } + let!(:resource_4) { issues.create!(description: 'fourth', state: 'closed') } + end + it_behaves_like 'rescheduling migrations' do let(:worker_class) { Gitlab::BackgroundMigration::SyncIssuesStateId } - let(:resource_1) { issues.create!(description: 'first', state: 'opened') } - let(:resource_2) { issues.create!(description: 'second', state: 'closed') } + let!(:resource_1) { issues.create!(description: 'first', state: 'opened') } + let!(:resource_2) { issues.create!(description: 'second', state: 'closed') } end end @@ -88,6 +101,14 @@ describe ScheduleSyncIssuablesStateId, :migration do expect(invalid_state_merge_request.reload.state_id).to be_nil end + it_behaves_like 'scheduling migrations' do + let(:migration) { described_class::MERGE_REQUESTS_MIGRATION } + let!(:resource_1) { merge_requests.create!(state: 'opened', target_project_id: @project.id, target_branch: 'feature1', source_branch: 'master') } + let!(:resource_2) { merge_requests.create!(state: 'closed', target_project_id: @project.id, target_branch: 'feature2', source_branch: 'master') } + let!(:resource_3) { merge_requests.create!(state: 'merged', target_project_id: @project.id, target_branch: 'feature3', source_branch: 'master') } + let!(:resource_4) { merge_requests.create!(state: 'locked', target_project_id: @project.id, target_branch: 'feature4', source_branch: 'master') } + end + it_behaves_like 'rescheduling migrations' do let(:worker_class) { Gitlab::BackgroundMigration::SyncMergeRequestsStateId } let(:resource_1) { merge_requests.create!(state: 'opened', target_project_id: @project.id, target_branch: 'feature1', source_branch: 'master') } -- GitLab From 7bd066a1fa51018211e26ca0c5624aecbc364a66 Mon Sep 17 00:00:00 2001 From: Felipe Artur Date: Tue, 19 Feb 2019 14:00:53 -0300 Subject: [PATCH 15/18] Address review comments --- ...112022_schedule_sync_issuables_state_id.rb | 28 +++++---- lib/gitlab/background_migration.rb | 2 - .../background_migration/delete_diff_files.rb | 37 +++++++++-- .../helpers/reschedulable.rb | 62 ------------------- .../background_migration/reschedulable.rb | 60 ++++++++++++++++++ .../sync_issues_state_id.rb | 2 +- .../sync_merge_requests_state_id.rb | 2 +- .../delete_diff_files_spec.rb | 10 +-- 8 files changed, 113 insertions(+), 90 deletions(-) delete mode 100644 lib/gitlab/background_migration/helpers/reschedulable.rb create mode 100644 lib/gitlab/background_migration/reschedulable.rb diff --git a/db/post_migrate/20190214112022_schedule_sync_issuables_state_id.rb b/db/post_migrate/20190214112022_schedule_sync_issuables_state_id.rb index 898bad043ac..2167f19e022 100644 --- a/db/post_migrate/20190214112022_schedule_sync_issuables_state_id.rb +++ b/db/post_migrate/20190214112022_schedule_sync_issuables_state_id.rb @@ -18,6 +18,8 @@ class ScheduleSyncIssuablesStateId < ActiveRecord::Migration[5.0] ISSUES_MIGRATION = 'SyncIssuesStateId'.freeze MERGE_REQUESTS_MIGRATION = 'SyncMergeRequestsStateId'.freeze + disable_ddl_transaction! + class Issue < ActiveRecord::Base include EachBatch @@ -31,19 +33,19 @@ class ScheduleSyncIssuablesStateId < ActiveRecord::Migration[5.0] end def up - Sidekiq::Worker.skipping_transaction_check do - queue_background_migration_jobs_by_range_at_intervals(Issue.where(state_id: nil), - ISSUES_MIGRATION, - DELAY_INTERVAL, - batch_size: BATCH_SIZE - ) - - queue_background_migration_jobs_by_range_at_intervals(MergeRequest.where(state_id: nil), - MERGE_REQUESTS_MIGRATION, - DELAY_INTERVAL, - batch_size: BATCH_SIZE - ) - end + queue_background_migration_jobs_by_range_at_intervals( + Issue.where(state_id: nil), + ISSUES_MIGRATION, + DELAY_INTERVAL, + batch_size: BATCH_SIZE + ) + + queue_background_migration_jobs_by_range_at_intervals( + MergeRequest.where(state_id: nil), + MERGE_REQUESTS_MIGRATION, + DELAY_INTERVAL, + batch_size: BATCH_SIZE + ) end def down diff --git a/lib/gitlab/background_migration.rb b/lib/gitlab/background_migration.rb index 948488c844c..5251e0fadf9 100644 --- a/lib/gitlab/background_migration.rb +++ b/lib/gitlab/background_migration.rb @@ -1,7 +1,5 @@ # frozen_string_literal: true -Dir[Rails.root.join("lib/gitlab/background_migration/helpers/*.rb")].each { |f| require f } - module Gitlab module BackgroundMigration def self.queue diff --git a/lib/gitlab/background_migration/delete_diff_files.rb b/lib/gitlab/background_migration/delete_diff_files.rb index 0066faa23dd..664ead1af44 100644 --- a/lib/gitlab/background_migration/delete_diff_files.rb +++ b/lib/gitlab/background_migration/delete_diff_files.rb @@ -4,8 +4,6 @@ module Gitlab module BackgroundMigration class DeleteDiffFiles - include Helpers::Reschedulable - class MergeRequestDiff < ActiveRecord::Base self.table_name = 'merge_request_diffs' @@ -17,24 +15,47 @@ module Gitlab self.table_name = 'merge_request_diff_files' end + DEAD_TUPLES_THRESHOLD = 50_000 + VACUUM_WAIT_TIME = 5.minutes + def perform(ids) @ids = ids - reschedule_if_needed(ids) do - prune_diff_files + # We should reschedule until deadtuples get in a desirable + # state (e.g. < 50_000). That may take more than one reschedule. + # + if should_wait_deadtuple_vacuum? + reschedule + return end + + prune_diff_files + end + + def should_wait_deadtuple_vacuum? + return false unless Gitlab::Database.postgresql? + + diff_files_dead_tuples_count >= DEAD_TUPLES_THRESHOLD end private - def should_reschedule? - wait_for_deadtuple_vacuum?(MergeRequestDiffFile.table_name) + def reschedule + BackgroundMigrationWorker.perform_in(VACUUM_WAIT_TIME, self.class.name.demodulize, [@ids]) end def diffs_collection MergeRequestDiff.where(id: @ids) end + def diff_files_dead_tuples_count + dead_tuple = + execute_statement("SELECT n_dead_tup FROM pg_stat_all_tables "\ + "WHERE relname = 'merge_request_diff_files'")[0] + + dead_tuple&.fetch('n_dead_tup', 0).to_i + end + def prune_diff_files removed = 0 updated = 0 @@ -48,6 +69,10 @@ module Gitlab "updated #{updated} merge_request_diffs rows") end + def execute_statement(sql) + ActiveRecord::Base.connection.execute(sql) + end + def log_info(message) Rails.logger.info("BackgroundMigration::DeleteDiffFiles - #{message}") end diff --git a/lib/gitlab/background_migration/helpers/reschedulable.rb b/lib/gitlab/background_migration/helpers/reschedulable.rb deleted file mode 100644 index 60ab62cd973..00000000000 --- a/lib/gitlab/background_migration/helpers/reschedulable.rb +++ /dev/null @@ -1,62 +0,0 @@ -# frozen_string_literal: true - -module Gitlab - module BackgroundMigration - module Helpers - # == Reschedulable helper - # - # Allows background migrations to be rescheduled if a condition is met, - # the condition should be overridden in classes in #should_reschedule? method. - # - # For example, check DeleteDiffFiles migration which is rescheduled if dead tuple count - # on DB is not acceptable. - # - module Reschedulable - extend ActiveSupport::Concern - - # Use this method to perform the background migration and it will be rescheduled - # if #should_reschedule? returns true. - def reschedule_if_needed(*args, &block) - if should_reschedule? - BackgroundMigrationWorker.perform_in(wait_time, self.class.name.demodulize, args) - else - yield - end - end - - # Override this on base class if you need a different reschedule condition - def should_reschedule? - raise NotImplementedError, "#{self.class} does not implement #{__method__}" - end - - # Override in subclass if a different dead tuple threshold - def dead_tuples_threshold - @dead_tuples_threshold ||= 50_000 - end - - # Override in subclass if a different wait time - def wait_time - @wait_time ||= 5.minutes - end - - def execute_statement(sql) - ActiveRecord::Base.connection.execute(sql) - end - - def wait_for_deadtuple_vacuum?(table_name) - return false unless Gitlab::Database.postgresql? - - dead_tuples_count_for(table_name) >= dead_tuples_threshold - end - - def dead_tuples_count_for(table_name) - dead_tuple = - execute_statement("SELECT n_dead_tup FROM pg_stat_all_tables "\ - "WHERE relname = '#{table_name}'")[0] - - dead_tuple&.fetch('n_dead_tup', 0).to_i - end - end - end - end -end diff --git a/lib/gitlab/background_migration/reschedulable.rb b/lib/gitlab/background_migration/reschedulable.rb new file mode 100644 index 00000000000..3d6781c07c0 --- /dev/null +++ b/lib/gitlab/background_migration/reschedulable.rb @@ -0,0 +1,60 @@ +# frozen_string_literal: true + +module Gitlab + module BackgroundMigration + # == Reschedulable helper + # + # Allows background migrations to be rescheduled if a condition is met, + # the condition should be overridden in classes in #should_reschedule? method. + # + # For example, check DeleteDiffFiles migration which is rescheduled if dead tuple count + # on DB is not acceptable. + # + module Reschedulable + extend ActiveSupport::Concern + + # Use this method to perform the background migration and it will be rescheduled + # if #should_reschedule? returns true. + def reschedule_if_needed(*args, &block) + if should_reschedule? + BackgroundMigrationWorker.perform_in(wait_time, self.class.name.demodulize, args) + else + yield + end + end + + # Override this on base class if you need a different reschedule condition + def should_reschedule? + raise NotImplementedError, "#{self.class} does not implement #{__method__}" + end + + # Override in subclass if a different dead tuple threshold + def dead_tuples_threshold + @dead_tuples_threshold ||= 50_000 + end + + # Override in subclass if a different wait time + def wait_time + @wait_time ||= 5.minutes + end + + def execute_statement(sql) + ActiveRecord::Base.connection.execute(sql) + end + + def wait_for_deadtuple_vacuum?(table_name) + return false unless Gitlab::Database.postgresql? + + dead_tuples_count_for(table_name) >= dead_tuples_threshold + end + + def dead_tuples_count_for(table_name) + dead_tuple = + execute_statement("SELECT n_dead_tup FROM pg_stat_all_tables "\ + "WHERE relname = '#{table_name}'")[0] + + dead_tuple&.fetch('n_dead_tup', 0).to_i + end + end + end +end diff --git a/lib/gitlab/background_migration/sync_issues_state_id.rb b/lib/gitlab/background_migration/sync_issues_state_id.rb index 50841c3fe4d..053d154cef8 100644 --- a/lib/gitlab/background_migration/sync_issues_state_id.rb +++ b/lib/gitlab/background_migration/sync_issues_state_id.rb @@ -4,7 +4,7 @@ module Gitlab module BackgroundMigration class SyncIssuesStateId - include Helpers::Reschedulable + include Reschedulable def perform(start_id, end_id) Rails.logger.info("Issues - Populating state_id: #{start_id} - #{end_id}") diff --git a/lib/gitlab/background_migration/sync_merge_requests_state_id.rb b/lib/gitlab/background_migration/sync_merge_requests_state_id.rb index 4cf8e993c52..a94529aaf5c 100644 --- a/lib/gitlab/background_migration/sync_merge_requests_state_id.rb +++ b/lib/gitlab/background_migration/sync_merge_requests_state_id.rb @@ -4,7 +4,7 @@ module Gitlab module BackgroundMigration class SyncMergeRequestsStateId - include Helpers::Reschedulable + include Reschedulable def perform(start_id, end_id) Rails.logger.info("Merge Requests - Populating state_id: #{start_id} - #{end_id}") diff --git a/spec/lib/gitlab/background_migration/delete_diff_files_spec.rb b/spec/lib/gitlab/background_migration/delete_diff_files_spec.rb index 690881ab59b..27281333348 100644 --- a/spec/lib/gitlab/background_migration/delete_diff_files_spec.rb +++ b/spec/lib/gitlab/background_migration/delete_diff_files_spec.rb @@ -40,14 +40,14 @@ describe Gitlab::BackgroundMigration::DeleteDiffFiles, :migration, :sidekiq, sch end end - it 'reschedules itself when should wait for dead tuple vacuum' do + it 'reschedules itself when should_wait_deadtuple_vacuum' do merge_request = create(:merge_request, :merged) first_diff = merge_request.merge_request_diff second_diff = merge_request.create_merge_request_diff Sidekiq::Testing.fake! do worker = described_class.new - allow(worker).to receive(:wait_for_deadtuple_vacuum?) { true } + allow(worker).to receive(:should_wait_deadtuple_vacuum?) { true } worker.perform([first_diff.id, second_diff.id]) @@ -57,10 +57,10 @@ describe Gitlab::BackgroundMigration::DeleteDiffFiles, :migration, :sidekiq, sch end end - describe '#wait_for_deadtuple_vacuum?' do + describe '#should_wait_deadtuple_vacuum?' do it 'returns true when hitting merge_request_diff_files hits DEAD_TUPLES_THRESHOLD', :postgresql do worker = described_class.new - threshold_query_result = [{ "n_dead_tup" => 50_000 }] + threshold_query_result = [{ "n_dead_tup" => described_class::DEAD_TUPLES_THRESHOLD.to_s }] normal_query_result = [{ "n_dead_tup" => '3' }] allow(worker) @@ -68,7 +68,7 @@ describe Gitlab::BackgroundMigration::DeleteDiffFiles, :migration, :sidekiq, sch .with(/SELECT n_dead_tup */) .and_return(threshold_query_result, normal_query_result) - expect(worker.wait_for_deadtuple_vacuum?('merge_request_diff_files')).to be(true) + expect(worker.should_wait_deadtuple_vacuum?).to be(true) end end end -- GitLab From 294c5c41beaac1fbc60c67df2c8745f7583544a1 Mon Sep 17 00:00:00 2001 From: Felipe Artur Date: Fri, 1 Mar 2019 16:24:47 -0300 Subject: [PATCH 16/18] Remove auto vacuum logic, decrease batch size and interval --- ...112022_schedule_sync_issuables_state_id.rb | 14 +++-- .../background_migration/reschedulable.rb | 60 ------------------- .../sync_issues_state_id.rb | 30 ++++------ .../sync_merge_requests_state_id.rb | 34 ++++------- .../schedule_sync_issuables_state_id_spec.rb | 35 +---------- 5 files changed, 32 insertions(+), 141 deletions(-) delete mode 100644 lib/gitlab/background_migration/reschedulable.rb diff --git a/db/post_migrate/20190214112022_schedule_sync_issuables_state_id.rb b/db/post_migrate/20190214112022_schedule_sync_issuables_state_id.rb index 2167f19e022..6edb0bf2d5e 100644 --- a/db/post_migrate/20190214112022_schedule_sync_issuables_state_id.rb +++ b/db/post_migrate/20190214112022_schedule_sync_issuables_state_id.rb @@ -11,10 +11,12 @@ class ScheduleSyncIssuablesStateId < ActiveRecord::Migration[5.0] # issues count: 13587305 # merge requests count: 18925274 # - # Using 25000 as batch size should take around 26 hours - # to migrate both issues and merge requests - BATCH_SIZE = 25000 - DELAY_INTERVAL = 5.minutes.to_i + # Using 5000 as batch size and 115 seconds interval will give: + # 2718 jobs for issues - taking ~86 hours + # 3786 jobs for merge requests - taking ~120 hours + # + BATCH_SIZE = 5000 + DELAY_INTERVAL = 120.seconds.to_i ISSUES_MIGRATION = 'SyncIssuesStateId'.freeze MERGE_REQUESTS_MIGRATION = 'SyncMergeRequestsStateId'.freeze @@ -34,14 +36,14 @@ class ScheduleSyncIssuablesStateId < ActiveRecord::Migration[5.0] def up queue_background_migration_jobs_by_range_at_intervals( - Issue.where(state_id: nil), + Issue.all, ISSUES_MIGRATION, DELAY_INTERVAL, batch_size: BATCH_SIZE ) queue_background_migration_jobs_by_range_at_intervals( - MergeRequest.where(state_id: nil), + MergeRequest.all, MERGE_REQUESTS_MIGRATION, DELAY_INTERVAL, batch_size: BATCH_SIZE diff --git a/lib/gitlab/background_migration/reschedulable.rb b/lib/gitlab/background_migration/reschedulable.rb deleted file mode 100644 index 3d6781c07c0..00000000000 --- a/lib/gitlab/background_migration/reschedulable.rb +++ /dev/null @@ -1,60 +0,0 @@ -# frozen_string_literal: true - -module Gitlab - module BackgroundMigration - # == Reschedulable helper - # - # Allows background migrations to be rescheduled if a condition is met, - # the condition should be overridden in classes in #should_reschedule? method. - # - # For example, check DeleteDiffFiles migration which is rescheduled if dead tuple count - # on DB is not acceptable. - # - module Reschedulable - extend ActiveSupport::Concern - - # Use this method to perform the background migration and it will be rescheduled - # if #should_reschedule? returns true. - def reschedule_if_needed(*args, &block) - if should_reschedule? - BackgroundMigrationWorker.perform_in(wait_time, self.class.name.demodulize, args) - else - yield - end - end - - # Override this on base class if you need a different reschedule condition - def should_reschedule? - raise NotImplementedError, "#{self.class} does not implement #{__method__}" - end - - # Override in subclass if a different dead tuple threshold - def dead_tuples_threshold - @dead_tuples_threshold ||= 50_000 - end - - # Override in subclass if a different wait time - def wait_time - @wait_time ||= 5.minutes - end - - def execute_statement(sql) - ActiveRecord::Base.connection.execute(sql) - end - - def wait_for_deadtuple_vacuum?(table_name) - return false unless Gitlab::Database.postgresql? - - dead_tuples_count_for(table_name) >= dead_tuples_threshold - end - - def dead_tuples_count_for(table_name) - dead_tuple = - execute_statement("SELECT n_dead_tup FROM pg_stat_all_tables "\ - "WHERE relname = '#{table_name}'")[0] - - dead_tuple&.fetch('n_dead_tup', 0).to_i - end - end - end -end diff --git a/lib/gitlab/background_migration/sync_issues_state_id.rb b/lib/gitlab/background_migration/sync_issues_state_id.rb index 053d154cef8..33b997c8533 100644 --- a/lib/gitlab/background_migration/sync_issues_state_id.rb +++ b/lib/gitlab/background_migration/sync_issues_state_id.rb @@ -4,29 +4,19 @@ module Gitlab module BackgroundMigration class SyncIssuesStateId - include Reschedulable - def perform(start_id, end_id) Rails.logger.info("Issues - Populating state_id: #{start_id} - #{end_id}") - reschedule_if_needed(start_id, end_id) do - execute_statement <<~SQL - UPDATE issues - SET state_id = - CASE state - WHEN 'opened' THEN 1 - WHEN 'closed' THEN 2 - END - WHERE state_id IS NULL - AND id BETWEEN #{start_id} AND #{end_id} - SQL - end - end - - private - - def should_reschedule? - wait_for_deadtuple_vacuum?('issues') + ActiveRecord::Base.connection.execute <<~SQL + UPDATE issues + SET state_id = + CASE state + WHEN 'opened' THEN 1 + WHEN 'closed' THEN 2 + END + WHERE state_id IS NULL + AND id BETWEEN #{start_id} AND #{end_id} + SQL end end end diff --git a/lib/gitlab/background_migration/sync_merge_requests_state_id.rb b/lib/gitlab/background_migration/sync_merge_requests_state_id.rb index a94529aaf5c..923ceaeec54 100644 --- a/lib/gitlab/background_migration/sync_merge_requests_state_id.rb +++ b/lib/gitlab/background_migration/sync_merge_requests_state_id.rb @@ -4,31 +4,21 @@ module Gitlab module BackgroundMigration class SyncMergeRequestsStateId - include Reschedulable - def perform(start_id, end_id) Rails.logger.info("Merge Requests - Populating state_id: #{start_id} - #{end_id}") - reschedule_if_needed(start_id, end_id) do - execute_statement <<~SQL - UPDATE merge_requests - SET state_id = - CASE state - WHEN 'opened' THEN 1 - WHEN 'closed' THEN 2 - WHEN 'merged' THEN 3 - WHEN 'locked' THEN 4 - END - WHERE state_id IS NULL - AND id BETWEEN #{start_id} AND #{end_id} - SQL - end - end - - private - - def should_reschedule? - wait_for_deadtuple_vacuum?('issues') + ActiveRecord::Base.connection.execute <<~SQL + UPDATE merge_requests + SET state_id = + CASE state + WHEN 'opened' THEN 1 + WHEN 'closed' THEN 2 + WHEN 'merged' THEN 3 + WHEN 'locked' THEN 4 + END + WHERE state_id IS NULL + AND id BETWEEN #{start_id} AND #{end_id} + SQL end end end diff --git a/spec/migrations/schedule_sync_issuables_state_id_spec.rb b/spec/migrations/schedule_sync_issuables_state_id_spec.rb index 6c4c31ae554..6b1ea804342 100644 --- a/spec/migrations/schedule_sync_issuables_state_id_spec.rb +++ b/spec/migrations/schedule_sync_issuables_state_id_spec.rb @@ -26,33 +26,14 @@ describe ScheduleSyncIssuablesStateId, :migration do Timecop.freeze do migrate! - expect(migration).to be_scheduled_delayed_migration(5.minutes, resource_1.id, resource_2.id) - expect(migration).to be_scheduled_delayed_migration(10.minutes, resource_3.id, resource_4.id) + expect(migration).to be_scheduled_delayed_migration(120.seconds, resource_1.id, resource_2.id) + expect(migration).to be_scheduled_delayed_migration(240.seconds, resource_3.id, resource_4.id) expect(BackgroundMigrationWorker.jobs.size).to eq(2) end end end end - shared_examples 'rescheduling migrations' do - before do - Sidekiq::Worker.clear_all - end - - it 'reschedules migrations when should wait for dead tuple vacuum' do - worker = worker_class.new - - Sidekiq::Testing.fake! do - allow(worker).to receive(:wait_for_deadtuple_vacuum?) { true } - - worker.perform(resource_1.id, resource_2.id) - - expect(worker_class.name.demodulize).to be_scheduled_delayed_migration(5.minutes, resource_1.id, resource_2.id) - expect(BackgroundMigrationWorker.jobs.size).to eq(1) - end - end - end - describe '#up' do context 'issues' do it 'migrates state column to integer' do @@ -76,12 +57,6 @@ describe ScheduleSyncIssuablesStateId, :migration do let!(:resource_3) { issues.create!(description: 'third', state: 'closed') } let!(:resource_4) { issues.create!(description: 'fourth', state: 'closed') } end - - it_behaves_like 'rescheduling migrations' do - let(:worker_class) { Gitlab::BackgroundMigration::SyncIssuesStateId } - let!(:resource_1) { issues.create!(description: 'first', state: 'opened') } - let!(:resource_2) { issues.create!(description: 'second', state: 'closed') } - end end context 'merge requests' do @@ -108,12 +83,6 @@ describe ScheduleSyncIssuablesStateId, :migration do let!(:resource_3) { merge_requests.create!(state: 'merged', target_project_id: @project.id, target_branch: 'feature3', source_branch: 'master') } let!(:resource_4) { merge_requests.create!(state: 'locked', target_project_id: @project.id, target_branch: 'feature4', source_branch: 'master') } end - - it_behaves_like 'rescheduling migrations' do - let(:worker_class) { Gitlab::BackgroundMigration::SyncMergeRequestsStateId } - let(:resource_1) { merge_requests.create!(state: 'opened', target_project_id: @project.id, target_branch: 'feature1', source_branch: 'master') } - let(:resource_2) { merge_requests.create!(state: 'closed', target_project_id: @project.id, target_branch: 'feature2', source_branch: 'master') } - end end end end -- GitLab From b2fb3a9c62862476a7591ecb5ab4a6a3146e0c49 Mon Sep 17 00:00:00 2001 From: Felipe Artur Date: Thu, 28 Mar 2019 11:31:14 -0300 Subject: [PATCH 17/18] Address review comments --- app/models/concerns/issuable_states.rb | 8 +++--- .../sync_issues_state_id.rb | 2 +- .../schedule_sync_issuables_state_id_spec.rb | 27 +++++++------------ 3 files changed, 14 insertions(+), 23 deletions(-) diff --git a/app/models/concerns/issuable_states.rb b/app/models/concerns/issuable_states.rb index 2ebd5013a4c..b722c541580 100644 --- a/app/models/concerns/issuable_states.rb +++ b/app/models/concerns/issuable_states.rb @@ -17,11 +17,9 @@ module IssuableStates # Needed to prevent breaking some migration specs that # rollback database to a point where state_id does not exist. # We can use this guard clause for now since this file will - # be removed on a future the next release. - return unless self.respond_to?(:state_id) + # be removed in the next release. + return unless self.has_attribute?(:state_id) - states_hash = self.class.available_states - - self.state_id = states_hash[state] + self.state_id = self.class.available_states[state] end end diff --git a/lib/gitlab/background_migration/sync_issues_state_id.rb b/lib/gitlab/background_migration/sync_issues_state_id.rb index 33b997c8533..33002f58be6 100644 --- a/lib/gitlab/background_migration/sync_issues_state_id.rb +++ b/lib/gitlab/background_migration/sync_issues_state_id.rb @@ -5,7 +5,7 @@ module Gitlab module BackgroundMigration class SyncIssuesStateId def perform(start_id, end_id) - Rails.logger.info("Issues - Populating state_id: #{start_id} - #{end_id}") + Gitlab::AppLogger.info("Issues - Populating state_id: #{start_id} - #{end_id}") ActiveRecord::Base.connection.execute <<~SQL UPDATE issues diff --git a/spec/migrations/schedule_sync_issuables_state_id_spec.rb b/spec/migrations/schedule_sync_issuables_state_id_spec.rb index 6b1ea804342..bf974d60b24 100644 --- a/spec/migrations/schedule_sync_issuables_state_id_spec.rb +++ b/spec/migrations/schedule_sync_issuables_state_id_spec.rb @@ -9,11 +9,8 @@ describe ScheduleSyncIssuablesStateId, :migration do let(:merge_requests) { table(:merge_requests) } let(:issues) { table(:issues) } let(:migration) { described_class.new } - - before do - @group = namespaces.create!(name: 'gitlab', path: 'gitlab') - @project = projects.create!(namespace_id: @group.id) - end + let(:group) { namespaces.create!(name: 'gitlab', path: 'gitlab') } + let(:project) { projects.create!(namespace_id: group.id) } shared_examples 'scheduling migrations' do before do @@ -40,14 +37,12 @@ describe ScheduleSyncIssuablesStateId, :migration do opened_issue = issues.create!(description: 'first', state: 'opened') closed_issue = issues.create!(description: 'second', state: 'closed') invalid_state_issue = issues.create!(description: 'fourth', state: 'not valid') - nil_state_issue = issues.create!(description: 'third', state: nil) migrate! expect(opened_issue.reload.state_id).to eq(Issue.available_states[:opened]) expect(closed_issue.reload.state_id).to eq(Issue.available_states[:closed]) expect(invalid_state_issue.reload.state_id).to be_nil - expect(nil_state_issue.reload.state_id).to be_nil end it_behaves_like 'scheduling migrations' do @@ -61,11 +56,10 @@ describe ScheduleSyncIssuablesStateId, :migration do context 'merge requests' do it 'migrates state column to integer' do - opened_merge_request = merge_requests.create!(state: 'opened', target_project_id: @project.id, target_branch: 'feature1', source_branch: 'master') - closed_merge_request = merge_requests.create!(state: 'closed', target_project_id: @project.id, target_branch: 'feature2', source_branch: 'master') - merged_merge_request = merge_requests.create!(state: 'merged', target_project_id: @project.id, target_branch: 'feature3', source_branch: 'master') - locked_merge_request = merge_requests.create!(state: 'locked', target_project_id: @project.id, target_branch: 'feature4', source_branch: 'master') - invalid_state_merge_request = merge_requests.create!(state: 'not valid', target_project_id: @project.id, target_branch: 'feature5', source_branch: 'master') + opened_merge_request = merge_requests.create!(state: 'opened', target_project_id: project.id, target_branch: 'feature1', source_branch: 'master') + closed_merge_request = merge_requests.create!(state: 'closed', target_project_id: project.id, target_branch: 'feature2', source_branch: 'master') + merged_merge_request = merge_requests.create!(state: 'merged', target_project_id: project.id, target_branch: 'feature3', source_branch: 'master') + locked_merge_request = merge_requests.create!(state: 'locked', target_project_id: project.id, target_branch: 'feature4', source_branch: 'master') migrate! @@ -73,15 +67,14 @@ describe ScheduleSyncIssuablesStateId, :migration do expect(closed_merge_request.reload.state_id).to eq(MergeRequest.available_states[:closed]) expect(merged_merge_request.reload.state_id).to eq(MergeRequest.available_states[:merged]) expect(locked_merge_request.reload.state_id).to eq(MergeRequest.available_states[:locked]) - expect(invalid_state_merge_request.reload.state_id).to be_nil end it_behaves_like 'scheduling migrations' do let(:migration) { described_class::MERGE_REQUESTS_MIGRATION } - let!(:resource_1) { merge_requests.create!(state: 'opened', target_project_id: @project.id, target_branch: 'feature1', source_branch: 'master') } - let!(:resource_2) { merge_requests.create!(state: 'closed', target_project_id: @project.id, target_branch: 'feature2', source_branch: 'master') } - let!(:resource_3) { merge_requests.create!(state: 'merged', target_project_id: @project.id, target_branch: 'feature3', source_branch: 'master') } - let!(:resource_4) { merge_requests.create!(state: 'locked', target_project_id: @project.id, target_branch: 'feature4', source_branch: 'master') } + let!(:resource_1) { merge_requests.create!(state: 'opened', target_project_id: project.id, target_branch: 'feature1', source_branch: 'master') } + let!(:resource_2) { merge_requests.create!(state: 'closed', target_project_id: project.id, target_branch: 'feature2', source_branch: 'master') } + let!(:resource_3) { merge_requests.create!(state: 'merged', target_project_id: project.id, target_branch: 'feature3', source_branch: 'master') } + let!(:resource_4) { merge_requests.create!(state: 'locked', target_project_id: project.id, target_branch: 'feature4', source_branch: 'master') } end end end -- GitLab From f2b7da4bf507691cffd419d3dd759fcf6311cdd6 Mon Sep 17 00:00:00 2001 From: Felipe Artur Date: Mon, 1 Apr 2019 15:04:36 -0300 Subject: [PATCH 18/18] Remove additional logging --- lib/gitlab/background_migration/sync_issues_state_id.rb | 2 -- lib/gitlab/background_migration/sync_merge_requests_state_id.rb | 2 -- 2 files changed, 4 deletions(-) diff --git a/lib/gitlab/background_migration/sync_issues_state_id.rb b/lib/gitlab/background_migration/sync_issues_state_id.rb index 33002f58be6..2a0751928b8 100644 --- a/lib/gitlab/background_migration/sync_issues_state_id.rb +++ b/lib/gitlab/background_migration/sync_issues_state_id.rb @@ -5,8 +5,6 @@ module Gitlab module BackgroundMigration class SyncIssuesStateId def perform(start_id, end_id) - Gitlab::AppLogger.info("Issues - Populating state_id: #{start_id} - #{end_id}") - ActiveRecord::Base.connection.execute <<~SQL UPDATE issues SET state_id = diff --git a/lib/gitlab/background_migration/sync_merge_requests_state_id.rb b/lib/gitlab/background_migration/sync_merge_requests_state_id.rb index 923ceaeec54..6707e178d8b 100644 --- a/lib/gitlab/background_migration/sync_merge_requests_state_id.rb +++ b/lib/gitlab/background_migration/sync_merge_requests_state_id.rb @@ -5,8 +5,6 @@ module Gitlab module BackgroundMigration class SyncMergeRequestsStateId def perform(start_id, end_id) - Rails.logger.info("Merge Requests - Populating state_id: #{start_id} - #{end_id}") - ActiveRecord::Base.connection.execute <<~SQL UPDATE merge_requests SET state_id = -- GitLab