Merge upstream 2.0ish #165

This commit is contained in:
kibigo!
2017-10-11 10:43:10 -07:00
322 changed files with 8478 additions and 2587 deletions

View File

@@ -0,0 +1,988 @@
# frozen_string_literal: true
# This file is copied almost entirely from GitLab, which has done a large
# amount of work to ensure that migrations can happen with minimal downtime.
# Many thanks to those engineers.
# Changes have been made to remove dependencies on other GitLab files and to
# shorten temporary column names.
# Documentation on using these functions (and why one might do so):
# https://gitlab.com/gitlab-org/gitlab-ce/blob/master/doc/development/what_requires_downtime.md
# The file itself:
# https://gitlab.com/gitlab-org/gitlab-ce/blob/master/lib/gitlab/database/migration_helpers.rb
# It is licensed as follows:
# Copyright (c) 2011-2017 GitLab B.V.
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
# in the Software without restriction, including without limitation the rights
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
# copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
# The above copyright notice and this permission notice shall be included in
# all copies or substantial portions of the Software.
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
# THE SOFTWARE.
# This is bad form, but there are enough differences that it's impractical to do
# otherwise:
# rubocop:disable all
module Mastodon
module MigrationHelpers
# Stub for Database.postgresql? from GitLab
def self.postgresql?
ActiveRecord::Base.configurations[Rails.env]['adapter'].casecmp('postgresql').zero?
end
# Stub for Database.mysql? from GitLab
def self.mysql?
ActiveRecord::Base.configurations[Rails.env]['adapter'].casecmp('mysql2').zero?
end
# Model that can be used for querying permissions of a SQL user.
class Grant < ActiveRecord::Base
self.table_name =
if Mastodon::MigrationHelpers.postgresql?
'information_schema.role_table_grants'
else
'mysql.user'
end
def self.scope_to_current_user
if Mastodon::MigrationHelpers.postgresql?
where('grantee = user')
else
where("CONCAT(User, '@', Host) = current_user()")
end
end
# Returns true if the current user can create and execute triggers on the
# given table.
def self.create_and_execute_trigger?(table)
priv =
if Mastodon::MigrationHelpers.postgresql?
where(privilege_type: 'TRIGGER', table_name: table)
else
where(Trigger_priv: 'Y')
end
priv.scope_to_current_user.any?
end
end
BACKGROUND_MIGRATION_BATCH_SIZE = 1000 # Number of rows to process per job
BACKGROUND_MIGRATION_JOB_BUFFER_SIZE = 1000 # Number of jobs to bulk queue at a time
# Gets an estimated number of rows for a table
def estimate_rows_in_table(table_name)
exec_query('SELECT reltuples FROM pg_class WHERE relname = ' +
"'#{table_name}'").to_a.first['reltuples']
end
# Adds `created_at` and `updated_at` columns with timezone information.
#
# This method is an improved version of Rails' built-in method `add_timestamps`.
#
# Available options are:
# default - The default value for the column.
# null - When set to `true` the column will allow NULL values.
# The default is to not allow NULL values.
def add_timestamps_with_timezone(table_name, options = {})
options[:null] = false if options[:null].nil?
[:created_at, :updated_at].each do |column_name|
if options[:default] && transaction_open?
raise '`add_timestamps_with_timezone` with default value cannot be run inside a transaction. ' \
'You can disable transactions by calling `disable_ddl_transaction!` ' \
'in the body of your migration class'
end
# If default value is presented, use `add_column_with_default` method instead.
if options[:default]
add_column_with_default(
table_name,
column_name,
:datetime_with_timezone,
default: options[:default],
allow_null: options[:null]
)
else
add_column(table_name, column_name, :datetime_with_timezone, options)
end
end
end
# Creates a new index, concurrently when supported
#
# On PostgreSQL this method creates an index concurrently, on MySQL this
# creates a regular index.
#
# Example:
#
# add_concurrent_index :users, :some_column
#
# See Rails' `add_index` for more info on the available arguments.
def add_concurrent_index(table_name, column_name, options = {})
if transaction_open?
raise 'add_concurrent_index can not be run inside a transaction, ' \
'you can disable transactions by calling disable_ddl_transaction! ' \
'in the body of your migration class'
end
if MigrationHelpers.postgresql?
options = options.merge({ algorithm: :concurrently })
disable_statement_timeout
end
add_index(table_name, column_name, options)
end
# Removes an existed index, concurrently when supported
#
# On PostgreSQL this method removes an index concurrently.
#
# Example:
#
# remove_concurrent_index :users, :some_column
#
# See Rails' `remove_index` for more info on the available arguments.
def remove_concurrent_index(table_name, column_name, options = {})
if transaction_open?
raise 'remove_concurrent_index can not be run inside a transaction, ' \
'you can disable transactions by calling disable_ddl_transaction! ' \
'in the body of your migration class'
end
if supports_drop_index_concurrently?
options = options.merge({ algorithm: :concurrently })
disable_statement_timeout
end
remove_index(table_name, options.merge({ column: column_name }))
end
# Removes an existing index, concurrently when supported
#
# On PostgreSQL this method removes an index concurrently.
#
# Example:
#
# remove_concurrent_index :users, "index_X_by_Y"
#
# See Rails' `remove_index` for more info on the available arguments.
def remove_concurrent_index_by_name(table_name, index_name, options = {})
if transaction_open?
raise 'remove_concurrent_index_by_name can not be run inside a transaction, ' \
'you can disable transactions by calling disable_ddl_transaction! ' \
'in the body of your migration class'
end
if supports_drop_index_concurrently?
options = options.merge({ algorithm: :concurrently })
disable_statement_timeout
end
remove_index(table_name, options.merge({ name: index_name }))
end
# Only available on Postgresql >= 9.2
def supports_drop_index_concurrently?
return false unless MigrationHelpers.postgresql?
version = select_one("SELECT current_setting('server_version_num') AS v")['v'].to_i
version >= 90200
end
# Adds a foreign key with only minimal locking on the tables involved.
#
# This method only requires minimal locking when using PostgreSQL. When
# using MySQL this method will use Rails' default `add_foreign_key`.
#
# source - The source table containing the foreign key.
# target - The target table the key points to.
# column - The name of the column to create the foreign key on.
# on_delete - The action to perform when associated data is removed,
# defaults to "CASCADE".
def add_concurrent_foreign_key(source, target, column:, on_delete: :cascade, target_col: 'id')
# Transactions would result in ALTER TABLE locks being held for the
# duration of the transaction, defeating the purpose of this method.
if transaction_open?
raise 'add_concurrent_foreign_key can not be run inside a transaction'
end
# While MySQL does allow disabling of foreign keys it has no equivalent
# of PostgreSQL's "VALIDATE CONSTRAINT". As a result we'll just fall
# back to the normal foreign key procedure.
if MigrationHelpers.mysql?
return add_foreign_key(source, target,
column: column,
on_delete: on_delete)
else
on_delete = 'SET NULL' if on_delete == :nullify
end
disable_statement_timeout
key_name = concurrent_foreign_key_name(source, column, target_col)
# Using NOT VALID allows us to create a key without immediately
# validating it. This means we keep the ALTER TABLE lock only for a
# short period of time. The key _is_ enforced for any newly created
# data.
execute <<-EOF.strip_heredoc
ALTER TABLE #{source}
ADD CONSTRAINT #{key_name}
FOREIGN KEY (#{column})
REFERENCES #{target} (#{target_col})
#{on_delete ? "ON DELETE #{on_delete.upcase}" : ''}
NOT VALID;
EOF
# Validate the existing constraint. This can potentially take a very
# long time to complete, but fortunately does not lock the source table
# while running.
execute("ALTER TABLE #{source} VALIDATE CONSTRAINT #{key_name};")
end
# Returns the name for a concurrent foreign key.
#
# PostgreSQL constraint names have a limit of 63 bytes. The logic used
# here is based on Rails' foreign_key_name() method, which unfortunately
# is private so we can't rely on it directly.
def concurrent_foreign_key_name(table, column, target_col)
"fk_#{Digest::SHA256.hexdigest("#{table}_#{column}_#{target_col}_fk").first(10)}"
end
# Long-running migrations may take more than the timeout allowed by
# the database. Disable the session's statement timeout to ensure
# migrations don't get killed prematurely. (PostgreSQL only)
def disable_statement_timeout
execute('SET statement_timeout TO 0') if MigrationHelpers.postgresql?
end
# Updates the value of a column in batches.
#
# This method updates the table in batches of 5% of the total row count.
# This method will continue updating rows until no rows remain.
#
# When given a block this method will yield two values to the block:
#
# 1. An instance of `Arel::Table` for the table that is being updated.
# 2. The query to run as an Arel object.
#
# By supplying a block one can add extra conditions to the queries being
# executed. Note that the same block is used for _all_ queries.
#
# Example:
#
# update_column_in_batches(:projects, :foo, 10) do |table, query|
# query.where(table[:some_column].eq('hello'))
# end
#
# This would result in this method updating only rows where
# `projects.some_column` equals "hello".
#
# table - The name of the table.
# column - The name of the column to update.
# value - The value for the column.
#
# Rubocop's Metrics/AbcSize metric is disabled for this method as Rubocop
# determines this method to be too complex while there's no way to make it
# less "complex" without introducing extra methods (which actually will
# make things _more_ complex).
#
# rubocop: disable Metrics/AbcSize
def update_column_in_batches(table_name, column, value)
if transaction_open?
raise 'update_column_in_batches can not be run inside a transaction, ' \
'you can disable transactions by calling disable_ddl_transaction! ' \
'in the body of your migration class'
end
table = Arel::Table.new(table_name)
total = estimate_rows_in_table(table_name).to_i
if total == 0
count_arel = table.project(Arel.star.count.as('count'))
count_arel = yield table, count_arel if block_given?
total = exec_query(count_arel.to_sql).to_hash.first['count'].to_i
return if total == 0
end
# Update in batches of 5% until we run out of any rows to update.
batch_size = ((total / 100.0) * 5.0).ceil
max_size = 1000
# The upper limit is 1000 to ensure we don't lock too many rows. For
# example, for "merge_requests" even 1% of the table is around 35 000
# rows for GitLab.com.
batch_size = max_size if batch_size > max_size
start_arel = table.project(table[:id]).order(table[:id].asc).take(1)
start_arel = yield table, start_arel if block_given?
start_id = exec_query(start_arel.to_sql).to_hash.first['id'].to_i
say "Migrating #{table_name}.#{column} (~#{total.to_i} rows)"
started_time = Time.now
last_time = Time.now
migrated = 0
loop do
stop_row = nil
suppress_messages do
stop_arel = table.project(table[:id])
.where(table[:id].gteq(start_id))
.order(table[:id].asc)
.take(1)
.skip(batch_size)
stop_arel = yield table, stop_arel if block_given?
stop_row = exec_query(stop_arel.to_sql).to_hash.first
update_arel = Arel::UpdateManager.new
.table(table)
.set([[table[column], value]])
.where(table[:id].gteq(start_id))
if stop_row
stop_id = stop_row['id'].to_i
start_id = stop_id
update_arel = update_arel.where(table[:id].lt(stop_id))
end
update_arel = yield table, update_arel if block_given?
execute(update_arel.to_sql)
end
migrated += batch_size
if Time.now - last_time > 1
status = "Migrated #{migrated} rows"
percentage = 100.0 * migrated / total
status += " (~#{sprintf('%.2f', percentage)}%, "
remaining_time = (100.0 - percentage) * (Time.now - started_time) / percentage
status += "#{(remaining_time / 60).to_i}:"
status += sprintf('%02d', remaining_time.to_i % 60)
status += ' remaining, '
# Tell users not to interrupt if we're almost done.
if remaining_time > 10
status += 'safe to interrupt'
else
status += 'DO NOT interrupt'
end
status += ')'
say status, true
last_time = Time.now
end
# There are no more rows left to update.
break unless stop_row
end
end
# Adds a column with a default value without locking an entire table.
#
# This method runs the following steps:
#
# 1. Add the column with a default value of NULL.
# 2. Change the default value of the column to the specified value.
# 3. Update all existing rows in batches.
# 4. Set a `NOT NULL` constraint on the column if desired (the default).
#
# These steps ensure a column can be added to a large and commonly used
# table without locking the entire table for the duration of the table
# modification.
#
# table - The name of the table to update.
# column - The name of the column to add.
# type - The column type (e.g. `:integer`).
# default - The default value for the column.
# limit - Sets a column limit. For example, for :integer, the default is
# 4-bytes. Set `limit: 8` to allow 8-byte integers.
# allow_null - When set to `true` the column will allow NULL values, the
# default is to not allow NULL values.
#
# This method can also take a block which is passed directly to the
# `update_column_in_batches` method.
def add_column_with_default(table, column, type, default:, limit: nil, allow_null: false, &block)
if transaction_open?
raise 'add_column_with_default can not be run inside a transaction, ' \
'you can disable transactions by calling disable_ddl_transaction! ' \
'in the body of your migration class'
end
disable_statement_timeout
transaction do
if limit
add_column(table, column, type, default: nil, limit: limit)
else
add_column(table, column, type, default: nil)
end
# Changing the default before the update ensures any newly inserted
# rows already use the proper default value.
change_column_default(table, column, default)
end
begin
update_column_in_batches(table, column, default, &block)
change_column_null(table, column, false) unless allow_null
# We want to rescue _all_ exceptions here, even those that don't inherit
# from StandardError.
rescue Exception => error # rubocop: disable all
remove_column(table, column)
raise error
end
end
# Renames a column without requiring downtime.
#
# Concurrent renames work by using database triggers to ensure both the
# old and new column are in sync. However, this method will _not_ remove
# the triggers or the old column automatically; this needs to be done
# manually in a post-deployment migration. This can be done using the
# method `cleanup_concurrent_column_rename`.
#
# table - The name of the database table containing the column.
# old - The old column name.
# new - The new column name.
# type - The type of the new column. If no type is given the old column's
# type is used.
def rename_column_concurrently(table, old, new, type: nil)
if transaction_open?
raise 'rename_column_concurrently can not be run inside a transaction'
end
check_trigger_permissions!(table)
trigger_name = rename_trigger_name(table, old, new)
# If we were in the middle of update_column_in_batches, we should remove
# the old column and start over, as we have no idea where we were.
if column_for(table, new)
if MigrationHelpers.postgresql?
remove_rename_triggers_for_postgresql(table, trigger_name)
else
remove_rename_triggers_for_mysql(trigger_name)
end
remove_column(table, new)
end
old_col = column_for(table, old)
new_type = type || old_col.type
col_opts = {
precision: old_col.precision,
scale: old_col.scale,
}
# We may be trying to reset the limit on an integer column type, so let
# Rails handle that.
unless [:bigint, :integer].include?(new_type)
col_opts[:limit] = old_col.limit
end
add_column(table, new, new_type, col_opts)
# We set the default value _after_ adding the column so we don't end up
# updating any existing data with the default value. This isn't
# necessary since we copy over old values further down.
change_column_default(table, new, old_col.default) if old_col.default
quoted_table = quote_table_name(table)
quoted_old = quote_column_name(old)
quoted_new = quote_column_name(new)
if MigrationHelpers.postgresql?
install_rename_triggers_for_postgresql(trigger_name, quoted_table,
quoted_old, quoted_new)
else
install_rename_triggers_for_mysql(trigger_name, quoted_table,
quoted_old, quoted_new)
end
update_column_in_batches(table, new, Arel::Table.new(table)[old])
change_column_null(table, new, false) unless old_col.null
copy_indexes(table, old, new)
copy_foreign_keys(table, old, new)
end
# Changes the type of a column concurrently.
#
# table - The table containing the column.
# column - The name of the column to change.
# new_type - The new column type.
def change_column_type_concurrently(table, column, new_type)
temp_column = rename_column_name(column)
rename_column_concurrently(table, column, temp_column, type: new_type)
# Primary keys don't necessarily have an associated index.
if ActiveRecord::Base.get_primary_key(table) == column.to_s
old_pk_index_name = "index_#{table}_on_#{column}"
new_pk_index_name = "index_#{table}_on_#{column}_cm"
unless indexes_for(table, column).find{|i| i.name == old_pk_index_name}
add_concurrent_index(table, [temp_column], {
unique: true,
name: new_pk_index_name
})
end
end
end
# Performs cleanup of a concurrent type change.
#
# table - The table containing the column.
# column - The name of the column to change.
# new_type - The new column type.
def cleanup_concurrent_column_type_change(table, column)
temp_column = rename_column_name(column)
# Wait for the indices to be built
indexes_for(table, column).each do |index|
expected_name = index.name + '_cm'
puts "Waiting for index #{expected_name}"
sleep 1 until indexes_for(table, temp_column).find {|i| i.name == expected_name }
end
was_primary = (ActiveRecord::Base.get_primary_key(table) == column.to_s)
old_default_fn = column_for(table, column).default_function
old_fks = []
if was_primary
# Get any foreign keys pointing at this column we need to recreate, and
# remove the old ones.
# Based on code from:
# http://errorbank.blogspot.com/2011/03/list-all-foreign-keys-references-for.html
old_fks_res = execute <<-EOF.strip_heredoc
select m.relname as src_table,
(select a.attname
from pg_attribute a
where a.attrelid = m.oid
and a.attnum = o.conkey[1]
and a.attisdropped = false) as src_col,
o.conname as name,
o.confdeltype as on_delete
from pg_constraint o
left join pg_class f on f.oid = o.confrelid
left join pg_class c on c.oid = o.conrelid
left join pg_class m on m.oid = o.conrelid
where o.contype = 'f'
and o.conrelid in (
select oid from pg_class c where c.relkind = 'r')
and f.relname = '#{table}';
EOF
old_fks = old_fks_res.to_a
old_fks.each do |old_fk|
add_concurrent_foreign_key(
old_fk['src_table'],
table,
column: old_fk['src_col'],
target_col: temp_column,
on_delete: extract_foreign_key_action(old_fk['on_delete'])
)
remove_foreign_key(old_fk['src_table'], name: old_fk['name'])
end
end
# If there was a sequence owned by the old column, make it owned by the
# new column, as it will otherwise be deleted when we get rid of the
# old column.
if (seq_match = /^nextval\('([^']*)'(::text|::regclass)?\)/.match(old_default_fn))
seq_name = seq_match[1]
execute("ALTER SEQUENCE #{seq_name} OWNED BY #{table}.#{temp_column}")
end
transaction do
# This has to be performed in a transaction as otherwise we might have
# inconsistent data.
cleanup_concurrent_column_rename(table, column, temp_column)
rename_column(table, temp_column, column)
# If there was an old default function, we didn't copy it. Do that now
# in the transaction, so we don't miss anything.
change_column_default(table, column, -> { old_default_fn }) if old_default_fn
end
# Rename any indices back to what they should be.
indexes_for(table, column).each do |index|
next unless index.name.end_with?('_cm')
real_index_name = index.name.sub(/_cm$/, '')
rename_index(table, index.name, real_index_name)
end
# Rename any foreign keys back to names based on the real column.
foreign_keys_for(table, column).each do |fk|
old_fk_name = concurrent_foreign_key_name(fk.from_table, temp_column, 'id')
new_fk_name = concurrent_foreign_key_name(fk.from_table, column, 'id')
execute("ALTER TABLE #{fk.from_table} RENAME CONSTRAINT " +
"#{old_fk_name} TO #{new_fk_name}")
end
# Rename any foreign keys from other tables to names based on the real
# column.
old_fks.each do |old_fk|
old_fk_name = concurrent_foreign_key_name(old_fk['src_table'],
old_fk['src_col'], temp_column)
new_fk_name = concurrent_foreign_key_name(old_fk['src_table'],
old_fk['src_col'], column)
execute("ALTER TABLE #{old_fk['src_table']} RENAME CONSTRAINT " +
"#{old_fk_name} TO #{new_fk_name}")
end
# If the old column was a primary key, mark the new one as a primary key.
if was_primary
execute("ALTER TABLE #{table} ADD PRIMARY KEY USING INDEX " +
"index_#{table}_on_#{column}")
end
end
# Cleans up a concurrent column name.
#
# This method takes care of removing previously installed triggers as well
# as removing the old column.
#
# table - The name of the database table.
# old - The name of the old column.
# new - The name of the new column.
def cleanup_concurrent_column_rename(table, old, new)
trigger_name = rename_trigger_name(table, old, new)
check_trigger_permissions!(table)
if MigrationHelpers.postgresql?
remove_rename_triggers_for_postgresql(table, trigger_name)
else
remove_rename_triggers_for_mysql(trigger_name)
end
remove_column(table, old)
end
# Performs a concurrent column rename when using PostgreSQL.
def install_rename_triggers_for_postgresql(trigger, table, old, new)
execute <<-EOF.strip_heredoc
CREATE OR REPLACE FUNCTION #{trigger}()
RETURNS trigger AS
$BODY$
BEGIN
NEW.#{new} := NEW.#{old};
RETURN NEW;
END;
$BODY$
LANGUAGE 'plpgsql'
VOLATILE
EOF
execute <<-EOF.strip_heredoc
CREATE TRIGGER #{trigger}
BEFORE INSERT OR UPDATE
ON #{table}
FOR EACH ROW
EXECUTE PROCEDURE #{trigger}()
EOF
end
# Installs the triggers necessary to perform a concurrent column rename on
# MySQL.
def install_rename_triggers_for_mysql(trigger, table, old, new)
execute <<-EOF.strip_heredoc
CREATE TRIGGER #{trigger}_insert
BEFORE INSERT
ON #{table}
FOR EACH ROW
SET NEW.#{new} = NEW.#{old}
EOF
execute <<-EOF.strip_heredoc
CREATE TRIGGER #{trigger}_update
BEFORE UPDATE
ON #{table}
FOR EACH ROW
SET NEW.#{new} = NEW.#{old}
EOF
end
# Removes the triggers used for renaming a PostgreSQL column concurrently.
def remove_rename_triggers_for_postgresql(table, trigger)
execute("DROP TRIGGER IF EXISTS #{trigger} ON #{table}")
execute("DROP FUNCTION IF EXISTS #{trigger}()")
end
# Removes the triggers used for renaming a MySQL column concurrently.
def remove_rename_triggers_for_mysql(trigger)
execute("DROP TRIGGER IF EXISTS #{trigger}_insert")
execute("DROP TRIGGER IF EXISTS #{trigger}_update")
end
# Returns the (base) name to use for triggers when renaming columns.
def rename_trigger_name(table, old, new)
'trigger_' + Digest::SHA256.hexdigest("#{table}_#{old}_#{new}").first(12)
end
# Returns the name to use for temporary rename columns.
def rename_column_name(base)
base.to_s + '_cm'
end
# Returns an Array containing the indexes for the given column
def indexes_for(table, column)
column = column.to_s
indexes(table).select { |index| index.columns.include?(column) }
end
# Returns an Array containing the foreign keys for the given column.
def foreign_keys_for(table, column)
column = column.to_s
foreign_keys(table).select { |fk| fk.column == column }
end
# Copies all indexes for the old column to a new column.
#
# table - The table containing the columns and indexes.
# old - The old column.
# new - The new column.
def copy_indexes(table, old, new)
old = old.to_s
new = new.to_s
indexes_for(table, old).each do |index|
new_columns = index.columns.map do |column|
column == old ? new : column
end
# This is necessary as we can't properly rename indexes such as
# "ci_taggings_idx".
name = index.name + '_cm'
# If the order contained the old column, map it to the new one.
order = index.orders
if order.key?(old)
order[new] = order.delete(old)
end
options = {
unique: index.unique,
name: name,
length: index.lengths,
order: order
}
# These options are not supported by MySQL, so we only add them if
# they were previously set.
options[:using] = index.using if index.using
options[:where] = index.where if index.where
add_concurrent_index(table, new_columns, options)
end
end
# Copies all foreign keys for the old column to the new column.
#
# table - The table containing the columns and indexes.
# old - The old column.
# new - The new column.
def copy_foreign_keys(table, old, new)
foreign_keys_for(table, old).each do |fk|
add_concurrent_foreign_key(fk.from_table,
fk.to_table,
column: new,
on_delete: fk.on_delete)
end
end
# Returns the column for the given table and column name.
def column_for(table, name)
name = name.to_s
columns(table).find { |column| column.name == name }
end
# This will replace the first occurance of a string in a column with
# the replacement
# On postgresql we can use `regexp_replace` for that.
# On mysql we find the location of the pattern, and overwrite it
# with the replacement
def replace_sql(column, pattern, replacement)
quoted_pattern = Arel::Nodes::Quoted.new(pattern.to_s)
quoted_replacement = Arel::Nodes::Quoted.new(replacement.to_s)
if MigrationHelpers.mysql?
locate = Arel::Nodes::NamedFunction
.new('locate', [quoted_pattern, column])
insert_in_place = Arel::Nodes::NamedFunction
.new('insert', [column, locate, pattern.size, quoted_replacement])
Arel::Nodes::SqlLiteral.new(insert_in_place.to_sql)
else
replace = Arel::Nodes::NamedFunction
.new("regexp_replace", [column, quoted_pattern, quoted_replacement])
Arel::Nodes::SqlLiteral.new(replace.to_sql)
end
end
def remove_foreign_key_without_error(*args)
remove_foreign_key(*args)
rescue ArgumentError
end
def sidekiq_queue_migrate(queue_from, to:)
while sidekiq_queue_length(queue_from) > 0
Sidekiq.redis do |conn|
conn.rpoplpush "queue:#{queue_from}", "queue:#{to}"
end
end
end
def sidekiq_queue_length(queue_name)
Sidekiq.redis do |conn|
conn.llen("queue:#{queue_name}")
end
end
def check_trigger_permissions!(table)
unless Grant.create_and_execute_trigger?(table)
dbname = ActiveRecord::Base.configurations[Rails.env]['database']
user = ActiveRecord::Base.configurations[Rails.env]['username'] || ENV['USER']
raise <<-EOF
Your database user is not allowed to create, drop, or execute triggers on the
table #{table}.
If you are using PostgreSQL you can solve this by logging in to the GitLab
database (#{dbname}) using a super user and running:
ALTER #{user} WITH SUPERUSER
For MySQL you instead need to run:
GRANT ALL PRIVILEGES ON *.* TO #{user}@'%'
Both queries will grant the user super user permissions, ensuring you don't run
into similar problems in the future (e.g. when new tables are created).
EOF
end
end
# Bulk queues background migration jobs for an entire table, batched by ID range.
# "Bulk" meaning many jobs will be pushed at a time for efficiency.
# If you need a delay interval per job, then use `queue_background_migration_jobs_by_range_at_intervals`.
#
# model_class - The table being iterated over
# job_class_name - The background migration job class as a string
# batch_size - The maximum number of rows per job
#
# Example:
#
# class Route < ActiveRecord::Base
# include EachBatch
# self.table_name = 'routes'
# end
#
# bulk_queue_background_migration_jobs_by_range(Route, 'ProcessRoutes')
#
# Where the model_class includes EachBatch, and the background migration exists:
#
# class Gitlab::BackgroundMigration::ProcessRoutes
# def perform(start_id, end_id)
# # do something
# end
# end
def bulk_queue_background_migration_jobs_by_range(model_class, job_class_name, batch_size: BACKGROUND_MIGRATION_BATCH_SIZE)
raise "#{model_class} does not have an ID to use for batch ranges" unless model_class.column_names.include?('id')
jobs = []
model_class.each_batch(of: batch_size) do |relation|
start_id, end_id = relation.pluck('MIN(id), MAX(id)').first
if jobs.length >= BACKGROUND_MIGRATION_JOB_BUFFER_SIZE
# Note: This code path generally only helps with many millions of rows
# We push multiple jobs at a time to reduce the time spent in
# Sidekiq/Redis operations. We're using this buffer based approach so we
# don't need to run additional queries for every range.
BackgroundMigrationWorker.perform_bulk(jobs)
jobs.clear
end
jobs << [job_class_name, [start_id, end_id]]
end
BackgroundMigrationWorker.perform_bulk(jobs) unless jobs.empty?
end
# Queues background migration jobs for an entire table, batched by ID range.
# Each job is scheduled with a `delay_interval` in between.
# If you use a small interval, then some jobs may run at the same time.
#
# model_class - The table being iterated over
# job_class_name - The background migration job class as a string
# delay_interval - The duration between each job's scheduled time (must respond to `to_f`)
# batch_size - The maximum number of rows per job
#
# Example:
#
# class Route < ActiveRecord::Base
# include EachBatch
# self.table_name = 'routes'
# end
#
# queue_background_migration_jobs_by_range_at_intervals(Route, 'ProcessRoutes', 1.minute)
#
# Where the model_class includes EachBatch, and the background migration exists:
#
# class Gitlab::BackgroundMigration::ProcessRoutes
# def perform(start_id, end_id)
# # do something
# end
# end
def queue_background_migration_jobs_by_range_at_intervals(model_class, job_class_name, delay_interval, batch_size: BACKGROUND_MIGRATION_BATCH_SIZE)
raise "#{model_class} does not have an ID to use for batch ranges" unless model_class.column_names.include?('id')
model_class.each_batch(of: batch_size) do |relation, index|
start_id, end_id = relation.pluck('MIN(id), MAX(id)').first
# `BackgroundMigrationWorker.bulk_perform_in` schedules all jobs for
# the same time, which is not helpful in most cases where we wish to
# spread the work over time.
BackgroundMigrationWorker.perform_in(delay_interval * index, job_class_name, [start_id, end_id])
end
end
end
end
# rubocop:enable all

162
lib/mastodon/snowflake.rb Normal file
View File

@@ -0,0 +1,162 @@
# frozen_string_literal: true
module Mastodon::Snowflake
DEFAULT_REGEX = /timestamp_id\('(?<seq_prefix>\w+)'/
class Callbacks
def self.around_create(record)
now = Time.now.utc
if record.created_at.nil? || record.created_at >= now || record.created_at == record.updated_at
yield
else
record.id = Mastodon::Snowflake.id_at(record.created_at)
tries = 0
begin
yield
rescue ActiveRecord::RecordNotUnique
raise if tries > 100
tries += 1
record.id += rand(100)
retry
end
end
end
end
class << self
# Our ID will be composed of the following:
# 6 bytes (48 bits) of millisecond-level timestamp
# 2 bytes (16 bits) of sequence data
#
# The 'sequence data' is intended to be unique within a
# given millisecond, yet obscure the 'serial number' of
# this row.
#
# To do this, we hash the following data:
# * Table name (if provided, skipped if not)
# * Secret salt (should not be guessable)
# * Timestamp (again, millisecond-level granularity)
#
# We then take the first two bytes of that value, and add
# the lowest two bytes of the table ID sequence number
# (`table_name`_id_seq). This means that even if we insert
# two rows at the same millisecond, they will have
# distinct 'sequence data' portions.
#
# If this happens, and an attacker can see both such IDs,
# they can determine which of the two entries was inserted
# first, but not the total number of entries in the table
# (even mod 2**16).
#
# The table name is included in the hash to ensure that
# different tables derive separate sequence bases so rows
# inserted in the same millisecond in different tables do
# not reveal the table ID sequence number for one another.
#
# The secret salt is included in the hash to ensure that
# external users cannot derive the sequence base given the
# timestamp and table name, which would allow them to
# compute the table ID sequence number.
def define_timestamp_id
return if already_defined?
connection.execute(<<~SQL)
CREATE OR REPLACE FUNCTION timestamp_id(table_name text)
RETURNS bigint AS
$$
DECLARE
time_part bigint;
sequence_base bigint;
tail bigint;
BEGIN
time_part := (
-- Get the time in milliseconds
((date_part('epoch', now()) * 1000))::bigint
-- And shift it over two bytes
<< 16);
sequence_base := (
'x' ||
-- Take the first two bytes (four hex characters)
substr(
-- Of the MD5 hash of the data we documented
md5(table_name ||
'#{SecureRandom.hex(16)}' ||
time_part::text
),
1, 4
)
-- And turn it into a bigint
)::bit(16)::bigint;
-- Finally, add our sequence number to our base, and chop
-- it to the last two bytes
tail := (
(sequence_base + nextval(table_name || '_id_seq'))
& 65535);
-- Return the time part and the sequence part. OR appears
-- faster here than addition, but they're equivalent:
-- time_part has no trailing two bytes, and tail is only
-- the last two bytes.
RETURN time_part | tail;
END
$$ LANGUAGE plpgsql VOLATILE;
SQL
end
def ensure_id_sequences_exist
# Find tables using timestamp IDs.
connection.tables.each do |table|
# We're only concerned with "id" columns.
next unless (id_col = connection.columns(table).find { |col| col.name == 'id' })
# And only those that are using timestamp_id.
next unless (data = DEFAULT_REGEX.match(id_col.default_function))
seq_name = data[:seq_prefix] + '_id_seq'
# If we were on Postgres 9.5+, we could do CREATE SEQUENCE IF
# NOT EXISTS, but we can't depend on that. Instead, catch the
# possible exception and ignore it.
# Note that seq_name isn't a column name, but it's a
# relation, like a column, and follows the same quoting rules
# in Postgres.
connection.execute(<<~SQL)
DO $$
BEGIN
CREATE SEQUENCE #{connection.quote_column_name(seq_name)};
EXCEPTION WHEN duplicate_table THEN
-- Do nothing, we have the sequence already.
END
$$ LANGUAGE plpgsql;
SQL
end
end
def id_at(timestamp)
id = timestamp.to_i * 1000 + rand(1000)
id = id << 16
id += rand(2**16)
id
end
private
def already_defined?
connection.execute(<<~SQL).values.first.first
SELECT EXISTS(
SELECT * FROM pg_proc WHERE proname = 'timestamp_id'
);
SQL
end
def connection
ActiveRecord::Base.connection
end
end
end

View File

@@ -5,15 +5,15 @@ module Mastodon
module_function
def major
1
2
end
def minor
6
0
end
def patch
1
0
end
def pre
@@ -21,7 +21,7 @@ module Mastodon
end
def flags
''
'rc2'
end
def to_a

View File

@@ -10,7 +10,7 @@ end
namespace :assets do
desc 'Generate static pages'
task :generate_static_pages do
render_static_page 'errors/500', layout: 'error', dest: Rails.root.join('public', '500.html')
render_static_page 'errors/500', layout: 'error', dest: Rails.root.join('public', 'assets', '500.html')
end
end

View File

@@ -1,5 +1,36 @@
# frozen_string_literal: true
require_relative '../mastodon/snowflake'
def each_schema_load_environment
# If we're in development, also run this for the test environment.
# This is a somewhat hacky way to do this, so here's why:
# 1. We have to define this before we load the schema, or we won't
# have a timestamp_id function when we get to it in the schema.
# 2. db:setup calls db:schema:load_if_ruby, which calls
# db:schema:load, which we define above as having a prerequisite
# of this task.
# 3. db:schema:load ends up running
# ActiveRecord::Tasks::DatabaseTasks.load_schema_current, which
# calls a private method `each_current_configuration`, which
# explicitly also does the loading for the `test` environment
# if the current environment is `development`, so we end up
# needing to do the same, and we can't even use the same method
# to do it.
if Rails.env == 'development'
test_conf = ActiveRecord::Base.configurations['test']
if test_conf['database']&.present?
ActiveRecord::Base.establish_connection(:test)
yield
ActiveRecord::Base.establish_connection(Rails.env.to_sym)
end
end
yield
end
namespace :db do
namespace :migrate do
desc 'Setup the db or migrate depending on state of db'
@@ -16,4 +47,29 @@ namespace :db do
end
end
end
# Before we load the schema, define the timestamp_id function.
# Idiomatically, we might do this in a migration, but then it
# wouldn't end up in schema.rb, so we'd need to figure out a way to
# get it in before doing db:setup as well. This is simpler, and
# ensures it's always in place.
Rake::Task['db:schema:load'].enhance ['db:define_timestamp_id']
# After we load the schema, make sure we have sequences for each
# table using timestamp IDs.
Rake::Task['db:schema:load'].enhance do
Rake::Task['db:ensure_id_sequences_exist'].invoke
end
task :define_timestamp_id do
each_schema_load_environment do
Mastodon::Snowflake.define_timestamp_id
end
end
task :ensure_id_sequences_exist do
each_schema_load_environment do
Mastodon::Snowflake.ensure_id_sequences_exist
end
end
end

View File

@@ -17,7 +17,7 @@ namespace :emojis do
task :generate do
source = 'http://www.unicode.org/Public/emoji/5.0/emoji-test.txt'
codes = []
dest = Rails.root.join('app', 'javascript', 'mastodon', 'emoji_map.json')
dest = Rails.root.join('app', 'javascript', 'mastodon', 'features', 'emoji', 'emoji_map.json')
puts "Downloading emojos from source... (#{source})"