mirror of
https://github.com/mastodon/mastodon.git
synced 2025-01-03 05:29:07 +00:00
Clean up code style of Mastodon::TimestampId module (#5232)
* Clean up code style of Mastodon::TimestampId module * Update brakeman config
This commit is contained in:
parent
a5143df303
commit
eb5ac23434
|
@ -57,6 +57,26 @@
|
||||||
"confidence": "Weak",
|
"confidence": "Weak",
|
||||||
"note": ""
|
"note": ""
|
||||||
},
|
},
|
||||||
|
{
|
||||||
|
"warning_type": "SQL Injection",
|
||||||
|
"warning_code": 0,
|
||||||
|
"fingerprint": "34efc76883080f8b1110a30c34ec4f903946ee56651aae46c62477f45d4fc412",
|
||||||
|
"check_name": "SQL",
|
||||||
|
"message": "Possible SQL injection",
|
||||||
|
"file": "lib/mastodon/timestamp_ids.rb",
|
||||||
|
"line": 63,
|
||||||
|
"link": "http://brakemanscanner.org/docs/warning_types/sql_injection/",
|
||||||
|
"code": "connection.execute(\" CREATE OR REPLACE FUNCTION timestamp_id(table_name text)\\n RETURNS bigint AS\\n $$\\n DECLARE\\n time_part bigint;\\n sequence_base bigint;\\n tail bigint;\\n BEGIN\\n time_part := (\\n -- Get the time in milliseconds\\n ((date_part('epoch', now()) * 1000))::bigint\\n -- And shift it over two bytes\\n << 16);\\n\\n sequence_base := (\\n 'x' ||\\n -- Take the first two bytes (four hex characters)\\n substr(\\n -- Of the MD5 hash of the data we documented\\n md5(table_name ||\\n '#{SecureRandom.hex(16)}' ||\\n time_part::text\\n ),\\n 1, 4\\n )\\n -- And turn it into a bigint\\n )::bit(16)::bigint;\\n\\n -- Finally, add our sequence number to our base, and chop\\n -- it to the last two bytes\\n tail := (\\n (sequence_base + nextval(table_name || '_id_seq'))\\n & 65535);\\n\\n -- Return the time part and the sequence part. OR appears\\n -- faster here than addition, but they're equivalent:\\n -- time_part has no trailing two bytes, and tail is only\\n -- the last two bytes.\\n RETURN time_part | tail;\\n END\\n $$ LANGUAGE plpgsql VOLATILE;\\n\")",
|
||||||
|
"render_path": null,
|
||||||
|
"location": {
|
||||||
|
"type": "method",
|
||||||
|
"class": "Mastodon::TimestampIds",
|
||||||
|
"method": "define_timestamp_id"
|
||||||
|
},
|
||||||
|
"user_input": "SecureRandom.hex(16)",
|
||||||
|
"confidence": "Medium",
|
||||||
|
"note": ""
|
||||||
|
},
|
||||||
{
|
{
|
||||||
"warning_type": "Dynamic Render Path",
|
"warning_type": "Dynamic Render Path",
|
||||||
"warning_code": 15,
|
"warning_code": 15,
|
||||||
|
@ -210,26 +230,6 @@
|
||||||
"confidence": "Weak",
|
"confidence": "Weak",
|
||||||
"note": ""
|
"note": ""
|
||||||
},
|
},
|
||||||
{
|
|
||||||
"warning_type": "SQL Injection",
|
|
||||||
"warning_code": 0,
|
|
||||||
"fingerprint": "cd440d9d0bcb76225f4142030cec0bdec6ad119c537c108c9d514bf87bc34d29",
|
|
||||||
"check_name": "SQL",
|
|
||||||
"message": "Possible SQL injection",
|
|
||||||
"file": "lib/mastodon/timestamp_ids.rb",
|
|
||||||
"line": 69,
|
|
||||||
"link": "http://brakemanscanner.org/docs/warning_types/sql_injection/",
|
|
||||||
"code": "ActiveRecord::Base.connection.execute(\" CREATE OR REPLACE FUNCTION timestamp_id(table_name text)\\n RETURNS bigint AS\\n $$\\n DECLARE\\n time_part bigint;\\n sequence_base bigint;\\n tail bigint;\\n BEGIN\\n -- Our ID will be composed of the following:\\n -- 6 bytes (48 bits) of millisecond-level timestamp\\n -- 2 bytes (16 bits) of sequence data\\n\\n -- The 'sequence data' is intended to be unique within a\\n -- given millisecond, yet obscure the 'serial number' of\\n -- this row.\\n\\n -- To do this, we hash the following data:\\n -- * Table name (if provided, skipped if not)\\n -- * Secret salt (should not be guessable)\\n -- * Timestamp (again, millisecond-level granularity)\\n\\n -- We then take the first two bytes of that value, and add\\n -- the lowest two bytes of the table ID sequence number\\n -- (`table_name`_id_seq). This means that even if we insert\\n -- two rows at the same millisecond, they will have\\n -- distinct 'sequence data' portions.\\n\\n -- If this happens, and an attacker can see both such IDs,\\n -- they can determine which of the two entries was inserted\\n -- first, but not the total number of entries in the table\\n -- (even mod 2**16).\\n\\n -- The table name is included in the hash to ensure that\\n -- different tables derive separate sequence bases so rows\\n -- inserted in the same millisecond in different tables do\\n -- not reveal the table ID sequence number for one another.\\n\\n -- The secret salt is included in the hash to ensure that\\n -- external users cannot derive the sequence base given the\\n -- timestamp and table name, which would allow them to\\n -- compute the table ID sequence number.\\n\\n time_part := (\\n -- Get the time in milliseconds\\n ((date_part('epoch', now()) * 1000))::bigint\\n -- And shift it over two bytes\\n << 16);\\n\\n sequence_base := (\\n 'x' ||\\n -- Take the first two bytes (four hex characters)\\n substr(\\n -- Of the MD5 hash of the data we documented\\n md5(table_name ||\\n '#{SecureRandom.hex(16)}' ||\\n time_part::text\\n ),\\n 1, 4\\n )\\n -- And turn it into a bigint\\n )::bit(16)::bigint;\\n\\n -- Finally, add our sequence number to our base, and chop\\n -- it to the last two bytes\\n tail := (\\n (sequence_base + nextval(table_name || '_id_seq'))\\n & 65535);\\n\\n -- Return the time part and the sequence part. OR appears\\n -- faster here than addition, but they're equivalent:\\n -- time_part has no trailing two bytes, and tail is only\\n -- the last two bytes.\\n RETURN time_part | tail;\\n END\\n $$ LANGUAGE plpgsql VOLATILE;\\n\")",
|
|
||||||
"render_path": null,
|
|
||||||
"location": {
|
|
||||||
"type": "method",
|
|
||||||
"class": "Mastodon::TimestampIds",
|
|
||||||
"method": "s(:self).define_timestamp_id"
|
|
||||||
},
|
|
||||||
"user_input": "SecureRandom.hex(16)",
|
|
||||||
"confidence": "Medium",
|
|
||||||
"note": ""
|
|
||||||
},
|
|
||||||
{
|
{
|
||||||
"warning_type": "Cross-Site Scripting",
|
"warning_type": "Cross-Site Scripting",
|
||||||
"warning_code": 4,
|
"warning_code": 4,
|
||||||
|
@ -269,6 +269,6 @@
|
||||||
"note": ""
|
"note": ""
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
"updated": "2017-10-05 20:06:40 +0200",
|
"updated": "2017-10-06 03:27:46 +0200",
|
||||||
"brakeman_version": "4.0.1"
|
"brakeman_version": "4.0.1"
|
||||||
}
|
}
|
||||||
|
|
|
@ -1,120 +1,111 @@
|
||||||
# frozen_string_literal: true
|
# frozen_string_literal: true
|
||||||
|
|
||||||
module Mastodon
|
module Mastodon::TimestampIds
|
||||||
module TimestampIds
|
DEFAULT_REGEX = /timestamp_id\('(?<seq_prefix>\w+)'/
|
||||||
def self.define_timestamp_id
|
|
||||||
conn = ActiveRecord::Base.connection
|
|
||||||
|
|
||||||
# Make sure we don't already have a `timestamp_id` function.
|
class << self
|
||||||
unless conn.execute(<<~SQL).values.first.first
|
# Our ID will be composed of the following:
|
||||||
SELECT EXISTS(
|
# 6 bytes (48 bits) of millisecond-level timestamp
|
||||||
SELECT * FROM pg_proc WHERE proname = 'timestamp_id'
|
# 2 bytes (16 bits) of sequence data
|
||||||
);
|
#
|
||||||
|
# The 'sequence data' is intended to be unique within a
|
||||||
|
# given millisecond, yet obscure the 'serial number' of
|
||||||
|
# this row.
|
||||||
|
#
|
||||||
|
# To do this, we hash the following data:
|
||||||
|
# * Table name (if provided, skipped if not)
|
||||||
|
# * Secret salt (should not be guessable)
|
||||||
|
# * Timestamp (again, millisecond-level granularity)
|
||||||
|
#
|
||||||
|
# We then take the first two bytes of that value, and add
|
||||||
|
# the lowest two bytes of the table ID sequence number
|
||||||
|
# (`table_name`_id_seq). This means that even if we insert
|
||||||
|
# two rows at the same millisecond, they will have
|
||||||
|
# distinct 'sequence data' portions.
|
||||||
|
#
|
||||||
|
# If this happens, and an attacker can see both such IDs,
|
||||||
|
# they can determine which of the two entries was inserted
|
||||||
|
# first, but not the total number of entries in the table
|
||||||
|
# (even mod 2**16).
|
||||||
|
#
|
||||||
|
# The table name is included in the hash to ensure that
|
||||||
|
# different tables derive separate sequence bases so rows
|
||||||
|
# inserted in the same millisecond in different tables do
|
||||||
|
# not reveal the table ID sequence number for one another.
|
||||||
|
#
|
||||||
|
# The secret salt is included in the hash to ensure that
|
||||||
|
# external users cannot derive the sequence base given the
|
||||||
|
# timestamp and table name, which would allow them to
|
||||||
|
# compute the table ID sequence number.
|
||||||
|
def define_timestamp_id
|
||||||
|
return if already_defined?
|
||||||
|
|
||||||
|
connection.execute(<<~SQL)
|
||||||
|
CREATE OR REPLACE FUNCTION timestamp_id(table_name text)
|
||||||
|
RETURNS bigint AS
|
||||||
|
$$
|
||||||
|
DECLARE
|
||||||
|
time_part bigint;
|
||||||
|
sequence_base bigint;
|
||||||
|
tail bigint;
|
||||||
|
BEGIN
|
||||||
|
time_part := (
|
||||||
|
-- Get the time in milliseconds
|
||||||
|
((date_part('epoch', now()) * 1000))::bigint
|
||||||
|
-- And shift it over two bytes
|
||||||
|
<< 16);
|
||||||
|
|
||||||
|
sequence_base := (
|
||||||
|
'x' ||
|
||||||
|
-- Take the first two bytes (four hex characters)
|
||||||
|
substr(
|
||||||
|
-- Of the MD5 hash of the data we documented
|
||||||
|
md5(table_name ||
|
||||||
|
'#{SecureRandom.hex(16)}' ||
|
||||||
|
time_part::text
|
||||||
|
),
|
||||||
|
1, 4
|
||||||
|
)
|
||||||
|
-- And turn it into a bigint
|
||||||
|
)::bit(16)::bigint;
|
||||||
|
|
||||||
|
-- Finally, add our sequence number to our base, and chop
|
||||||
|
-- it to the last two bytes
|
||||||
|
tail := (
|
||||||
|
(sequence_base + nextval(table_name || '_id_seq'))
|
||||||
|
& 65535);
|
||||||
|
|
||||||
|
-- Return the time part and the sequence part. OR appears
|
||||||
|
-- faster here than addition, but they're equivalent:
|
||||||
|
-- time_part has no trailing two bytes, and tail is only
|
||||||
|
-- the last two bytes.
|
||||||
|
RETURN time_part | tail;
|
||||||
|
END
|
||||||
|
$$ LANGUAGE plpgsql VOLATILE;
|
||||||
SQL
|
SQL
|
||||||
# The function doesn't exist, so we'll define it.
|
|
||||||
conn.execute(<<~SQL)
|
|
||||||
CREATE OR REPLACE FUNCTION timestamp_id(table_name text)
|
|
||||||
RETURNS bigint AS
|
|
||||||
$$
|
|
||||||
DECLARE
|
|
||||||
time_part bigint;
|
|
||||||
sequence_base bigint;
|
|
||||||
tail bigint;
|
|
||||||
BEGIN
|
|
||||||
-- Our ID will be composed of the following:
|
|
||||||
-- 6 bytes (48 bits) of millisecond-level timestamp
|
|
||||||
-- 2 bytes (16 bits) of sequence data
|
|
||||||
|
|
||||||
-- The 'sequence data' is intended to be unique within a
|
|
||||||
-- given millisecond, yet obscure the 'serial number' of
|
|
||||||
-- this row.
|
|
||||||
|
|
||||||
-- To do this, we hash the following data:
|
|
||||||
-- * Table name (if provided, skipped if not)
|
|
||||||
-- * Secret salt (should not be guessable)
|
|
||||||
-- * Timestamp (again, millisecond-level granularity)
|
|
||||||
|
|
||||||
-- We then take the first two bytes of that value, and add
|
|
||||||
-- the lowest two bytes of the table ID sequence number
|
|
||||||
-- (`table_name`_id_seq). This means that even if we insert
|
|
||||||
-- two rows at the same millisecond, they will have
|
|
||||||
-- distinct 'sequence data' portions.
|
|
||||||
|
|
||||||
-- If this happens, and an attacker can see both such IDs,
|
|
||||||
-- they can determine which of the two entries was inserted
|
|
||||||
-- first, but not the total number of entries in the table
|
|
||||||
-- (even mod 2**16).
|
|
||||||
|
|
||||||
-- The table name is included in the hash to ensure that
|
|
||||||
-- different tables derive separate sequence bases so rows
|
|
||||||
-- inserted in the same millisecond in different tables do
|
|
||||||
-- not reveal the table ID sequence number for one another.
|
|
||||||
|
|
||||||
-- The secret salt is included in the hash to ensure that
|
|
||||||
-- external users cannot derive the sequence base given the
|
|
||||||
-- timestamp and table name, which would allow them to
|
|
||||||
-- compute the table ID sequence number.
|
|
||||||
|
|
||||||
time_part := (
|
|
||||||
-- Get the time in milliseconds
|
|
||||||
((date_part('epoch', now()) * 1000))::bigint
|
|
||||||
-- And shift it over two bytes
|
|
||||||
<< 16);
|
|
||||||
|
|
||||||
sequence_base := (
|
|
||||||
'x' ||
|
|
||||||
-- Take the first two bytes (four hex characters)
|
|
||||||
substr(
|
|
||||||
-- Of the MD5 hash of the data we documented
|
|
||||||
md5(table_name ||
|
|
||||||
'#{SecureRandom.hex(16)}' ||
|
|
||||||
time_part::text
|
|
||||||
),
|
|
||||||
1, 4
|
|
||||||
)
|
|
||||||
-- And turn it into a bigint
|
|
||||||
)::bit(16)::bigint;
|
|
||||||
|
|
||||||
-- Finally, add our sequence number to our base, and chop
|
|
||||||
-- it to the last two bytes
|
|
||||||
tail := (
|
|
||||||
(sequence_base + nextval(table_name || '_id_seq'))
|
|
||||||
& 65535);
|
|
||||||
|
|
||||||
-- Return the time part and the sequence part. OR appears
|
|
||||||
-- faster here than addition, but they're equivalent:
|
|
||||||
-- time_part has no trailing two bytes, and tail is only
|
|
||||||
-- the last two bytes.
|
|
||||||
RETURN time_part | tail;
|
|
||||||
END
|
|
||||||
$$ LANGUAGE plpgsql VOLATILE;
|
|
||||||
SQL
|
|
||||||
end
|
|
||||||
end
|
end
|
||||||
|
|
||||||
def self.ensure_id_sequences_exist
|
def ensure_id_sequences_exist
|
||||||
conn = ActiveRecord::Base.connection
|
|
||||||
|
|
||||||
# Find tables using timestamp IDs.
|
# Find tables using timestamp IDs.
|
||||||
default_regex = /timestamp_id\('(?<seq_prefix>\w+)'/
|
connection.tables.each do |table|
|
||||||
conn.tables.each do |table|
|
|
||||||
# We're only concerned with "id" columns.
|
# We're only concerned with "id" columns.
|
||||||
next unless (id_col = conn.columns(table).find { |col| col.name == 'id' })
|
next unless (id_col = connection.columns(table).find { |col| col.name == 'id' })
|
||||||
|
|
||||||
# And only those that are using timestamp_id.
|
# And only those that are using timestamp_id.
|
||||||
next unless (data = default_regex.match(id_col.default_function))
|
next unless (data = DEFAULT_REGEX.match(id_col.default_function))
|
||||||
|
|
||||||
seq_name = data[:seq_prefix] + '_id_seq'
|
seq_name = data[:seq_prefix] + '_id_seq'
|
||||||
|
|
||||||
# If we were on Postgres 9.5+, we could do CREATE SEQUENCE IF
|
# If we were on Postgres 9.5+, we could do CREATE SEQUENCE IF
|
||||||
# NOT EXISTS, but we can't depend on that. Instead, catch the
|
# NOT EXISTS, but we can't depend on that. Instead, catch the
|
||||||
# possible exception and ignore it.
|
# possible exception and ignore it.
|
||||||
# Note that seq_name isn't a column name, but it's a
|
# Note that seq_name isn't a column name, but it's a
|
||||||
# relation, like a column, and follows the same quoting rules
|
# relation, like a column, and follows the same quoting rules
|
||||||
# in Postgres.
|
# in Postgres.
|
||||||
conn.execute(<<~SQL)
|
connection.execute(<<~SQL)
|
||||||
DO $$
|
DO $$
|
||||||
BEGIN
|
BEGIN
|
||||||
CREATE SEQUENCE #{conn.quote_column_name(seq_name)};
|
CREATE SEQUENCE #{connection.quote_column_name(seq_name)};
|
||||||
EXCEPTION WHEN duplicate_table THEN
|
EXCEPTION WHEN duplicate_table THEN
|
||||||
-- Do nothing, we have the sequence already.
|
-- Do nothing, we have the sequence already.
|
||||||
END
|
END
|
||||||
|
@ -122,5 +113,19 @@ module Mastodon
|
||||||
SQL
|
SQL
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
|
private
|
||||||
|
|
||||||
|
def already_defined?
|
||||||
|
connection.execute(<<~SQL).values.first.first
|
||||||
|
SELECT EXISTS(
|
||||||
|
SELECT * FROM pg_proc WHERE proname = 'timestamp_id'
|
||||||
|
);
|
||||||
|
SQL
|
||||||
|
end
|
||||||
|
|
||||||
|
def connection
|
||||||
|
ActiveRecord::Base.connection
|
||||||
|
end
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
|
@ -20,10 +20,10 @@ def each_schema_load_environment
|
||||||
|
|
||||||
if Rails.env == 'development'
|
if Rails.env == 'development'
|
||||||
test_conf = ActiveRecord::Base.configurations['test']
|
test_conf = ActiveRecord::Base.configurations['test']
|
||||||
|
|
||||||
if test_conf['database']&.present?
|
if test_conf['database']&.present?
|
||||||
ActiveRecord::Base.establish_connection(:test)
|
ActiveRecord::Base.establish_connection(:test)
|
||||||
yield
|
yield
|
||||||
|
|
||||||
ActiveRecord::Base.establish_connection(Rails.env.to_sym)
|
ActiveRecord::Base.establish_connection(Rails.env.to_sym)
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
Loading…
Reference in a new issue