Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion Gemfile
Original file line number Diff line number Diff line change
Expand Up @@ -82,7 +82,7 @@ end

group :development do
# Use Capistrano for deployment
gem 'capistrano', '~> 3.18.0', require: false
gem 'capistrano', '~> 3.19.0', require: false
gem 'capistrano-cul', require: false
gem 'capistrano-passenger', '~> 0.1', require: false
gem 'capistrano-rails', '~> 1.4', require: false
Expand Down
27 changes: 15 additions & 12 deletions Gemfile.lock
Original file line number Diff line number Diff line change
Expand Up @@ -77,7 +77,7 @@ GEM
tzinfo (~> 2.0)
addressable (2.8.6)
public_suffix (>= 2.0.2, < 6.0)
airbrussh (1.5.1)
airbrussh (1.5.3)
sshkit (>= 1.6.1, != 1.7.0)
ast (2.4.2)
aws-crt (0.2.0-arm64-darwin)
Expand All @@ -100,7 +100,7 @@ GEM
aws-sigv4 (~> 1.8)
aws-sigv4 (1.8.0)
aws-eventstream (~> 1, >= 1.0.2)
base64 (0.2.0)
base64 (0.3.0)
bcrypt (3.1.20)
bcrypt_pbkdf (1.1.0)
best_type (1.0.0)
Expand All @@ -110,7 +110,7 @@ GEM
bootsnap (1.18.3)
msgpack (~> 1.2)
builder (3.2.4)
capistrano (3.18.1)
capistrano (3.19.2)
airbrussh (>= 1.0.0)
i18n
rake (>= 10.0.0)
Expand Down Expand Up @@ -139,7 +139,7 @@ GEM
rack-test (>= 0.6.3)
regexp_parser (>= 1.5, < 3.0)
xpath (~> 3.2)
concurrent-ruby (1.2.3)
concurrent-ruby (1.3.5)
connection_pool (2.4.1)
crack (1.0.0)
bigdecimal
Expand Down Expand Up @@ -216,7 +216,7 @@ GEM
hashdiff (1.1.0)
hashie (5.0.0)
httpclient (2.8.3)
i18n (1.14.4)
i18n (1.14.7)
concurrent-ruby (~> 1.0)
importmap-rails (2.0.1)
actionpack (>= 6.0.0)
Expand All @@ -234,6 +234,7 @@ GEM
jwt (2.8.1)
base64
language_server-protocol (3.17.0.3)
logger (1.7.0)
loofah (2.22.0)
crass (~> 1.0.2)
nokogiri (>= 1.12.0)
Expand All @@ -254,7 +255,7 @@ GEM
multi_json (1.15.0)
mustermann (3.0.0)
ruby2_keywords (~> 0.0.1)
mutex_m (0.2.0)
mutex_m (0.3.0)
mysql2 (0.5.6)
net-http (0.4.1)
uri
Expand All @@ -265,13 +266,13 @@ GEM
net-protocol
net-protocol (0.2.2)
timeout
net-scp (4.0.0)
net-scp (4.1.0)
net-ssh (>= 2.6.5, < 8.0.0)
net-sftp (4.0.0)
net-ssh (>= 5.0.0, < 8.0.0)
net-smtp (0.5.0)
net-protocol
net-ssh (7.2.1)
net-ssh (7.3.0)
nio4r (2.7.1)
nokogiri (1.16.3-arm64-darwin)
racc (~> 1.4)
Expand All @@ -286,6 +287,7 @@ GEM
omniauth (>= 2.0)
orm_adapter (0.5.0)
os (1.1.4)
ostruct (0.6.3)
parallel (1.25.1)
parser (3.3.4.0)
ast (~> 2.4.1)
Expand Down Expand Up @@ -337,7 +339,7 @@ GEM
thor (~> 1.0, >= 1.2.2)
zeitwerk (~> 2.6)
rainbow (3.1.1)
rake (13.1.0)
rake (13.3.0)
rdoc (6.6.3.1)
psych (>= 4.0.0)
redis (4.8.1)
Expand Down Expand Up @@ -453,12 +455,13 @@ GEM
sprockets (>= 3.0.0)
sqlite3 (1.7.3-arm64-darwin)
sqlite3 (1.7.3-x86_64-linux)
sshkit (1.22.1)
sshkit (1.24.0)
base64
mutex_m
logger
net-scp (>= 1.1.2)
net-sftp (>= 2.1.2)
net-ssh (>= 2.8.0)
ostruct
stimulus-rails (1.3.3)
railties (>= 6.0.0)
stringex (2.8.6)
Expand Down Expand Up @@ -509,7 +512,7 @@ DEPENDENCIES
aws-sdk-s3 (~> 1)
best_type (~> 1.0)
bootsnap
capistrano (~> 3.18.0)
capistrano (~> 3.19.0)
capistrano-cul
capistrano-passenger (~> 0.1)
capistrano-rails (~> 1.4)
Expand Down
2 changes: 1 addition & 1 deletion config/deploy.rb
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
# frozen_string_literal: true

# config valid for current version and patch releases of Capistrano
lock '~> 3.18.0'
lock '~> 3.19.0'

# Until we retire all old CentOS VMs, we need to set the rvm_custom_path because rvm is installed
# in a non-standard location for our AlmaLinux VMs. This is because our service accounts need to
Expand Down
127 changes: 127 additions & 0 deletions lib/tasks/aws.rake
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,133 @@

namespace :atc do
namespace :aws do
# Calls the S3_CLIENT#list_objects_v2 method multiple times to automatically page through all results.
# S3_CLIENT#list_objects_v2 method returns up to 1000 results per call, and returns a token that can be
# used in subsequent calls to get the next page of results. This method wraps that paging functionality.
def auto_paginating_list_object_v2(list_objects_v2_opts)
next_continuation_token = nil

counter = 0
loop do
counter += 1
result_object = S3_CLIENT.list_objects_v2(list_objects_v2_opts.merge({
continuation_token: next_continuation_token
}))

S3_CLIENT.list_objects_v2(list_objects_v2_opts).contents.each do |object|
yield object
end

next_continuation_token = result_object.next_continuation_token
break if next_continuation_token.nil?
end
end


desc 'For the given bucket_name and key_prefix, iterates over objects and generates a list of their file extensions and counts'
task list_file_extensions: :environment do
bucket_name = ENV['bucket_name']
key_prefix = ENV['key_prefix']

extension_counts = {}

auto_paginating_list_object_v2({
bucket: bucket_name,
prefix: key_prefix
}) do |object|
ext = File.extname(object.key)
extension_counts[ext] ||= 0
extension_counts[ext] += 1
end

# Sort the files by count, descending.
extension_counts.to_a.sort_by {|pair| pair[1] }.reverse.each do |pair|
puts "#{pair[0]}: #{pair[1]}"
end
end

desc 'For the given bucket_name and key_prefix, iterates over objects in Intelligent Tiering and restores them '\
' if they have already transitioned to the Archive Access tier.'
task restore_archived_objects: :environment do
bucket_name = ENV['bucket_name']
key_prefix = ENV['key_prefix']
key_suffix_filter = ENV['key_suffix_filter']
dry_run = ENV['dry_run'] == 'true'

puts ""

puts "This is a dry run because dry_run=true has been set. No objects will actually be restored during this run.\n\n" if dry_run

if key_suffix_filter.present?
puts "Searching for objects (and filtering on objects with keys that end with \"#{key_suffix_filter}\")...\n\n"
else
puts "Searching for objects...\n\n"
end
number_of_intelligent_tiering_object_resoration_requests_submitted = 0
number_of_intelligent_tiering_objects_with_restoration_in_progress = 0
number_of_intelligent_tiering_objects_already_available = 0
number_of_non_intelligent_tiering_objects_skipped = 0
number_of_objects_skipped_based_on_key_suffix_filter = 0
errors_encountered = []

auto_paginating_list_object_v2({
bucket: bucket_name,
prefix: key_prefix
}) do |object|
object_key = object.key
storage_class = object.storage_class

if storage_class == 'INTELLIGENT_TIERING'
if key_suffix_filter.present? && !object_key.end_with?(key_suffix_filter)
number_of_objects_skipped_based_on_key_suffix_filter += 1
next
end

begin
S3_CLIENT.restore_object({
bucket: bucket_name,
key: object_key,
# For an object in Intelligent Tiering Archive Instant storage, we just pass an empty hash here.
# No further configuration is needed.
restore_request: {}
}) unless dry_run
number_of_intelligent_tiering_object_resoration_requests_submitted += 1
rescue Aws::S3::Errors::ServiceError => e
if e.message.include?("Restore is not allowed for the object's current storage class")
# If we got here, that means that this object was already restored and doesn't need to be restored again
# because it is available. We'll silently ignore this error.
number_of_intelligent_tiering_objects_already_available += 1
elsif e.message.include?("Object restore is already in progress")
# If we got here, that means that this object's restoration is already in progress and we do not need to
# initiate another restoration request. We'll silently ignore this error.
number_of_intelligent_tiering_objects_with_restoration_in_progress += 1
else
errors_encountered << "An unexpected error occured while attempting to restore #{object_key}: #{e.message}"
end
end
else
number_of_non_intelligent_tiering_objects_skipped += 1
end
end

puts "--------------------"
puts "Results:"

if dry_run
puts "Number of intelligent tiering object restoration requests that would have been made (if this wasn't a dry run): #{number_of_intelligent_tiering_object_resoration_requests_submitted}"
else
puts "Number of intelligent tiering object restoration requests submitted: #{number_of_intelligent_tiering_object_resoration_requests_submitted}"
puts "Number of intelligent tiering objects with restoration in progress: #{number_of_intelligent_tiering_objects_with_restoration_in_progress}"
puts "Number of intelligent tiering objects already available: #{number_of_intelligent_tiering_objects_already_available}"
end
puts "Number of objects skipped based on key_suffix_filter: #{number_of_objects_skipped_based_on_key_suffix_filter}"
puts "Number of non intelligent tiering objects skipped: #{number_of_non_intelligent_tiering_objects_skipped}"
puts "\nReminder: After restoration has been initiated, it will take 3-5 hours until the files are available for download. "\
"The current time is #{Time.current}, so the files should be available after #{Time.current + 5.hours}."
puts "--------------------"
puts "Errors: " + (errors_encountered.empty? ? 'None' : "\n#{errors_encountered.join("\n")}")
end

desc 'Run a fixity check using a remote CheckPlease app deployment.'
task fixity_check: :environment do
bucket_name = ENV['bucket_name']
Expand Down