diff --git a/Gemfile b/Gemfile index f3060f5..443cf3c 100644 --- a/Gemfile +++ b/Gemfile @@ -82,7 +82,7 @@ end group :development do # Use Capistrano for deployment - gem 'capistrano', '~> 3.18.0', require: false + gem 'capistrano', '~> 3.19.0', require: false gem 'capistrano-cul', require: false gem 'capistrano-passenger', '~> 0.1', require: false gem 'capistrano-rails', '~> 1.4', require: false diff --git a/Gemfile.lock b/Gemfile.lock index 7684236..8a6036a 100644 --- a/Gemfile.lock +++ b/Gemfile.lock @@ -77,7 +77,7 @@ GEM tzinfo (~> 2.0) addressable (2.8.6) public_suffix (>= 2.0.2, < 6.0) - airbrussh (1.5.1) + airbrussh (1.5.3) sshkit (>= 1.6.1, != 1.7.0) ast (2.4.2) aws-crt (0.2.0-arm64-darwin) @@ -100,7 +100,7 @@ GEM aws-sigv4 (~> 1.8) aws-sigv4 (1.8.0) aws-eventstream (~> 1, >= 1.0.2) - base64 (0.2.0) + base64 (0.3.0) bcrypt (3.1.20) bcrypt_pbkdf (1.1.0) best_type (1.0.0) @@ -110,7 +110,7 @@ GEM bootsnap (1.18.3) msgpack (~> 1.2) builder (3.2.4) - capistrano (3.18.1) + capistrano (3.19.2) airbrussh (>= 1.0.0) i18n rake (>= 10.0.0) @@ -139,7 +139,7 @@ GEM rack-test (>= 0.6.3) regexp_parser (>= 1.5, < 3.0) xpath (~> 3.2) - concurrent-ruby (1.2.3) + concurrent-ruby (1.3.5) connection_pool (2.4.1) crack (1.0.0) bigdecimal @@ -216,7 +216,7 @@ GEM hashdiff (1.1.0) hashie (5.0.0) httpclient (2.8.3) - i18n (1.14.4) + i18n (1.14.7) concurrent-ruby (~> 1.0) importmap-rails (2.0.1) actionpack (>= 6.0.0) @@ -234,6 +234,7 @@ GEM jwt (2.8.1) base64 language_server-protocol (3.17.0.3) + logger (1.7.0) loofah (2.22.0) crass (~> 1.0.2) nokogiri (>= 1.12.0) @@ -254,7 +255,7 @@ GEM multi_json (1.15.0) mustermann (3.0.0) ruby2_keywords (~> 0.0.1) - mutex_m (0.2.0) + mutex_m (0.3.0) mysql2 (0.5.6) net-http (0.4.1) uri @@ -265,13 +266,13 @@ GEM net-protocol net-protocol (0.2.2) timeout - net-scp (4.0.0) + net-scp (4.1.0) net-ssh (>= 2.6.5, < 8.0.0) net-sftp (4.0.0) net-ssh (>= 5.0.0, < 8.0.0) net-smtp (0.5.0) net-protocol - net-ssh (7.2.1) + net-ssh (7.3.0) nio4r (2.7.1) nokogiri (1.16.3-arm64-darwin) racc (~> 1.4) @@ -286,6 +287,7 @@ GEM omniauth (>= 2.0) orm_adapter (0.5.0) os (1.1.4) + ostruct (0.6.3) parallel (1.25.1) parser (3.3.4.0) ast (~> 2.4.1) @@ -337,7 +339,7 @@ GEM thor (~> 1.0, >= 1.2.2) zeitwerk (~> 2.6) rainbow (3.1.1) - rake (13.1.0) + rake (13.3.0) rdoc (6.6.3.1) psych (>= 4.0.0) redis (4.8.1) @@ -453,12 +455,13 @@ GEM sprockets (>= 3.0.0) sqlite3 (1.7.3-arm64-darwin) sqlite3 (1.7.3-x86_64-linux) - sshkit (1.22.1) + sshkit (1.24.0) base64 - mutex_m + logger net-scp (>= 1.1.2) net-sftp (>= 2.1.2) net-ssh (>= 2.8.0) + ostruct stimulus-rails (1.3.3) railties (>= 6.0.0) stringex (2.8.6) @@ -509,7 +512,7 @@ DEPENDENCIES aws-sdk-s3 (~> 1) best_type (~> 1.0) bootsnap - capistrano (~> 3.18.0) + capistrano (~> 3.19.0) capistrano-cul capistrano-passenger (~> 0.1) capistrano-rails (~> 1.4) diff --git a/config/deploy.rb b/config/deploy.rb index 50ead39..76107cd 100644 --- a/config/deploy.rb +++ b/config/deploy.rb @@ -1,7 +1,7 @@ # frozen_string_literal: true # config valid for current version and patch releases of Capistrano -lock '~> 3.18.0' +lock '~> 3.19.0' # Until we retire all old CentOS VMs, we need to set the rvm_custom_path because rvm is installed # in a non-standard location for our AlmaLinux VMs. This is because our service accounts need to diff --git a/lib/tasks/aws.rake b/lib/tasks/aws.rake index 6163c94..d1bd04d 100644 --- a/lib/tasks/aws.rake +++ b/lib/tasks/aws.rake @@ -2,6 +2,133 @@ namespace :atc do namespace :aws do + # Calls the S3_CLIENT#list_objects_v2 method multiple times to automatically page through all results. + # S3_CLIENT#list_objects_v2 method returns up to 1000 results per call, and returns a token that can be + # used in subsequent calls to get the next page of results. This method wraps that paging functionality. + def auto_paginating_list_object_v2(list_objects_v2_opts) + next_continuation_token = nil + + counter = 0 + loop do + counter += 1 + result_object = S3_CLIENT.list_objects_v2(list_objects_v2_opts.merge({ + continuation_token: next_continuation_token + })) + + S3_CLIENT.list_objects_v2(list_objects_v2_opts).contents.each do |object| + yield object + end + + next_continuation_token = result_object.next_continuation_token + break if next_continuation_token.nil? + end + end + + + desc 'For the given bucket_name and key_prefix, iterates over objects and generates a list of their file extensions and counts' + task list_file_extensions: :environment do + bucket_name = ENV['bucket_name'] + key_prefix = ENV['key_prefix'] + + extension_counts = {} + + auto_paginating_list_object_v2({ + bucket: bucket_name, + prefix: key_prefix + }) do |object| + ext = File.extname(object.key) + extension_counts[ext] ||= 0 + extension_counts[ext] += 1 + end + + # Sort the files by count, descending. + extension_counts.to_a.sort_by {|pair| pair[1] }.reverse.each do |pair| + puts "#{pair[0]}: #{pair[1]}" + end + end + + desc 'For the given bucket_name and key_prefix, iterates over objects in Intelligent Tiering and restores them '\ + ' if they have already transitioned to the Archive Access tier.' + task restore_archived_objects: :environment do + bucket_name = ENV['bucket_name'] + key_prefix = ENV['key_prefix'] + key_suffix_filter = ENV['key_suffix_filter'] + dry_run = ENV['dry_run'] == 'true' + + puts "" + + puts "This is a dry run because dry_run=true has been set. No objects will actually be restored during this run.\n\n" if dry_run + + if key_suffix_filter.present? + puts "Searching for objects (and filtering on objects with keys that end with \"#{key_suffix_filter}\")...\n\n" + else + puts "Searching for objects...\n\n" + end + number_of_intelligent_tiering_object_resoration_requests_submitted = 0 + number_of_intelligent_tiering_objects_with_restoration_in_progress = 0 + number_of_intelligent_tiering_objects_already_available = 0 + number_of_non_intelligent_tiering_objects_skipped = 0 + number_of_objects_skipped_based_on_key_suffix_filter = 0 + errors_encountered = [] + + auto_paginating_list_object_v2({ + bucket: bucket_name, + prefix: key_prefix + }) do |object| + object_key = object.key + storage_class = object.storage_class + + if storage_class == 'INTELLIGENT_TIERING' + if key_suffix_filter.present? && !object_key.end_with?(key_suffix_filter) + number_of_objects_skipped_based_on_key_suffix_filter += 1 + next + end + + begin + S3_CLIENT.restore_object({ + bucket: bucket_name, + key: object_key, + # For an object in Intelligent Tiering Archive Instant storage, we just pass an empty hash here. + # No further configuration is needed. + restore_request: {} + }) unless dry_run + number_of_intelligent_tiering_object_resoration_requests_submitted += 1 + rescue Aws::S3::Errors::ServiceError => e + if e.message.include?("Restore is not allowed for the object's current storage class") + # If we got here, that means that this object was already restored and doesn't need to be restored again + # because it is available. We'll silently ignore this error. + number_of_intelligent_tiering_objects_already_available += 1 + elsif e.message.include?("Object restore is already in progress") + # If we got here, that means that this object's restoration is already in progress and we do not need to + # initiate another restoration request. We'll silently ignore this error. + number_of_intelligent_tiering_objects_with_restoration_in_progress += 1 + else + errors_encountered << "An unexpected error occured while attempting to restore #{object_key}: #{e.message}" + end + end + else + number_of_non_intelligent_tiering_objects_skipped += 1 + end + end + + puts "--------------------" + puts "Results:" + + if dry_run + puts "Number of intelligent tiering object restoration requests that would have been made (if this wasn't a dry run): #{number_of_intelligent_tiering_object_resoration_requests_submitted}" + else + puts "Number of intelligent tiering object restoration requests submitted: #{number_of_intelligent_tiering_object_resoration_requests_submitted}" + puts "Number of intelligent tiering objects with restoration in progress: #{number_of_intelligent_tiering_objects_with_restoration_in_progress}" + puts "Number of intelligent tiering objects already available: #{number_of_intelligent_tiering_objects_already_available}" + end + puts "Number of objects skipped based on key_suffix_filter: #{number_of_objects_skipped_based_on_key_suffix_filter}" + puts "Number of non intelligent tiering objects skipped: #{number_of_non_intelligent_tiering_objects_skipped}" + puts "\nReminder: After restoration has been initiated, it will take 3-5 hours until the files are available for download. "\ + "The current time is #{Time.current}, so the files should be available after #{Time.current + 5.hours}." + puts "--------------------" + puts "Errors: " + (errors_encountered.empty? ? 'None' : "\n#{errors_encountered.join("\n")}") + end + desc 'Run a fixity check using a remote CheckPlease app deployment.' task fixity_check: :environment do bucket_name = ENV['bucket_name']