From 325cbea97fe5d25cdd6f0398d2e91af36380bfb2 Mon Sep 17 00:00:00 2001 From: Golda Velez Date: Fri, 11 Oct 2024 14:48:35 -0700 Subject: [PATCH 1/6] feat: script to update the datastore on c1 and selectively run the migration --- ansible/roles/migrate-kubo-c1/tasks/main.yml | 85 ++++++++++++++++++++ 1 file changed, 85 insertions(+) create mode 100644 ansible/roles/migrate-kubo-c1/tasks/main.yml diff --git a/ansible/roles/migrate-kubo-c1/tasks/main.yml b/ansible/roles/migrate-kubo-c1/tasks/main.yml new file mode 100644 index 0000000..1c042b2 --- /dev/null +++ b/ansible/roles/migrate-kubo-c1/tasks/main.yml @@ -0,0 +1,85 @@ +--- +# Script to update the ceramic-one blockstore and run migration on the updated blocks + + +# Get latest common snapshot between the kubo and c1 datastores +- name: Get latest common snapshot between gitcoin-go-ipfs-1 and gitcoin-rust-ceramic-1 + block: + - name: List snapshots from gitcoin-go-ipfs-1 + ansible.builtin.shell: + cmd: zfs list -H -t snapshot -o name ipfspool/data-store + register: kubo_snapshots + delegate_to: gitcoin-go-ipfs-1 + + - name: Let snapshots from gitcoin-rust-ceramic-1 + ansible.builtin.shell: + cmd: zfs list -H -t snapshot -o name migrationpool/data-store + register: c1_snapshots + delegate_to: gitcoin-rust-ceramic-1 + + - name: Find latest common snapshot + ansible.builtin.shell: + cmd: | + kubo_snaps="{{ kubo_snapshots.stdout_lines | join('\n') }}" + c1_snaps="{{ c1_snapshots.stdout_lines | join('\n') }}" + echo "$kubo_snaps" | grep -F "$(echo "$c1_snaps" | sed 's/migrationpool\/data-store@//')" | tail -n 1 + register: common_snapshot + failed_when: common_snapshot.rc != 0 or common_snapshot.stdout == "" + delegate_to: localhost + + - name: Display latest common snapshot + ansible.builtin.debug: + var: common_snapshot.stdout + + run_once: true + +# Create new kubo snapshot +- name: Create a new snapshot on gitcoin-go-ipfs-1 + ansible.buildin.shell: + cmd: | + snapshot_name="ipfspool/data-store@c1_migration_$(date +%Y%m%d_%H%M%S)" + zfs snapshot "$snapshot_name" + echo "$snapshot_name" + delegate_to: gitcoin-go-ipfs-1 + become: yes + register: new_snapshot + +# Compare to the new snapshot to latest common snapshot and get file list +- name: Run zfs diff and stream results + block: + - name: Execute zfs diff + ansible.builtin.shell: + cmd: | + output_file="/tmp/zfs_diff_output_$(date +%Y%m%d_%H%M%S).txt" + zfs diff -F "{{ common_snapshot.stdout }} {{ new_snapshot }} | tee "$output_file" + args: + executable: /bin/bash + register: diff_result + delegate_to: gitcoin-go-ipfs-1 + become: yes + + - name: Display diff results in real time + ansible.builtin.debug: + var: diff_result.stdout_lines + when: diff_result.stdout_lines | length > 0 + + strategy: free + +- name: Fetch the diff results file + ansible.builtin.fetch: + src: "/tmp/zfs_diff_output.txt" + dest: "zfs_diff_{{ inventory_hostname }}.txt" + flat: yes + delegate_to: gitcoin-go-ipfs-1 + +- name: Register diff results + ansible.builtin.set_fact: + zfs_diff_results: "{{ diff_result.stdout_lines }}" + +# TODO + +# zfs send new snapshot to the c1 node + +# run the migration script on the c1 node only on the changed blocks + + From bb9a8b1a1639c70dc23a522237966cdb2d18f286 Mon Sep 17 00:00:00 2001 From: Golda Velez Date: Mon, 14 Oct 2024 22:31:34 -0700 Subject: [PATCH 2/6] feat: run the diff on the c1 node, run migration on unprocessed modified files --- ansible/roles/migrate-kubo-c1/tasks/main.yml | 100 ++++++++++++++----- 1 file changed, 73 insertions(+), 27 deletions(-) diff --git a/ansible/roles/migrate-kubo-c1/tasks/main.yml b/ansible/roles/migrate-kubo-c1/tasks/main.yml index 1c042b2..575f8ed 100644 --- a/ansible/roles/migrate-kubo-c1/tasks/main.yml +++ b/ansible/roles/migrate-kubo-c1/tasks/main.yml @@ -33,53 +33,99 @@ run_once: true -# Create new kubo snapshot -- name: Create a new snapshot on gitcoin-go-ipfs-1 - ansible.buildin.shell: +- name: Create new snapshot on ipfs node + ansible.builtin.shell: + cmd: zfs snapshot ipfspool/data-store@$(date +%Y%m%d_%H%M%S) + register: new_snapshot + delegate_to: gitcoin-go-ipfs-1 + +- name: Send incremental snapshot to c1 node + ansible.builtin.shell: cmd: | - snapshot_name="ipfspool/data-store@c1_migration_$(date +%Y%m%d_%H%M%S)" - zfs snapshot "$snapshot_name" - echo "$snapshot_name" + zfs send -i {{ common_snapshot.stdout }} {{ new_snapshot.stdout }} | \ + ssh gitcoin-rust-ceramic-1 'zfs receive migrationpool/data-store' delegate_to: gitcoin-go-ipfs-1 - become: yes - register: new_snapshot -# Compare to the new snapshot to latest common snapshot and get file list -- name: Run zfs diff and stream results +- name: Set output file name for diff results + ansible.builtin.set_fact: + diff_output_file: "/tmp/zfs_diff_output_$(date +%Y%m%d_%H%M%S).txt" + modified_block_file: "/tmp/zfs_modified_blockfiles_$(date +%Y%m%d_%H%M%S).txt" + files_to_process: "/tmp/zfs_modified_files_to_process_$(date +%Y%m%d_%H%M%S).txt" + already_processed: "/tmp/zfs_already_processed.txt" + + +- name: Run zfs diff on the c1 node block: - name: Execute zfs diff ansible.builtin.shell: cmd: | - output_file="/tmp/zfs_diff_output_$(date +%Y%m%d_%H%M%S).txt" - zfs diff -F "{{ common_snapshot.stdout }} {{ new_snapshot }} | tee "$output_file" + zfs diff -F {{ common_snapshot.stdout | regex_replace('ipfspool', 'migrationpool') }} {{ new_snapshot.stdout | regex_replace('ipfspool', 'migrationpool') }} | tee "{{ diff_output_file }}" args: executable: /bin/bash register: diff_result - delegate_to: gitcoin-go-ipfs-1 + delegate_to: gitcoin-rust-ceramic-1 become: yes - + - name: Display diff results in real time ansible.builtin.debug: var: diff_result.stdout_lines when: diff_result.stdout_lines | length > 0 + + strategy: free - strategy: free +# run the migration script on the c1 node only on the changed blocks -- name: Fetch the diff results file - ansible.builtin.fetch: - src: "/tmp/zfs_diff_output.txt" - dest: "zfs_diff_{{ inventory_hostname }}.txt" - flat: yes - delegate_to: gitcoin-go-ipfs-1 +- name: Generate list of modified block files + ansible.builtin.shell: + cmd: | + awk '/^M\t\/\t/ {print $3}' "{{ diff_output_file }}" | grep '/go_ipfs_datastore/ipfs-data/blocks/' > "{{ modified_block_file }}" + args: + executable: /bin/bash + delegate_to: gitcoin-rust-ceramic-1 + become: yes -- name: Register diff results - ansible.builtin.set_fact: - zfs_diff_results: "{{ diff_result.stdout_lines }}" +- name: Exclude files already processed + ansible.builtin.shell: + cmd: | + comm -23 <(sort -u "{{ modified_block_file }}") <(sort -u "{{ already_processed }}") > "{{ files_to_process }}" + args: + executable: /bin/bash + delegate_to: gitcoin-rust-ceramic-1 + become: yes -# TODO -# zfs send new snapshot to the c1 node +- name: Run migration and update processed files list + block: -# run the migration script on the c1 node only on the changed blocks + # TODO correct how we run this script TODO # + - name: Run migration on modified files not already processed + ansible.builtin.command: + cmd: > + ceramic-one migrations from-ipfs + --input-ipfs-path {{ input_ipfs_path }} + --output-store-path {{ output_store_path }} + --input-file-list-path {{ files_to_process }} + --network {{ network }} + {% if local_network_id is defined %}--local-network-id {{ local_network_id }}{% endif %} + {% if non_sharded_paths %}--non-sharded-paths{% endif %} + {% if log_tile_docs %}--log-tile-docs{% endif %} + environment: + CERAMIC_ONE_INPUT_FILE_LIST_PATH: "{{ files_to_process }}" + delegate_to: gitcoin-rust-ceramic-1 + become: yes + - name: Append processed files to already_processed list + ansible.builtin.shell: + cmd: cat {{ files_to_process }} >> {{ already_processed }} + delegate_to: gitcoin-rust-ceramic-1 + become: yes + always: + - name: Display migration completion message + ansible.builtin.debug: + msg: "Migration process completed. Check logs for details." + + rescue: + - name: Display migration failure message + ansible.builtin.debug: + msg: "Migration process failed. Check logs for errors." From b8140f9edbfebe2f96b154580680a3e2dd057d65 Mon Sep 17 00:00:00 2001 From: Golda Velez Date: Tue, 15 Oct 2024 10:26:12 -0700 Subject: [PATCH 3/6] maybe we can't output in real time --- ansible/roles/migrate-kubo-c1/tasks/main.yml | 1 - 1 file changed, 1 deletion(-) diff --git a/ansible/roles/migrate-kubo-c1/tasks/main.yml b/ansible/roles/migrate-kubo-c1/tasks/main.yml index 575f8ed..da4e3cd 100644 --- a/ansible/roles/migrate-kubo-c1/tasks/main.yml +++ b/ansible/roles/migrate-kubo-c1/tasks/main.yml @@ -71,7 +71,6 @@ var: diff_result.stdout_lines when: diff_result.stdout_lines | length > 0 - strategy: free # run the migration script on the c1 node only on the changed blocks From 64e5399a9bed84c8a76f50497fc0c2117b051075 Mon Sep 17 00:00:00 2001 From: Golda Velez Date: Wed, 16 Oct 2024 13:56:13 -0700 Subject: [PATCH 4/6] feat: simplify with fdfind --- ansible/roles/migrate-kubo-c1/tasks/main.yml | 110 ++++++++++--------- 1 file changed, 57 insertions(+), 53 deletions(-) diff --git a/ansible/roles/migrate-kubo-c1/tasks/main.yml b/ansible/roles/migrate-kubo-c1/tasks/main.yml index da4e3cd..9363bc1 100644 --- a/ansible/roles/migrate-kubo-c1/tasks/main.yml +++ b/ansible/roles/migrate-kubo-c1/tasks/main.yml @@ -1,6 +1,20 @@ --- # Script to update the ceramic-one blockstore and run migration on the updated blocks +# Basic plan: + +# Get latest common snapshot + +# create new snapshot and send incremental update since latest common snapshot from kubo -> c1 + +# get file list of changed files and dates between dates of latest common snapshot & new snapshot + +# diff with processed list of files and dates + +# run migration on the todos (not yet processed) and add to processed list as each completes + +################################################################################################## +###### Send Incremental Snapshot ############################################################### # Get latest common snapshot between the kubo and c1 datastores - name: Get latest common snapshot between gitcoin-go-ipfs-1 and gitcoin-rust-ceramic-1 @@ -46,53 +60,50 @@ ssh gitcoin-rust-ceramic-1 'zfs receive migrationpool/data-store' delegate_to: gitcoin-go-ipfs-1 -- name: Set output file name for diff results - ansible.builtin.set_fact: - diff_output_file: "/tmp/zfs_diff_output_$(date +%Y%m%d_%H%M%S).txt" - modified_block_file: "/tmp/zfs_modified_blockfiles_$(date +%Y%m%d_%H%M%S).txt" - files_to_process: "/tmp/zfs_modified_files_to_process_$(date +%Y%m%d_%H%M%S).txt" - already_processed: "/tmp/zfs_already_processed.txt" - - -- name: Run zfs diff on the c1 node +- name: Get time window between snapshots block: - - name: Execute zfs diff - ansible.builtin.shell: - cmd: | - zfs diff -F {{ common_snapshot.stdout | regex_replace('ipfspool', 'migrationpool') }} {{ new_snapshot.stdout | regex_replace('ipfspool', 'migrationpool') }} | tee "{{ diff_output_file }}" - args: - executable: /bin/bash - register: diff_result - delegate_to: gitcoin-rust-ceramic-1 - become: yes - - - name: Display diff results in real time - ansible.builtin.debug: - var: diff_result.stdout_lines - when: diff_result.stdout_lines | length > 0 - + - name: Get ZFS snapshot creation dates + ansible.builtin.shell: | + snapshot1_date=$(zfs get -H -o value creation {{ common_snapshot.stdout }} | xargs -I{} date -d {} '+%Y%m%d_%H%M%S') + snapshot2_date=$(zfs get -H -o value creation {{ new_snapshot.stdout }} | xargs -I{} date -d {} '+%Y%m%d_%H%M%S') + echo "$snapshot1_date" + echo "$snapshot2_date" + register: snapshot_dates_result + + - name: Set facts for snapshot dates and filenames + ansible.builtin.set_fact: + + # filename for modified blocks within the date window + modified_blocks: "/home/migrator/modified_blocks_{{ from_date_fn }}_to_{{ to_date_fn }}.txt" + + # log of all processed blocks for this window + # (even if we processed a file in a previous window, it must be reprocessed in this window) + processed_blocks: "/home/migrator/processed_blocks_{{ from_date_fn }}_to_{{ to_date_fn }}.txt" + migration_outfile: "/home/migrator/migrations_output_{{ from_date_fn }}_to_{{ to_date_fn }}.txt" + + # formats for use in fdfind command + from_date: "{{ from_date_raw | strftime('%Y-%m-%d %H:%M:%S', '%Y%m%d_%H%M%S') }}" + to_date: "{{ to_date_raw | strftime('%Y-%m-%d %H:%M:%S', '%Y%m%d_%H%M%S') }}" + vars: + # we have output the data in a format suitable for filename segments + from_date_fn: "{{ snapshot_dates_result.stdout_lines[0] }}" + to_date_fn: "{{ snapshot_dates_result.stdout_lines[1] }}" + delegate_to: gitcoin-rust-ceramic-1 -# run the migration script on the c1 node only on the changed blocks -- name: Generate list of modified block files - ansible.builtin.shell: - cmd: | - awk '/^M\t\/\t/ {print $3}' "{{ diff_output_file }}" | grep '/go_ipfs_datastore/ipfs-data/blocks/' > "{{ modified_block_file }}" - args: - executable: /bin/bash - delegate_to: gitcoin-rust-ceramic-1 - become: yes +################################################################################################## +###### Generate List of Modified Blocks To Migrate ######################################### -- name: Exclude files already processed +- name: Run fdfind on the c1 node after the snapshot is sent to find files modified between snapshots ansible.builtin.shell: cmd: | - comm -23 <(sort -u "{{ modified_block_file }}") <(sort -u "{{ already_processed }}") > "{{ files_to_process }}" - args: - executable: /bin/bash + fdfind . '/migration_datastore/ipfs-data/blocks' --changed-after '{{ from_date }}' --changed-before '{{ to_date }}' > {{ modified_blocks }} delegate_to: gitcoin-rust-ceramic-1 - become: yes +################################################################################################## +###### Run the migration script on the C1 node on the changed blocks ########################## + - name: Run migration and update processed files list block: @@ -100,22 +111,15 @@ - name: Run migration on modified files not already processed ansible.builtin.command: cmd: > - ceramic-one migrations from-ipfs - --input-ipfs-path {{ input_ipfs_path }} - --output-store-path {{ output_store_path }} - --input-file-list-path {{ files_to_process }} - --network {{ network }} - {% if local_network_id is defined %}--local-network-id {{ local_network_id }}{% endif %} - {% if non_sharded_paths %}--non-sharded-paths{% endif %} - {% if log_tile_docs %}--log-tile-docs{% endif %} + ceramic-one migrations from-ipfs \ + --input-ipfs-path /migration_datastore/ipfs-data \ + --output-store-path /ceramic_one_datastore \ + --network mainnet \ + --input-file-list-path {{ modified_blocks }} \ + --log-tile-docs \ + --log-format single-line > {{ migration_outfile }} environment: - CERAMIC_ONE_INPUT_FILE_LIST_PATH: "{{ files_to_process }}" - delegate_to: gitcoin-rust-ceramic-1 - become: yes - - - name: Append processed files to already_processed list - ansible.builtin.shell: - cmd: cat {{ files_to_process }} >> {{ already_processed }} + CERAMIC_ONE_INPUT_FILE_LIST_PATH: "{{ modified_blocks }}" delegate_to: gitcoin-rust-ceramic-1 become: yes From 73152a68e8a9dc9af9f63ff980b8dd83328f4a6e Mon Sep 17 00:00:00 2001 From: Golda Velez Date: Fri, 18 Oct 2024 09:54:23 -0700 Subject: [PATCH 5/6] Update main.yml --- ansible/roles/migrate-kubo-c1/tasks/main.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ansible/roles/migrate-kubo-c1/tasks/main.yml b/ansible/roles/migrate-kubo-c1/tasks/main.yml index 9363bc1..92d3cf9 100644 --- a/ansible/roles/migrate-kubo-c1/tasks/main.yml +++ b/ansible/roles/migrate-kubo-c1/tasks/main.yml @@ -97,7 +97,7 @@ - name: Run fdfind on the c1 node after the snapshot is sent to find files modified between snapshots ansible.builtin.shell: cmd: | - fdfind . '/migration_datastore/ipfs-data/blocks' --changed-after '{{ from_date }}' --changed-before '{{ to_date }}' > {{ modified_blocks }} + fdfind . '/migration_datastore/ipfs-data/blocks' --extension data --changed-after '{{ from_date }}' --changed-before '{{ to_date }}' > {{ modified_blocks }} delegate_to: gitcoin-rust-ceramic-1 From 306122b81ad91cd1815d07bb23e2bac2a2dff2f0 Mon Sep 17 00:00:00 2001 From: Golda Velez Date: Fri, 18 Oct 2024 10:01:38 -0700 Subject: [PATCH 6/6] fix: use the large drive for process files instead of a user --- ansible/roles/migrate-kubo-c1/tasks/main.yml | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/ansible/roles/migrate-kubo-c1/tasks/main.yml b/ansible/roles/migrate-kubo-c1/tasks/main.yml index 92d3cf9..d4d84bc 100644 --- a/ansible/roles/migrate-kubo-c1/tasks/main.yml +++ b/ansible/roles/migrate-kubo-c1/tasks/main.yml @@ -74,12 +74,12 @@ ansible.builtin.set_fact: # filename for modified blocks within the date window - modified_blocks: "/home/migrator/modified_blocks_{{ from_date_fn }}_to_{{ to_date_fn }}.txt" + modified_blocks: "/ceramic_one_datastore/migration/modified_blocks_{{ from_date_fn }}_to_{{ to_date_fn }}.txt" # log of all processed blocks for this window # (even if we processed a file in a previous window, it must be reprocessed in this window) - processed_blocks: "/home/migrator/processed_blocks_{{ from_date_fn }}_to_{{ to_date_fn }}.txt" - migration_outfile: "/home/migrator/migrations_output_{{ from_date_fn }}_to_{{ to_date_fn }}.txt" + processed_blocks: "/ceramic_one_datastore/migration/processed_blocks_{{ from_date_fn }}_to_{{ to_date_fn }}.txt" + migration_outfile: "/ceramic_one_datastore/migration/migrations_output_{{ from_date_fn }}_to_{{ to_date_fn }}.txt" # formats for use in fdfind command from_date: "{{ from_date_raw | strftime('%Y-%m-%d %H:%M:%S', '%Y%m%d_%H%M%S') }}" @@ -89,6 +89,7 @@ from_date_fn: "{{ snapshot_dates_result.stdout_lines[0] }}" to_date_fn: "{{ snapshot_dates_result.stdout_lines[1] }}" delegate_to: gitcoin-rust-ceramic-1 + become: yes ################################################################################################## @@ -99,7 +100,7 @@ cmd: | fdfind . '/migration_datastore/ipfs-data/blocks' --extension data --changed-after '{{ from_date }}' --changed-before '{{ to_date }}' > {{ modified_blocks }} delegate_to: gitcoin-rust-ceramic-1 - + become: yes ################################################################################################## ###### Run the migration script on the C1 node on the changed blocks ##########################