From 7a12eb5f7a96e68cd370285b96c8894a48693e6a Mon Sep 17 00:00:00 2001 From: Mohamed Abdul Rasheed Date: Fri, 20 Jun 2025 11:59:37 -0400 Subject: [PATCH 1/8] LIBAVALON-373. Use Minio for local storage https://umd-dit.atlassian.net/browse/LIBAVALON-373 --- config/environments/development.rb | 2 +- docker-compose.yml | 95 +++++++++++++++++------------- 2 files changed, 55 insertions(+), 42 deletions(-) diff --git a/config/environments/development.rb b/config/environments/development.rb index 9d411205a7..d2ab5c928a 100644 --- a/config/environments/development.rb +++ b/config/environments/development.rb @@ -34,7 +34,7 @@ end # Store uploaded files on the local file system (see config/storage.yml for options). - config.active_storage.service = :local + config.active_storage.service = :generic_s3 # Don't care if the mailer can't send. config.action_mailer.raise_delivery_errors = false diff --git a/docker-compose.yml b/docker-compose.yml index f6b36de338..f5f1058477 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -8,10 +8,7 @@ volumes: fedora: solr: npms: - # UMD Customization - # data: - work: - # End UMD Customization + data: networks: internal: @@ -74,7 +71,7 @@ services: # End UMD Customization environment: - AVALON_DOMAIN=http://avalon:3000 - - AVALON_STREAMING_BASE_URL=http://localhost:8880 + - AVALON_STREAMING_BUCKET_URL=http://minio:9000/derivatives/ # UMD Customization volumes: - streaming:/data @@ -82,6 +79,7 @@ services: ports: - '8880:80' networks: + internal: external: redis: &redis @@ -105,6 +103,7 @@ services: - solr - redis - hls + - minio environment: # UMD Customization - APP_NAME=avalon @@ -112,21 +111,36 @@ services: - SETTINGS__DOMAIN_HOST=av-local - SETTINGS__DOMAIN_PORT=3000 - BUNDLE_FLAGS=--with development postgres --without production test - - ENCODE_WORK_DIR=/streamfiles + - ENCODE_WORK_DIR=/tmp - CONTROLLED_VOCABULARY=config/controlled_vocabulary.yml - DATABASE_URL=postgres://postgres:password@db/avalon - DIGITAL_COLLECTIONS_URL=https://digital.lib.umd.edu/ - DATABASE_CLEANER_ALLOW_REMOTE_DATABASE_URL=true - FEDORA_NAMESPACE=avalon - FEDORA_URL=http://fedoraAdmin:fedoraAdmin@fedora:8080/fedora/rest + - DIGITAL_COLLECTIONS_URL=https://digital.lib.umd.edu/rest - RAILS_ENV=development - RAILS_ADDITIONAL_HOSTS=avalon - SETTINGS__REDIS__HOST=redis - SETTINGS__REDIS__PORT=6379 - SECRET_KEY_BASE=abcd - SOLR_URL=http://solr:8983/solr/avalon - - SETTINGS__STREAMING__CONTENT_PATH=/streamfiles + # - SETTINGS__STREAMING__CONTENT_PATH=/streamfiles + - AWS_REGION=us-east-1 + - SETTINGS__ACTIVE_STORAGE__BUCKET=supplementalfiles + - SETTINGS__ACTIVE_STORAGE__SERVICE=generic_s3 - SETTINGS__FFMPEG__PATH=/usr/bin/ffmpeg + - SETTINGS__MINIO__ENDPOINT=http://minio:9000 + - SETTINGS__MINIO__PUBLIC_HOST=http://localhost:9000 + - SETTINGS__MINIO__ACCESS=minio + - SETTINGS__MINIO__SECRET=minio123 + - SETTINGS__ENCODING__MASTERFILE_BUCKET=masterfiles + - SETTINGS__ENCODING__DERIVATIVE_BUCKET=derivatives + - SETTINGS__DROPBOX__PATH=s3://masterfiles/dropbox/ + - SETTINGS__DROPBOX__UPLOAD_URI=s3://masterfiles/dropbox/ + - SETTINGS__MASTER_FILE_MANAGEMENT__PATH=s3://masterfiles/archive/ + - SETTINGS__MASTER_FILE_MANAGEMENT__STRATEGY=move + - SETTINGS__STREAMING__CONTENT_PATH=/ - SETTINGS__STREAMING__STREAM_DEFAULT_QUALITY=medium - SETTINGS__STREAMING__HTTP_BASE=http://localhost:8880/avalon - SETTINGS__STREAMING__SERVER=nginx @@ -179,41 +193,40 @@ services: <<: *avalon command: dumb-init -- bash -c "bundle install && bundle exec sidekiq -C config/sidekiq.yml" ports: [] -# UMD Customization - # minio: - # image: minio/minio:RELEASE.2019-10-12T01-39-57Z - # command: minio server /data - # environment: - # MINIO_ACCESS_KEY: minio - # MINIO_SECRET_KEY: minio123 - # volumes: - # - data:/data - # ports: - # - 9000:9000 - # networks: - # internal: - # external: - # healthcheck: - # test: ['CMD', 'curl', '-f', 'http://localhost:9000/minio/health/live'] - # interval: 30s - # timeout: 20s - # retries: 3 - # createbuckets: - # image: minio/mc - # depends_on: - # - minio - # entrypoint: > - # /bin/sh -c " - # /usr/bin/mc config host add myminio http://minio:9000 minio minio123; - # /usr/bin/mc mb -p myminio/fcrepo myminio/masterfiles myminio/derivatives myminio/supplementalfiles; - # /usr/bin/mc policy set download myminio/derivatives; - # /usr/bin/mc policy set download myminio/supplementalfiles; - # exit 0; - # " - # networks: - # internal: -# End UMD Customization + minio: + image: minio/minio:RELEASE.2019-10-12T01-39-57Z + command: minio server /data + environment: + MINIO_ACCESS_KEY: minio + MINIO_SECRET_KEY: minio123 + volumes: + - data:/data + ports: + - 9000:9000 + networks: + internal: + external: + healthcheck: + test: ["CMD", "curl", "-f", "http://localhost:9000/minio/health/live"] + interval: 30s + timeout: 20s + retries: 3 + + createbuckets: + image: minio/mc + depends_on: + - minio + entrypoint: > + /bin/sh -c " + /usr/bin/mc config host add myminio http://minio:9000 minio minio123; + /usr/bin/mc mb -p myminio/fcrepo myminio/masterfiles myminio/derivatives myminio/supplementalfiles myminio/preserves; + /usr/bin/mc policy set download myminio/derivatives; + /usr/bin/mc policy set download myminio/supplementalfiles; + exit 0; + " + networks: + internal: cypress: network_mode: host From 670d8f175b18c290063d37ba14b2683004592400 Mon Sep 17 00:00:00 2001 From: Mohamed Abdul Rasheed Date: Tue, 8 Jul 2025 09:20:07 -0400 Subject: [PATCH 2/8] LIBAVALON-373. Updated MinIO with changes from 7.8. https://umd-dit.atlassian.net/browse/LIBAVALON-373 --- docker-compose.yml | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/docker-compose.yml b/docker-compose.yml index f5f1058477..6c5eb8d741 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -194,9 +194,11 @@ services: command: dumb-init -- bash -c "bundle install && bundle exec sidekiq -C config/sidekiq.yml" ports: [] + # UMD Customization + # Ported in changes from Avalon 7.8 minio: - image: minio/minio:RELEASE.2019-10-12T01-39-57Z - command: minio server /data + image: minio/minio:RELEASE.2024-01-29T03-56-32Z + command: minio server /data --console-address ":9090" environment: MINIO_ACCESS_KEY: minio MINIO_SECRET_KEY: minio123 @@ -204,6 +206,7 @@ services: - data:/data ports: - 9000:9000 + - 9090:9090 networks: internal: external: @@ -219,14 +222,15 @@ services: - minio entrypoint: > /bin/sh -c " - /usr/bin/mc config host add myminio http://minio:9000 minio minio123; + /usr/bin/mc alias set myminio http://minio:9000 minio minio123; /usr/bin/mc mb -p myminio/fcrepo myminio/masterfiles myminio/derivatives myminio/supplementalfiles myminio/preserves; - /usr/bin/mc policy set download myminio/derivatives; - /usr/bin/mc policy set download myminio/supplementalfiles; + /usr/bin/mc anonymous set download myminio/derivatives; + /usr/bin/mc anonymous set download myminio/supplementalfiles; exit 0; " networks: internal: + # End UMD Customization cypress: network_mode: host From debe7a170874185dd5ed4bc156c85d92a90a2c02 Mon Sep 17 00:00:00 2001 From: Mohamed Abdul Rasheed Date: Tue, 8 Jul 2025 09:22:40 -0400 Subject: [PATCH 3/8] LIBAVALON-373. Allow mixed streaming. - Old `/avalon` will be streamed from filesystem. - New `/s3-avalon` will be streamed from S3/MinIO https://umd-dit.atlassian.net/browse/LIBAVALON-373 --- docker-compose.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/docker-compose.yml b/docker-compose.yml index 6c5eb8d741..6a50cca7be 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -66,7 +66,7 @@ services: hls: # UMD Customization - image: docker.lib.umd.edu/nginx:avalon-7.6-umd-0 + image: docker.lib.umd.edu/nginx:avalon-7.6-umd-LIBAVALON-373 platform: linux/amd64 # End UMD Customization environment: @@ -142,7 +142,7 @@ services: - SETTINGS__MASTER_FILE_MANAGEMENT__STRATEGY=move - SETTINGS__STREAMING__CONTENT_PATH=/ - SETTINGS__STREAMING__STREAM_DEFAULT_QUALITY=medium - - SETTINGS__STREAMING__HTTP_BASE=http://localhost:8880/avalon + - SETTINGS__STREAMING__HTTP_BASE=http://localhost:8880/s3-avalon - SETTINGS__STREAMING__SERVER=nginx - SETTINGS__STREAMING__STREAM_TOKEN_TTL=20 - SYSTEM_GROUPS=administrator,group_manager,manager From 31613d5984e16021b9543dfab919971a85eff433 Mon Sep 17 00:00:00 2001 From: Mohamed Abdul Rasheed Date: Tue, 8 Jul 2025 10:10:08 -0400 Subject: [PATCH 4/8] LIBAVALON-373. Revert dropbox config in settings.yml This is configured using ENV in the docker-compose.yml https://umd-dit.atlassian.net/browse/LIBAVALON-373 --- config/settings.yml | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/config/settings.yml b/config/settings.yml index ae96bcb0b1..3ddbc09e0b 100644 --- a/config/settings.yml +++ b/config/settings.yml @@ -6,10 +6,8 @@ domain: protocol: http # End UMD Customization dropbox: - # UMD Customization - path: /masterfiles/dropbox - upload_uri: /masterfiles/dropbox - # End UMD Customization + path: /srv/avalon/dropbox + upload_uri: sftp://avalon.example.edu # google_drive: # client_id: id # client_secret: secret From 59d44f41353f8148ce8960b4bd22d43ff51507dd Mon Sep 17 00:00:00 2001 From: Mohamed Abdul Rasheed Date: Tue, 8 Jul 2025 10:13:12 -0400 Subject: [PATCH 5/8] LIBAVALON-373. Minio related doc updates. https://umd-dit.atlassian.net/browse/LIBAVALON-373 --- umd_docs/DockerDevelopmentEnvironment.md | 32 +++++++++++++++--------- 1 file changed, 20 insertions(+), 12 deletions(-) diff --git a/umd_docs/DockerDevelopmentEnvironment.md b/umd_docs/DockerDevelopmentEnvironment.md index 163bcd28b3..b01ce0ecbc 100644 --- a/umd_docs/DockerDevelopmentEnvironment.md +++ b/umd_docs/DockerDevelopmentEnvironment.md @@ -17,6 +17,13 @@ of Avalon using Docker. 127.0.0.1 av-local ``` +3) Install the Minio Client and configure it. + + ```zsh + brew install minio-mc + mc alias set minio http://minio:9000 minio minio123; + ``` + ### Setup Instructions 1) Checkout the application and switch to the directory: @@ -93,6 +100,8 @@ of Avalon using Docker. Avalon should be available at: [http://av-local:3000](http://av-local:3000) + The MinIO browser should be accessible at + ### Loading Sample Data Sample data for Avalon is available in the @@ -117,8 +126,7 @@ in the folder for a description of each dataset. then left-click the "Create Collection" button in the dialog. Once created, the "Manage Content" page with the collection will be displayed. Also, a - "masterfiles/dropbox/Test_Collection/" subdirectory will be created in the - Avalon project directory. + "dropbox/Test_Collection/" folder will be created in the masterfiles bucket. 3) In a terminal, add a `sample-data@example.com` admin user (which is the email address of the submitter in the sample datasets) by executing a Bash @@ -143,23 +151,23 @@ in the folder for a description of each dataset. ``` 4) Download the "Sample_Audio_and_Video" folder (as a Zip file) from - and place it in the - "masterfiles/dropbox/Test_Collection/" folder. In a terminal, - switch to the "masterfiles/dropbox/Test_Collection/" subdirectory: + and place it in a + temporary folder. In a terminal, switch to the directory containing + the download and extract the file. ```zsh - cd masterfiles/dropbox/Test_Collection/ + cd + unzip Sample_Audio_and_Video.zip ``` - and extract the file: + Copy the files to the masterfiles bucket dropbox folder. - ```zsh - unzip Sample_Audio_and_Video.zip + ```sh + mc cp -r Sample_Audio_and_Video/assets minio/masterfiles/dropbox/Test_Collection/Sample_Audio_and_Video/ + mc cp -r Sample_Audio_and_Video/batch_manifest.xlsx minio/masterfiles/dropbox/Test_Collection/Sample_Audio_and_Video/ ``` - and extract the file. - -5) The "avalon-worker" container scans the "masterfiles/dropbox" directory +5) The "avalon-worker" container scans the "masterfiles/dropbox" bucket once a minute, and ingests any new items found. Depending on the size of the dataset, and the need to transcode the content, the ingest may take several minutes. From d1a8a35a5252a42942ca2106f8770f97661bfe5f Mon Sep 17 00:00:00 2001 From: Mohamed Abdul Rasheed Date: Tue, 8 Jul 2025 15:01:32 -0400 Subject: [PATCH 6/8] LIBAVALON-373. More documentation update. https://umd-dit.atlassian.net/browse/LIBAVALON-373 --- umd_docs/DockerDevelopmentEnvironment.md | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/umd_docs/DockerDevelopmentEnvironment.md b/umd_docs/DockerDevelopmentEnvironment.md index b01ce0ecbc..4db757a2dc 100644 --- a/umd_docs/DockerDevelopmentEnvironment.md +++ b/umd_docs/DockerDevelopmentEnvironment.md @@ -21,7 +21,7 @@ of Avalon using Docker. ```zsh brew install minio-mc - mc alias set minio http://minio:9000 minio minio123; + mc alias set minio http://localhost:9000 minio minio123; ``` ### Setup Instructions @@ -102,6 +102,13 @@ of Avalon using Docker. The MinIO browser should be accessible at +9) In a separate terminal, run the createbuckets container to create the + necessary buckets in MinIO + + ```zsh + docker-compose up createbuckets + ``` + ### Loading Sample Data Sample data for Avalon is available in the From b75058012f45aeddbeec1d4fdeb77e31ad4c4892 Mon Sep 17 00:00:00 2001 From: Mohamed Abdul Rasheed Date: Tue, 8 Jul 2025 17:30:15 -0400 Subject: [PATCH 7/8] LIBAVALON-373. Move minio alias command to after starting the server. https://umd-dit.atlassian.net/browse/LIBAVALON-373 --- umd_docs/DockerDevelopmentEnvironment.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/umd_docs/DockerDevelopmentEnvironment.md b/umd_docs/DockerDevelopmentEnvironment.md index 4db757a2dc..62d0ccf160 100644 --- a/umd_docs/DockerDevelopmentEnvironment.md +++ b/umd_docs/DockerDevelopmentEnvironment.md @@ -21,7 +21,6 @@ of Avalon using Docker. ```zsh brew install minio-mc - mc alias set minio http://localhost:9000 minio minio123; ``` ### Setup Instructions @@ -170,6 +169,7 @@ in the folder for a description of each dataset. Copy the files to the masterfiles bucket dropbox folder. ```sh + mc alias set minio http://localhost:9000 minio minio123; mc cp -r Sample_Audio_and_Video/assets minio/masterfiles/dropbox/Test_Collection/Sample_Audio_and_Video/ mc cp -r Sample_Audio_and_Video/batch_manifest.xlsx minio/masterfiles/dropbox/Test_Collection/Sample_Audio_and_Video/ ``` From 61cb82595139cfc1e35a9ec50fa3744f0c8d920f Mon Sep 17 00:00:00 2001 From: "David P. Steelman" Date: Wed, 9 Jul 2025 10:59:05 -0400 Subject: [PATCH 8/8] LIBAVALON-373. Development Environment documentation updates Updated Docker Development Environment documentation, moving the "mc alias" command to before the "createbuckets" step, as that seemed to be a more natural place to put it. Note that the "mc alias" command appears to be idempotent, so it can be run multiple times on a user's workstation without ill effect. Simplified the creation of the "sample-data" administrative user to a single Docker command. Parameterized the uploading of files to minio, so that it will be simpler to upload different sample datasets. Also used the "mc mirror" command to upload everything *except* the XLSX manifest file (as it has an "--exclude" flag that "mc cp" does not provide), and then used "mc cp" to upload the manifest. https://umd-dit.atlassian.net/browse/LIBAVALON-373 --- umd_docs/DockerDevelopmentEnvironment.md | 40 +++++++++++------------- 1 file changed, 18 insertions(+), 22 deletions(-) diff --git a/umd_docs/DockerDevelopmentEnvironment.md b/umd_docs/DockerDevelopmentEnvironment.md index 62d0ccf160..6beb1ef683 100644 --- a/umd_docs/DockerDevelopmentEnvironment.md +++ b/umd_docs/DockerDevelopmentEnvironment.md @@ -17,7 +17,7 @@ of Avalon using Docker. 127.0.0.1 av-local ``` -3) Install the Minio Client and configure it. +3) Install the Minio Client ```zsh brew install minio-mc @@ -105,6 +105,7 @@ of Avalon using Docker. necessary buckets in MinIO ```zsh + mc alias set minio http://localhost:9000 minio minio123; docker-compose up createbuckets ``` @@ -135,25 +136,14 @@ in the folder for a description of each dataset. "dropbox/Test_Collection/" folder will be created in the masterfiles bucket. 3) In a terminal, add a `sample-data@example.com` admin user (which is the - email address of the submitter in the sample datasets) by executing a Bash - shell in the "avalon-avalon-1" Docker container: - - ```zsh - docker exec -it avalon-avalon-1 /bin/bash - ``` - - and running the following command: + email address of the submitter in the sample datasets) by running the + following command: ```zsh - rails avalon:user:create \ - avalon_username=sample-data@example.com avalon_password=PASSWORD \ - avalon_groups=administrator - ``` - - then exit the Docker container: - - ```zsh - exit + docker exec -it avalon-avalon-1 /bin/bash -c \ + 'rails avalon:user:create \ + avalon_username=sample-data@example.com avalon_password=PASSWORD \ + avalon_groups=administrator' ``` 4) Download the "Sample_Audio_and_Video" folder (as a Zip file) from @@ -166,14 +156,20 @@ in the folder for a description of each dataset. unzip Sample_Audio_and_Video.zip ``` - Copy the files to the masterfiles bucket dropbox folder. + Copy the files to the masterfiles bucket dropbox folder. In the following + the "UPLOAD_DIR" environment variable contains the name of the extracted + folder from the Zip file (to simplify uploading different sample datasets). ```sh - mc alias set minio http://localhost:9000 minio minio123; - mc cp -r Sample_Audio_and_Video/assets minio/masterfiles/dropbox/Test_Collection/Sample_Audio_and_Video/ - mc cp -r Sample_Audio_and_Video/batch_manifest.xlsx minio/masterfiles/dropbox/Test_Collection/Sample_Audio_and_Video/ + export UPLOAD_DIR=Sample_Audio_and_Video + mc mirror --exclude "*.xlsx" $UPLOAD_DIR minio/masterfiles/dropbox/Test_Collection/$UPLOAD_DIR/ + mc cp -r $UPLOAD_DIR/*.xlsx minio/masterfiles/dropbox/Test_Collection/$UPLOAD_DIR/ ``` + **Note:** Uploading the assets and the "xlsx" manifest file separately is a + best practice, because it ensures that the Avalon "worker" process won't + start the import before the asset files are available. + 5) The "avalon-worker" container scans the "masterfiles/dropbox" bucket once a minute, and ingests any new items found. Depending on the size of the dataset, and the need to transcode the content, the ingest may