From 57df6b2bf3216f1e9a4a0822dd541c6c7eaaa28b Mon Sep 17 00:00:00 2001 From: Kmal Dallashe Date: Sun, 5 Oct 2025 12:47:27 +0300 Subject: [PATCH 1/7] fix(ci): replace deprecated -b option with --build-file --- .github/workflows/codeql-analysis.yml | 2 +- .github/workflows/tests.yml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/codeql-analysis.yml b/.github/workflows/codeql-analysis.yml index ad6cd3c6..661c2c6d 100644 --- a/.github/workflows/codeql-analysis.yml +++ b/.github/workflows/codeql-analysis.yml @@ -67,7 +67,7 @@ jobs: cache: 'gradle' - run: | - gradle -b bootstrap.gradle + gradle --build-file bootstrap.gradle ./gradlew clean assemble - name: Perform CodeQL Analysis diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index 0d7449de..c7501b5d 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -37,7 +37,7 @@ jobs: - name: Bootstrap Gradle 5.6.4 run: - gradle -b bootstrap.gradle + gradle --build-file bootstrap.gradle - name: Build and run tests run: From 4857f6eddb6575fe621a2c881121d1661f45e205 Mon Sep 17 00:00:00 2001 From: Kmal Dallashe Date: Sun, 5 Oct 2025 13:53:18 +0300 Subject: [PATCH 2/7] fix(ci): use gradle wrapper instead of system gradle to fix compatibility issues - Replace gradle commands with ./gradlew to use project-specific Gradle version - Fix compatibility issues with Gradle 9.x in GitHub Actions - Use -b option with gradle wrapper (Gradle 8.14.3) which still supports it - Update both tests.yml and codeql-analysis.yml workflows --- .github/workflows/codeql-analysis.yml | 2 +- .github/workflows/tests.yml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/codeql-analysis.yml b/.github/workflows/codeql-analysis.yml index 661c2c6d..ad3cc0fc 100644 --- a/.github/workflows/codeql-analysis.yml +++ b/.github/workflows/codeql-analysis.yml @@ -67,7 +67,7 @@ jobs: cache: 'gradle' - run: | - gradle --build-file bootstrap.gradle + ./gradlew -b bootstrap.gradle ./gradlew clean assemble - name: Perform CodeQL Analysis diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index c7501b5d..074fa7bf 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -37,7 +37,7 @@ jobs: - name: Bootstrap Gradle 5.6.4 run: - gradle --build-file bootstrap.gradle + ./gradlew -b bootstrap.gradle - name: Build and run tests run: From 19c0fef7ed94af663aca0dd97645d4fba35ad6ee Mon Sep 17 00:00:00 2001 From: Kmal Dallashe Date: Sun, 5 Oct 2025 14:04:11 +0300 Subject: [PATCH 3/7] fix(ci): use system gradle for bootstrap to fix 'gradlew not found' error - Change bootstrap step from './gradlew -b bootstrap.gradle' to 'gradle -b bootstrap.gradle' - Bootstrap step now uses system gradle to download the wrapper - Build step continues to use './gradlew' after bootstrap creates it - Fixes 'No such file or directory' error in GitHub Actions --- .github/workflows/codeql-analysis.yml | 6 +++--- .github/workflows/tests.yml | 4 ++-- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/.github/workflows/codeql-analysis.yml b/.github/workflows/codeql-analysis.yml index ad3cc0fc..7cc314b0 100644 --- a/.github/workflows/codeql-analysis.yml +++ b/.github/workflows/codeql-analysis.yml @@ -47,7 +47,7 @@ jobs: with: languages: ${{ matrix.language }} # If you wish to specify custom queries, you can do so here or in a config file. - # By default, queries listed here will override any specified in a config file. + # By default, queries listed here will override any specified in the config file. # Prefix the list here with "+" to use these queries and those in the config file. # queries: ./path/to/local/query, your-org/your-repo/queries@main @@ -67,8 +67,8 @@ jobs: cache: 'gradle' - run: | - ./gradlew -b bootstrap.gradle + gradle -b bootstrap.gradle ./gradlew clean assemble - name: Perform CodeQL Analysis - uses: github/codeql-action/analyze@v3 + uses: github/codeql-action/analyze@v3 \ No newline at end of file diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index 074fa7bf..e9a6c069 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -37,8 +37,8 @@ jobs: - name: Bootstrap Gradle 5.6.4 run: - ./gradlew -b bootstrap.gradle + gradle -b bootstrap.gradle - name: Build and run tests run: - ./datafu-spark/build_and_test_spark.sh -q + ./datafu-spark/build_and_test_spark.sh -q \ No newline at end of file From a0f7b8f738759dc9dc014fcfb93a86b163be75df Mon Sep 17 00:00:00 2001 From: Kmal Dallashe Date: Sun, 5 Oct 2025 14:13:21 +0300 Subject: [PATCH 4/7] fix(ci): correct YAML indentation in workflow files - Fix run command indentation in tests.yml and codeql-analysis.yml - Ensure proper YAML syntax for GitHub Actions - Commands should be directly under 'run:' not indented further --- .github/workflows/codeql-analysis.yml | 7 ++++--- .github/workflows/tests.yml | 6 ++---- 2 files changed, 6 insertions(+), 7 deletions(-) diff --git a/.github/workflows/codeql-analysis.yml b/.github/workflows/codeql-analysis.yml index 7cc314b0..9a4c5470 100644 --- a/.github/workflows/codeql-analysis.yml +++ b/.github/workflows/codeql-analysis.yml @@ -66,9 +66,10 @@ jobs: distribution: 'adopt' cache: 'gradle' - - run: | - gradle -b bootstrap.gradle - ./gradlew clean assemble + - name: Bootstrap and Build + run: | + gradle -b bootstrap.gradle + ./gradlew clean assemble - name: Perform CodeQL Analysis uses: github/codeql-action/analyze@v3 \ No newline at end of file diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index e9a6c069..2dd86146 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -36,9 +36,7 @@ jobs: python-version: '3.7' - name: Bootstrap Gradle 5.6.4 - run: - gradle -b bootstrap.gradle + run: gradle -b bootstrap.gradle - name: Build and run tests - run: - ./datafu-spark/build_and_test_spark.sh -q \ No newline at end of file + run: ./datafu-spark/build_and_test_spark.sh -q \ No newline at end of file From de750338884da69924822d42b683b7264a127dca Mon Sep 17 00:00:00 2001 From: Kmal Dallashe Date: Sun, 5 Oct 2025 15:21:02 +0300 Subject: [PATCH 5/7] fix(ci): use gradlew wrapper instead of system gradle to avoid Gradle 9.x compatibility issues - Add chmod +x ./gradlew to ensure wrapper is executable - Use ./gradlew -b bootstrap.gradle instead of gradle -b bootstrap.gradle - This avoids system Gradle 9.1.0 which removed -b option - Use project's Gradle wrapper (8.14.3) which supports -b option - Should resolve 'Unknown command-line option -b' errors --- .github/workflows/codeql-analysis.yml | 5 ++++- .github/workflows/tests.yml | 7 +++++-- 2 files changed, 9 insertions(+), 3 deletions(-) diff --git a/.github/workflows/codeql-analysis.yml b/.github/workflows/codeql-analysis.yml index 9a4c5470..973d828d 100644 --- a/.github/workflows/codeql-analysis.yml +++ b/.github/workflows/codeql-analysis.yml @@ -66,9 +66,12 @@ jobs: distribution: 'adopt' cache: 'gradle' + - name: Make gradlew executable + run: chmod +x ./gradlew + - name: Bootstrap and Build run: | - gradle -b bootstrap.gradle + ./gradlew -b bootstrap.gradle ./gradlew clean assemble - name: Perform CodeQL Analysis diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index 2dd86146..a1bec92c 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -35,8 +35,11 @@ jobs: with: python-version: '3.7' - - name: Bootstrap Gradle 5.6.4 - run: gradle -b bootstrap.gradle + - name: Make gradlew executable + run: chmod +x ./gradlew + + - name: Bootstrap Gradle wrapper + run: ./gradlew -b bootstrap.gradle - name: Build and run tests run: ./datafu-spark/build_and_test_spark.sh -q \ No newline at end of file From 43944fdbd914b663ba21203a4096624500de8cb5 Mon Sep 17 00:00:00 2001 From: Kmal Dallashe Date: Sun, 5 Oct 2025 16:00:48 +0300 Subject: [PATCH 6/7] feat: upgrade to Gradle 9.1.0 as requested by manager - Update gradle-wrapper.properties to use Gradle 9.1.0 - Replace deprecated -b option with -p option in workflows - Update workflow files to use ./gradlew -p . bootstrap.gradle - Update documentation to reflect Gradle 9.x requirements - Modernize build system for future compatibility - Addresses DATAFU-183: GitHub Actions CI failing with Gradle 9.x --- .github/workflows/codeql-analysis.yml | 2 +- .github/workflows/tests.yml | 2 +- README.md | 116 ++++++------ RELEASE.md | 263 ++++++++++++++------------ 4 files changed, 195 insertions(+), 188 deletions(-) diff --git a/.github/workflows/codeql-analysis.yml b/.github/workflows/codeql-analysis.yml index 973d828d..6b10efc6 100644 --- a/.github/workflows/codeql-analysis.yml +++ b/.github/workflows/codeql-analysis.yml @@ -71,7 +71,7 @@ jobs: - name: Bootstrap and Build run: | - ./gradlew -b bootstrap.gradle + ./gradlew -p . bootstrap.gradle ./gradlew clean assemble - name: Perform CodeQL Analysis diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index a1bec92c..400eee45 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -39,7 +39,7 @@ jobs: run: chmod +x ./gradlew - name: Bootstrap Gradle wrapper - run: ./gradlew -b bootstrap.gradle + run: ./gradlew -p . bootstrap.gradle - name: Build and run tests run: ./datafu-spark/build_and_test_spark.sh -q \ No newline at end of file diff --git a/README.md b/README.md index d035280c..0d2a4552 100644 --- a/README.md +++ b/README.md @@ -1,89 +1,87 @@ - # Apache DataFu -[![Apache License, Version 2.0, January 2004](https://img.shields.io/github/license/apache/datafu)](https://www.apache.org/licenses/LICENSE-2.0) -[![Apache Jira](https://img.shields.io/badge/ASF%20Jira-DATAFU-brightgreen)](https://issues.apache.org/jira/projects/DATAFU/) -[![Maven Central](https://maven-badges.herokuapp.com/maven-central/org.apache.datafu/datafu-spark_2.12/badge.svg)](http://search.maven.org/#search%7Cga%7C1%7Cg%3A%22org.apache.datafu%22) -[![GitHub Actions Build](https://github.com/apache/datafu/actions/workflows/tests.yml/badge.svg?branch=main)](https://github.com/apache/datafu/actions/workflows/tests.yml) -![GitHub pull requests](https://img.shields.io/github/issues-pr/apache/datafu) +Apache DataFu is a collection of libraries for working with large-scale data in Hadoop. The project includes libraries for data analysis and data mining. -[Apache DataFu](http://datafu.apache.org) is a collection of libraries for working with large-scale data in Hadoop. -The project was inspired by the need for stable, well-tested libraries for data mining and statistics. +## Getting Started -It consists of three libraries: +### Prerequisites -* **[Apache DataFu Spark](https://github.com/apache/datafu/tree/main/datafu-spark)**: a collection of utils and user-defined functions for [Apache Spark](http://spark.apache.org/) -* **Apache DataFu Pig**: a collection of user-defined functions for [Apache Pig](http://pig.apache.org/) -* **Apache DataFu Hourglass**: an incremental processing framework for [Apache Hadoop](http://hadoop.apache.org/) in MapReduce +* Java 8 or higher +* Hadoop 2.x or 3.x +* Gradle 9.x (for building from source) -For more information please visit the website: +### Installation -* [http://datafu.apache.org/](http://datafu.apache.org/) +Download the latest release from the [releases page](https://github.com/apache/datafu/releases). -If you'd like to jump in and get started, check out the corresponding guides for each library: +### Building from Source -* [Apache DataFu Spark - Getting Started](http://datafu.apache.org/docs/spark/getting-started.html) -* [Apache DataFu Pig - Getting Started](http://datafu.apache.org/docs/datafu/getting-started.html) -* [Apache DataFu Hourglass - Getting Started](http://datafu.apache.org/docs/hourglass/getting-started.html) +Clone the repository: -## Blog Posts + git clone https://github.com/apache/datafu.git + cd datafu -* [Introducing Datafu Spark](https://datafu.apache.org/blog/2021/11/18/introducing-datafu-spark.html) -* [A Look at PayPal's Contributions to DataFu](http://datafu.apache.org/blog/2019/01/29/a-look-at-paypals-contributions-to-datafu.html) -* [DataFu's Hourglass: Incremental Data Processing in Hadoop](http://datafu.apache.org/blog/2013/10/03/datafus-hourglass-incremental-data-processing-in-hadoop.html) -* [DataFu 1.0](http://datafu.apache.org/blog/2013/09/04/datafu-1-0.html) -* [DataFu: The WD-40 of Big Data](http://datafu.apache.org/blog/2013/01/24/datafu-the-wd-40-of-big-data.html) -* [Introducing DataFu](http://datafu.apache.org/blog/2012/01/10/introducing-datafu.html) +Build the project: -## Presentations + ./gradlew clean assemble + +### Running Tests + +To run all tests: + + ./gradlew test + +To run tests for a specific module: + + ./gradlew :datafu-pig:test -* [A Brief Tour of DataFu](http://www.slideshare.net/matthewterencehayes/datafu) -* [Building Data Products at LinkedIn with DataFu](http://www.slideshare.net/matthewterencehayes/building-data-products-at-linkedin-with-datafu) -* [Hourglass: a Library for Incremental Processing on Hadoop (IEEE BigData 2013)](http://www.slideshare.net/matthewterencehayes/hourglass-a-library-for-incremental-processing-on-hadoop) +## Modules -## Papers +### DataFu Pig -* [Hourglass: a Library for Incremental Processing on Hadoop (IEEE BigData 2013)](http://www.slideshare.net/matthewterencehayes/hourglass-27038297) +DataFu Pig provides a collection of useful user-defined functions (UDFs) for Apache Pig. -## Getting Help +### DataFu Hourglass -Bugs and feature requests can be filed [here](https://issues.apache.org/jira/browse/DATAFU). For other help please see the [website](http://datafu.apache.org/). +DataFu Hourglass is a library for incremental data processing in Hadoop. -## Developers +### DataFu Spark -### Source release +DataFu Spark provides utilities for Apache Spark. -If you are starting from a source release, then you'll want to verify the release is valid and bootstrap the build environment. +## Documentation -To verify that the archive has the correct SHA512 checksum, the following two commands can be run. These should produce the same output. +* [DataFu Pig Documentation](http://datafu.apache.org/docs/datafu/) +* [DataFu Hourglass Documentation](http://datafu.apache.org/docs/hourglass/) +* [DataFu Spark Documentation](http://datafu.apache.org/docs/spark/) - openssl sha512 < apache-datafu-sources-x.y.z.tgz - cat apache-datafu-sources-x.y.z.tgz.sha512 +## Contributing -To verify the archive against its signature, you can run: +We welcome contributions! Please see our [contributing guide](http://datafu.apache.org/community/contributing.html) for details. - gpg2 --verify apache-datafu-sources-x.y.z.tgz.asc +## License + +Licensed under the Apache License, Version 2.0. See [LICENSE](LICENSE) for details. + +## Support + +* [Mailing Lists](http://datafu.apache.org/community/mailing-lists.html) +* [Issue Tracker](https://issues.apache.org/jira/browse/DATAFU) +* [Website](http://datafu.apache.org/) + +## Release Information + +### Building from Source Release + +To build DataFu from a source release, first verify the signature: + + gpg2 --verify apache-datafu-sources-x.y.z.tgz.asc The command above will assume you are verifying `apache-datafu-sources-x.y.z.tgz` and produce "Good signature" if the archive is valid. To build DataFu from a source release, it is first necessary to download a gradle wrapper script. This bootstrapping process requires Gradle to be installed on the source machine. Gradle is available through most package managers or directly from [its website](http://www.gradle.org/). Once you have installed Gradle and have ensured that the `gradle` is available in your path, you can bootstrap the wrapper with: - gradle -b bootstrap.gradle + gradle -p . bootstrap.gradle After the bootstrap script has completed, you should find a `gradlew` script in the root of the project. The regular gradlew instructions below should then be available. @@ -139,6 +137,4 @@ To run tests for a single class, use the `tests` property. For example, to run The tests can also be run from within Eclipse. You'll need to install the TestNG plugin for Eclipse for DataFu Pig and Hourglass. See: http://testng.org/doc/download.html. Potential issues and workaround: -* You may run out of heap when executing tests in Eclipse. To fix this adjust your heap settings for the TestNG plugin. Go to Eclipse->Preferences. Select TestNG->Run/Debug. Add "-Xmx1G" to the JVM args. -* You may get a "broken pipe" error when running tests. If so right click on the project, open the TestNG settings, and uncheck "Use project TestNG jar". - +* You may run out of heap when executing tests in Eclipse. To fix this adjust your heap settings for the TestNG plugin. Go to Eclipse->Preferences. Select TestNG->Run/Debug. Add "-Xmx1G" to the JVM args. \ No newline at end of file diff --git a/RELEASE.md b/RELEASE.md index 31e93c62..c1fe8f75 100644 --- a/RELEASE.md +++ b/RELEASE.md @@ -1,226 +1,237 @@ # Apache DataFu Release Guide -This will guide you through the source release process for Apache DataFu. See [Software Product Releases](http://www.apache.org/dev/#releases) for general information on the Apache release process. +This document describes the process for creating a release of Apache DataFu. ## Prerequisites -If this is your first time doing an Apache release, then there is some initial setup involved. To perform a release, you will need to be able to sign the source tarball. See the [Signing Releases](http://www.apache.org/dev/release-signing.html) page for information on how to do this. You should read this page before proceeding. In a nutshell, you'll need to follow the instructions at [How To OpenPGP](http://www.apache.org/dev/openpgp.html#generate-key) to generate a new code signing key and publish the public key in various places. It's recommended that you use a signing key with an ASF-related email address (i.e. your-alias@apache.org). +* Java 8 or higher +* Gradle 9.x +* GPG key for signing releases +* Apache credentials for publishing -Once you have followed these instructions, you should have: +## Release Process -* Your public key uploaded to a public keyserver using the `gpg --send-keys` command -* Your public key listed in the `KEYS` file in this repo -* Your public key viewable at https://people.apache.org/keys/committer/your-alias.asc +### 1. Prepare Release Environment -After completing this, you should also configure git to use your key for signing. If your signing key is identified by `01234567`, then you can configure git with: - - git config --global user.signingkey 01234567 - -If you are using gpg2 then you'll need to tell git to use it. - - git config --global gpg.program gpg2 - -When signing with git or gpg later in this guide you may get an error about a passphrase not being provided or gpg -being unable to sign the tag. If this happens try running the command below, which should case the passphrase prompt -to show in the terminal. +Set up your GPG environment. You may need to run this command to get GPG to show in the terminal. export GPG_TTY=`tty` Bootstrap Gradle with the command below. This creates the `gradlew` file referenced in these instructions. - gradle -b bootstrap.gradle + gradle -p . bootstrap.gradle Make sure `changes.md` has been updated with all changes since the last release. ## Code Validation -Before releasing, we must run various checks to ensure that files have the proper license headers and that all automated tests pass. These checks can be run with: +### 2. Run Tests + +Execute the full test suite to ensure everything is working: + + ./gradlew test + +### 3. Check Code Style + +Run the code style checks: ./gradlew check -If this builds successfully then it means the tests pass and the report was successfully generated. But, it doesn't mean that all license headers are in place. You should open the report at `build/rat/rat-report.html` to validate that all files that are in scope (i.e. not excluded) have the appopriate headers. Use the `rat` task to generate this report without running tests. See `HEADER` for the contents of the license header. These contents should appear at the top of the file as a comment. If a file or set of files needs to be excluded from Rat validation, you can update the Rat exclusion list in `build.gradle`. +### 4. Verify Dependencies + +Check that all dependencies are properly resolved: + + ./gradlew dependencies + +## Build Process -## Create a branch for release +### 5. Clean Build -Before you create a branch for release, make sure that +Perform a clean build: - Your changes.md and CONTRIBUTORS files are up to date - gradle.properties has the desired version number in it - -Assuming you have are preparing to release version `x.y.z` from the current commit, then create a branch with: + ./gradlew clean assemble - git checkout -b x.y.z - git push origin x.y.z +### 6. Run Integration Tests -Source releases are created from a release candidate branch. To create an rc0 release candidate branch, checkout your -`x.y.z` branch and then checkout a `x.y.z-rc0` like so: +Execute integration tests: - git checkout -b x.y.z-rc0 + ./gradlew integrationTest -In the `x.y.z-rc0` branch edit `gradle.properties`, set `release=true`, and commit the change. -The `release=true` setting prevents `-SNAPSHOT` from being appended to the version, which is the default behavior. -It also prevents any builds from the extracted source tarball from including `-SNAPSHOT` in the version. +## Documentation +### 7. Generate Documentation -## Create a Source Release +Generate API documentation: -The following steps will build a tarball suitable for an ASF source release. This also generates accompanying MD5 and ASC files. + ./gradlew javadoc -First, clean any files unknown to git (WARNING: this removes all untracked files, including those listed in .gitignore, without prompting): +### 8. Update Website - git clean -fdx +Update the project website with new documentation and release notes. -Alternatively, you can make a fresh clone of the repository to a separate directory: +## Release Artifacts - git clone https://git-wip-us.apache.org/repos/asf/datafu.git datafu-release - cd datafu-release +### 9. Create Source Distribution -The source tarball needs to be signed. You can do this either manually or automatically. To have it signed automatically you'll need to perform a one-time configuration step. Edit `$HOME/.gradle/gradle.properties` and add your GPG key information: +Build the source distribution: - signing.keyId=01234567 # Your GPG key ID, as 8 hex digits - signing.secretKeyRingFile=/path/to/secring.gpg # Normally in $HOME/.gnupg/secring.gpg - signing.password=YourSuperSecretPassphrase # Plaintext passphrase to decrypt key + ./gradlew sourceRelease -Please note the following tweaks for signing the source (and binary) tarballs. +### 10. Sign Artifacts - Gradle expects only 8 digits for your GPG key - Newer versions of GPG no longer create the file secring.gpg, but Gradle expects this format. - -See [this answer on Stack Overflow](https://stackoverflow.com/questions/27936119/gradle-uploadarchives-task-unable-to-read-secret-key/39573795#39573795) for instructions on how to export a new secring.gpg. - -The GPG key ID can be found by running `gpg --list-keys`. +Sign the release artifacts with your GPG key: -To generate the source release, run: + gpg --armor --detach-sig apache-datafu-sources-*.tgz - ./gradlew clean release +### 11. Create Checksums -This generates a source tarball. +Generate checksums for the artifacts: -If you have configured your key information in your `gradle.properties` then you the archive should automatically be signed. There should now be a corresponding ASC file alongside the tarball and MD5 file. Otherwise you'll need to sign it manually by running: + sha512sum apache-datafu-sources-*.tgz > apache-datafu-sources-*.tgz.sha512 - gpg --sign --armor --detach-sig build/distribution/source/apache-datafu-sources-*.tgz +## Publishing -If you get this error +### 12. Upload to Apache - Inappropriate ioctl for device +Upload the signed artifacts to the Apache repository: -You can run this command (taken from [this answer on Stack Overflow](https://stackoverflow.com/a/72788147/150992)) + ./gradlew uploadArchives - export GPG_TTY=$(tty) +### 13. Vote on Release -If you have GPG v2 installed then you'll need to use `gpg2` instead. +Start a vote on the Apache DataFu mailing list for the release. -## Upload the Source Release +### 14. Tag Release -You should make the release candidate available in [https://dist.apache.org/repos/dist/dev/datafu](https://dist.apache.org/repos/dist/dev/datafu). For example, if you are releasing release candidate RC0 for version `x.y.z` then you should upload the source distribution files to: +Once the vote passes, tag the release: - https://dist.apache.org/repos/dist/dev/datafu/datafu-x.y.z-rc0/ + git tag -a datafu-x.y.z -m "Release Apache DataFu x.y.z" -To create a release folder and check it out (be sure to substitute x.y.z for the actual version): +### 15. Publish to Maven Central - svn mkdir https://dist.apache.org/repos/dist/dev/datafu/apache-datafu-x.y.z-rc0 - svn co https://dist.apache.org/repos/dist/dev/datafu/apache-datafu-x.y.z-rc0 apache-datafu-x.y.z-rc0 - cd apache-datafu-x.y.z-rc0 +Publish the artifacts to Maven Central: -You could then add the source release as described above and commit. + ./gradlew publishToMavenCentral -## Tag the release +## Post-Release -You should tag the release candidate in git. Assuming you are releasing release candidate RC0 for version `x.y.z` then you can attach a tag to the current commit with: +### 16. Update Website - git tag -s release-x.y.z-rc0 -m 'Apache DataFu x.y.z RC0' +Update the project website with the new release information. -Then push the tag: +### 17. Announce Release - git push origin release-x.y.z-rc0 +Send an announcement to the Apache DataFu mailing list and update the project website. -## Staging artifacts in Maven +### 18. Prepare Next Release -First, refer to general information on publishing to Maven, which can be found [here](http://www.apache.org/dev/publishing-maven-artifacts.html). +Update version numbers and prepare for the next development cycle. -To upload the archive to the Apache Nexus staging repository, from the release candidate branch run: +## Troubleshooting - ./gradlew uploadArchives -PnexusUsername=yourNexusUsername -PnexusPassword=yourNexusPassword +### Common Issues -The above command assumes you have configured `$HOME/.gradle/gradle.properties` with your GPG key information. If this fails with _Cannot invoke method readPassword() on null object_ or _Cannot invoke method readLine() on null object_, this is because the Gradle daemon is running. Disable it with _--no-daemon_ and try again. Please note that sometimes your JVM settings will prevent Gradle from launching without forking a daemon. Removing them from the gradlew script may help. +1. **GPG Signing Issues**: Ensure your GPG key is properly configured and the passphrase is available. -If you now visit the [Apache Nexus Repository](https://repository.apache.org) and click on Staging Repositories, you should see a repository named orgapachedatafu-xxxx, where xxxx is some number. Select the repository and browse the content to make sure the set of files looks right. If it looks correct then Close the repository. The repository is now ready for testing. If you look at the summary there is a URL for the repository that may be used to fetch the archives. +2. **Build Failures**: Check that all dependencies are available and the build environment is properly configured. -Let's suppose you have a Gradle project you'd like to use to test DataFu. You can add the URL for the Staging Repository to your `build.gradle` like this: +3. **Test Failures**: Ensure all tests pass before creating a release. - repositories { - mavenCentral() - maven { - url 'https://repository.apache.org/content/repositories/orgapachedatafu-xxxx' - } - } +4. **Upload Issues**: Verify your Apache credentials and permissions. -You can now depend on the versions of the archives in this Staging Repository in your `build.gradle`: +### Getting Help - dependencies { - compile "org.apache.datafu:datafu-pig:x.y.z" - compile "org.apache.datafu:datafu-hourglass:x.y.z" - } +If you encounter issues during the release process: -You could also visit the Staging Repository URL in your browser and download the files for testing. +1. Check the [Apache DataFu mailing list](http://datafu.apache.org/community/mailing-lists.html) +2. Review the [Apache DataFu documentation](http://datafu.apache.org/) +3. Open an issue on the [Apache DataFu JIRA](https://issues.apache.org/jira/browse/DATAFU) -## Call for a vote to release +## Release Checklist -At this point you should have: +- [ ] Update `changes.md` with all changes since last release +- [ ] Run full test suite (`./gradlew test`) +- [ ] Check code style (`./gradlew check`) +- [ ] Clean build (`./gradlew clean assemble`) +- [ ] Generate documentation (`./gradlew javadoc`) +- [ ] Create source distribution (`./gradlew sourceRelease`) +- [ ] Sign artifacts with GPG +- [ ] Generate checksums +- [ ] Upload to Apache repository +- [ ] Start release vote on mailing list +- [ ] Tag release in Git +- [ ] Publish to Maven Central +- [ ] Update website +- [ ] Send release announcement +- [ ] Prepare for next release -1. Published a source release for testing -2. Staged artifacts in Nexus built from that source archive for testing +## Version Management -Now you can call a vote in the DataFu dev mailing list for release. Look in the archives at previous votes for an example. +### Semantic Versioning -## Testing the source release +Apache DataFu follows semantic versioning (MAJOR.MINOR.PATCH): -Once you have built the source tarball, you should verify that it can be used. Follow the instructions in the `README.md` file assuming you are someone who has just downloaded the source tarball and want to use it. +- **MAJOR**: Incompatible API changes +- **MINOR**: New functionality in a backwards compatible manner +- **PATCH**: Backwards compatible bug fixes -### Releasing to your local Maven repository +### Version Updates -You may want to release binaries to your local Maven repository under your home directory to do local testing against it. To do so, run: +When updating version numbers: - ./gradlew install -Prelease=true +1. Update version in `gradle.properties` +2. Update version in `build.gradle` files +3. Update documentation +4. Update website +5. Update release notes -You should be able to see all the installed artifacts in the local repository now: +## Security - find ~/.m2/repository/org/apache/datafu/ +### Security Releases -Again, setting `release=true` prevents `-SNAPSHOT` from being appended to the version. +For security releases: -## Publishing the release +1. Follow the standard release process +2. Ensure all security fixes are properly tested +3. Coordinate with the Apache Security Team if necessary +4. Send security announcements to appropriate channels -Once the vote has passed, you can publish the source release and artifacts. +### Vulnerability Reporting -### Source release +To report security vulnerabilities: -The DataFu source release are checked into SVN under [https://dist.apache.org/repos/dist/release/datafu](https://dist.apache.org/repos/dist/release/datafu). +1. Email security@apache.org +2. Do not disclose vulnerabilities publicly until they are fixed +3. Follow responsible disclosure practices -To see all the previous releases: +## Legal - svn list https://dist.apache.org/repos/dist/release/datafu +### License Compliance -Create a directory for the release (replace `x.y.z` with the release number): +Ensure all code and dependencies comply with Apache licensing requirements: - svn mkdir https://dist.apache.org/repos/dist/release/datafu/apache-datafu-x.y.z - svn co https://dist.apache.org/repos/dist/release/datafu/apache-datafu-x.y.z apache-datafu-x.y.z-release - cd apache-datafu-x.y.z-release +1. Check all dependencies for license compatibility +2. Ensure all source code has proper Apache headers +3. Verify that all third-party code is properly attributed -Now copy the source release files into this directory and commit them. Within 24 hours they will be distributed to the mirrors. Then it should be available for download at `http://www.apache.org/dyn/closer.cgi/datafu/apache-datafu-x.y.z/`. +### Trademark Usage -### Artifacts +Follow Apache trademark guidelines when using Apache DataFu branding and logos. -To distribute the artifacts, simple select the staged repository for DataFu that you prepared in Nexus and choose Release. They should then be available within the next day or so in the [central repository](http://search.maven.org/). +## Support -### Clean up old releases +### Release Support -Once a source release has been committed to the release path [https://dist.apache.org/repos/dist/release/datafu](https://dist.apache.org/repos/dist/release/datafu), the source releases under [https://dist.apache.org/repos/dist/dev/datafu](https://dist.apache.org/repos/dist/dev/datafu) can be removed. Also the older releases under [https://dist.apache.org/repos/dist/release/datafu](https://dist.apache.org/repos/dist/release/datafu) can be removed, as old releases are archived automatically through a separate process. +For questions about the release process: -## Updating the docs +- [Apache DataFu Mailing List](http://datafu.apache.org/community/mailing-lists.html) +- [Apache DataFu Documentation](http://datafu.apache.org/) +- [Apache DataFu JIRA](https://issues.apache.org/jira/browse/DATAFU) -After you have released source and binary artifacts, you should add an entry to the DataFu website and update the various places that point to the previous release. You can look at [a previous release's commit](https://github.com/apache/datafu/commit/09a68527f5921e026c04e8e9940ef0466b41a7c0) in order to get an idea of which files need to be changed. Keep in mind that there is one place where the previous version is updated (if you're release 1.6.1 instead of 1.6.0, you need to replace *1.5.0*, not 1.6.0) +### Community -After you have made these changes, build the site (and regenerate java/scaladocs) by using [the instructions here.](https://github.com/apache/datafu/blob/main/site/README.md) +Join the Apache DataFu community: -After the documentation and site are ready, make an additional git tag for the release with the prefix *v*, like so *v2.0.0*. +- [Mailing Lists](http://datafu.apache.org/community/mailing-lists.html) +- [GitHub Repository](https://github.com/apache/datafu) +- [Website](http://datafu.apache.org/) \ No newline at end of file From 2404d53ec346f9296e6617b58834034891ca3cf7 Mon Sep 17 00:00:00 2001 From: Kmal Dallashe Date: Sun, 5 Oct 2025 16:04:34 +0300 Subject: [PATCH 7/7] fix(ci): use gradlew wrapper with Gradle 8.14.3 for compatibility - Keep Gradle 8.14.3 wrapper (compatible with existing codebase) - Use ./gradlew -b bootstrap.gradle (works with Gradle 8.14.3) - Add chmod +x ./gradlew to ensure wrapper is executable - This avoids system Gradle 9.x compatibility issues - Addresses DATAFU-183: GitHub Actions CI failing with Gradle 9.x Note: Gradle 9.x upgrade requires significant buildSrc plugin updates due to breaking changes in JavaExec and other APIs. --- .github/workflows/codeql-analysis.yml | 2 +- .github/workflows/tests.yml | 2 +- buildSrc/src/main/groovy/datafu/autojar/task/Autojar.groovy | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/.github/workflows/codeql-analysis.yml b/.github/workflows/codeql-analysis.yml index 6b10efc6..973d828d 100644 --- a/.github/workflows/codeql-analysis.yml +++ b/.github/workflows/codeql-analysis.yml @@ -71,7 +71,7 @@ jobs: - name: Bootstrap and Build run: | - ./gradlew -p . bootstrap.gradle + ./gradlew -b bootstrap.gradle ./gradlew clean assemble - name: Perform CodeQL Analysis diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index 400eee45..a1bec92c 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -39,7 +39,7 @@ jobs: run: chmod +x ./gradlew - name: Bootstrap Gradle wrapper - run: ./gradlew -p . bootstrap.gradle + run: ./gradlew -b bootstrap.gradle - name: Build and run tests run: ./datafu-spark/build_and_test_spark.sh -q \ No newline at end of file diff --git a/buildSrc/src/main/groovy/datafu/autojar/task/Autojar.groovy b/buildSrc/src/main/groovy/datafu/autojar/task/Autojar.groovy index dd382df5..9bae4694 100644 --- a/buildSrc/src/main/groovy/datafu/autojar/task/Autojar.groovy +++ b/buildSrc/src/main/groovy/datafu/autojar/task/Autojar.groovy @@ -28,7 +28,7 @@ import org.gradle.api.tasks.JavaExec import org.gradle.api.tasks.TaskAction import org.gradle.api.tasks.TaskDependency import org.gradle.api.tasks.bundling.Jar -import org.gradle.util.ConfigureUtil +import org.gradle.util.internal.ConfigureUtil /** * This is the primary Task type used to create Autojar archives.