diff --git a/.envrc b/.envrc index 60f1536b5..d4a7ef2b6 100644 --- a/.envrc +++ b/.envrc @@ -3,3 +3,4 @@ export PATH="$PWD/javascript/packages/printer/bin:$PATH" export PATH="$PWD/javascript/packages/formatter/bin:$PATH" export PATH="$PWD/javascript/packages/language-server/bin:$PATH" export PATH="$PWD/javascript/packages/highlighter/bin:$PATH" +export PATH="$PWD/javascript/packages/stimulus-lint/bin:$PATH" diff --git a/.github/workflows/build-gems.yml b/.github/workflows/build-gems.yml index 8fa627fd9..2082d4f02 100644 --- a/.github/workflows/build-gems.yml +++ b/.github/workflows/build-gems.yml @@ -65,7 +65,7 @@ jobs: - name: Add LLVM apt Repo run: |- wget -O - https://apt.llvm.org/llvm-snapshot.gpg.key | sudo apt-key add - - sudo add-apt-repository "deb http://apt.llvm.org/$(lsb_release -cs)/ llvm-toolchain-$(lsb_release -cs)-19 main" + sudo add-apt-repository "deb http://apt.llvm.org/$(lsb_release -cs)/ llvm-toolchain-$(lsb_release -cs)-21 main" sudo apt update - name: Install APT dependencies diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index cd5820660..283fa105d 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -1,6 +1,6 @@ name: Build -on: [push] +on: [push, pull_request] permissions: contents: read @@ -18,7 +18,7 @@ jobs: - name: Add LLVM apt Repo run: |- wget -O - https://apt.llvm.org/llvm-snapshot.gpg.key | sudo apt-key add - - sudo add-apt-repository "deb http://apt.llvm.org/$(lsb_release -cs)/ llvm-toolchain-$(lsb_release -cs)-19 main" + sudo add-apt-repository "deb http://apt.llvm.org/$(lsb_release -cs)/ llvm-toolchain-$(lsb_release -cs)-21 main" sudo apt update - name: Install APT dependencies @@ -57,13 +57,13 @@ jobs: run: ./run_herb_tests - name: clang-format version - run: clang-format-19 --version + run: clang-format-21 --version - name: Lint run: bin/lint - name: clang-tidy version - run: clang-tidy-19 --version + run: clang-tidy-21 --version - name: Tidy run: bin/tidy diff --git a/.github/workflows/deploy.yml b/.github/workflows/deploy.yml index 038be7d83..84d94d085 100644 --- a/.github/workflows/deploy.yml +++ b/.github/workflows/deploy.yml @@ -43,7 +43,7 @@ jobs: - name: Add LLVM apt Repo run: |- wget -O - https://apt.llvm.org/llvm-snapshot.gpg.key | sudo apt-key add - - sudo add-apt-repository "deb http://apt.llvm.org/$(lsb_release -cs)/ llvm-toolchain-$(lsb_release -cs)-19 main" + sudo add-apt-repository "deb http://apt.llvm.org/$(lsb_release -cs)/ llvm-toolchain-$(lsb_release -cs)-21 main" sudo apt update - name: Install APT dependencies diff --git a/.github/workflows/javascript.yml b/.github/workflows/javascript.yml index 25e0de88a..35b873fc1 100644 --- a/.github/workflows/javascript.yml +++ b/.github/workflows/javascript.yml @@ -30,7 +30,7 @@ jobs: - name: Add LLVM apt Repo run: |- wget -O - https://apt.llvm.org/llvm-snapshot.gpg.key | sudo apt-key add - - sudo add-apt-repository "deb http://apt.llvm.org/$(lsb_release -cs)/ llvm-toolchain-$(lsb_release -cs)-19 main" + sudo add-apt-repository "deb http://apt.llvm.org/$(lsb_release -cs)/ llvm-toolchain-$(lsb_release -cs)-21 main" sudo apt update - name: Install APT dependencies diff --git a/Aptfile b/Aptfile index df7a373cf..d52b811d7 100644 --- a/Aptfile +++ b/Aptfile @@ -1,7 +1,7 @@ check -clang-19 -clang-format-19 -clang-tidy-19 +clang-21 +clang-format-21 +clang-tidy-21 emscripten doxygen xvfb diff --git a/Brewfile b/Brewfile index b127e58ba..c568821f3 100644 --- a/Brewfile +++ b/Brewfile @@ -1,6 +1,6 @@ # frozen_string_literal: true brew "check" -brew "llvm@19" # for clang, clang-format and clang-tidy +brew "llvm@21" # for clang, clang-format and clang-tidy brew "emscripten" brew "doxygen" diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index e0bdc6276..04f6f9ad0 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -2,17 +2,17 @@ Pull request, bug reports, and any other forms of contribution are welcomed and highly encouraged. -If you encounter any issues when following along with this file please dont hesitate to reach out and file an issue! +If you encounter any issues when following along with this file please don't hesitate to reach out and file an issue! ## Running Locally ### Requirements - [**Check**](https://libcheck.github.io/check/): For unit testing. -- [**Clang 19**](https://clang.llvm.org): The compiler used to build this project. -- [**Clang Format 19**](https://clang.llvm.org/docs/ClangFormat.html): For formatting the project. -- [**Clang Tidy 19**](https://clang.llvm.org/extra/clang-tidy/): For linting the project. -- [**Prism Ruby Parser v1.5.1**](https://github.com/ruby/prism/releases/tag/v1.5.1): We use Prism for Parsing the Ruby Source Code in the HTML+ERB files. +- [**Clang 21**](https://clang.llvm.org): The compiler used to build this project. +- [**Clang Format 21**](https://clang.llvm.org/docs/ClangFormat.html): For formatting the project. +- [**Clang Tidy 21**](https://clang.llvm.org/extra/clang-tidy/): For linting the project. +- [**Prism Ruby Parser v1.6.0**](https://github.com/ruby/prism/releases/tag/v1.6.0): We use Prism for Parsing the Ruby Source Code in the HTML+ERB files. - [**Ruby**](https://www.ruby-lang.org/en/): We need Ruby as a dependency for `bundler`. - [**Bundler**](https://bundler.io): We are using `bundler` to build [`prism`](https://github.com/ruby/prism) from source so we can build `herb` against it. - [**Emscripten**](https://emscripten.org): For the WebAssembly build of `libherb` so it can be used in the browser using the [`@herb-tools/browser`](https://github.com/marcoroth/herb/blob/main/javascript/packages/browser) package. @@ -27,7 +27,7 @@ xargs sudo apt-get install < Aptfile or: ```bash -sudo apt-get install check clang-19 clang-tidy-19 clang-format-19 emscripten doxygen +sudo apt-get install check clang-21 clang-tidy-21 clang-format-21 emscripten doxygen ``` ##### For macOS (using Homebrew) @@ -38,7 +38,7 @@ brew bundle or: ```bash -brew install check llvm@19 emscripten doxygen +brew install check llvm@21 emscripten doxygen ``` ### Building @@ -73,7 +73,6 @@ The `herb` executable exposes a few commands for interacting with `.html.erb` fi Herb 🌿 Powerful and seamless HTML-aware ERB parsing and tooling. ./herb lex [file] - Lex a file -./herb lex_json [file] - Lex a file and return the result as json. ./herb parse [file] - Parse a file ./herb ruby [file] - Extract Ruby from a file ./herb html [file] - Extract HTML from a file diff --git a/Gemfile b/Gemfile index b23142024..fe40af78f 100644 --- a/Gemfile +++ b/Gemfile @@ -4,7 +4,7 @@ source "https://rubygems.org" gemspec -gem "prism", github: "ruby/prism", tag: "v1.5.1" +gem "prism", github: "ruby/prism", tag: "v1.6.0" gem "actionview", "~> 8.0" gem "lz_string" diff --git a/Gemfile.lock b/Gemfile.lock index 4e87cabfe..5129d66c8 100644 --- a/Gemfile.lock +++ b/Gemfile.lock @@ -9,15 +9,15 @@ GIT GIT remote: https://github.com/ruby/prism.git - revision: 23f16d31a7c57c4b927e1e3ec8f1281b45e7cb9f - tag: v1.5.1 + revision: 2924f8f8832d57def7895cd7e2cc199ee58b3a3b + tag: v1.6.0 specs: - prism (1.5.1) + prism (1.6.0) PATH remote: . specs: - herb (0.7.4) + herb (0.7.5) GEM remote: https://rubygems.org/ diff --git a/Makefile b/Makefile index 8b09e67f9..137f736d9 100644 --- a/Makefile +++ b/Makefile @@ -61,16 +61,16 @@ shared_flags = $(production_flags) $(shared_library_flags) $(prism_flags) ifeq ($(os),Linux) test_cflags = $(test_flags) -I/usr/include/check test_ldflags = -L/usr/lib/x86_64-linux-gnu -lcheck -lm -lsubunit $(prism_ldflags) - cc = clang-19 - clang_format = clang-format-19 - clang_tidy = clang-tidy-19 + cc = clang-21 + clang_format = clang-format-21 + clang_tidy = clang-tidy-21 endif ifeq ($(os),Darwin) brew_prefix := $(shell brew --prefix check) test_cflags = $(test_flags) -I$(brew_prefix)/include test_ldflags = -L$(brew_prefix)/lib -lcheck -lm $(prism_ldflags) - llvm_path = $(shell brew --prefix llvm@19) + llvm_path = $(shell brew --prefix llvm@21) cc = $(llvm_path)/bin/clang clang_format = $(llvm_path)/bin/clang-format clang_tidy = $(llvm_path)/bin/clang-tidy diff --git a/bin/leaks_parse b/bin/leaks_parse index 603132e80..0212a2948 100755 --- a/bin/leaks_parse +++ b/bin/leaks_parse @@ -1,7 +1,7 @@ #!/bin/bash if [ -z "$1" ]; then - echo "Usage: $0 " + echo "Usage: $0 " exit 1 fi @@ -10,4 +10,13 @@ if [[ "$(uname)" != "Darwin" ]]; then exit 0 fi -leaks --atExit -- ./herb parse "$1" +TARGET="$1" + +if [ -d "$TARGET" ]; then + while IFS= read -r -d '' file; do + echo "Checking $file for leaks..." + leaks --atExit -- ./herb parse "$file" --silent || exit 1 + done < <(find "$TARGET" -name "*.html.erb" -type f -print0) +else + leaks --atExit -- ./herb parse "$TARGET" --silent +fi diff --git a/bin/publish_packages b/bin/publish_packages new file mode 100755 index 000000000..10766ab81 --- /dev/null +++ b/bin/publish_packages @@ -0,0 +1,25 @@ +#!/bin/bash + +set -euo pipefail + +echo "Building all packages..." +yarn nx run-many -t build --all + +echo "Running tests in all packages..." +yarn nx run-many -t test --all + +for package_dir in javascript/packages/*; do + if [ -d "$package_dir" ] && [ -f "$package_dir/package.json" ]; then + package=$(basename "$package_dir") + + if [ "$package" = "vscode" ]; then + echo "Skipping vscode package..." + continue + fi + + echo "Publishing $package..." + (cd "$package_dir" && yarn publish) + fi +done + +echo "All packages published successfully!" diff --git a/bin/setup b/bin/setup index 361c7f2e6..bb27d1eb8 100755 --- a/bin/setup +++ b/bin/setup @@ -2,5 +2,7 @@ set -e # Exit on error -make prism -make all +bundle install +bundle exec rake templates +yarn install +yarn build diff --git a/config.yml b/config.yml index b67bfb480..df26a43fa 100644 --- a/config.yml +++ b/config.yml @@ -34,8 +34,8 @@ errors: arguments: - token_type_to_string(found->type) - token_type_to_string(expected_type) - - found->location->start->line - - found->location->start->column + - found->location.start.line + - found->location.start.column fields: - name: expected_type @@ -49,8 +49,8 @@ errors: template: "Found closing tag `` at (%zu:%zu) without a matching opening tag." arguments: - closing_tag->value - - closing_tag->location->start->line - - closing_tag->location->start->column + - closing_tag->location.start.line + - closing_tag->location.start.column fields: - name: closing_tag @@ -61,8 +61,8 @@ errors: template: "Opening tag `<%s>` at (%zu:%zu) doesn't have a matching closing tag ``." arguments: - opening_tag->value - - opening_tag->location->start->line - - opening_tag->location->start->column + - opening_tag->location.start.line + - opening_tag->location.start.column - opening_tag->value fields: @@ -74,11 +74,11 @@ errors: template: "Opening tag `<%s>` at (%zu:%zu) closed with `` at (%zu:%zu)." arguments: - opening_tag->value - - opening_tag->location->start->line - - opening_tag->location->start->column + - opening_tag->location.start.line + - opening_tag->location.start.column - closing_tag->value - - closing_tag->location->start->line - - closing_tag->location->start->column + - closing_tag->location.start.line + - closing_tag->location.start.column fields: - name: opening_tag @@ -93,8 +93,8 @@ errors: arguments: - opening_quote->value - closing_quote->value - - closing_quote->location->start->line - - closing_quote->location->start->column + - closing_quote->location.start.line + - closing_quote->location.start.column fields: - name: opening_quote @@ -127,8 +127,8 @@ errors: template: "Tag `<%s>` opened at (%zu:%zu) was never closed before the end of document." arguments: - opening_tag->value - - opening_tag->location->start->line - - opening_tag->location->start->column + - opening_tag->location.start.line + - opening_tag->location.start.column fields: - name: opening_tag diff --git a/docs/.vitepress/theme/components/GitHubContributors.vue b/docs/.vitepress/theme/components/GitHubContributors.vue index 5b94a1549..487728c64 100644 --- a/docs/.vitepress/theme/components/GitHubContributors.vue +++ b/docs/.vitepress/theme/components/GitHubContributors.vue @@ -23,7 +23,10 @@ const error = ref(null) onMounted(async () => { try { - contributors.value = contributorsFile.slice(0, props.limit) + const excludedUsernames = ['marcoroth', 'dependabot[bot]'] + const filteredContributors = contributorsFile.filter(contributor => !excludedUsernames.includes(contributor.login)) + contributors.value = filteredContributors.slice(0, props.limit) + loading.value = false } catch (err) { error.value = `Failed to load contributors data for ${props.owner}/${props.repo}. Make sure the data file exists.` diff --git a/docs/.vitepress/transformers/herb-linter-transformer.mts b/docs/.vitepress/transformers/herb-linter-transformer.mts index 329189480..01cb4b02c 100644 --- a/docs/.vitepress/transformers/herb-linter-transformer.mts +++ b/docs/.vitepress/transformers/herb-linter-transformer.mts @@ -21,9 +21,9 @@ function createCustomTwoslashFunction(optionse) { return (code, lang, options) => { let fileName = undefined - // kinda of a hack to make sure we pass a `fileName` for the `erb-requires-trailing-newline` rule + // kind of a hack to make sure we pass a `fileName` for the `erb-require-trailing-newline` rule if (code.includes('▌')) { - fileName = "erb-requires-trailing-newline.html.erb" + fileName = "erb-require-trailing-newline.html.erb" } if (!lang || !['erb', 'html'].includes(lang)) { diff --git a/docs/package.json b/docs/package.json index f6685b400..cbda66736 100644 --- a/docs/package.json +++ b/docs/package.json @@ -19,9 +19,9 @@ "fetch:contributors": "mkdir -p .vitepress/data/ && gh api -X get https://api.github.com/repos/marcoroth/herb/contributors > .vitepress/data/contributors.json" }, "dependencies": { - "@herb-tools/browser": "0.7.4", - "@herb-tools/core": "0.7.4", - "@herb-tools/node": "0.7.4" + "@herb-tools/browser": "0.7.5", + "@herb-tools/core": "0.7.5", + "@herb-tools/node": "0.7.5" }, "devDependencies": { "@shikijs/vitepress-twoslash": "^3.4.2", diff --git a/examples/block_comment.html.erb b/examples/block_comment.html.erb new file mode 100644 index 000000000..c424fd687 --- /dev/null +++ b/examples/block_comment.html.erb @@ -0,0 +1,7 @@ +<% +=begin %> + This, while unusual, is a legal form of commenting. +<% +=end %> + +
Content
diff --git a/examples/case_in.html.erb b/examples/case_in.html.erb new file mode 100644 index 000000000..ff4e756d0 --- /dev/null +++ b/examples/case_in.html.erb @@ -0,0 +1,6 @@ +<% case {} %> +<% in {} %> + "matched" +<% else %> + "not matched" +<% end %> diff --git a/examples/complete_erb.html.erb b/examples/complete_erb.html.erb new file mode 100644 index 000000000..7b1b244a8 --- /dev/null +++ b/examples/complete_erb.html.erb @@ -0,0 +1 @@ +<%= hello %> diff --git a/examples/incomplete_erb.invalid.html.erb b/examples/incomplete_erb.invalid.html.erb new file mode 100644 index 000000000..56157a8bb --- /dev/null +++ b/examples/incomplete_erb.invalid.html.erb @@ -0,0 +1 @@ +<%= hello diff --git a/ext/herb/extconf.rb b/ext/herb/extconf.rb index 5dc44cfe4..cb215e568 100644 --- a/ext/herb/extconf.rb +++ b/ext/herb/extconf.rb @@ -15,6 +15,7 @@ prism_include_path = "#{prism_path}/include" $VPATH << "$(srcdir)/../../src" +$VPATH << "$(srcdir)/../../src/util" $VPATH << prism_src_path $VPATH << "#{prism_src_path}/util" diff --git a/ext/herb/extension.c b/ext/herb/extension.c index 7f83a0090..155908ac3 100644 --- a/ext/herb/extension.c +++ b/ext/herb/extension.c @@ -19,7 +19,7 @@ VALUE cParseResult; static VALUE Herb_lex(VALUE self, VALUE source) { char* string = (char*) check_string(source); - array_T* tokens = herb_lex(string); + hb_array_T* tokens = herb_lex(string); VALUE result = create_lex_result(tokens, source); @@ -30,7 +30,7 @@ static VALUE Herb_lex(VALUE self, VALUE source) { static VALUE Herb_lex_file(VALUE self, VALUE path) { char* file_path = (char*) check_string(path); - array_T* tokens = herb_lex_file(file_path); + hb_array_T* tokens = herb_lex_file(file_path); VALUE source_value = read_file_to_ruby_string(file_path); VALUE result = create_lex_result(tokens, source_value); @@ -85,45 +85,30 @@ static VALUE Herb_parse_file(VALUE self, VALUE path) { return result; } -static VALUE Herb_lex_to_json(VALUE self, VALUE source) { - char* string = (char*) check_string(source); - buffer_T output; - - if (!buffer_init(&output)) { return Qnil; } - - herb_lex_json_to_buffer(string, &output); - - VALUE result = rb_str_new(output.value, output.length); - - buffer_free(&output); - - return result; -} - static VALUE Herb_extract_ruby(VALUE self, VALUE source) { char* string = (char*) check_string(source); - buffer_T output; + hb_buffer_T output; - if (!buffer_init(&output)) { return Qnil; } + if (!hb_buffer_init(&output, strlen(string))) { return Qnil; } herb_extract_ruby_to_buffer(string, &output); VALUE result = rb_utf8_str_new_cstr(output.value); - buffer_free(&output); + free(output.value); return result; } static VALUE Herb_extract_html(VALUE self, VALUE source) { char* string = (char*) check_string(source); - buffer_T output; + hb_buffer_T output; - if (!buffer_init(&output)) { return Qnil; } + if (!hb_buffer_init(&output, strlen(string))) { return Qnil; } herb_extract_html_to_buffer(string, &output); VALUE result = rb_utf8_str_new_cstr(output.value); - buffer_free(&output); + free(output.value); return result; } @@ -151,7 +136,6 @@ void Init_herb(void) { rb_define_singleton_method(mHerb, "lex", Herb_lex, 1); rb_define_singleton_method(mHerb, "parse_file", Herb_parse_file, 1); rb_define_singleton_method(mHerb, "lex_file", Herb_lex_file, 1); - rb_define_singleton_method(mHerb, "lex_to_json", Herb_lex_to_json, 1); rb_define_singleton_method(mHerb, "extract_ruby", Herb_extract_ruby, 1); rb_define_singleton_method(mHerb, "extract_html", Herb_extract_html, 1); rb_define_singleton_method(mHerb, "version", Herb_version, 0); diff --git a/ext/herb/extension_helpers.c b/ext/herb/extension_helpers.c index 875f34d1f..2cfc0602b 100644 --- a/ext/herb/extension_helpers.c +++ b/ext/herb/extension_helpers.c @@ -20,32 +20,26 @@ const char* check_string(VALUE value) { return RSTRING_PTR(value); } -VALUE rb_position_from_c_struct(position_T* position) { - if (!position) { return Qnil; } - +VALUE rb_position_from_c_struct(position_T position) { VALUE args[2]; - args[0] = SIZET2NUM(position->line); - args[1] = SIZET2NUM(position->column); + args[0] = UINT2NUM(position.line); + args[1] = UINT2NUM(position.column); return rb_class_new_instance(2, args, cPosition); } -VALUE rb_location_from_c_struct(location_T* location) { - if (!location) { return Qnil; } - +VALUE rb_location_from_c_struct(location_T location) { VALUE args[2]; - args[0] = rb_position_from_c_struct(location->start); - args[1] = rb_position_from_c_struct(location->end); + args[0] = rb_position_from_c_struct(location.start); + args[1] = rb_position_from_c_struct(location.end); return rb_class_new_instance(2, args, cLocation); } -VALUE rb_range_from_c_struct(range_T* range) { - if (!range) { return Qnil; } - +VALUE rb_range_from_c_struct(range_T range) { VALUE args[2]; - args[0] = SIZET2NUM(range->from); - args[1] = SIZET2NUM(range->to); + args[0] = UINT2NUM(range.from); + args[1] = UINT2NUM(range.to); return rb_class_new_instance(2, args, cRange); } @@ -64,13 +58,13 @@ VALUE rb_token_from_c_struct(token_T* token) { return rb_class_new_instance(4, args, cToken); } -VALUE create_lex_result(array_T* tokens, VALUE source) { +VALUE create_lex_result(hb_array_T* tokens, VALUE source) { VALUE value = rb_ary_new(); VALUE warnings = rb_ary_new(); VALUE errors = rb_ary_new(); - for (size_t i = 0; i < array_size(tokens); i++) { - token_T* token = array_get(tokens, i); + for (size_t i = 0; i < hb_array_size(tokens); i++) { + token_T* token = hb_array_get(tokens, i); if (token != NULL) { rb_ary_push(value, rb_token_from_c_struct(token)); } } diff --git a/ext/herb/extension_helpers.h b/ext/herb/extension_helpers.h index 120cbf8ba..d4a853f07 100644 --- a/ext/herb/extension_helpers.h +++ b/ext/herb/extension_helpers.h @@ -12,13 +12,13 @@ const char* check_string(VALUE value); VALUE read_file_to_ruby_string(const char* file_path); -VALUE rb_position_from_c_struct(position_T* position); -VALUE rb_location_from_c_struct(location_T* location); +VALUE rb_position_from_c_struct(position_T position); +VALUE rb_location_from_c_struct(location_T location); VALUE rb_token_from_c_struct(token_T* token); -VALUE rb_range_from_c_struct(range_T* range); +VALUE rb_range_from_c_struct(range_T range); -VALUE create_lex_result(array_T* tokens, VALUE source); +VALUE create_lex_result(hb_array_T* tokens, VALUE source); VALUE create_parse_result(AST_DOCUMENT_NODE_T* root, VALUE source); #endif diff --git a/javascript/packages/browser/package.json b/javascript/packages/browser/package.json index c65ea539a..55622a4ad 100644 --- a/javascript/packages/browser/package.json +++ b/javascript/packages/browser/package.json @@ -1,6 +1,6 @@ { "name": "@herb-tools/browser", - "version": "0.7.4", + "version": "0.7.5", "description": "WebAssembly-based HTML-aware ERB parser for browsers.", "type": "module", "license": "MIT", @@ -34,7 +34,7 @@ } }, "dependencies": { - "@herb-tools/core": "0.7.4" + "@herb-tools/core": "0.7.5" }, "files": [ "package.json", diff --git a/javascript/packages/browser/test/browser.test.ts b/javascript/packages/browser/test/browser.test.ts index ff99b6553..701184766 100644 --- a/javascript/packages/browser/test/browser.test.ts +++ b/javascript/packages/browser/test/browser.test.ts @@ -17,7 +17,7 @@ describe("@herb-tools/browser", () => { test("version() returns a string", async () => { const version = Herb.version expect(typeof version).toBe("string") - expect(version).toBe("@herb-tools/browser@0.7.4, @herb-tools/core@0.7.4, libprism@1.5.1, libherb@0.7.4 (WebAssembly)") + expect(version).toBe("@herb-tools/browser@0.7.5, @herb-tools/core@0.7.5, libprism@1.6.0, libherb@0.7.5 (WebAssembly)") }) test("parse() can process a simple template", async () => { diff --git a/javascript/packages/core/package.json b/javascript/packages/core/package.json index 109a4d029..81b3c4a00 100644 --- a/javascript/packages/core/package.json +++ b/javascript/packages/core/package.json @@ -1,8 +1,7 @@ { "name": "@herb-tools/core", - "version": "0.7.4", + "version": "0.7.5", "description": "Core module exporting shared interfaces, AST node definitions, and common utilities for Herb", - "type": "module", "license": "MIT", "homepage": "https://herb-tools.dev", "bugs": "https://github.com/marcoroth/herb/issues/new?title=Package%20%60@herb-tools/core%60:%20", diff --git a/javascript/packages/core/src/ast-utils.ts b/javascript/packages/core/src/ast-utils.ts index c7f773ddb..6c844a4cd 100644 --- a/javascript/packages/core/src/ast-utils.ts +++ b/javascript/packages/core/src/ast-utils.ts @@ -1,6 +1,7 @@ import { Node, LiteralNode, + ERBNode, ERBContentNode, ERBIfNode, ERBUnlessNode, @@ -30,11 +31,17 @@ import { import type { Location } from "./location.js" import type { Position } from "./position.js" +export type ERBOutputNode = ERBNode & { + tag_opening: { + value: "<%=" | "<%==" + } +} + /** * Checks if a node is an ERB output node (generates content: <%= %> or <%== %>) */ -export function isERBOutputNode(node: Node): node is ERBContentNode { - if (!isNode(node, ERBContentNode)) return false +export function isERBOutputNode(node: Node): node is ERBOutputNode { + if (!isERBNode(node)) return false if (!node.tag_opening?.value) return false return ["<%=", "<%=="].includes(node.tag_opening?.value) diff --git a/javascript/packages/core/src/glob.ts b/javascript/packages/core/src/glob.ts new file mode 100644 index 000000000..485042c74 --- /dev/null +++ b/javascript/packages/core/src/glob.ts @@ -0,0 +1 @@ +export const HERB_FILES_GLOB = "**/*.{html,rhtml,html.erb,html+*.erb,turbo_stream.erb}" diff --git a/javascript/packages/core/src/index.ts b/javascript/packages/core/src/index.ts index c0858ee4e..51f88b7e2 100644 --- a/javascript/packages/core/src/index.ts +++ b/javascript/packages/core/src/index.ts @@ -2,11 +2,12 @@ export * from "./ast-utils.js" export * from "./backend.js" export * from "./diagnostic.js" export * from "./errors.js" +export * from "./glob.js" export * from "./herb-backend.js" export * from "./lex-result.js" export * from "./location.js" -export * from "./nodes.js" export * from "./node-type-guards.js" +export * from "./nodes.js" export * from "./parse-result.js" export * from "./parser-options.js" export * from "./position.js" diff --git a/javascript/packages/core/src/location.ts b/javascript/packages/core/src/location.ts index e059ff734..32f38eec1 100644 --- a/javascript/packages/core/src/location.ts +++ b/javascript/packages/core/src/location.ts @@ -17,6 +17,10 @@ export class Location { return new Location(start, end) } + static get zero() { + return new Location(Position.zero, Position.zero) + } + constructor(start: Position, end: Position) { this.start = start this.end = end diff --git a/javascript/packages/core/src/position.ts b/javascript/packages/core/src/position.ts index 7da697594..ec4e5ac6f 100644 --- a/javascript/packages/core/src/position.ts +++ b/javascript/packages/core/src/position.ts @@ -11,6 +11,10 @@ export class Position { return new Position(position.line, position.column) } + static get zero() { + return new Position(0, 0) + } + constructor(line: number, column: number) { this.line = line this.column = column diff --git a/javascript/packages/core/src/range.ts b/javascript/packages/core/src/range.ts index 34c4de2b1..8cb031f63 100644 --- a/javascript/packages/core/src/range.ts +++ b/javascript/packages/core/src/range.ts @@ -8,6 +8,10 @@ export class Range { return new Range(range[0], range[1]) } + static get zero() { + return new Range(0, 0) + } + constructor(start: number, end: number) { this.start = start this.end = end diff --git a/javascript/packages/dev-tools/package.json b/javascript/packages/dev-tools/package.json index c66f0ff4e..4dbe1b66f 100644 --- a/javascript/packages/dev-tools/package.json +++ b/javascript/packages/dev-tools/package.json @@ -1,6 +1,6 @@ { "name": "@herb-tools/dev-tools", - "version": "0.7.4", + "version": "0.7.5", "description": "Development tools for visual debugging in HTML+ERB templates", "type": "module", "license": "MIT", @@ -30,7 +30,7 @@ } }, "dependencies": { - "@herb-tools/core": "0.7.4" + "@herb-tools/core": "0.7.5" }, "files": [ "package.json", diff --git a/javascript/packages/dev-tools/src/herb-overlay.ts b/javascript/packages/dev-tools/src/herb-overlay.ts index 49cc8aff3..e2b12d4a1 100644 --- a/javascript/packages/dev-tools/src/herb-overlay.ts +++ b/javascript/packages/dev-tools/src/herb-overlay.ts @@ -934,6 +934,7 @@ export class HerbOverlay { `subl://open?url=file://${absolutePath}&line=${line}&column=${column}`, `atom://core/open/file?filename=${absolutePath}&line=${line}&column=${column}`, `txmt://open?url=file://${absolutePath}&line=${line}&column=${column}`, + `x-mine://open?file=//${absolutePath}&line=${line}&column=${column}`, ]; try { diff --git a/javascript/packages/formatter/package.json b/javascript/packages/formatter/package.json index 467e1ac21..4de78cad0 100644 --- a/javascript/packages/formatter/package.json +++ b/javascript/packages/formatter/package.json @@ -1,6 +1,6 @@ { "name": "@herb-tools/formatter", - "version": "0.7.4", + "version": "0.7.5", "description": "Auto-formatter for HTML+ERB templates with intelligent indentation, line wrapping, and ERB-aware pretty-printing.", "license": "MIT", "homepage": "https://herb-tools.dev", @@ -35,8 +35,8 @@ } }, "dependencies": { - "@herb-tools/core": "0.7.4", - "@herb-tools/printer": "0.7.4" + "@herb-tools/core": "0.7.5", + "@herb-tools/printer": "0.7.5" }, "devDependencies": { "glob": "^11.0.3" diff --git a/javascript/packages/formatter/src/cli.ts b/javascript/packages/formatter/src/cli.ts index 5d88c1d28..df73eb694 100644 --- a/javascript/packages/formatter/src/cli.ts +++ b/javascript/packages/formatter/src/cli.ts @@ -4,6 +4,8 @@ import { glob } from "glob" import { join, resolve } from "path" import { Herb } from "@herb-tools/node-wasm" +import { HERB_FILES_GLOB } from "@herb-tools/core" + import { Formatter } from "./formatter.js" import { parseArgs } from "util" import { resolveFormatOptions } from "./options.js" @@ -19,8 +21,8 @@ export class CLI { Usage: herb-format [file|directory|glob-pattern] [options] Arguments: - file|directory|glob-pattern File to format, directory to format all **/*.html.erb files within, - glob pattern to match files, or '-' for stdin (omit to format all **/*.html.erb files in current directory) + file|directory|glob-pattern File to format, directory to format all \`${HERB_FILES_GLOB}\` files within, + glob pattern to match files, or '-' for stdin (omit to format all \`${HERB_FILES_GLOB}\` files in current directory) Options: -c, --check check if files are formatted without modifying them @@ -30,18 +32,20 @@ export class CLI { --max-line-length maximum line length before wrapping (default: 80) Examples: - herb-format # Format all **/*.html.erb files in current directory - herb-format index.html.erb # Format and write single file - herb-format templates/index.html.erb # Format and write single file - herb-format templates/ # Format and **/*.html.erb within the given directory - herb-format "templates/**/*.html.erb" # Format all .html.erb files in templates directory using glob pattern - herb-format "**/*.html.erb" # Format all .html.erb files using glob pattern - herb-format "**/*.xml.erb" # Format all .xml.erb files using glob pattern - herb-format --check # Check if all **/*.html.erb files are formatted - herb-format --check templates/ # Check if all **/*.html.erb files in templates/ are formatted - herb-format --indent-width 4 # Format with 4-space indentation - herb-format --max-line-length 100 # Format with 100-character line limit - cat template.html.erb | herb-format # Format from stdin to stdout + herb-format # Format all \`${HERB_FILES_GLOB}\` files in current directory + herb-format index.html.erb # Format and write single file + herb-format templates/index.html.erb # Format and write single file + herb-format templates/ # Format all \`${HERB_FILES_GLOB}\` within the given directory + herb-format "templates/**/*.html.erb" # Format all \`**/*.html.erb\` files in the templates/ directory + herb-format "**/*.html.erb" # Format all \`*.html.erb\` files using glob pattern + herb-format "**/*.xml.erb" # Format all \`*.xml.erb\` files using glob pattern + + herb-format --check # Check if all \`${HERB_FILES_GLOB}\` files are formatted + herb-format --check templates/ # Check if all \`${HERB_FILES_GLOB}\` files in templates/ are formatted + + herb-format --indent-width 4 # Format with 4-space indentation + herb-format --max-line-length 100 # Format with 100-character line limit + cat template.html.erb | herb-format # Format from stdin to stdout ` private parseArguments() { @@ -112,8 +116,8 @@ export class CLI { process.exit(0) } - console.log("⚠️ Experimental Preview: The formatter is in early development. Please report any unexpected behavior or bugs to https://github.com/marcoroth/herb/issues/new?template=formatting-issue.md") - console.log() + console.error("⚠️ Experimental Preview: The formatter is in early development. Please report any unexpected behavior or bugs to https://github.com/marcoroth/herb/issues/new?template=formatting-issue.md") + console.error() const formatOptions = resolveFormatOptions({ indentWidth, @@ -162,7 +166,7 @@ export class CLI { } if (isDirectory) { - pattern = join(file, "**/*.html.erb") + pattern = join(file, HERB_FILES_GLOB) } else if (isFile) { const source = readFileSync(file, "utf-8") const result = formatter.format(source) @@ -245,10 +249,10 @@ export class CLI { process.exit(1) } } else { - const files = await glob("**/*.html.erb") + const files = await glob(HERB_FILES_GLOB) if (files.length === 0) { - console.log(`No files found matching pattern: ${resolve("**/*.html.erb")}`) + console.log(`No files found matching pattern: ${resolve(HERB_FILES_GLOB)}`) process.exit(0) } diff --git a/javascript/packages/formatter/src/format-helpers.ts b/javascript/packages/formatter/src/format-helpers.ts new file mode 100644 index 000000000..13c71fb2b --- /dev/null +++ b/javascript/packages/formatter/src/format-helpers.ts @@ -0,0 +1,475 @@ +import { isNode, isERBNode, getTagName, isAnyOf, isERBControlFlowNode, hasERBOutput } from "@herb-tools/core" +import { Node, HTMLDoctypeNode, HTMLTextNode, HTMLElementNode, HTMLCommentNode, HTMLOpenTagNode, HTMLCloseTagNode, ERBIfNode, ERBContentNode, WhitespaceNode } from "@herb-tools/core" + +// --- Types --- + +/** + * Analysis result for HTMLElementNode formatting decisions + */ +export interface ElementFormattingAnalysis { + openTagInline: boolean + elementContentInline: boolean + closeTagInline: boolean +} + +/** + * Content unit represents a piece of content in text flow + * Can be atomic (inline elements, ERB) or splittable (text) + */ +export interface ContentUnit { + content: string + type: 'text' | 'inline' | 'erb' | 'block' + isAtomic: boolean + breaksFlow: boolean +} + +/** + * Content unit paired with its source AST node + */ +export interface ContentUnitWithNode { + unit: ContentUnit + node: Node | null +} + +// --- Constants --- + +// TODO: we can probably expand this list with more tags/attributes +export const FORMATTABLE_ATTRIBUTES: Record = { + '*': ['class'], + 'img': ['srcset', 'sizes'] +} + +export const INLINE_ELEMENTS = new Set([ + 'a', 'abbr', 'acronym', 'b', 'bdo', 'big', 'br', 'cite', 'code', + 'dfn', 'em', 'i', 'img', 'kbd', 'label', 'map', 'object', 'q', + 'samp', 'small', 'span', 'strong', 'sub', 'sup', + 'tt', 'var', 'del', 'ins', 'mark', 's', 'u', 'time', 'wbr' +]) + +export const CONTENT_PRESERVING_ELEMENTS = new Set([ + 'script', 'style', 'pre', 'textarea' +]) + +export const SPACEABLE_CONTAINERS = new Set([ + 'div', 'section', 'article', 'main', 'header', 'footer', 'aside', + 'figure', 'details', 'summary', 'dialog', 'fieldset' +]) + +export const TIGHT_GROUP_PARENTS = new Set([ + 'ul', 'ol', 'nav', 'select', 'datalist', 'optgroup', 'tr', 'thead', + 'tbody', 'tfoot' +]) + +export const TIGHT_GROUP_CHILDREN = new Set([ + 'li', 'option', 'td', 'th', 'dt', 'dd' +]) + +export const SPACING_THRESHOLD = 3 + +/** + * Token list attributes that contain space-separated values and benefit from + * spacing around ERB content for readability + */ +export const TOKEN_LIST_ATTRIBUTES = new Set([ + 'class', 'data-controller', 'data-action' +]) + + +// --- Node Utility Functions --- + +/** + * Check if a node is pure whitespace (empty text node with only whitespace) + */ +export function isPureWhitespaceNode(node: Node): boolean { + return isNode(node, HTMLTextNode) && node.content.trim() === "" +} + +/** + * Check if a node is non-whitespace (has meaningful content) + */ +export function isNonWhitespaceNode(node: Node): boolean { + if (isNode(node, WhitespaceNode)) return false + if (isNode(node, HTMLTextNode)) return node.content.trim() !== "" + + return true +} + +/** + * Find the previous meaningful (non-whitespace) sibling + * Returns -1 if no meaningful sibling is found + */ +export function findPreviousMeaningfulSibling(siblings: Node[], currentIndex: number): number { + for (let i = currentIndex - 1; i >= 0; i--) { + if (isNonWhitespaceNode(siblings[i])) { + return i + } + } + + return -1 +} + +/** + * Check if there's whitespace between two indices in children array + */ +export function hasWhitespaceBetween(children: Node[], startIndex: number, endIndex: number): boolean { + for (let j = startIndex + 1; j < endIndex; j++) { + if (isNode(children[j], WhitespaceNode) || isPureWhitespaceNode(children[j])) { + return true + } + } + + return false +} + +/** + * Filter children to remove insignificant whitespace + */ +export function filterSignificantChildren(body: Node[]): Node[] { + return body.filter(child => { + if (isNode(child, WhitespaceNode)) return false + + if (isNode(child, HTMLTextNode)) { + if (child.content === " ") return true + + return child.content.trim() !== "" + } + + return true + }) +} + +/** + * Filter out empty text nodes and whitespace nodes + */ +export function filterEmptyNodes(nodes: Node[]): Node[] { + return nodes.filter(child => + !isNode(child, WhitespaceNode) && !(isNode(child, HTMLTextNode) && child.content.trim() === "") + ) +} + +// --- Punctuation and Word Spacing Functions --- + +/** + * Check if a word is standalone closing punctuation + */ +export function isClosingPunctuation(word: string): boolean { + return /^[.,;:!?)\]]+$/.test(word) +} + +/** + * Check if a line ends with opening punctuation + */ +export function lineEndsWithOpeningPunctuation(line: string): boolean { + return /[(\[]$/.test(line) +} + +/** + * Check if a string is an ERB tag + */ +export function isERBTag(text: string): boolean { + return /^<%.*?%>$/.test(text.trim()) +} + +/** + * Check if a string ends with an ERB tag + */ +export function endsWithERBTag(text: string): boolean { + return /%>$/.test(text.trim()) +} + +/** + * Check if a string starts with an ERB tag + */ +export function startsWithERBTag(text: string): boolean { + return /^<%/.test(text.trim()) +} + +/** + * Determine if space is needed between the current line and the next word + */ +export function needsSpaceBetween(currentLine: string, word: string): boolean { + if (isClosingPunctuation(word)) return false + if (lineEndsWithOpeningPunctuation(currentLine)) return false + if (currentLine.endsWith(' ')) return false + if (endsWithERBTag(currentLine) && startsWithERBTag(word)) return false + + return true +} + +/** + * Build a line by adding a word with appropriate spacing + */ +export function buildLineWithWord(currentLine: string, word: string): string { + if (!currentLine) return word + + if (word === ' ') { + return currentLine.endsWith(' ') ? currentLine : `${currentLine} ` + } + + return needsSpaceBetween(currentLine, word) ? `${currentLine} ${word}` : `${currentLine}${word}` +} + +/** + * Check if a node is an inline element or ERB node + */ +export function isInlineOrERBNode(node: Node): boolean { + return isERBNode(node) || (isNode(node, HTMLElementNode) && isInlineElement(getTagName(node))) +} + +/** + * Check if an element should be treated as inline based on its tag name + */ +export function isInlineElement(tagName: string): boolean { + return INLINE_ELEMENTS.has(tagName.toLowerCase()) +} + +/** + * Check if the current inline element is adjacent to a previous inline element (no whitespace between) + */ +export function isAdjacentToPreviousInline(siblings: Node[], index: number): boolean { + const previousNode = siblings[index - 1] + + if (isInlineOrERBNode(previousNode)) { + return true + } + + if (index > 1 && isNode(previousNode, HTMLTextNode) && !/^\s/.test(previousNode.content)) { + const twoBack = siblings[index - 2] + + return isInlineOrERBNode(twoBack) + } + + return false +} + +/** + * Check if a node should be appended to the last line (for adjacent inline elements and punctuation) + */ +export function shouldAppendToLastLine(child: Node, siblings: Node[], index: number): boolean { + if (index === 0) return false + + if (isNode(child, HTMLTextNode) && !/^\s/.test(child.content)) { + const previousNode = siblings[index - 1] + + return isInlineOrERBNode(previousNode) + } + + if (isNode(child, HTMLElementNode) && isInlineElement(getTagName(child))) { + return isAdjacentToPreviousInline(siblings, index) + } + + if (isNode(child, ERBContentNode)) { + for (let i = index - 1; i >= 0; i--) { + const previousSibling = siblings[i] + + if (isPureWhitespaceNode(previousSibling) || isNode(previousSibling, WhitespaceNode)) { + continue + } + + if (previousSibling.location && child.location) { + return previousSibling.location.end.line === child.location.start.line + } + + break + } + } + + return false +} + +/** + * Check if user-intentional spacing should be preserved (double newlines between elements) + */ +export function shouldPreserveUserSpacing(child: Node, siblings: Node[], index: number): boolean { + if (!isPureWhitespaceNode(child)) return false + + const hasPreviousNonWhitespace = index > 0 && isNonWhitespaceNode(siblings[index - 1]) + const hasNextNonWhitespace = index < siblings.length - 1 && isNonWhitespaceNode(siblings[index + 1]) + const hasMultipleNewlines = isNode(child, HTMLTextNode) && child.content.includes('\n\n') + + return hasPreviousNonWhitespace && hasNextNonWhitespace && hasMultipleNewlines +} + + +/** + * Check if children contain any text content with newlines + */ +export function hasMultilineTextContent(children: Node[]): boolean { + for (const child of children) { + if (isNode(child, HTMLTextNode)) { + return child.content.includes('\n') + } + + if (isNode(child, HTMLElementNode)) { + const nestedChildren = filterEmptyNodes(child.body) + + if (hasMultilineTextContent(nestedChildren)) { + return true + } + } + } + + return false +} + +/** + * Check if all nested elements in the children are inline elements + */ +export function areAllNestedElementsInline(children: Node[]): boolean { + for (const child of children) { + if (isNode(child, HTMLElementNode)) { + if (!isInlineElement(getTagName(child))) { + return false + } + + const nestedChildren = filterEmptyNodes(child.body) + + if (!areAllNestedElementsInline(nestedChildren)) { + return false + } + } else if (isAnyOf(child, HTMLDoctypeNode, HTMLCommentNode, isERBControlFlowNode)) { + return false + } + } + + return true +} + +/** + * Check if element has complex ERB control flow + */ +export function hasComplexERBControlFlow(inlineNodes: Node[]): boolean { + return inlineNodes.some(node => { + if (isNode(node, ERBIfNode)) { + if (node.statements.length > 0 && node.location) { + const startLine = node.location.start.line + const endLine = node.location.end.line + + return startLine !== endLine + } + + return false + } + + return false + }) +} + +/** + * Check if children contain mixed text and inline elements (like "textinlinetext") + * or mixed ERB output and text (like "<%= value %> text") + * This indicates content that should be formatted inline even with structural newlines + */ +export function hasMixedTextAndInlineContent(children: Node[]): boolean { + let hasText = false + let hasInlineElements = false + + for (const child of children) { + if (isNode(child, HTMLTextNode)) { + if (child.content.trim() !== "") { + hasText = true + } + } else if (isNode(child, HTMLElementNode)) { + if (isInlineElement(getTagName(child))) { + hasInlineElements = true + } + } + } + + return (hasText && hasInlineElements) || (hasERBOutput(children) && hasText) +} + +export function isContentPreserving(element: HTMLElementNode | HTMLOpenTagNode | HTMLCloseTagNode): boolean { + const tagName = getTagName(element) + + return CONTENT_PRESERVING_ELEMENTS.has(tagName) +} + +/** + * Count consecutive inline elements/ERB at the start of children (with no whitespace between) + */ +export function countAdjacentInlineElements(children: Node[]): number { + let count = 0 + let lastSignificantIndex = -1 + + for (let i = 0; i < children.length; i++) { + const child = children[i] + + if (isPureWhitespaceNode(child) || isNode(child, WhitespaceNode)) { + continue + } + + const isInlineOrERB = (isNode(child, HTMLElementNode) && isInlineElement(getTagName(child))) || isNode(child, ERBContentNode) + + if (!isInlineOrERB) { + break + } + + if (lastSignificantIndex >= 0 && hasWhitespaceBetween(children, lastSignificantIndex, i)) { + break + } + + count++ + lastSignificantIndex = i + } + + return count +} + +/** + * Determine if we should wrap to the next line + */ +export function shouldWrapToNextLine(testLine: string, currentLine: string, word: string, wrapWidth: number): boolean { + if (!currentLine) return false + if (isClosingPunctuation(word)) return false + + return testLine.length >= wrapWidth +} + +/** + * Check if a node represents a block-level element + */ +export function isBlockLevelNode(node: Node): boolean { + if (!isNode(node, HTMLElementNode)) { + return false + } + + const tagName = getTagName(node) + + if (INLINE_ELEMENTS.has(tagName)) { + return false + } + + return true +} + +/** + * Normalize text by replacing multiple spaces with single space and trim + * Then split into words + */ +export function normalizeAndSplitWords(text: string): string[] { + const normalized = text.replace(/\s+/g, ' ') + return normalized.trim().split(' ') +} + +/** + * Check if text starts with an alphanumeric character (not punctuation) + */ +export function startsWithAlphanumeric(text: string): boolean { + const trimmed = text.trim() + return /^[a-zA-Z0-9]/.test(trimmed) +} + +/** + * Check if text ends with an alphanumeric character (not punctuation) + */ +export function endsWithAlphanumeric(text: string): boolean { + return /[a-zA-Z0-9]$/.test(text) +} + +/** + * Check if text ends with whitespace + */ +export function endsWithWhitespace(text: string): boolean { + return /\s$/.test(text) +} diff --git a/javascript/packages/formatter/src/format-printer.ts b/javascript/packages/formatter/src/format-printer.ts index 02a6f8c66..327fef82c 100644 --- a/javascript/packages/formatter/src/format-printer.ts +++ b/javascript/packages/formatter/src/format-printer.ts @@ -1,4 +1,5 @@ import dedent from "dedent" + import { getTagName, getCombinedAttributeName, @@ -6,16 +7,56 @@ import { isNode, isToken, isParseResult, - isAnyOf, isNoneOf, isERBNode, isCommentNode, isERBControlFlowNode, filterNodes, - hasERBOutput, } from "@herb-tools/core" + import { Printer, IdentityPrinter } from "@herb-tools/printer" +import { + areAllNestedElementsInline, + buildLineWithWord, + countAdjacentInlineElements, + endsWithAlphanumeric, + endsWithWhitespace, + filterEmptyNodes, + filterSignificantChildren, + findPreviousMeaningfulSibling, + hasComplexERBControlFlow, + hasMixedTextAndInlineContent, + hasMultilineTextContent, + hasWhitespaceBetween, + isBlockLevelNode, + isContentPreserving, + isInlineElement, + isNonWhitespaceNode, + isPureWhitespaceNode, + normalizeAndSplitWords, + shouldAppendToLastLine, + shouldPreserveUserSpacing, + shouldWrapToNextLine, + startsWithAlphanumeric, +} from "./format-helpers.js" + +import { + FORMATTABLE_ATTRIBUTES, + INLINE_ELEMENTS, + SPACEABLE_CONTAINERS, + SPACING_THRESHOLD, + TIGHT_GROUP_CHILDREN, + TIGHT_GROUP_PARENTS, + TOKEN_LIST_ATTRIBUTES, +} from "./format-helpers.js" + +import type { + ContentUnit, + ContentUnitWithNode, + ElementFormattingAnalysis, +} from "./format-helpers.js" + import { ParseResult, Node, @@ -56,21 +97,6 @@ import { import type { ERBNode } from "@herb-tools/core" import type { FormatOptions } from "./options.js" -/** - * Analysis result for HTMLElementNode formatting decisions - */ -interface ElementFormattingAnalysis { - openTagInline: boolean - elementContentInline: boolean - closeTagInline: boolean -} - -// TODO: we can probably expand this list with more tags/attributes -const FORMATTABLE_ATTRIBUTES: Record = { - '*': ['class'], - 'img': ['srcset', 'sizes'] -} - /** * Printer traverses the Herb AST using the Visitor pattern * and emits a formatted string with proper indentation, line breaks, and attribute wrapping. @@ -98,34 +124,6 @@ export class FormatPrinter extends Printer { public source: string - // TODO: extract - private static readonly INLINE_ELEMENTS = new Set([ - 'a', 'abbr', 'acronym', 'b', 'bdo', 'big', 'br', 'cite', 'code', - 'dfn', 'em', 'i', 'img', 'kbd', 'label', 'map', 'object', 'q', - 'samp', 'small', 'span', 'strong', 'sub', 'sup', - 'tt', 'var', 'del', 'ins', 'mark', 's', 'u', 'time', 'wbr' - ]) - - private static readonly CONTENT_PRESERVING_ELEMENTS = new Set([ - 'script', 'style', 'pre', 'textarea' - ]) - - private static readonly SPACEABLE_CONTAINERS = new Set([ - 'div', 'section', 'article', 'main', 'header', 'footer', 'aside', - 'figure', 'details', 'summary', 'dialog', 'fieldset' - ]) - - private static readonly TIGHT_GROUP_PARENTS = new Set([ - 'ul', 'ol', 'nav', 'select', 'datalist', 'optgroup', 'tr', 'thead', - 'tbody', 'tfoot' - ]) - - private static readonly TIGHT_GROUP_CHILDREN = new Set([ - 'li', 'option', 'td', 'th', 'dt', 'dd' - ]) - - private static readonly SPACING_THRESHOLD = 3 - constructor(source: string, options: Required) { super() @@ -320,26 +318,26 @@ export class FormatPrinter extends Printer { return false } - const meaningfulSiblings = siblings.filter(child => this.isNonWhitespaceNode(child)) + const meaningfulSiblings = siblings.filter(child => isNonWhitespaceNode(child)) - if (meaningfulSiblings.length < FormatPrinter.SPACING_THRESHOLD) { + if (meaningfulSiblings.length < SPACING_THRESHOLD) { return false } const parentTagName = parentElement ? getTagName(parentElement) : null - if (parentTagName && FormatPrinter.TIGHT_GROUP_PARENTS.has(parentTagName)) { + if (parentTagName && TIGHT_GROUP_PARENTS.has(parentTagName)) { return false } - const isSpaceableContainer = !parentTagName || (parentTagName && FormatPrinter.SPACEABLE_CONTAINERS.has(parentTagName)) + const isSpaceableContainer = !parentTagName || (parentTagName && SPACEABLE_CONTAINERS.has(parentTagName)) if (!isSpaceableContainer && meaningfulSiblings.length < 5) { return false } const currentNode = siblings[currentIndex] - const previousMeaningfulIndex = this.findPreviousMeaningfulSibling(siblings, currentIndex) + const previousMeaningfulIndex = findPreviousMeaningfulSibling(siblings, currentIndex) const isCurrentComment = isCommentNode(currentNode) if (previousMeaningfulIndex !== -1) { @@ -358,11 +356,11 @@ export class FormatPrinter extends Printer { if (isNode(currentNode, HTMLElementNode)) { const currentTagName = getTagName(currentNode) - if (FormatPrinter.INLINE_ELEMENTS.has(currentTagName)) { + if (INLINE_ELEMENTS.has(currentTagName)) { return false } - if (FormatPrinter.TIGHT_GROUP_CHILDREN.has(currentTagName)) { + if (TIGHT_GROUP_CHILDREN.has(currentTagName)) { return false } @@ -371,56 +369,18 @@ export class FormatPrinter extends Printer { } } - const isBlockElement = this.isBlockLevelNode(currentNode) + const isBlockElement = isBlockLevelNode(currentNode) const isERBBlock = isERBNode(currentNode) && isERBControlFlowNode(currentNode) const isComment = isCommentNode(currentNode) return isBlockElement || isERBBlock || isComment } - /** - * Token list attributes that contain space-separated values and benefit from - * spacing around ERB content for readability - */ - private static readonly TOKEN_LIST_ATTRIBUTES = new Set([ - 'class', 'data-controller', 'data-action' - ]) - /** * Check if we're currently processing a token list attribute that needs spacing */ - private isInTokenListAttribute(): boolean { - return this.currentAttributeName !== null && - FormatPrinter.TOKEN_LIST_ATTRIBUTES.has(this.currentAttributeName) - } - - /** - * Find the previous meaningful (non-whitespace) sibling - */ - private findPreviousMeaningfulSibling(siblings: Node[], currentIndex: number): number { - for (let i = currentIndex - 1; i >= 0; i--) { - if (this.isNonWhitespaceNode(siblings[i])) { - return i - } - } - return -1 - } - - /** - * Check if a node represents a block-level element - */ - private isBlockLevelNode(node: Node): boolean { - if (!isNode(node, HTMLElementNode)) { - return false - } - - const tagName = getTagName(node) - - if (FormatPrinter.INLINE_ELEMENTS.has(tagName)) { - return false - } - - return true + private get isInTokenListAttribute(): boolean { + return this.currentAttributeName !== null && TOKEN_LIST_ATTRIBUTES.has(this.currentAttributeName) } /** @@ -473,16 +433,13 @@ export class FormatPrinter extends Printer { return true } - private getAttributeName(attribute: HTMLAttributeNode): string { - return attribute.name ? getCombinedAttributeName(attribute.name) : "" - } - private wouldClassAttributeBeMultiline(content: string, indentLength: number): boolean { const normalizedContent = content.replace(/\s+/g, ' ').trim() const hasActualNewlines = /\r?\n/.test(content) if (hasActualNewlines && normalizedContent.length > 80) { const lines = content.split(/\r?\n/).map(line => line.trim()).filter(line => line) + if (lines.length > 1) { return true } @@ -504,6 +461,12 @@ export class FormatPrinter extends Printer { return false } + // TOOD: extract to core or reuse function from core + private getAttributeName(attribute: HTMLAttributeNode): string { + return attribute.name ? getCombinedAttributeName(attribute.name) : "" + } + + // TOOD: extract to core or reuse function from core private getAttributeValue(attribute: HTMLAttributeNode): string { if (isNode(attribute.value, HTMLAttributeValueNode)) { return attribute.value.children.map(child => isNode(child, HTMLTextNode) ? child.content : IdentityPrinter.print(child)).join('') @@ -680,31 +643,30 @@ export class FormatPrinter extends Printer { for (let i = 0; i < node.children.length; i++) { const child = node.children[i] - if (isNode(child, HTMLTextNode)) { - const isWhitespaceOnly = child.content.trim() === "" - - if (isWhitespaceOnly) { - const hasPreviousNonWhitespace = i > 0 && this.isNonWhitespaceNode(node.children[i - 1]) - const hasNextNonWhitespace = i < node.children.length - 1 && this.isNonWhitespaceNode(node.children[i + 1]) - - const hasMultipleNewlines = child.content.includes('\n\n') + if (shouldPreserveUserSpacing(child, node.children, i)) { + this.push("") + hasHandledSpacing = true + continue + } - if (hasPreviousNonWhitespace && hasNextNonWhitespace && hasMultipleNewlines) { - this.push("") - hasHandledSpacing = true - } + if (isPureWhitespaceNode(child)) { + continue + } - continue - } + if (shouldAppendToLastLine(child, node.children, i)) { + this.appendChildToLastLine(child, node.children, i) + lastWasMeaningful = true + hasHandledSpacing = false + continue } - if (this.isNonWhitespaceNode(child) && lastWasMeaningful && !hasHandledSpacing) { + if (isNonWhitespaceNode(child) && lastWasMeaningful && !hasHandledSpacing) { this.push("") } this.visit(child) - if (this.isNonWhitespaceNode(child)) { + if (isNonWhitespaceNode(child)) { lastWasMeaningful = true hasHandledSpacing = false } @@ -729,7 +691,7 @@ export class FormatPrinter extends Printer { } visitHTMLElementBody(body: Node[], element: HTMLElementNode) { - if (this.isContentPreserving(element)) { + if (isContentPreserving(element)) { element.body.map(child => { if (isNode(child, HTMLElementNode)) { const wasInlineMode = this.inlineMode @@ -749,7 +711,7 @@ export class FormatPrinter extends Printer { const analysis = this.elementFormattingAnalysis.get(element) const hasTextFlow = this.isInTextFlowContext(null, body) - const children = this.filterSignificantChildren(body, hasTextFlow) + const children = filterSignificantChildren(body) if (analysis?.elementContentInline) { if (children.length === 0) return @@ -771,10 +733,13 @@ export class FormatPrinter extends Printer { this.push(' ') } } else { - const normalizedContent = child.content.replace(/\s+/g, ' ').trim() + const normalizedContent = child.content.replace(/\s+/g, ' ') + const trimmedContent = normalizedContent.trim() - if (normalizedContent) { - this.push(normalizedContent) + if (trimmedContent) { + this.push(trimmedContent) + } else if (normalizedContent === ' ') { + this.push(' ') } } } else if (isNode(child, WhitespaceNode)) { @@ -822,8 +787,8 @@ export class FormatPrinter extends Printer { const isWhitespaceOnly = child.content.trim() === "" if (isWhitespaceOnly) { - const hasPreviousNonWhitespace = i > 0 && this.isNonWhitespaceNode(body[i - 1]) - const hasNextNonWhitespace = i < body.length - 1 && this.isNonWhitespaceNode(body[i + 1]) + const hasPreviousNonWhitespace = i > 0 && isNonWhitespaceNode(body[i - 1]) + const hasNextNonWhitespace = i < body.length - 1 && isNonWhitespaceNode(body[i + 1]) const hasMultipleNewlines = child.content.includes('\n\n') @@ -836,7 +801,7 @@ export class FormatPrinter extends Printer { } } - if (this.isNonWhitespaceNode(child) && lastWasMeaningful && !hasHandledSpacing) { + if (isNonWhitespaceNode(child) && lastWasMeaningful && !hasHandledSpacing) { const element = body[i - 1] const hasExistingSpacing = i > 0 && isNode(element, HTMLTextNode) && element.content.trim() === "" && (element.content.includes('\n\n') || element.content.split('\n').length > 2) @@ -854,7 +819,7 @@ export class FormatPrinter extends Printer { this.visit(child) - if (this.isNonWhitespaceNode(child)) { + if (isNonWhitespaceNode(child)) { lastWasMeaningful = true hasHandledSpacing = false } @@ -1101,7 +1066,17 @@ export class FormatPrinter extends Printer { visitERBBlockNode(node: ERBBlockNode) { this.printERBNode(node) - this.withIndent(() => this.visitElementChildren(node.body, null)) + + this.withIndent(() => { + const hasTextFlow = this.isInTextFlowContext(null, node.body) + + if (hasTextFlow) { + const children = filterSignificantChildren(node.body) + this.visitTextFlowChildren(children) + } else { + this.visitElementChildren(node.body, null) + } + }) if (node.end_node) this.visit(node.end_node) } @@ -1115,7 +1090,7 @@ export class FormatPrinter extends Printer { this.lines.push(" ") this.lines.push(this.renderAttribute(child)) } else { - const shouldAddSpaces = this.isInTokenListAttribute() + const shouldAddSpaces = this.isInTokenListAttribute if (shouldAddSpaces) { this.lines.push(" ") @@ -1130,7 +1105,7 @@ export class FormatPrinter extends Printer { }) const hasHTMLAttributes = node.statements.some(child => isNode(child, HTMLAttributeNode)) - const isTokenList = this.isInTokenListAttribute() + const isTokenList = this.isInTokenListAttribute if ((hasHTMLAttributes || isTokenList) && node.end_node) { this.lines.push(" ") @@ -1242,7 +1217,7 @@ export class FormatPrinter extends Printer { const attributes = filterNodes(children, HTMLAttributeNode) const inlineNodes = this.extractInlineNodes(children) const hasERBControlFlow = inlineNodes.some(node => isERBControlFlowNode(node)) || children.some(node => isERBControlFlowNode(node)) - const hasComplexERB = hasERBControlFlow && this.hasComplexERBControlFlow(inlineNodes) + const hasComplexERB = hasERBControlFlow && hasComplexERBControlFlow(inlineNodes) if (hasComplexERB) return false @@ -1275,15 +1250,14 @@ export class FormatPrinter extends Printer { */ private shouldRenderElementContentInline(node: HTMLElementNode): boolean { const tagName = getTagName(node) - const children = this.filterSignificantChildren(node.body, this.isInTextFlowContext(null, node.body)) - const isInlineElement = this.isInlineElement(tagName) + const children = filterSignificantChildren(node.body) const openTagInline = this.shouldRenderOpenTagInline(node) if (!openTagInline) return false if (children.length === 0) return true - if (isInlineElement) { - const fullInlineResult = this.tryRenderInlineFull(node, tagName, filterNodes(node.open_tag?.children, HTMLAttributeNode), children) + if (isInlineElement(tagName)) { + const fullInlineResult = this.tryRenderInlineFull(node, tagName, filterNodes(node.open_tag?.children, HTMLAttributeNode), node.body) if (fullInlineResult) { const totalLength = this.indent.length + fullInlineResult.length @@ -1293,12 +1267,12 @@ export class FormatPrinter extends Printer { return false } - const allNestedAreInline = this.areAllNestedElementsInline(children) - const hasMultilineText = this.hasMultilineTextContent(children) - const hasMixedContent = this.hasMixedTextAndInlineContent(children) + const allNestedAreInline = areAllNestedElementsInline(children) + const hasMultilineText = hasMultilineTextContent(children) + const hasMixedContent = hasMixedTextAndInlineContent(children) if (allNestedAreInline && (!hasMultilineText || hasMixedContent)) { - const fullInlineResult = this.tryRenderInlineFull(node, tagName, filterNodes(node.open_tag?.children, HTMLAttributeNode), children) + const fullInlineResult = this.tryRenderInlineFull(node, tagName, filterNodes(node.open_tag?.children, HTMLAttributeNode), node.body) if (fullInlineResult) { const totalLength = this.indent.length + fullInlineResult.length @@ -1337,9 +1311,9 @@ export class FormatPrinter extends Printer { private shouldRenderCloseTagInline(node: HTMLElementNode, elementContentInline: boolean): boolean { if (node.is_void) return true if (node.open_tag?.tag_closing?.value === "/>") return true - if (this.isContentPreserving(node)) return true + if (isContentPreserving(node)) return true - const children = this.filterSignificantChildren(node.body, this.isInTextFlowContext(null, node.body)) + const children = filterSignificantChildren(node.body) if (children.length === 0) return true @@ -1349,130 +1323,416 @@ export class FormatPrinter extends Printer { // --- Utility methods --- - private isNonWhitespaceNode(node: Node): boolean { - if (isNode(node, WhitespaceNode)) return false - if (isNode(node, HTMLTextNode)) return node.content.trim() !== "" + /** + * Append a child node to the last output line + */ + private appendChildToLastLine(child: Node, siblings?: Node[], index?: number): void { + if (isNode(child, HTMLTextNode)) { + this.pushToLastLine(child.content.trim()) + } else { + let hasSpaceBefore = false + + if (siblings && index !== undefined && index > 0) { + const prevSibling = siblings[index - 1] - return true + if (isPureWhitespaceNode(prevSibling) || isNode(prevSibling, WhitespaceNode)) { + hasSpaceBefore = true + } + } + + const oldInlineMode = this.inlineMode + this.inlineMode = true + const inlineContent = this.capture(() => this.visit(child)).join("") + this.inlineMode = oldInlineMode + this.pushToLastLine((hasSpaceBefore ? " " : "") + inlineContent) + } } /** - * Check if an element should be treated as inline based on its tag name + * Visit children in a text flow context (mixed text and inline elements) + * Handles word wrapping and keeps adjacent inline elements together */ - private isInlineElement(tagName: string): boolean { - return FormatPrinter.INLINE_ELEMENTS.has(tagName.toLowerCase()) + private visitTextFlowChildren(children: Node[]) { + const adjacentInlineCount = countAdjacentInlineElements(children) + + if (adjacentInlineCount >= 2) { + this.renderAdjacentInlineElements(children, adjacentInlineCount) + this.visitRemainingChildren(children, adjacentInlineCount) + + return + } + + this.buildAndWrapTextFlow(children) } /** - * Check if we're in a text flow context (parent contains mixed text and inline elements) + * Render adjacent inline elements together on one line */ - private visitTextFlowChildren(children: Node[]) { - let currentLineContent = "" + private renderAdjacentInlineElements(children: Node[], count: number): void { + let inlineContent = "" + let processedCount = 0 - for (const child of children) { - if (isNode(child, HTMLTextNode)) { - const content = child.content + for (let i = 0; i < children.length && processedCount < count; i++) { + const child = children[i] - let processedContent = content.replace(/\s+/g, ' ').trim() + if (isPureWhitespaceNode(child) || isNode(child, WhitespaceNode)) { + continue + } - if (processedContent) { - const hasLeadingSpace = /^\s/.test(content) + if (isNode(child, HTMLElementNode) && isInlineElement(getTagName(child))) { + inlineContent += this.renderInlineElementAsString(child) + processedCount++ + } else if (isNode(child, ERBContentNode)) { + inlineContent += this.renderERBAsString(child) + processedCount++ + } + } - if (currentLineContent && hasLeadingSpace && !currentLineContent.endsWith(' ')) { - currentLineContent += ' ' - } + if (inlineContent) { + this.pushWithIndent(inlineContent) + } + } - currentLineContent += processedContent + /** + * Render an inline element as a string + */ + private renderInlineElementAsString(element: HTMLElementNode): string { + const tagName = getTagName(element) - const hasTrailingSpace = /\s$/.test(content) + if (element.is_void || element.open_tag?.tag_closing?.value === "/>") { + const attributes = filterNodes(element.open_tag?.children, HTMLAttributeNode) + const attributesString = this.renderAttributesString(attributes) + const isSelfClosing = element.open_tag?.tag_closing?.value === "/>" - if (hasTrailingSpace && !currentLineContent.endsWith(' ')) { - currentLineContent += ' ' - } + return `<${tagName}${attributesString}${isSelfClosing ? " />" : ">"}` + } - if ((this.indent.length + currentLineContent.length) > Math.max(this.maxLineLength, 120)) { - children.forEach(child => this.visit(child)) + const childInline = this.tryRenderInlineFull(element, tagName, + filterNodes(element.open_tag?.children, HTMLAttributeNode), + filterEmptyNodes(element.body) + ) - return - } + return childInline !== null ? childInline : "" + } + + /** + * Render an ERB node as a string + */ + private renderERBAsString(node: ERBContentNode): string { + return this.capture(() => { + this.inlineMode = true + this.visit(node) + }).join("") + } + + /** + * Visit remaining children after processing adjacent inline elements + */ + private visitRemainingChildren(children: Node[], skipCount: number): void { + let skipped = 0 + + for (const child of children) { + if (isPureWhitespaceNode(child) || isNode(child, WhitespaceNode)) { + continue + } + + if (skipped < skipCount) { + skipped++ + continue + } + + this.visit(child) + } + } + + /** + * Build words array from text/inline/ERB and wrap them + */ + private buildAndWrapTextFlow(children: Node[]): void { + const unitsWithNodes: ContentUnitWithNode[] = this.buildContentUnitsWithNodes(children) + const words: string[] = [] + + for (const { unit, node } of unitsWithNodes) { + if (unit.breaksFlow) { + this.flushWords(words) + + if (node) { + this.visit(node) } - } else if (isNode(child, HTMLElementNode)) { - const childTagName = getTagName(child) + } else if (unit.isAtomic) { + words.push(unit.content) + } else { + const text = unit.content.replace(/\s+/g, ' ').trim() - if (this.isInlineElement(childTagName)) { - const childInline = this.tryRenderInlineFull(child, childTagName, - filterNodes(child.open_tag?.children, HTMLAttributeNode), - this.filterEmptyNodes(child.body) - ) + if (text) { + words.push(...text.split(' ')) + } + } + } - if (childInline) { - currentLineContent += childInline + this.flushWords(words) + } - if ((this.indent.length + currentLineContent.length) > this.maxLineLength) { - children.forEach(child => this.visit(child)) + /** + * Try to merge text that follows an atomic unit (ERB/inline) with no whitespace + * Returns true if merge was performed + */ + private tryMergeTextAfterAtomic(result: ContentUnitWithNode[], textNode: HTMLTextNode): boolean { + if (result.length === 0) return false - return - } - } else { - if (currentLineContent.trim()) { - this.pushWithIndent(currentLineContent.trim()) - currentLineContent = "" - } + const lastUnit = result[result.length - 1] - this.visit(child) + if (!lastUnit.unit.isAtomic || (lastUnit.unit.type !== 'erb' && lastUnit.unit.type !== 'inline')) { + return false + } + + const words = normalizeAndSplitWords(textNode.content) + if (words.length === 0 || !words[0]) return false + if (!startsWithAlphanumeric(words[0])) return false + + lastUnit.unit.content += words[0] + + if (words.length > 1) { + const remainingText = words.slice(1).join(' ') + + result.push({ + unit: { content: remainingText, type: 'text', isAtomic: false, breaksFlow: false }, + node: textNode + }) + } + + return true + } + + /** + * Try to merge an atomic unit (ERB/inline) with preceding text that has no whitespace + * Returns true if merge was performed + */ + private tryMergeAtomicAfterText(result: ContentUnitWithNode[], children: Node[], lastProcessedIndex: number, atomicContent: string, atomicType: 'erb' | 'inline', atomicNode: Node): boolean { + if (result.length === 0) return false + + const lastUnit = result[result.length - 1] + + if (lastUnit.unit.type !== 'text' || lastUnit.unit.isAtomic) return false + + const words = normalizeAndSplitWords(lastUnit.unit.content) + const lastWord = words[words.length - 1] + + if (!lastWord) return false + + result.pop() + + if (words.length > 1) { + const remainingText = words.slice(0, -1).join(' ') + + result.push({ + unit: { content: remainingText, type: 'text', isAtomic: false, breaksFlow: false }, + node: children[lastProcessedIndex] + }) + } + + result.push({ + unit: { content: lastWord + atomicContent, type: atomicType, isAtomic: true, breaksFlow: false }, + node: atomicNode + }) + + return true + } + + /** + * Check if there's whitespace between current node and last processed node + */ + private hasWhitespaceBeforeNode(children: Node[], lastProcessedIndex: number, currentIndex: number, currentNode: Node): boolean { + if (hasWhitespaceBetween(children, lastProcessedIndex, currentIndex)) { + return true + } + + if (isNode(currentNode, HTMLTextNode) && /^\s/.test(currentNode.content)) { + return true + } + + return false + } + + /** + * Check if last unit in result ends with whitespace + */ + private lastUnitEndsWithWhitespace(result: ContentUnitWithNode[]): boolean { + if (result.length === 0) return false + + const lastUnit = result[result.length - 1] + + return lastUnit.unit.type === 'text' && endsWithWhitespace(lastUnit.unit.content) + } + + /** + * Process a text node and add it to results (with potential merging) + */ + private processTextNode(result: ContentUnitWithNode[], children: Node[], child: HTMLTextNode, index: number, lastProcessedIndex: number): void { + const isAtomic = child.content === ' ' + + if (!isAtomic && lastProcessedIndex >= 0) { + const hasWhitespace = this.hasWhitespaceBeforeNode(children, lastProcessedIndex, index, child) + + if (!hasWhitespace && this.tryMergeTextAfterAtomic(result, child)) { + return + } + } + + result.push({ + unit: { content: child.content, type: 'text', isAtomic, breaksFlow: false }, + node: child + }) + } + + /** + * Process an inline element and add it to results (with potential merging) + */ + private processInlineElement(result: ContentUnitWithNode[], children: Node[], child: HTMLElementNode, index: number, lastProcessedIndex: number): boolean { + const tagName = getTagName(child) + const inlineContent = this.tryRenderInlineFull(child, tagName, filterNodes(child.open_tag?.children, HTMLAttributeNode), filterEmptyNodes(child.body)) + + if (inlineContent === null) { + result.push({ + unit: { content: '', type: 'block', isAtomic: false, breaksFlow: true }, + node: child + }) + + return false + } + + if (lastProcessedIndex >= 0) { + const hasWhitespace = hasWhitespaceBetween(children, lastProcessedIndex, index) || this.lastUnitEndsWithWhitespace(result) + + if (!hasWhitespace && this.tryMergeAtomicAfterText(result, children, lastProcessedIndex, inlineContent, 'inline', child)) { + return true + } + } + + result.push({ + unit: { content: inlineContent, type: 'inline', isAtomic: true, breaksFlow: false }, + node: child + }) + + return false + } + + /** + * Process an ERB content node and add it to results (with potential merging) + */ + private processERBContentNode(result: ContentUnitWithNode[], children: Node[], child: ERBContentNode, index: number, lastProcessedIndex: number): boolean { + const erbContent = this.renderERBAsString(child) + + if (lastProcessedIndex >= 0) { + const hasWhitespace = hasWhitespaceBetween(children, lastProcessedIndex, index) || this.lastUnitEndsWithWhitespace(result) + + if (!hasWhitespace && this.tryMergeAtomicAfterText(result, children, lastProcessedIndex, erbContent, 'erb', child)) { + return true + } + } + + result.push({ + unit: { content: erbContent, type: 'erb', isAtomic: true, breaksFlow: false }, + node: child + }) + + return false + } + + /** + * Convert AST nodes to content units with node references + */ + private buildContentUnitsWithNodes(children: Node[]): ContentUnitWithNode[] { + const result: ContentUnitWithNode[] = [] + let lastProcessedIndex = -1 + + for (let i = 0; i < children.length; i++) { + const child = children[i] + + if (isNode(child, WhitespaceNode)) continue + if (isPureWhitespaceNode(child) && !(isNode(child, HTMLTextNode) && child.content === ' ')) continue + + if (isNode(child, HTMLTextNode)) { + this.processTextNode(result, children, child, i, lastProcessedIndex) + + lastProcessedIndex = i + } else if (isNode(child, HTMLElementNode)) { + const tagName = getTagName(child) + + if (isInlineElement(tagName)) { + const merged = this.processInlineElement(result, children, child, i, lastProcessedIndex) + + if (merged) { + lastProcessedIndex = i + + continue } } else { - if (currentLineContent.trim()) { - this.pushWithIndent(currentLineContent.trim()) - currentLineContent = "" - } - - this.visit(child) + result.push({ + unit: { content: '', type: 'block', isAtomic: false, breaksFlow: true }, + node: child + }) } - } else if (isNode(child, ERBContentNode)) { - const oldLines = this.lines - const oldInlineMode = this.inlineMode - // TODO: use this.capture - try { - this.lines = [] - this.inlineMode = true - this.visit(child) - const erbContent = this.lines.join("") - currentLineContent += erbContent + lastProcessedIndex = i + } else if (isNode(child, ERBContentNode)) { + const merged = this.processERBContentNode(result, children, child, i, lastProcessedIndex) - if ((this.indent.length + currentLineContent.length) > Math.max(this.maxLineLength, 120)) { - this.lines = oldLines - this.inlineMode = oldInlineMode - children.forEach(child => this.visit(child)) + if (merged) { + lastProcessedIndex = i - return - } - } finally { - this.lines = oldLines - this.inlineMode = oldInlineMode + continue } + + lastProcessedIndex = i } else { - if (currentLineContent.trim()) { - this.pushWithIndent(currentLineContent.trim()) - currentLineContent = "" - } + result.push({ + unit: { content: '', type: 'block', isAtomic: false, breaksFlow: true }, + node: child + }) - this.visit(child) + lastProcessedIndex = i } } - if (currentLineContent.trim()) { - const finalLine = this.indent + currentLineContent.trim() + return result + } - if (finalLine.length > Math.max(this.maxLineLength, 120)) { - this.visitAll(children) + /** + * Flush accumulated words to output with wrapping + */ + private flushWords(words: string[]): void { + if (words.length > 0) { + this.wrapAndPushWords(words) + words.length = 0 + } + } - return + /** + * Wrap words to fit within line length and push to output + * Handles punctuation spacing intelligently + */ + private wrapAndPushWords(words: string[]): void { + const wrapWidth = this.maxLineLength - this.indent.length + const lines: string[] = [] + let currentLine = "" + + for (const word of words) { + const nextLine = buildLineWithWord(currentLine, word) + + if (shouldWrapToNextLine(nextLine, currentLine, word, wrapWidth)) { + lines.push(this.indent + currentLine) + currentLine = word + } else { + currentLine = nextLine } + } - this.push(finalLine) + if (currentLine) { + lines.push(this.indent + currentLine) } + + lines.forEach(line => this.push(line)) } private isInTextFlowContext(_parent: Node | null, children: Node[]): boolean { @@ -1486,7 +1746,7 @@ export class FormatPrinter extends Printer { if (isNode(child, ERBContentNode)) return true if (isNode(child, HTMLElementNode)) { - return this.isInlineElement(getTagName(child)) + return isInlineElement(getTagName(child)) } return false @@ -1576,7 +1836,7 @@ export class FormatPrinter extends Printer { } else { const printed = IdentityPrinter.print(child) - if (this.currentAttributeName && FormatPrinter.TOKEN_LIST_ATTRIBUTES.has(this.currentAttributeName)) { + if (this.isInTokenListAttribute) { return printed.replace(/%>([^<\s])/g, '%> $1').replace(/([^>\s])<%/g, '$1 <%') } @@ -1632,6 +1892,7 @@ export class FormatPrinter extends Printer { */ private tryRenderChildrenInline(children: Node[]): string | null { let result = "" + let hasInternalWhitespace = false for (const child of children) { if (isNode(child, HTMLTextNode)) { @@ -1655,19 +1916,25 @@ export class FormatPrinter extends Printer { } else if (hasLeadingSpace || hasTrailingSpace) { if (result && !result.endsWith(' ')) { result += ' ' + hasInternalWhitespace = true } } + } else if (isNode(child, WhitespaceNode)) { + if (result && !result.endsWith(' ')) { + result += ' ' + hasInternalWhitespace = true + } } else if (isNode(child, HTMLElementNode)) { const tagName = getTagName(child) - if (!this.isInlineElement(tagName)) { + if (!isInlineElement(tagName)) { return null } const childInline = this.tryRenderInlineFull(child, tagName, filterNodes(child.open_tag?.children, HTMLAttributeNode), - this.filterEmptyNodes(child.body) + filterEmptyNodes(child.body) ) if (!childInline) { @@ -1676,11 +1943,18 @@ export class FormatPrinter extends Printer { result += childInline } else { - result += this.capture(() => this.visit(child)).join("") + const wasInlineMode = this.inlineMode + this.inlineMode = true + + const captured = this.capture(() => this.visit(child)).join("") + + this.inlineMode = wasInlineMode + + result += captured } } - return result.trim() + return hasInternalWhitespace ? result : result.trim() } /** @@ -1694,9 +1968,7 @@ export class FormatPrinter extends Printer { return null } } else if (isNode(child, HTMLElementNode)) { - const isInlineElement = this.isInlineElement(getTagName(child)) - - if (!isInlineElement) { + if (!isInlineElement(getTagName(child))) { return null } } else if (isNode(child, ERBContentNode)) { @@ -1715,122 +1987,8 @@ export class FormatPrinter extends Printer { return `<${tagName}>${content}` } - /** - * Check if children contain mixed text and inline elements (like "textinlinetext") - * or mixed ERB output and text (like "<%= value %> text") - * This indicates content that should be formatted inline even with structural newlines - */ - private hasMixedTextAndInlineContent(children: Node[]): boolean { - let hasText = false - let hasInlineElements = false - - for (const child of children) { - if (isNode(child, HTMLTextNode)) { - if (child.content.trim() !== "") { - hasText = true - } - } else if (isNode(child, HTMLElementNode)) { - if (this.isInlineElement(getTagName(child))) { - hasInlineElements = true - } - } - } - - return (hasText && hasInlineElements) || (hasERBOutput(children) && hasText) - } - - /** - * Check if children contain any text content with newlines - */ - private hasMultilineTextContent(children: Node[]): boolean { - for (const child of children) { - if (isNode(child, HTMLTextNode)) { - return child.content.includes('\n') - } - - if (isNode(child, HTMLElementNode)) { - const nestedChildren = this.filterEmptyNodes(child.body) - - if (this.hasMultilineTextContent(nestedChildren)) { - return true - } - } - } - - return false - } - - /** - * Check if all nested elements in the children are inline elements - */ - private areAllNestedElementsInline(children: Node[]): boolean { - for (const child of children) { - if (isNode(child, HTMLElementNode)) { - if (!this.isInlineElement(getTagName(child))) { - return false - } - - const nestedChildren = this.filterEmptyNodes(child.body) - - if (!this.areAllNestedElementsInline(nestedChildren)) { - return false - } - } else if (isAnyOf(child, HTMLDoctypeNode, HTMLCommentNode, isERBControlFlowNode)) { - return false - } - } - - return true - } - - /** - * Check if element has complex ERB control flow - */ - private hasComplexERBControlFlow(inlineNodes: Node[]): boolean { - return inlineNodes.some(node => { - if (isNode(node, ERBIfNode)) { - if (node.statements.length > 0 && node.location) { - const startLine = node.location.start.line - const endLine = node.location.end.line - - return startLine !== endLine - } - - return false - } - - return false - }) - } - - /** - * Filter children to remove insignificant whitespace - */ - private filterSignificantChildren(body: Node[], hasTextFlow: boolean): Node[] { - return body.filter(child => { - if (isNode(child, WhitespaceNode)) return false - - if (isNode(child, HTMLTextNode)) { - if (hasTextFlow && child.content === " ") return true - - return child.content.trim() !== "" - } - - return true - }) - } - - /** - * Filter out empty text nodes and whitespace nodes - */ - private filterEmptyNodes(nodes: Node[]): Node[] { - return nodes.filter(child => - !isNode(child, WhitespaceNode) && !(isNode(child, HTMLTextNode) && child.content.trim() === "") - ) - } - private renderElementInline(element: HTMLElementNode): string { - const children = this.filterEmptyNodes(element.body) + const children = filterEmptyNodes(element.body) return this.renderChildrenInline(children) } @@ -1855,10 +2013,4 @@ export class FormatPrinter extends Printer { return content.replace(/\s+/g, ' ').trim() } - - private isContentPreserving(element: HTMLElementNode | HTMLOpenTagNode | HTMLCloseTagNode): boolean { - const tagName = getTagName(element) - - return FormatPrinter.CONTENT_PRESERVING_ELEMENTS.has(tagName) - } } diff --git a/javascript/packages/formatter/test/cli.test.ts b/javascript/packages/formatter/test/cli.test.ts index 5d4d3ed46..bbb0b302d 100644 --- a/javascript/packages/formatter/test/cli.test.ts +++ b/javascript/packages/formatter/test/cli.test.ts @@ -155,20 +155,41 @@ describe("CLI Binary", () => { } }) - it("should show experimental preview message", async () => { + it("should show experimental preview message on stderr", async () => { const result = await execBinary([]) - expect(result.stdout).toContain("⚠️ Experimental Preview") - expect(result.stdout).toContain("early development") - expect(result.stdout).toContain("github.com/marcoroth/herb/issues") + expect(result.stderr).toContain("⚠️ Experimental Preview") + expect(result.stderr).toContain("early development") + expect(result.stderr).toContain("github.com/marcoroth/herb/issues") + }) + + it("should not include experimental preview message in stdout", async () => { + const input = '
test
' + const result = await execBinary([], input) + + expectExitCode(result, 0) + expect(result.stdout).not.toContain("⚠️ Experimental Preview") + expect(result.stdout).toContain('
test
') + }) + + it("stdout should contain only formatted output without warnings", async () => { + const input = '

Hello

' + const result = await execBinary([], input) + + expectExitCode(result, 0) + + expect(result.stderr).toContain("⚠️ Experimental Preview") + expect(result.stdout).not.toContain("⚠️") + expect(result.stdout).not.toContain("Experimental") + expect(result.stdout).toBe('
\n

Hello

\n
\n') }) it("should format empty input from stdin when no args provided", async () => { const result = await execBinary([], "") expectExitCode(result, 0) - expect(result.stdout).toContain("⚠️ Experimental Preview") - expect(result.stdout).toContain("\n\n") + expect(result.stderr).toContain("⚠️ Experimental Preview") + expect(result.stdout).toBe("\n") }) it("should handle no files found in empty directory", async () => { @@ -178,9 +199,9 @@ describe("CLI Binary", () => { const result = await execBinary(["test-empty-dir"]) expectExitCode(result, 0) - expect(result.stdout).toContain("⚠️ Experimental Preview") + expect(result.stderr).toContain("⚠️ Experimental Preview") expect(result.stdout).toContain("No files found matching pattern:") - expect(result.stdout).toContain("test-empty-dir/**/*.html.erb") + expect(result.stdout).toContain("test-empty-dir/**/*.{html,rhtml,html.erb,html+*.erb,turbo_stream.erb") } finally { await rm("test-empty-dir", { recursive: true }).catch(() => {}) } @@ -232,7 +253,7 @@ describe("CLI Binary", () => { expectExitCode(result, 0) expect(result.stdout).toContain("No files found matching pattern:") - expect(result.stdout).toContain("test-dir/**/*.html.erb") + expect(result.stdout).toContain("test-dir/**/*.{html,rhtml,html.erb,html+*.erb,turbo_stream.erb") } finally { await rm("test-dir", { recursive: true }).catch(() => {}) } @@ -365,12 +386,8 @@ describe("CLI Binary", () => { expectExitCode(result, 0) expect(result.stdout.endsWith('\n')).toBe(true) - expect(result.stdout).toContain("⚠️ Experimental Preview") - expect(result.stdout).toContain('
Hello
') - - const lines = result.stdout.split('\n') - const formattedLines = lines.slice(2) // Skip experimental preview lines - expect(formattedLines.join('\n')).toBe('
Hello
\n') + expect(result.stderr).toContain("⚠️ Experimental Preview") + expect(result.stdout).toBe('
Hello
\n') }) it("CLI should preserve existing trailing newline", async () => { @@ -379,12 +396,8 @@ describe("CLI Binary", () => { expectExitCode(result, 0) expect(result.stdout.endsWith('\n')).toBe(true) - expect(result.stdout).toContain("⚠️ Experimental Preview") - expect(result.stdout).toContain('
Hello
') - - const lines = result.stdout.split('\n') - const formattedLines = lines.slice(2) // Skip experimental preview lines - expect(formattedLines.join('\n')).toBe('
Hello
\n') + expect(result.stderr).toContain("⚠️ Experimental Preview") + expect(result.stdout).toBe('
Hello
\n') }) it("CLI should add trailing newline to empty input", async () => { @@ -392,11 +405,8 @@ describe("CLI Binary", () => { const result = await execBinary([], input) expectExitCode(result, 0) - expect(result.stdout).toContain("⚠️ Experimental Preview") - - const lines = result.stdout.split('\n') - const formattedLines = lines.slice(2) // Skip experimental preview lines - expect(formattedLines.join('\n')).toBe('\n') + expect(result.stderr).toContain("⚠️ Experimental Preview") + expect(result.stdout).toBe('\n') }) it("should show --indent-width option in help", async () => { @@ -632,7 +642,7 @@ describe("CLI Binary", () => { expectExitCode(result, 0) expect(result.stdout).toContain("No files found matching pattern") - expect(result.stdout).toContain("test-advanced/**/*.html.erb") + expect(result.stdout).toContain("test-advanced/**/*.{html,rhtml,html.erb,html+*.erb,turbo_stream.erb") }) it("should handle mixed file arguments", async () => { diff --git a/javascript/packages/formatter/test/document-formatting.test.ts b/javascript/packages/formatter/test/document-formatting.test.ts index 60a3e1e5f..1f7d4c633 100644 --- a/javascript/packages/formatter/test/document-formatting.test.ts +++ b/javascript/packages/formatter/test/document-formatting.test.ts @@ -692,7 +692,8 @@ describe("Document-level formatting", () => {

- <%= event.static_metadata.location %> • <%= event.formatted_dates %> + <%= event.static_metadata.location %> • + <%= event.formatted_dates %>