From 3dddec177b47bb15c7eb0817e3ff93820561f9bb Mon Sep 17 00:00:00 2001 From: Ian Wahbe Date: Mon, 21 Aug 2023 19:53:50 -0700 Subject: [PATCH 01/13] Add `jsonian-format-region' --- jsonian.el | 135 ++++++++++++++++++++++++++++++++++++++++++++++++----- 1 file changed, 123 insertions(+), 12 deletions(-) diff --git a/jsonian.el b/jsonian.el index b7a491c..b2a04e2 100644 --- a/jsonian.el +++ b/jsonian.el @@ -1628,6 +1628,16 @@ If END is non-nil, inspect only before it." (current-column)))) (and (< 0 indent) indent)))))) +(defun jsonian--indentation-spaces () + "The number of spaces per indentation level. +Either set or inferred." + (or + jsonian-indentation + (if-let* ((indent (jsonian--infer-indentation)) + (not-zero (> indent 0))) + indent + jsonian-default-indentation))) + ;;;###autoload (defun jsonian-indent-line () "Indent a single line. @@ -1635,12 +1645,7 @@ The indent is determined by examining the previous line. The number of spaces is determined by `jsonian-indentation' if it is set, otherwise it is inferred from the document." (interactive) - (let* ((indent (or - jsonian-indentation - (if-let* ((indent (jsonian--infer-indentation)) - (not-zero (> indent 0))) - indent - jsonian-default-indentation))) + (let* ((indent (jsonian--indentation-spaces)) (indent-level (jsonian--get-indent-level indent)) (current-indent (save-excursion (back-to-indentation) (current-column)))) @@ -1768,12 +1773,7 @@ containing array/object." "Indent the region from START to END." (interactive "r") (save-excursion - (let ((indent (or - jsonian-indentation - (if-let* ((indent (jsonian--infer-indentation)) - (not-zero (> indent 0))) - indent - jsonian-default-indentation))) + (let ((indent (jsonian--indentation-spaces)) ;; Indent levels of siblings, parent, grand parent, and so on. (levels '()) progress @@ -1855,6 +1855,117 @@ containing array/object." (progress-reporter-done progress)) (set-marker end nil nil))) +(defmacro jsonian--huge-edit (start end &rest body) + "Evaluate form BODY with optimizations for huge edits. +Run the change hooks just once like `combine-change-calls'. +Create undo entries as if the contents from START to END are replaced at once. +BODY must not modify buffer outside the region (START END), nor move any markers +out of the region." + (declare (debug (form form def-body)) (indent 2)) + (let ((start-value (make-symbol "start")) + (end-value (make-symbol "end"))) + `(let ((,start-value ,start) + (,end-value ,end)) + ;; WORKAROUND: If buffer-undo-list is nil, combine-change-calls shows + ;; unnecessary message. + ;; https://git.savannah.gnu.org/cgit/emacs.git/commit/?id=977630b5285809a57e50ff5f38d9c34247b549a7 + (unless buffer-undo-list + (push (point) buffer-undo-list)) + (,(if (fboundp 'combine-change-calls) + 'combine-change-calls + 'combine-after-change-calls) + ,start-value + ,end-value + (jsonian--huge-edit-1 ,start-value ,end-value (lambda () ,@body)))))) + +(defun jsonian--huge-edit-1 (start end body) + "Evaluate a function BODY with optimizations for huge edits. +Create undo entries as if the contents from START to END are replaced at once. +BODY must not modify buffer outside the region (START END), nor move any markers +out of the region." + (let ((old-undo-list buffer-undo-list) + (undo-inhibit-record-point t) + deletion-undo-list) + ;; Clear the undo list. + (buffer-disable-undo) + (buffer-enable-undo) + (unwind-protect + (atomic-change-group + (delete-region start end) + ;; This contains restoreing the region and markers inside it. + (setq deletion-undo-list buffer-undo-list) + (primitive-undo (length deletion-undo-list) deletion-undo-list)) + (setq buffer-undo-list old-undo-list)) + (setq start (copy-marker start)) + (setq end (copy-marker end)) + (buffer-disable-undo) + (unwind-protect + (funcall body) + ;; Note that setting `buffer-undo-list' enables undo again. + (setq buffer-undo-list + (append (cons + (cons (jsonian--free-marker start) + (jsonian--free-marker end)) + deletion-undo-list) + old-undo-list))))) + +(defun jsonian--free-marker (marker) + "Make MARKER pointing nowhere and return the old position." + (prog1 (marker-position marker) + (set-marker marker nil nil))) + +;;;###autoload +(defun jsonian-format-region (start end) + "Format the region (START . END)." + (interactive "r") + (jsonian--huge-edit start end + (save-excursion + (goto-char start) + (jsonian--snap-to-token) + (let* ((indent (jsonian--indentation-spaces)) + (indent-level (jsonian--get-indent-level indent)) + (undo-inhibit-record-point t) + next-token) + (while (and + (<= (point) end) + (jsonian--forward-token)) + (let ((removed (* -1 (- (point) jsonian--last-token-end)))) + (delete-char removed) + (cl-decf end removed)) + (setq next-token (point)) + (cond + ;; A space separates : from the next token + ;; + ;; "foo": bar + ;; ^space + ((eq (char-before jsonian--last-token-end) ?:) + (goto-char jsonian--last-token-end) + (insert " ") + (cl-incf end 1) + (goto-char (1+ next-token))) + ;; If the second of the abutting tokens is a ",", then we don't make any + ;; adjustments. + ((memq (char-after) '(?, ?:)) + (ignore)) + + ;; TODO empty arrays and objects should be printed together. + + ;; All other items are separated by a new line, then the appropriate indentation. + (t + (cond + ((memq (char-after) '(?\] ?\})) + (cl-decf indent-level)) + ((memq (char-before jsonian--last-token-end) '(?\[ ?\{)) + (cl-incf indent-level))) + + (insert "\n") + (insert-char ?\s (* indent indent-level)) + (cl-incf end (+ 1 (* indent indent-level))) + (goto-char (+ next-token + 1 ;; The newline + (* indent indent-level) ;; The inserted spaces + ))))))))) + (defun jsonian-beginning-of-defun (&optional arg) "Move to the beginning of the smallest object/array enclosing `POS'. ARG is currently ignored." From eea7c5efdadeb6c1940bb91c34ffe857243ead2e Mon Sep 17 00:00:00 2001 From: Ian Wahbe Date: Tue, 22 Aug 2023 07:58:17 -0700 Subject: [PATCH 02/13] Performance optimizations --- jsonian.el | 46 ++++++++++++++++++++++++---------------------- 1 file changed, 24 insertions(+), 22 deletions(-) diff --git a/jsonian.el b/jsonian.el index b2a04e2..6b933ad 100644 --- a/jsonian.el +++ b/jsonian.el @@ -1919,20 +1919,23 @@ out of the region." "Format the region (START . END)." (interactive "r") (jsonian--huge-edit start end - (save-excursion + (let ((end (progn (goto-char end) (point-marker)))) (goto-char start) (jsonian--snap-to-token) (let* ((indent (jsonian--indentation-spaces)) (indent-level (jsonian--get-indent-level indent)) (undo-inhibit-record-point t) - next-token) + (next-token (make-marker)) + ;; Don't allocate a new string each time you add indentation. + ;; + ;; In effect, this is where we intern strings on behalf of elisp. + (indent-strings '("\n"))) + (set-marker-insertion-type next-token t) (while (and (<= (point) end) (jsonian--forward-token)) - (let ((removed (* -1 (- (point) jsonian--last-token-end)))) - (delete-char removed) - (cl-decf end removed)) - (setq next-token (point)) + (set-marker next-token (point)) + (delete-region jsonian--last-token-end (point)) (cond ;; A space separates : from the next token ;; @@ -1941,30 +1944,29 @@ out of the region." ((eq (char-before jsonian--last-token-end) ?:) (goto-char jsonian--last-token-end) (insert " ") - (cl-incf end 1) - (goto-char (1+ next-token))) + (goto-char next-token)) ;; If the second of the abutting tokens is a ",", then we don't make any ;; adjustments. - ((memq (char-after) '(?, ?:)) - (ignore)) + ((memq (char-after) '(?, ?:))) ;; TODO empty arrays and objects should be printed together. ;; All other items are separated by a new line, then the appropriate indentation. (t - (cond - ((memq (char-after) '(?\] ?\})) + (when (memq (char-after) '(?\] ?\})) (cl-decf indent-level)) - ((memq (char-before jsonian--last-token-end) '(?\[ ?\{)) - (cl-incf indent-level))) - - (insert "\n") - (insert-char ?\s (* indent indent-level)) - (cl-incf end (+ 1 (* indent indent-level))) - (goto-char (+ next-token - 1 ;; The newline - (* indent indent-level) ;; The inserted spaces - ))))))))) + (when (memq (char-before jsonian--last-token-end) '(?\[ ?\{)) + (cl-incf indent-level)) + (while (<= (length indent-strings) indent-level) + (setq indent-strings + (append indent-strings + (list (concat + "\n" + (make-string + (* indent (length indent-strings)) + ?\s)))))) + (insert (nth indent-level indent-strings)) + (goto-char next-token)))))))) (defun jsonian-beginning-of-defun (&optional arg) "Move to the beginning of the smallest object/array enclosing `POS'. From 4d27a2a3800bccd4796253456a39a8fee60d1a5d Mon Sep 17 00:00:00 2001 From: Ian Wahbe Date: Tue, 22 Aug 2023 09:21:55 -0700 Subject: [PATCH 03/13] Increase string traversal speed --- jsonian.el | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/jsonian.el b/jsonian.el index 6b933ad..637dee4 100644 --- a/jsonian.el +++ b/jsonian.el @@ -933,18 +933,19 @@ If AT-BEGINNING is non-nil, `jsonian--string-scan-forward' assumes it is at the beginning of the string. Otherwise it scans backwards to ensure that the end of a string is not escaped." (let ((start (if at-beginning (point) (jsonian--pos-in-stringp))) - escaped done) (when start (goto-char (1+ start)) (while (not (or done (eolp))) (cond ((= (char-after) ?\\) - (setq escaped (not escaped))) - ((and (= (char-after) ?\") (not escaped)) - (setq done (point))) - (t (setq escaped nil))) - (forward-char)) + (forward-char 2)) + ((= (char-after) ?\") + (setq done (point)) + (forward-char)) + ;; We are in the string, and not looking at a significant character. Scan forward + ;; (in C) for an interesting character. + (t (skip-chars-forward "^\"\\\\\n")))) (and done (>= done start) done)))) (defun jsonian--pos-in-stringp () From 060e65b64f7122583a4809fcedfa4a22fb74a616 Mon Sep 17 00:00:00 2001 From: Ian Wahbe Date: Wed, 23 Aug 2023 23:55:59 -0700 Subject: [PATCH 04/13] Fix indentation bug --- jsonian.el | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/jsonian.el b/jsonian.el index 637dee4..01924a7 100644 --- a/jsonian.el +++ b/jsonian.el @@ -1955,17 +1955,16 @@ out of the region." ;; All other items are separated by a new line, then the appropriate indentation. (t (when (memq (char-after) '(?\] ?\})) - (cl-decf indent-level)) + (cl-decf indent-level indent)) (when (memq (char-before jsonian--last-token-end) '(?\[ ?\{)) - (cl-incf indent-level)) + (cl-incf indent-level indent)) (while (<= (length indent-strings) indent-level) (setq indent-strings (append indent-strings (list (concat "\n" - (make-string - (* indent (length indent-strings)) - ?\s)))))) + (make-string (length indent-strings) + ?\s)))))) (insert (nth indent-level indent-strings)) (goto-char next-token)))))))) From 625f94752050c6c4bcb226a86cbb8ece85a14bcf Mon Sep 17 00:00:00 2001 From: Ian Wahbe Date: Thu, 24 Aug 2023 00:03:05 -0700 Subject: [PATCH 05/13] Indent comments correctly --- jsonian.el | 27 +++++++++++++++++++++------ 1 file changed, 21 insertions(+), 6 deletions(-) diff --git a/jsonian.el b/jsonian.el index 01924a7..84be2f4 100644 --- a/jsonian.el +++ b/jsonian.el @@ -356,11 +356,17 @@ It will set the value of `jsonian--last-token-end' to If `jsonian--forward-token' returned nil, the value of `jsonian--last-token-end' is undefined.") -(defun jsonian--forward-token () +(defun jsonian--forward-token (&optional stop-at-comments) "Move `point' to the next JSON token. `jsonian--forward-token' will skip over any whitespace it finds. +By default, `jsonian--forward-token' skips over comments when in +`jsonian-c-mode' or errors on comments in plain `jsonian-mode'. +If STOP-AT-COMMENTS is non-nil and a comment is encountered in +`jsonian-c-mode', then comments are treated like tokens by +`jsonian--forward-token'. + It is assumed that `point' starts at a JSON token. t is returned if `jsonian--forward-token' successfully traversed @@ -381,13 +387,18 @@ a token, otherwise nil is returned." (?t (jsonian--forward-true)) (?f (jsonian--forward-false)) (?n (jsonian--forward-null)) + ((pred (lambda (c) (and stop-at-comments + (derived-mode-p 'jsonian-c-mode) + (eq c ?/) + (memq (char-after (1+ (point))) '(?/ ?*))))) + (forward-comment 1)) ((pred (lambda (c) (or (and (<= c ?9) (>= c ?0)) (eq c ?-)))) (jsonian--forward-number)) ;; This is the set of chars that can start a token (_ (jsonian--unexpected-char :forward "one of ':,[]{}\"tfn0123456789-'"))) (setq jsonian--last-token-end (point)) ;; Skip forward over whitespace and comments - (when (and (= (jsonian--skip-chars-forward "\s\n\t") 0) + (when (and (= (jsonian--skip-chars-forward "\s\n\t" stop-at-comments) 0) needs-seperator (not (memq (char-after) '(nil ?: ?, ?\[ ?\] ?\{ ?\} ?\s ?\t ?\n)))) (jsonian--unexpected-char :forward "one of ':,[]{}\\s\\t\\n' or EOF"))) @@ -429,12 +440,16 @@ before a node." (jsonian--backward-comment))) (- start (point)))) -(defun jsonian--skip-chars-forward (chars) - "Skip CHARS forward in a comment aware way." +(defun jsonian--skip-chars-forward (chars &optional stop-at-comments) + "Skip CHARS forward in a comment aware way. + +If STOP-AT-COMMENTS is non-nil, then (comment . traveled) is +returned when a comment is encountered." (let ((start (point))) (while (or (> (skip-chars-forward chars) 0) - (jsonian--forward-comment))) + (and (not stop-at-comments) + (jsonian--forward-comment)))) (- (point) start))) (defun jsonian--snap-to-token () @@ -1934,7 +1949,7 @@ out of the region." (set-marker-insertion-type next-token t) (while (and (<= (point) end) - (jsonian--forward-token)) + (jsonian--forward-token t)) (set-marker next-token (point)) (delete-region jsonian--last-token-end (point)) (cond From d3528619cf8459fa8d3d2b5300e933c819591a96 Mon Sep 17 00:00:00 2001 From: Ian Wahbe Date: Thu, 24 Aug 2023 21:08:23 -0700 Subject: [PATCH 06/13] Add progress reporter --- jsonian.el | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/jsonian.el b/jsonian.el index 84be2f4..b5dc1a2 100644 --- a/jsonian.el +++ b/jsonian.el @@ -1945,11 +1945,13 @@ out of the region." ;; Don't allocate a new string each time you add indentation. ;; ;; In effect, this is where we intern strings on behalf of elisp. - (indent-strings '("\n"))) + (indent-strings '("\n")) + (progress (make-progress-reporter "Formatting region..." start (* (- end start) 1.5)))) (set-marker-insertion-type next-token t) (while (and (<= (point) end) (jsonian--forward-token t)) + (progress-reporter-update progress (point)) (set-marker next-token (point)) (delete-region jsonian--last-token-end (point)) (cond @@ -1981,7 +1983,8 @@ out of the region." (make-string (length indent-strings) ?\s)))))) (insert (nth indent-level indent-strings)) - (goto-char next-token)))))))) + (goto-char next-token)))) + (progress-reporter-done progress))))) (defun jsonian-beginning-of-defun (&optional arg) "Move to the beginning of the smallest object/array enclosing `POS'. From 150867fd5d03c3ea16ed1bc628610c958a708692 Mon Sep 17 00:00:00 2001 From: Ian Wahbe Date: Thu, 24 Aug 2023 21:45:02 -0700 Subject: [PATCH 07/13] Add tests for `jsonian-format-region` --- jsonian-tests.el | 52 ++++++++++++++++++++++++++++++++++++++++++++++++ jsonian.el | 4 +++- 2 files changed, 55 insertions(+), 1 deletion(-) diff --git a/jsonian-tests.el b/jsonian-tests.el index f2cbac9..d206bd0 100644 --- a/jsonian-tests.el +++ b/jsonian-tests.el @@ -714,5 +714,57 @@ Specifically, we need to comply with what `completion-boundaries' describes." (face 'font-lock-keyword-face "{ \"fo$o\" // bar\n:null }") (face 'font-lock-string-face "[ \"\\\"f$oo\" ]"))) +(defun jsonian--format-string (s) + "Call `jsonian-format-region' S. To be used in testing." + (with-temp-buffer + (insert s) + (jsonian-format-region (point-min) (point-max)) + (buffer-string))) + +(defun jsonian--test-format (input expected) + "Check that calling `jsonian-format-region' on INPUT yields EXPECTED." + (let ((inhibit-message t)) + ;; Validate that we get the expected result. + (should (string= (jsonian--format-string input) + expected)) + ;; Validate that once formatted, calling format again is a no-op. + (should (string= (jsonian--format-string expected) + expected)) + ;; Validate that `jsonian--format-string' matches the behavior of `json-pretty-print'. + ;; Because that `json-pretty-print-buffer' defaults to an indentation of 2, we set + ;; that for ourselves. + (let ((jsonian-indentation 2)) + (should (string= (jsonian--format-string input) + (with-temp-buffer + (insert input) + (json-pretty-print-buffer) + (buffer-string))))))) + +(ert-deftest jsonian-format-region () + "Test `jsonian-format-region'." + (jsonian--test-format + "[false,null,true,\"abc\",-3.14]" + "[ + false, + null, + true, + \"abc\", + -3.14 +]") + (jsonian--test-format "[{\"null\":null}, [ [ { +} ], [ ] ] ] +" "[ + { + \"null\": null + }, + [ + [ + {} + ], + [] + ] +] +")) + (provide 'jsonian-tests) ;;; jsonian-tests.el ends here diff --git a/jsonian.el b/jsonian.el index b5dc1a2..d89ab83 100644 --- a/jsonian.el +++ b/jsonian.el @@ -1967,7 +1967,9 @@ out of the region." ;; adjustments. ((memq (char-after) '(?, ?:))) - ;; TODO empty arrays and objects should be printed together. + ;; Empty objects and arrays are formatted as {} and [], respectively. + ((and (eq (char-before) ?\[) (eq (char-after) ?\]))) + ((and (eq (char-before) ?\{) (eq (char-after) ?\}))) ;; All other items are separated by a new line, then the appropriate indentation. (t From 52b3017aa0030dc175409b68c6cf23c4ba1b546b Mon Sep 17 00:00:00 2001 From: Ian Wahbe Date: Thu, 24 Aug 2023 23:50:34 -0700 Subject: [PATCH 08/13] Improve benchmark quality --- Makefile | 48 ++++++++++++----------------------- README.md | 63 +++++++++++++++++++++++----------------------- bench/font-lock.md | 7 ++++++ bench/font-lock.sh | 30 ++++++++++++++++++++++ bench/markdown.md | 33 ++++++++++++++++++++++++ bench/markdown.sh | 35 ++++++++++++++++++++++++++ 6 files changed, 153 insertions(+), 63 deletions(-) create mode 100644 bench/font-lock.md create mode 100755 bench/font-lock.sh create mode 100644 bench/markdown.md create mode 100755 bench/markdown.sh diff --git a/Makefile b/Makefile index 6d3a3e1..870d5ce 100644 --- a/Makefile +++ b/Makefile @@ -45,39 +45,23 @@ checkdoc: --eval "(setq byte-compile-error-on-warn t)" \ -f batch-byte-compile $< -bench = time $(EMACS) -Q -nw $(3) \ ---eval '(setq enable-local-variables nil)' \ ---eval '(setq large-file-warning-threshold nil)' \ ---eval '(switch-to-buffer (find-file-literally "$(1)"))' \ ---eval $(2) \ ---eval '(condition-case err \ -(with-current-buffer (current-buffer) \ -(setq font-lock-major-mode nil) \ -(syntax-ppss-flush-cache -1) \ -(font-lock-set-defaults) \ -(save-excursion \ -(font-lock-fontify-region (point-min) (point-max)))) \ -((debug error) (kill-emacs (error-message-string err))))' \ ---eval '(goto-char (point-max))' \ ---eval '(kill-emacs)' - LARGE_JSON_FILE := test-assets/large-json-file.json ${LARGE_JSON_FILE}: curl 'https://raw.githubusercontent.com/pulumi/pulumi-azure-native/master/provider/cmd/pulumi-resource-azure-native/schema.json' > ${LARGE_JSON_FILE} -bench-base: ${LARGE_JSON_FILE} jsonian.elc - -bench-jsonian: bench-base - $(call bench,${LARGE_JSON_FILE}, "(progn (require 'jsonian) (jsonian-mode))", -L .) - -bench-json-mode: bench-base - $(call bench,${LARGE_JSON_FILE}, "(progn (require 'json-mode) (json-mode))", -L ../json-mode -L ../json-snatcher -L ../json-reformat) - -bench-javascript: bench-base - $(call bench,${LARGE_JSON_FILE}, "(javascript-mode)",) - -bench-fundamental: bench-base - $(call bench,${LARGE_JSON_FILE},"(fundamental-mode)",) - -bench-prog: bench-base - $(call bench,${LARGE_JSON_FILE},"(prog-mode)",) +BENCHMARK_START= +BENCHMARK_END= +README.md: bench/markdown.md + @echo "Splicing bench/markdown.md into README.md" + cp $@ $@.backup + rg -U '(?s)${BENCHMARK_START}.*${BENCHMARK_END}' \ + --replace "${BENCHMARK_START}$$(cat bench/markdown.md)\n${BENCHMARK_END}" \ + --passthru < $@ > $@.new + mv $@.new $@ + +bench/markdown.md: bench/font-lock.md bench/markdown.sh + EXPORT="$@" ./bench/markdown.sh + +bench/font-lock.md: ${LARGE_JSON_FILE} jsonian.elc bench/font-lock.sh + hyperfine --version + EMACS="'${EMACS}'" FILE="${LARGE_JSON_FILE}" EXPORT="$@" ./bench/font-lock.sh diff --git a/README.md b/README.md index b2accc4..a836c6c 100644 --- a/README.md +++ b/README.md @@ -178,40 +178,41 @@ buffer. When the element is selected, jump to that point in the buffer. Enable `jsonian-mode` for all checkers where `json-mode` is enabled. -## Speed comparison against other modes - -Part of the promise of `jsonian` is that it will be performant on large files. A -primitive benchmark is included in the `Makefile`. It opens a very very large -(42M) JSON file, and then forces emacs to fontify it. It finally moves point to -the end of the file and exits. Here is a comparison of the time it takes to -fontify the whole buffer on a file: - -| Package | Time | comparison | -| ------------------ | -------------------------------------------------------------- | ---------- | -| `fundamental-mode` | 8 seconds | 0.66 | -| `prog-mode` | 8 seconds | 0.66 | -| `jsonian` | 12 seconds | 1 | -| `javascript-mode` | 31 seconds | 2.58 | -| `json-mode` | Fails after 43 seconds with "Stack overflow in regexp matcher" | 3.58 | - -Here is what we can take away from this benchmark: - -- Emacs spends 8 seconds traversing the buffer and parse matching delimiters. We - see that from the unfontified time of both `fundamental-mode` and `prog-mode`. -- `jsonian-mode` adds 4 seconds in fontification. I assume that this time is - spent in additional regex searches and function calls. -- `javascript-mode` spends 19 seconds longer the `jsonian-mode` to achieve the - same effect, presumably because the mode is more general. JavaScript is a much - more complicated spec then JSON. This will result in more complicated regexes - and functions. -- `json-mode` Spends 12 _additional_ seconds, presumably with an additional set - of font lock regexes. + +## Benchmarks + +The original reason I wrote jsonian is that I needed to read and naviage very large JSON +files, and Emacs was slowing me down. To keep jsonian fast, I maintain benchmarks of +jsonian doing real world tasks. + +### `font-lock`ing a large buffer + +This benchmark opens a very large (42M) JSON file, then forces Emacs to fontify it. It +finally moves point to the end of the file and exits. + +| Package | Mean [s] | Min [s] | Max [s] | Relative | +|:---|---:|---:|---:|---:| +| `fundamental-mode` | 1.351 ± 0.003 | 1.345 | 1.355 | 1.00 | +| `prog-mode` | 1.437 ± 0.030 | 1.422 | 1.520 | 1.06 ± 0.02 | +| `jsonian-mode` | 2.349 ± 0.032 | 2.316 | 2.414 | 1.74 ± 0.02 | +| `json-mode` | 3.838 ± 0.041 | 3.808 | 3.947 | 2.84 ± 0.03 | +| `javascript-mode` | 13.547 ± 0.086 | 13.498 | 13.789 | 10.03 ± 0.07 | + +We can use this benchmark to derive how long different parts of the proces take. + +- Fundamental mode is the lower limit. This is the time Emacs spends processing the + buffer, parsing sexps, etc. + +- We see that `prog-mode` doesn\'t do much more then `fundamental-mode`, which makes + sense. + +- Applying JSON formatting take at most `jsonian-mode` - `prog-mode`. Notes: -1. Both `jsonian` and `json-mode` were byte-compiled for this benchmark. Byte - compiling `jsonian` shaves 6 seconds off of this benchmark. -2. These benchmarks were taken on a 2.6 GHz 6-Core Intel i7 running macOS Monterey. +- Both `jsonian` and `json-mode` were byte-compiled for the benchmark. +- These benchmarks were taken on an Apple M2 Max with 64GB running macOS Ventura. + ## Contributing diff --git a/bench/font-lock.md b/bench/font-lock.md new file mode 100644 index 0000000..4ef5acb --- /dev/null +++ b/bench/font-lock.md @@ -0,0 +1,7 @@ +| Command | Mean [s] | Min [s] | Max [s] | Relative | +|:---|---:|---:|---:|---:| +| `fundamental-mode` | 1.351 ± 0.003 | 1.345 | 1.355 | 1.00 | +| `prog-mode` | 1.437 ± 0.030 | 1.422 | 1.520 | 1.06 ± 0.02 | +| `jsonian-mode` | 2.349 ± 0.032 | 2.316 | 2.414 | 1.74 ± 0.02 | +| `json-mode` | 3.838 ± 0.041 | 3.808 | 3.947 | 2.84 ± 0.03 | +| `javascript-mode` | 13.547 ± 0.086 | 13.498 | 13.789 | 10.03 ± 0.07 | diff --git a/bench/font-lock.sh b/bench/font-lock.sh new file mode 100755 index 0000000..253bec8 --- /dev/null +++ b/bench/font-lock.sh @@ -0,0 +1,30 @@ +#!/usr/bin/env sh + +# EMACS should be set to the binary to invoke. +# FILE should be set to the file to test against. + +bench() { + echo "$EMACS -Q --batch -nw \ +-L . -L ../json-mode -L ../json-snatcher -L ../json-reformat \ +--eval '(setq enable-local-variables nil)' \ +--eval '(setq large-file-warning-threshold nil)' \ +--eval '(switch-to-buffer (find-file-literally \"$FILE\"))' \ +--eval \"$1\" \ +--eval '(condition-case err \ +(with-current-buffer (current-buffer) \ +(setq font-lock-major-mode nil) \ +(syntax-ppss-flush-cache -1) \ +(font-lock-set-defaults) \ +(save-excursion \ +(font-lock-fontify-region (point-min) (point-max)))) \ +((debug error) (kill-emacs (error-message-string err))))' \ +--eval '(goto-char (point-max))' \ +--eval '(kill-emacs)'" +} + +hyperfine --export-markdown "$EXPORT" --show-output \ + --command-name "fundamental-mode" "$(bench "(fundamental-mode)")" \ + --command-name "prog-mode" "$(bench "(prog-mode)")" \ + --command-name "jsonian-mode" "$(bench "(progn (require 'jsonian) (jsonian-mode))")" \ + --command-name "json-mode" "$(bench "(progn (require 'json-mode) (json-mode))")" \ + --command-name "javascript-mode" "$(bench "(javascript-mode)")" diff --git a/bench/markdown.md b/bench/markdown.md new file mode 100644 index 0000000..4e7892d --- /dev/null +++ b/bench/markdown.md @@ -0,0 +1,33 @@ + +## Benchmarks + +The original reason I wrote jsonian is that I needed to read and naviage very large JSON +files, and Emacs was slowing me down. To keep jsonian fast, I maintain benchmarks of +jsonian doing real world tasks. + +### `font-lock`ing a large buffer + +This benchmark opens a very large (42M) JSON file, then forces Emacs to fontify it. It +finally moves point to the end of the file and exits. + +| Package | Mean [s] | Min [s] | Max [s] | Relative | +|:---|---:|---:|---:|---:| +| `fundamental-mode` | 1.351 ± 0.003 | 1.345 | 1.355 | 1.00 | +| `prog-mode` | 1.437 ± 0.030 | 1.422 | 1.520 | 1.06 ± 0.02 | +| `jsonian-mode` | 2.349 ± 0.032 | 2.316 | 2.414 | 1.74 ± 0.02 | +| `json-mode` | 3.838 ± 0.041 | 3.808 | 3.947 | 2.84 ± 0.03 | +| `javascript-mode` | 13.547 ± 0.086 | 13.498 | 13.789 | 10.03 ± 0.07 | + +We can use this benchmark to derive how long different parts of the proces take. + +- Fundamental mode is the lower limit. This is the time Emacs spends processing the + buffer, parsing sexps, etc. + +- We see that `prog-mode` doesn\'t do much more then `fundamental-mode`, which makes + sense. + +- Applying JSON formatting take at most `jsonian-mode` - `prog-mode`. + +Notes: + +- Both `jsonian` and `json-mode` were byte-compiled for the benchmark. diff --git a/bench/markdown.sh b/bench/markdown.sh new file mode 100755 index 0000000..e381681 --- /dev/null +++ b/bench/markdown.sh @@ -0,0 +1,35 @@ +#!/usr/bin/env sh + +MESSAGE=$(cat <<-EOF + +## Benchmarks + +The original reason I wrote jsonian is that I needed to read and naviage very large JSON +files, and Emacs was slowing me down. To keep jsonian fast, I maintain benchmarks of +jsonian doing real world tasks. + +### \`font-lock\`ing a large buffer + +This benchmark opens a very large (42M) JSON file, then forces Emacs to fontify it. It +finally moves point to the end of the file and exits. + +$(sed 's/Command/Package/g' < bench/font-lock.md) + +We can use this benchmark to derive how long different parts of the proces take. + +- Fundamental mode is the lower limit. This is the time Emacs spends processing the + buffer, parsing sexps, etc. + +- We see that \`prog-mode\` doesn\'t do much more then \`fundamental-mode\`, which makes + sense. + +- Applying JSON formatting take at most \`jsonian-mode\` - \`prog-mode\`. + +Notes: + +- Both \`jsonian\` and \`json-mode\` were byte-compiled for the benchmark. +- These benchmarks were taken on an Apple M2 Max with 64GB running macOS Ventura. +EOF + ) + +echo "$MESSAGE" > "$EXPORT" From 234836930934ae78652ee341b4ca8b0bbe0ff9e4 Mon Sep 17 00:00:00 2001 From: Ian Wahbe Date: Fri, 25 Aug 2023 10:09:57 -0700 Subject: [PATCH 09/13] Add `jsonian-format-region` benchmark --- Makefile | 15 +++++++++------ README.md | 29 +++++++++++++++++++++-------- bench/font-lock.md | 10 +++++----- bench/format.md | 4 ++++ bench/format.sh | 22 ++++++++++++++++++++++ bench/markdown.md | 27 +++++++++++++++++++++------ bench/markdown.sh | 13 ++++++++++++- 7 files changed, 94 insertions(+), 26 deletions(-) create mode 100644 bench/format.md create mode 100755 bench/format.sh diff --git a/Makefile b/Makefile index 870d5ce..c295f3b 100644 --- a/Makefile +++ b/Makefile @@ -55,13 +55,16 @@ README.md: bench/markdown.md @echo "Splicing bench/markdown.md into README.md" cp $@ $@.backup rg -U '(?s)${BENCHMARK_START}.*${BENCHMARK_END}' \ - --replace "${BENCHMARK_START}$$(cat bench/markdown.md)\n${BENCHMARK_END}" \ + --replace '${BENCHMARK_START}'"$$(cat bench/markdown.md)"'${BENCHMARK_END}' \ --passthru < $@ > $@.new mv $@.new $@ -bench/markdown.md: bench/font-lock.md bench/markdown.sh - EXPORT="$@" ./bench/markdown.sh +bench/markdown.md: bench/format.md bench/font-lock.md bench/markdown.sh + EMACS="'${EMACS}'" EXPORT="$@" ./bench/markdown.sh -bench/font-lock.md: ${LARGE_JSON_FILE} jsonian.elc bench/font-lock.sh - hyperfine --version - EMACS="'${EMACS}'" FILE="${LARGE_JSON_FILE}" EXPORT="$@" ./bench/font-lock.sh +PHONY: bench-base +bench-base: ${LARGE_JSON_FILE} jsonian.elc + hyperfine --version # Ensure hyperfine is installed + +bench/%.md: bench/%.sh bench-base + EMACS="'${EMACS}'" FILE="${LARGE_JSON_FILE}" EXPORT="$@" $< diff --git a/README.md b/README.md index a836c6c..17efb78 100644 --- a/README.md +++ b/README.md @@ -192,11 +192,11 @@ finally moves point to the end of the file and exits. | Package | Mean [s] | Min [s] | Max [s] | Relative | |:---|---:|---:|---:|---:| -| `fundamental-mode` | 1.351 ± 0.003 | 1.345 | 1.355 | 1.00 | -| `prog-mode` | 1.437 ± 0.030 | 1.422 | 1.520 | 1.06 ± 0.02 | -| `jsonian-mode` | 2.349 ± 0.032 | 2.316 | 2.414 | 1.74 ± 0.02 | -| `json-mode` | 3.838 ± 0.041 | 3.808 | 3.947 | 2.84 ± 0.03 | -| `javascript-mode` | 13.547 ± 0.086 | 13.498 | 13.789 | 10.03 ± 0.07 | +| `fundamental-mode` | 1.331 ± 0.003 | 1.327 | 1.337 | 1.00 | +| `prog-mode` | 1.407 ± 0.010 | 1.398 | 1.429 | 1.06 ± 0.01 | +| `jsonian-mode` | 2.280 ± 0.006 | 2.272 | 2.291 | 1.71 ± 0.01 | +| `json-mode` | 3.787 ± 0.013 | 3.766 | 3.816 | 2.84 ± 0.01 | +| `javascript-mode` | 13.466 ± 0.071 | 13.325 | 13.516 | 10.11 ± 0.06 | We can use this benchmark to derive how long different parts of the proces take. @@ -208,11 +208,24 @@ We can use this benchmark to derive how long different parts of the proces take. - Applying JSON formatting take at most `jsonian-mode` - `prog-mode`. +### Formatting a large buffer + +This tests applying formatting to a very large (42M) JSON file that is compressed to +remove all whitespace. The formatted files are largely identical. + +| Package | Mean [s] | Min [s] | Max [s] | Relative | +|:---|---:|---:|---:|---:| +| `jsonian-format-region` | 1.668 ± 0.014 | 1.649 | 1.701 | 1.00 | +| `json-pretty-print-buffer` | 4.637 ± 0.023 | 4.597 | 4.679 | 2.78 ± 0.03 | + +We see that the built-in `json-pretty-print-buffer` takes significantly longer then our +implementation. + Notes: -- Both `jsonian` and `json-mode` were byte-compiled for the benchmark. -- These benchmarks were taken on an Apple M2 Max with 64GB running macOS Ventura. - +- Both `jsonian` and `json-mode` were byte-compiled for the `font-lock` benchmark. +- Tests were run against GNU Emacs 30.0.50. +- These benchmarks were taken on an Apple M2 Max with 64GB running macOS Ventura. ## Contributing diff --git a/bench/font-lock.md b/bench/font-lock.md index 4ef5acb..24de696 100644 --- a/bench/font-lock.md +++ b/bench/font-lock.md @@ -1,7 +1,7 @@ | Command | Mean [s] | Min [s] | Max [s] | Relative | |:---|---:|---:|---:|---:| -| `fundamental-mode` | 1.351 ± 0.003 | 1.345 | 1.355 | 1.00 | -| `prog-mode` | 1.437 ± 0.030 | 1.422 | 1.520 | 1.06 ± 0.02 | -| `jsonian-mode` | 2.349 ± 0.032 | 2.316 | 2.414 | 1.74 ± 0.02 | -| `json-mode` | 3.838 ± 0.041 | 3.808 | 3.947 | 2.84 ± 0.03 | -| `javascript-mode` | 13.547 ± 0.086 | 13.498 | 13.789 | 10.03 ± 0.07 | +| `fundamental-mode` | 1.331 ± 0.003 | 1.327 | 1.337 | 1.00 | +| `prog-mode` | 1.407 ± 0.010 | 1.398 | 1.429 | 1.06 ± 0.01 | +| `jsonian-mode` | 2.280 ± 0.006 | 2.272 | 2.291 | 1.71 ± 0.01 | +| `json-mode` | 3.787 ± 0.013 | 3.766 | 3.816 | 2.84 ± 0.01 | +| `javascript-mode` | 13.466 ± 0.071 | 13.325 | 13.516 | 10.11 ± 0.06 | diff --git a/bench/format.md b/bench/format.md new file mode 100644 index 0000000..d437f30 --- /dev/null +++ b/bench/format.md @@ -0,0 +1,4 @@ +| Command | Mean [s] | Min [s] | Max [s] | Relative | +|:---|---:|---:|---:|---:| +| `jsonian-format-region` | 1.668 ± 0.014 | 1.649 | 1.701 | 1.00 | +| `json-pretty-print-buffer` | 4.637 ± 0.023 | 4.597 | 4.679 | 2.78 ± 0.03 | diff --git a/bench/format.sh b/bench/format.sh new file mode 100755 index 0000000..75f5af5 --- /dev/null +++ b/bench/format.sh @@ -0,0 +1,22 @@ +#!/usr/bin/env sh + + +COMPRESSED="$FILE.compressed.json" +jq -c . "$FILE" > "$COMPRESSED" + +bench () { + echo "$EMACS -Q --batch -nw \ +-L . \ +--eval '(setq large-file-warning-threshold nil)' \ +--eval '(switch-to-buffer (find-file-literally \"$COMPRESSED\"))' \ +--eval \"(require 'jsonian)\" \ +--eval \"$1\" \ +--eval '(kill-emacs)'" +} + +hyperfine --export-markdown "$EXPORT" --show-output \ + --command-name "jsonian-format-region" "$(bench "(let ((inhibit-message t))\ + (jsonian-format-region (point-min) (point-max)))")" \ + --command-name "json-pretty-print-buffer" "$(bench "(json-pretty-print-buffer)")" + +rm "$COMPRESSED" diff --git a/bench/markdown.md b/bench/markdown.md index 4e7892d..b1a7f59 100644 --- a/bench/markdown.md +++ b/bench/markdown.md @@ -12,11 +12,11 @@ finally moves point to the end of the file and exits. | Package | Mean [s] | Min [s] | Max [s] | Relative | |:---|---:|---:|---:|---:| -| `fundamental-mode` | 1.351 ± 0.003 | 1.345 | 1.355 | 1.00 | -| `prog-mode` | 1.437 ± 0.030 | 1.422 | 1.520 | 1.06 ± 0.02 | -| `jsonian-mode` | 2.349 ± 0.032 | 2.316 | 2.414 | 1.74 ± 0.02 | -| `json-mode` | 3.838 ± 0.041 | 3.808 | 3.947 | 2.84 ± 0.03 | -| `javascript-mode` | 13.547 ± 0.086 | 13.498 | 13.789 | 10.03 ± 0.07 | +| `fundamental-mode` | 1.331 ± 0.003 | 1.327 | 1.337 | 1.00 | +| `prog-mode` | 1.407 ± 0.010 | 1.398 | 1.429 | 1.06 ± 0.01 | +| `jsonian-mode` | 2.280 ± 0.006 | 2.272 | 2.291 | 1.71 ± 0.01 | +| `json-mode` | 3.787 ± 0.013 | 3.766 | 3.816 | 2.84 ± 0.01 | +| `javascript-mode` | 13.466 ± 0.071 | 13.325 | 13.516 | 10.11 ± 0.06 | We can use this benchmark to derive how long different parts of the proces take. @@ -28,6 +28,21 @@ We can use this benchmark to derive how long different parts of the proces take. - Applying JSON formatting take at most `jsonian-mode` - `prog-mode`. +### Formatting a large buffer + +This tests applying formatting to a very large (42M) JSON file that is compressed to +remove all whitespace. The formatted files are largely identical. + +| Package | Mean [s] | Min [s] | Max [s] | Relative | +|:---|---:|---:|---:|---:| +| `jsonian-format-region` | 1.668 ± 0.014 | 1.649 | 1.701 | 1.00 | +| `json-pretty-print-buffer` | 4.637 ± 0.023 | 4.597 | 4.679 | 2.78 ± 0.03 | + +We see that the built-in `json-pretty-print-buffer` takes significantly longer then our +implementation. + Notes: -- Both `jsonian` and `json-mode` were byte-compiled for the benchmark. +- Both `jsonian` and `json-mode` were byte-compiled for the `font-lock` benchmark. +- Tests were run against GNU Emacs 30.0.50. +- These benchmarks were taken on an Apple M2 Max with 64GB running macOS Ventura. diff --git a/bench/markdown.sh b/bench/markdown.sh index e381681..2a30c6d 100755 --- a/bench/markdown.sh +++ b/bench/markdown.sh @@ -25,9 +25,20 @@ We can use this benchmark to derive how long different parts of the proces take. - Applying JSON formatting take at most \`jsonian-mode\` - \`prog-mode\`. +### Formatting a large buffer + +This tests applying formatting to a very large (42M) JSON file that is compressed to +remove all whitespace. The formatted files are largely identical. + +$(sed 's/Command/Package/g' < bench/format.md) + +We see that the built-in \`json-pretty-print-buffer\` takes significantly longer then our +implementation. + Notes: -- Both \`jsonian\` and \`json-mode\` were byte-compiled for the benchmark. +- Both \`jsonian\` and \`json-mode\` were byte-compiled for the \`font-lock\` benchmark. +- Tests were run against $($EMACS --version | head -1). - These benchmarks were taken on an Apple M2 Max with 64GB running macOS Ventura. EOF ) From dc3192ffc25bb28312c4b8777ef3e475ced12aa9 Mon Sep 17 00:00:00 2001 From: Ian Wahbe Date: Fri, 25 Aug 2023 18:31:53 -0700 Subject: [PATCH 10/13] Fix test on older Emacs versions --- .github/workflows/test.yaml | 10 ++++++++-- jsonian-tests.el | 17 +++++++++++------ 2 files changed, 19 insertions(+), 8 deletions(-) diff --git a/.github/workflows/test.yaml b/.github/workflows/test.yaml index fa95e18..8ff9b24 100644 --- a/.github/workflows/test.yaml +++ b/.github/workflows/test.yaml @@ -6,12 +6,18 @@ on: env: PR_COMMIT_SHA: ${{ github.event.client_payload.pull_request.head.sha }} jobs: - build_and_test: + test_and_lint: + name: Test and Lint + strategy: + matrix: + # We need to make sure we are testing at least the earliest and latest versions of + # Emacs. + version: ["27.1", "29.1"] runs-on: ubuntu-latest steps: - uses: purcell/setup-emacs@master with: - version: "27.1" + version: ${{ matrix.version }} - uses: actions/checkout@v2 with: ref: ${{ env.PR_COMMIT_SHA }} diff --git a/jsonian-tests.el b/jsonian-tests.el index d206bd0..f28dc92 100644 --- a/jsonian-tests.el +++ b/jsonian-tests.el @@ -733,12 +733,17 @@ Specifically, we need to comply with what `completion-boundaries' describes." ;; Validate that `jsonian--format-string' matches the behavior of `json-pretty-print'. ;; Because that `json-pretty-print-buffer' defaults to an indentation of 2, we set ;; that for ourselves. - (let ((jsonian-indentation 2)) - (should (string= (jsonian--format-string input) - (with-temp-buffer - (insert input) - (json-pretty-print-buffer) - (buffer-string))))))) + ;; + ;; Emacs major versions before 28 indent { } as {\n} instead of {}. This makes us + ;; unable to verify our formatting against `json-pretty-print' since we target + ;; different results. + (when (> emacs-major-version 27) + (let ((jsonian-indentation 2)) + (should (string= (jsonian--format-string input) + (with-temp-buffer + (insert input) + (json-pretty-print-buffer) + (buffer-string)))))))) (ert-deftest jsonian-format-region () "Test `jsonian-format-region'." From eaeed8433aaa2cf6c6b6c10a53a707d190c4be86 Mon Sep 17 00:00:00 2001 From: Ian Wahbe Date: Fri, 25 Aug 2023 18:52:31 -0700 Subject: [PATCH 11/13] Restore benchmark info --- Makefile | 4 ++-- README.md | 3 +++ bench/font-lock.md | 10 +++++----- bench/format.md | 4 ++-- bench/markdown.md | 23 +++++++++++++---------- bench/markdown.sh | 9 ++++++--- 6 files changed, 31 insertions(+), 22 deletions(-) diff --git a/Makefile b/Makefile index c295f3b..c903873 100644 --- a/Makefile +++ b/Makefile @@ -60,11 +60,11 @@ README.md: bench/markdown.md mv $@.new $@ bench/markdown.md: bench/format.md bench/font-lock.md bench/markdown.sh - EMACS="'${EMACS}'" EXPORT="$@" ./bench/markdown.sh + EMACS="${EMACS}" EXPORT="$@" ./bench/markdown.sh PHONY: bench-base bench-base: ${LARGE_JSON_FILE} jsonian.elc hyperfine --version # Ensure hyperfine is installed bench/%.md: bench/%.sh bench-base - EMACS="'${EMACS}'" FILE="${LARGE_JSON_FILE}" EXPORT="$@" $< + EMACS="${EMACS}" FILE="${LARGE_JSON_FILE}" EXPORT="$@" $< diff --git a/README.md b/README.md index 17efb78..7ad79e3 100644 --- a/README.md +++ b/README.md @@ -208,6 +208,9 @@ We can use this benchmark to derive how long different parts of the proces take. - Applying JSON formatting take at most `jsonian-mode` - `prog-mode`. +- Parsing a javascript file is much more complicated (and thus expensive) then parsing a + JSON file. + ### Formatting a large buffer This tests applying formatting to a very large (42M) JSON file that is compressed to diff --git a/bench/font-lock.md b/bench/font-lock.md index 24de696..b85278b 100644 --- a/bench/font-lock.md +++ b/bench/font-lock.md @@ -1,7 +1,7 @@ | Command | Mean [s] | Min [s] | Max [s] | Relative | |:---|---:|---:|---:|---:| -| `fundamental-mode` | 1.331 ± 0.003 | 1.327 | 1.337 | 1.00 | -| `prog-mode` | 1.407 ± 0.010 | 1.398 | 1.429 | 1.06 ± 0.01 | -| `jsonian-mode` | 2.280 ± 0.006 | 2.272 | 2.291 | 1.71 ± 0.01 | -| `json-mode` | 3.787 ± 0.013 | 3.766 | 3.816 | 2.84 ± 0.01 | -| `javascript-mode` | 13.466 ± 0.071 | 13.325 | 13.516 | 10.11 ± 0.06 | +| `fundamental-mode` | 1.493 ± 0.111 | 1.313 | 1.593 | 1.05 ± 0.08 | +| `prog-mode` | 1.423 ± 0.016 | 1.406 | 1.452 | 1.00 | +| `jsonian-mode` | 2.443 ± 0.129 | 2.294 | 2.613 | 1.72 ± 0.09 | +| `json-mode` | 3.801 ± 0.145 | 3.748 | 4.213 | 2.67 ± 0.11 | +| `javascript-mode` | 13.441 ± 0.102 | 13.305 | 13.681 | 9.44 ± 0.13 | diff --git a/bench/format.md b/bench/format.md index d437f30..2b73d1a 100644 --- a/bench/format.md +++ b/bench/format.md @@ -1,4 +1,4 @@ | Command | Mean [s] | Min [s] | Max [s] | Relative | |:---|---:|---:|---:|---:| -| `jsonian-format-region` | 1.668 ± 0.014 | 1.649 | 1.701 | 1.00 | -| `json-pretty-print-buffer` | 4.637 ± 0.023 | 4.597 | 4.679 | 2.78 ± 0.03 | +| `jsonian-format-region` | 1.711 ± 0.104 | 1.626 | 1.850 | 1.00 | +| `json-pretty-print-buffer` | 4.594 ± 0.013 | 4.578 | 4.620 | 2.69 ± 0.16 | diff --git a/bench/markdown.md b/bench/markdown.md index b1a7f59..2b11061 100644 --- a/bench/markdown.md +++ b/bench/markdown.md @@ -12,11 +12,11 @@ finally moves point to the end of the file and exits. | Package | Mean [s] | Min [s] | Max [s] | Relative | |:---|---:|---:|---:|---:| -| `fundamental-mode` | 1.331 ± 0.003 | 1.327 | 1.337 | 1.00 | -| `prog-mode` | 1.407 ± 0.010 | 1.398 | 1.429 | 1.06 ± 0.01 | -| `jsonian-mode` | 2.280 ± 0.006 | 2.272 | 2.291 | 1.71 ± 0.01 | -| `json-mode` | 3.787 ± 0.013 | 3.766 | 3.816 | 2.84 ± 0.01 | -| `javascript-mode` | 13.466 ± 0.071 | 13.325 | 13.516 | 10.11 ± 0.06 | +| `fundamental-mode` | 1.493 ± 0.111 | 1.313 | 1.593 | 1.05 ± 0.08 | +| `prog-mode` | 1.423 ± 0.016 | 1.406 | 1.452 | 1.00 | +| `jsonian-mode` | 2.443 ± 0.129 | 2.294 | 2.613 | 1.72 ± 0.09 | +| `json-mode` | 3.801 ± 0.145 | 3.748 | 4.213 | 2.67 ± 0.11 | +| `javascript-mode` | 13.441 ± 0.102 | 13.305 | 13.681 | 9.44 ± 0.13 | We can use this benchmark to derive how long different parts of the proces take. @@ -28,6 +28,9 @@ We can use this benchmark to derive how long different parts of the proces take. - Applying JSON formatting take at most `jsonian-mode` - `prog-mode`. +- Parsing a javascript file is much more complicated (and thus expensive) then parsing a + JSON file. + ### Formatting a large buffer This tests applying formatting to a very large (42M) JSON file that is compressed to @@ -35,14 +38,14 @@ remove all whitespace. The formatted files are largely identical. | Package | Mean [s] | Min [s] | Max [s] | Relative | |:---|---:|---:|---:|---:| -| `jsonian-format-region` | 1.668 ± 0.014 | 1.649 | 1.701 | 1.00 | -| `json-pretty-print-buffer` | 4.637 ± 0.023 | 4.597 | 4.679 | 2.78 ± 0.03 | +| `jsonian-format-region` | 1.711 ± 0.104 | 1.626 | 1.850 | 1.00 | +| `json-pretty-print-buffer` | 4.594 ± 0.013 | 4.578 | 4.620 | 2.69 ± 0.16 | We see that the built-in `json-pretty-print-buffer` takes significantly longer then our implementation. Notes: -- Both `jsonian` and `json-mode` were byte-compiled for the `font-lock` benchmark. -- Tests were run against GNU Emacs 30.0.50. -- These benchmarks were taken on an Apple M2 Max with 64GB running macOS Ventura. +1. Both `jsonian` and `json-mode` were byte-compiled for the `font-lock` benchmark. +1. Tests were run against . +1. These benchmarks were taken on an Apple M2 Max with 64GB running macOS Ventura. diff --git a/bench/markdown.sh b/bench/markdown.sh index 2a30c6d..931e0aa 100755 --- a/bench/markdown.sh +++ b/bench/markdown.sh @@ -25,6 +25,9 @@ We can use this benchmark to derive how long different parts of the proces take. - Applying JSON formatting take at most \`jsonian-mode\` - \`prog-mode\`. +- Parsing a javascript file is much more complicated (and thus expensive) then parsing a + JSON file. + ### Formatting a large buffer This tests applying formatting to a very large (42M) JSON file that is compressed to @@ -37,9 +40,9 @@ implementation. Notes: -- Both \`jsonian\` and \`json-mode\` were byte-compiled for the \`font-lock\` benchmark. -- Tests were run against $($EMACS --version | head -1). -- These benchmarks were taken on an Apple M2 Max with 64GB running macOS Ventura. +1. Both \`jsonian\` and \`json-mode\` were byte-compiled for the \`font-lock\` benchmark. +1. Tests were run against $($EMACS --version | head -1). +1. These benchmarks were taken on an Apple M2 Max with 64GB running macOS Ventura. EOF ) From e9bf58e9569b52892db76a9a455e43e2807cc8d7 Mon Sep 17 00:00:00 2001 From: Ian Wahbe Date: Fri, 25 Aug 2023 19:06:10 -0700 Subject: [PATCH 12/13] Add keybinding --- README.md | 38 +++++++++++++++++++++++++++++++++----- jsonian.el | 1 + 2 files changed, 34 insertions(+), 5 deletions(-) diff --git a/README.md b/README.md index 7ad79e3..45d3456 100644 --- a/README.md +++ b/README.md @@ -151,6 +151,39 @@ When you return from the buffer, the string is collapsed back into its escaped f By default, this command is bound to `C-c C-s`. +#### jsonian-find + +Provide an interactive completion interface for selecting an element in the +buffer. When the element is selected, jump to that point in the buffer. + +By default, this command is bound to `C-c C-f`. + +### jsonian-format-region + +Maximize the JSON contents of the region. This is equivalent to the built-in function +`json-pretty-print`, but much faster (see "\#\# Benchmarks"). For example: + +``` json +{"key":["simple",null,{"cpx": true},[]]} +``` + +Calling `jsonian-format-region` on the above will transform it into: + +``` json +{ + "key": [ + "simple", + null, + { + "cpx": true + }, + [] + ] +} +``` + +By default, this command is bound to `C-c C-w`. + #### jsonian-enclosing-item Move point to the enclosing node. For example: @@ -169,11 +202,6 @@ the point to the opening `[`. By default, this function is bound to `C-c C-e`. -#### jsonian-find - -Provide an interactive completion interface for selecting an element in the -buffer. When the element is selected, jump to that point in the buffer. - #### jsonian-enable-flycheck Enable `jsonian-mode` for all checkers where `json-mode` is enabled. diff --git a/jsonian.el b/jsonian.el index d89ab83..c307efa 100644 --- a/jsonian.el +++ b/jsonian.el @@ -1548,6 +1548,7 @@ nil is returned if the object at point is not a collection." (define-key km (kbd "C-c C-s") #'jsonian-edit-string) (define-key km (kbd "C-c C-e") #'jsonian-enclosing-item) (define-key km (kbd "C-c C-f") #'jsonian-find) + (define-key km (kbd "C-c C-w") #'jsonian-format-region) km) "The mode-map for `jsonian-mode'.") From ec1a21e5f5a348a4fbabaea327c840aed6c24f37 Mon Sep 17 00:00:00 2001 From: Ian Wahbe Date: Fri, 25 Aug 2023 19:38:00 -0700 Subject: [PATCH 13/13] Let `jsonian-format-region` minimize with a prefix argument --- README.md | 32 ++++++------ bench/font-lock.md | 10 ++-- bench/format.md | 6 ++- bench/format.sh | 19 ++++++-- bench/markdown.md | 18 ++++--- bench/markdown.sh | 8 +-- jsonian.el | 119 ++++++++++++++++++++++++--------------------- 7 files changed, 120 insertions(+), 92 deletions(-) diff --git a/README.md b/README.md index 45d3456..8d16db0 100644 --- a/README.md +++ b/README.md @@ -182,6 +182,8 @@ Calling `jsonian-format-region` on the above will transform it into: } ``` +If a prefix argument is supplied, `jsonian-format-region` minimizes instead of expanding. + By default, this command is bound to `C-c C-w`. #### jsonian-enclosing-item @@ -220,19 +222,19 @@ finally moves point to the end of the file and exits. | Package | Mean [s] | Min [s] | Max [s] | Relative | |:---|---:|---:|---:|---:| -| `fundamental-mode` | 1.331 ± 0.003 | 1.327 | 1.337 | 1.00 | -| `prog-mode` | 1.407 ± 0.010 | 1.398 | 1.429 | 1.06 ± 0.01 | -| `jsonian-mode` | 2.280 ± 0.006 | 2.272 | 2.291 | 1.71 ± 0.01 | -| `json-mode` | 3.787 ± 0.013 | 3.766 | 3.816 | 2.84 ± 0.01 | -| `javascript-mode` | 13.466 ± 0.071 | 13.325 | 13.516 | 10.11 ± 0.06 | +| `fundamental-mode` | 1.444 ± 0.174 | 1.301 | 1.734 | 1.00 ± 0.12 | +| `prog-mode` | 1.442 ± 0.039 | 1.402 | 1.488 | 1.00 | +| `jsonian-mode` | 2.296 ± 0.013 | 2.289 | 2.332 | 1.59 ± 0.04 | +| `json-mode` | 3.775 ± 0.033 | 3.762 | 3.867 | 2.62 ± 0.07 | +| `javascript-mode` | 13.599 ± 0.288 | 13.341 | 14.145 | 9.43 ± 0.32 | We can use this benchmark to derive how long different parts of the proces take. - Fundamental mode is the lower limit. This is the time Emacs spends processing the buffer, parsing sexps, etc. -- We see that `prog-mode` doesn\'t do much more then `fundamental-mode`, which makes - sense. +- `prog-mode` doesn\'t do much more then `fundamental-mode`, which makes sense, since it + takes about the same amount of time. - Applying JSON formatting take at most `jsonian-mode` - `prog-mode`. @@ -246,17 +248,19 @@ remove all whitespace. The formatted files are largely identical. | Package | Mean [s] | Min [s] | Max [s] | Relative | |:---|---:|---:|---:|---:| -| `jsonian-format-region` | 1.668 ± 0.014 | 1.649 | 1.701 | 1.00 | -| `json-pretty-print-buffer` | 4.637 ± 0.023 | 4.597 | 4.679 | 2.78 ± 0.03 | +| `jsonian-format-region` | 1.709 ± 0.091 | 1.633 | 1.877 | 1.12 ± 0.06 | +| `jsonian-format-region (minimize)` | 1.524 ± 0.010 | 1.516 | 1.549 | 1.00 | +| `json-pretty-print-buffer` | 4.582 ± 0.006 | 4.576 | 4.593 | 3.01 ± 0.02 | +| `json-pretty-print-buffer (minimize)` | 4.440 ± 0.114 | 4.384 | 4.753 | 2.91 ± 0.08 | -We see that the built-in `json-pretty-print-buffer` takes significantly longer then our -implementation. +We see that the built-in `json-pretty-print-buffer` takes significantly longer then +`jsonian-format-region`, regardless of whether we are pretty printing or minimizing. Notes: -- Both `jsonian` and `json-mode` were byte-compiled for the `font-lock` benchmark. -- Tests were run against GNU Emacs 30.0.50. -- These benchmarks were taken on an Apple M2 Max with 64GB running macOS Ventura. +1. Both `jsonian` and `json-mode` were byte-compiled for the `font-lock` benchmark. +1. Tests were run against GNU Emacs 30.0.50. +1. These benchmarks were taken on an Apple M2 Max with 64GB running macOS Ventura. ## Contributing diff --git a/bench/font-lock.md b/bench/font-lock.md index b85278b..1ef08d8 100644 --- a/bench/font-lock.md +++ b/bench/font-lock.md @@ -1,7 +1,7 @@ | Command | Mean [s] | Min [s] | Max [s] | Relative | |:---|---:|---:|---:|---:| -| `fundamental-mode` | 1.493 ± 0.111 | 1.313 | 1.593 | 1.05 ± 0.08 | -| `prog-mode` | 1.423 ± 0.016 | 1.406 | 1.452 | 1.00 | -| `jsonian-mode` | 2.443 ± 0.129 | 2.294 | 2.613 | 1.72 ± 0.09 | -| `json-mode` | 3.801 ± 0.145 | 3.748 | 4.213 | 2.67 ± 0.11 | -| `javascript-mode` | 13.441 ± 0.102 | 13.305 | 13.681 | 9.44 ± 0.13 | +| `fundamental-mode` | 1.444 ± 0.174 | 1.301 | 1.734 | 1.00 ± 0.12 | +| `prog-mode` | 1.442 ± 0.039 | 1.402 | 1.488 | 1.00 | +| `jsonian-mode` | 2.296 ± 0.013 | 2.289 | 2.332 | 1.59 ± 0.04 | +| `json-mode` | 3.775 ± 0.033 | 3.762 | 3.867 | 2.62 ± 0.07 | +| `javascript-mode` | 13.599 ± 0.288 | 13.341 | 14.145 | 9.43 ± 0.32 | diff --git a/bench/format.md b/bench/format.md index 2b73d1a..6fd0045 100644 --- a/bench/format.md +++ b/bench/format.md @@ -1,4 +1,6 @@ | Command | Mean [s] | Min [s] | Max [s] | Relative | |:---|---:|---:|---:|---:| -| `jsonian-format-region` | 1.711 ± 0.104 | 1.626 | 1.850 | 1.00 | -| `json-pretty-print-buffer` | 4.594 ± 0.013 | 4.578 | 4.620 | 2.69 ± 0.16 | +| `jsonian-format-region` | 1.709 ± 0.091 | 1.633 | 1.877 | 1.12 ± 0.06 | +| `jsonian-format-region (minimize)` | 1.524 ± 0.010 | 1.516 | 1.549 | 1.00 | +| `json-pretty-print-buffer` | 4.582 ± 0.006 | 4.576 | 4.593 | 3.01 ± 0.02 | +| `json-pretty-print-buffer (minimize)` | 4.440 ± 0.114 | 4.384 | 4.753 | 2.91 ± 0.08 | diff --git a/bench/format.sh b/bench/format.sh index 75f5af5..d8517f3 100755 --- a/bench/format.sh +++ b/bench/format.sh @@ -8,15 +8,28 @@ bench () { echo "$EMACS -Q --batch -nw \ -L . \ --eval '(setq large-file-warning-threshold nil)' \ ---eval '(switch-to-buffer (find-file-literally \"$COMPRESSED\"))' \ +--eval '(switch-to-buffer (find-file-literally \"$2\"))' \ --eval \"(require 'jsonian)\" \ --eval \"$1\" \ --eval '(kill-emacs)'" } +# Run the benchmark on the full file. +full () { + bench "$1" "$FILE" +} + +# Run the benchmark on the compressed file +cmpr () { + bench "$1" "$COMPRESSED" +} + hyperfine --export-markdown "$EXPORT" --show-output \ - --command-name "jsonian-format-region" "$(bench "(let ((inhibit-message t))\ + --command-name "jsonian-format-region" "$(cmpr "(let ((inhibit-message t))\ (jsonian-format-region (point-min) (point-max)))")" \ - --command-name "json-pretty-print-buffer" "$(bench "(json-pretty-print-buffer)")" + --command-name "jsonian-format-region (minimize)" "$(full "(let ((inhibit-message t))\ + (jsonian-format-region (point-min) (point-max) t))")" \ + --command-name "json-pretty-print-buffer" "$(cmpr "(json-pretty-print-buffer)")" \ + --command-name "json-pretty-print-buffer (minimize)" "$(full "(json-pretty-print-buffer t)")" rm "$COMPRESSED" diff --git a/bench/markdown.md b/bench/markdown.md index 2b11061..4a48749 100644 --- a/bench/markdown.md +++ b/bench/markdown.md @@ -12,11 +12,11 @@ finally moves point to the end of the file and exits. | Package | Mean [s] | Min [s] | Max [s] | Relative | |:---|---:|---:|---:|---:| -| `fundamental-mode` | 1.493 ± 0.111 | 1.313 | 1.593 | 1.05 ± 0.08 | -| `prog-mode` | 1.423 ± 0.016 | 1.406 | 1.452 | 1.00 | -| `jsonian-mode` | 2.443 ± 0.129 | 2.294 | 2.613 | 1.72 ± 0.09 | -| `json-mode` | 3.801 ± 0.145 | 3.748 | 4.213 | 2.67 ± 0.11 | -| `javascript-mode` | 13.441 ± 0.102 | 13.305 | 13.681 | 9.44 ± 0.13 | +| `fundamental-mode` | 1.444 ± 0.174 | 1.301 | 1.734 | 1.00 ± 0.12 | +| `prog-mode` | 1.442 ± 0.039 | 1.402 | 1.488 | 1.00 | +| `jsonian-mode` | 2.296 ± 0.013 | 2.289 | 2.332 | 1.59 ± 0.04 | +| `json-mode` | 3.775 ± 0.033 | 3.762 | 3.867 | 2.62 ± 0.07 | +| `javascript-mode` | 13.599 ± 0.288 | 13.341 | 14.145 | 9.43 ± 0.32 | We can use this benchmark to derive how long different parts of the proces take. @@ -38,8 +38,10 @@ remove all whitespace. The formatted files are largely identical. | Package | Mean [s] | Min [s] | Max [s] | Relative | |:---|---:|---:|---:|---:| -| `jsonian-format-region` | 1.711 ± 0.104 | 1.626 | 1.850 | 1.00 | -| `json-pretty-print-buffer` | 4.594 ± 0.013 | 4.578 | 4.620 | 2.69 ± 0.16 | +| `jsonian-format-region` | 1.709 ± 0.091 | 1.633 | 1.877 | 1.12 ± 0.06 | +| `jsonian-format-region (minimize)` | 1.524 ± 0.010 | 1.516 | 1.549 | 1.00 | +| `json-pretty-print-buffer` | 4.582 ± 0.006 | 4.576 | 4.593 | 3.01 ± 0.02 | +| `json-pretty-print-buffer (minimize)` | 4.440 ± 0.114 | 4.384 | 4.753 | 2.91 ± 0.08 | We see that the built-in `json-pretty-print-buffer` takes significantly longer then our implementation. @@ -47,5 +49,5 @@ implementation. Notes: 1. Both `jsonian` and `json-mode` were byte-compiled for the `font-lock` benchmark. -1. Tests were run against . +1. Tests were run against GNU Emacs 30.0.50. 1. These benchmarks were taken on an Apple M2 Max with 64GB running macOS Ventura. diff --git a/bench/markdown.sh b/bench/markdown.sh index 931e0aa..0a1d5ed 100755 --- a/bench/markdown.sh +++ b/bench/markdown.sh @@ -20,8 +20,8 @@ We can use this benchmark to derive how long different parts of the proces take. - Fundamental mode is the lower limit. This is the time Emacs spends processing the buffer, parsing sexps, etc. -- We see that \`prog-mode\` doesn\'t do much more then \`fundamental-mode\`, which makes - sense. +- \`prog-mode\` doesn\'t do much more then \`fundamental-mode\`, which makes sense, since it + takes about the same amount of time. - Applying JSON formatting take at most \`jsonian-mode\` - \`prog-mode\`. @@ -35,8 +35,8 @@ remove all whitespace. The formatted files are largely identical. $(sed 's/Command/Package/g' < bench/format.md) -We see that the built-in \`json-pretty-print-buffer\` takes significantly longer then our -implementation. +We see that the built-in \`json-pretty-print-buffer\` takes significantly longer then +\`jsonian-format-region\`, regardless of whether we are pretty printing or minimizing. Notes: diff --git a/jsonian.el b/jsonian.el index c307efa..230d8c5 100644 --- a/jsonian.el +++ b/jsonian.el @@ -1932,62 +1932,69 @@ out of the region." (set-marker marker nil nil))) ;;;###autoload -(defun jsonian-format-region (start end) - "Format the region (START . END)." - (interactive "r") - (jsonian--huge-edit start end - (let ((end (progn (goto-char end) (point-marker)))) - (goto-char start) - (jsonian--snap-to-token) - (let* ((indent (jsonian--indentation-spaces)) - (indent-level (jsonian--get-indent-level indent)) - (undo-inhibit-record-point t) - (next-token (make-marker)) - ;; Don't allocate a new string each time you add indentation. - ;; - ;; In effect, this is where we intern strings on behalf of elisp. - (indent-strings '("\n")) - (progress (make-progress-reporter "Formatting region..." start (* (- end start) 1.5)))) - (set-marker-insertion-type next-token t) - (while (and - (<= (point) end) - (jsonian--forward-token t)) - (progress-reporter-update progress (point)) - (set-marker next-token (point)) - (delete-region jsonian--last-token-end (point)) - (cond - ;; A space separates : from the next token - ;; - ;; "foo": bar - ;; ^space - ((eq (char-before jsonian--last-token-end) ?:) - (goto-char jsonian--last-token-end) - (insert " ") - (goto-char next-token)) - ;; If the second of the abutting tokens is a ",", then we don't make any - ;; adjustments. - ((memq (char-after) '(?, ?:))) - - ;; Empty objects and arrays are formatted as {} and [], respectively. - ((and (eq (char-before) ?\[) (eq (char-after) ?\]))) - ((and (eq (char-before) ?\{) (eq (char-after) ?\}))) - - ;; All other items are separated by a new line, then the appropriate indentation. - (t - (when (memq (char-after) '(?\] ?\})) - (cl-decf indent-level indent)) - (when (memq (char-before jsonian--last-token-end) '(?\[ ?\{)) - (cl-incf indent-level indent)) - (while (<= (length indent-strings) indent-level) - (setq indent-strings - (append indent-strings - (list (concat - "\n" - (make-string (length indent-strings) - ?\s)))))) - (insert (nth indent-level indent-strings)) - (goto-char next-token)))) - (progress-reporter-done progress))))) +(defun jsonian-format-region (start end &optional minimize) + "Format the region (START . END). + +If MINIMIZE is non-nil, minimize the region instead of expanding it." + (interactive "*r\nP") + (let ((current-point (point-marker))) + (jsonian--huge-edit start end + (let ((end (progn (goto-char end) (point-marker)))) + (goto-char start) + (jsonian--snap-to-token) + (let* ((indent (jsonian--indentation-spaces)) + (indent-level (jsonian--get-indent-level indent)) + (undo-inhibit-record-point t) + (next-token (make-marker)) + ;; Don't allocate a new string each time you add indentation. + ;; + ;; In effect, this is where we intern strings on behalf of elisp. + (indent-strings '("\n")) + (progress (make-progress-reporter "Formatting region..." start (* (- end start) 1.5)))) + (set-marker-insertion-type next-token t) + (while (and + (<= (point) end) + (jsonian--forward-token t)) + (progress-reporter-update progress (point)) + ;; Delete the whitespace between the old token and the next token. + (set-marker next-token (point)) + (delete-region jsonian--last-token-end (point)) + (unless minimize + ;; Unless we are minimizing, insert the appropriate whitespace. + (cond + ;; A space separates : from the next token + ;; + ;; "foo": bar + ;; ^space + ((eq (char-before jsonian--last-token-end) ?:) + (goto-char jsonian--last-token-end) + (insert " ") + (goto-char next-token)) + ;; If the second of the abutting tokens is a ",", then we don't make any + ;; adjustments. + ((memq (char-after) '(?, ?:))) + + ;; Empty objects and arrays are formatted as {} and [], respectively. + ((and (eq (char-before) ?\[) (eq (char-after) ?\]))) + ((and (eq (char-before) ?\{) (eq (char-after) ?\}))) + + ;; All other items are separated by a new line, then the appropriate indentation. + (t + (when (memq (char-after) '(?\] ?\})) + (cl-decf indent-level indent)) + (when (memq (char-before jsonian--last-token-end) '(?\[ ?\{)) + (cl-incf indent-level indent)) + (while (<= (length indent-strings) indent-level) + (setq indent-strings + (append indent-strings + (list (concat + "\n" + (make-string (length indent-strings) + ?\s)))))) + (insert (nth indent-level indent-strings)) + (goto-char next-token))))) + (progress-reporter-done progress)))) + (goto-char current-point))) (defun jsonian-beginning-of-defun (&optional arg) "Move to the beginning of the smallest object/array enclosing `POS'.