Skip to content

Commit 013ec3e

Browse files
committed
Add --compression (-c) option to specify output data compression
1 parent a945a93 commit 013ec3e

File tree

9 files changed

+88
-25
lines changed

9 files changed

+88
-25
lines changed

CHANGELOG.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
## next
22

33
- Added `gzip` and `deflate` compression support for input data
4+
- Added `--compression` (`-c`) option to specify output data compression (`gzip` or `deflate`). When omitted, `gzip` is used by default. For example, `jora -c -e jsonxl` will output gzipped JSONXL (the most compact size combination).
45
- Updated jsonxl
56
- Fixed an edge case for signed numbers in the range ±[MAX_SAFE_INTEGER/2 … MAX_SAFE_INTEGER].
67
- Removed the limitation on total encoded string length exceeding the maximum string length (~500 MB for V8)

fixtures/data-no-format.json

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
{"name":"jora-cli","version":"2.0.0","description":"Command line interface for Jora","keywords":["cli","jora"],"maintainers":[{"name":"Roman Dvornov","email":"rdvornov@gmail.com","github-username":"lahmatiy"}],"license":"MIT","repository":"discoveryjs/jora-cli","bin":{"jora":"./bin/jora"},"type":"module","main":"./src/index.js","scripts":{"test":"mocha --reporter progress","lint":"eslint src/*.js test/*.js","lint-and-test":"npm run lint && npm test","coverage":"c8 --reporter=lcovonly npm test","prepublishOnly":"npm run lint-and-test"},"dependencies":{"@discoveryjs/json-ext":"^0.6.2","ansi-styles":"^6.2.1","clap":"^3.1.1","jora":"1.0.0-beta.13","jora-sandbox":"^1.3.0","open":"^10.1.0","supports-color":"^9.4.0","tempfile":"^5.0.0"},"devDependencies":{"c8":"^10.1.2","eslint":"^8.57.1","mocha":"^10.7.3"},"engines":{"node":">=18.0.0"},"files":["bin","utils","index.js"]}

fixtures/data-no-format.json.gz

508 Bytes
Binary file not shown.

fixtures/data.json.gz

549 Bytes
Binary file not shown.

fixtures/data.jsonxl.gz

729 Bytes
Binary file not shown.

src/index.js

Lines changed: 25 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -67,18 +67,38 @@ function normFormat(value) {
6767
throw new cli.Error(`Bad value "${value}" for ${this.long} option, supported values: ${encodings.join(', ')}`);
6868
}
6969

70+
function normCompression(value) {
71+
if (value === false) {
72+
return false;
73+
}
74+
75+
if (value === undefined) {
76+
return 'gzip';
77+
}
78+
79+
if (compressions.includes(value)) {
80+
return value;
81+
}
82+
83+
throw new cli.Error(`Bad value "${value}" for ${this.long} option, supported values: ${compressions.join(', ')}`);
84+
}
85+
86+
function normPretty(value) {
87+
return value === undefined ? 4 : Number(value) || false;
88+
}
89+
7090
const encodings = ['json', 'jsonxl'];
91+
const compressions = ['gzip', 'deflate'];
7192
const command = cli.command('jora [query]')
7293
.version('', '', '', outputVersion)
7394
.option('-q, --query <query>', 'Jora query or path to a query file with extension .jora', normFilepath)
7495
.option('-i, --input <filename>', 'Input file', normFilepath)
7596
.option('-o, --output <filename>', 'Output file (outputs to stdout if not set)')
7697
.option('-e, --encoding <encoding>', 'Output encoding: json (default), jsonxl (snapshot9)', normFormat, 'json')
98+
.option('-c, --compression [compression]', 'Output compression: gzip (default when [compression] is omitted), deflate', normCompression, false)
7799
.option('--dry-run', 'Don\'t output result, only report what it would have done')
78100
.option('-f, --force', 'Force overwriting output file')
79-
.option('-p, --pretty [indent]', 'Pretty print with optionally specified indentation (4 spaces by default)', value =>
80-
value === undefined ? 4 : Number(value) || false
81-
, false)
101+
.option('-p, --pretty [indent]', 'Pretty-prints the output using the specified indentation (defaults to 4 spaces if omitted)', normPretty, false)
82102
.option('--no-color', 'Suppress color output')
83103
// .option('-s, --sandbox', 'Output data and query in sandbox')
84104
.option('--verbose', 'Output debug info about actions')
@@ -114,10 +134,11 @@ const command = cli.command('jora [query]')
114134
const query = extractQuery(options.query || args[0]);
115135
const queryFn = prepareQuery(query);
116136
const resultData = performQuery(queryFn, input.data, undefined);
117-
const encoding = options.encoding;
137+
const { compression, encoding } = options;
118138

119139
writer.log();
120140
await writeToDestination(resultData, {
141+
compression,
121142
encoding,
122143
displayInfo,
123144
color: options.color && colorsSupported,
@@ -128,16 +149,6 @@ const command = cli.command('jora [query]')
128149
inputPath: options.input
129150
}, setStageProgress);
130151

131-
// if (options.output) {
132-
// pipeline(
133-
// stringifyChunked(result, null, options.pretty),
134-
// fs.createWriteStream(options.outputFile)
135-
// );
136-
// } else {
137-
// const serializedResult = JSON.stringify(result ?? null, null, options.pretty);
138-
// console.log(options.color ? colorize(serializedResult) : serializedResult);
139-
// }
140-
141152
setStageProgress('done', { time: Date.now() - startTime });
142153
});
143154

src/reporter.js

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -97,14 +97,18 @@ export function createDefaultReporter({ log, logTemp }) {
9797

9898
case 'input-encoding': {
9999
if (params.compression) {
100-
log(` Compression: ${params.compression} (auto detected)`);
100+
log(` Compression: ${params.compression}`);
101101
}
102102

103-
log(` Encoding: ${params.encoding || '<unknown>'} (auto detected)`);
103+
log(` Encoding: ${params.encoding || '<unknown>'}`);
104104
break;
105105
}
106106

107107
case 'output-encoding':
108+
if (params.compression) {
109+
log(` Compression: ${params.compression}`);
110+
}
111+
108112
log(` Encoding: ${params.encoding || '<unknown>'}${params.autoEncoding ? ' (auto selected)' : ''}`);
109113
break;
110114

src/write.js

Lines changed: 18 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
import fs from 'node:fs';
22
import { pipeline } from 'node:stream/promises';
3+
import { createGzip, createDeflate } from 'node:zlib';
34
import { encode } from './tmp/jsonxl-snapshot9.js';
45
import { colorize } from './colorize.js';
56
import { stringifyChunked, stringifyInfo } from '@discoveryjs/json-ext';
@@ -9,19 +10,24 @@ const now = typeof performace !== 'undefined' && typeof performance.now === 'fun
910
const stringBytes = typeof Buffer === 'function' && typeof Buffer.byteLength === 'function'
1011
? Buffer.byteLength
1112
: (str) => str.length; // incorrect but fast fallback
13+
const compressionTransforms = {
14+
gzip: createGzip,
15+
deflate: createDeflate
16+
};
1217

1318
function* createChunkIterator(data, chunkSize = 64 * 1024) {
1419
for (let offset = 0; offset < data.length; offset += chunkSize) {
15-
yield data.subarray(offset, offset + chunkSize);
20+
yield Buffer.from(data.subarray(offset, offset + chunkSize));
1621
}
1722
}
1823

1924
async function writeIntoStream(stream, data, options, setStageProgress = () => {}) {
20-
const { autoEncoding, encoding } = options;
25+
const { autoEncoding, encoding, compression } = options;
26+
const compressionTransform = compressionTransforms[compression];
2127
let payload;
2228
let totalSize;
2329

24-
setStageProgress('output-encoding', { autoEncoding, encoding });
30+
setStageProgress('output-encoding', { autoEncoding, encoding, compression });
2531

2632
switch (encoding) {
2733
case 'jsonxl': {
@@ -58,6 +64,9 @@ async function writeIntoStream(stream, data, options, setStageProgress = () => {
5864
const endNewline = encoding !== 'jsonxl';
5965
const applyColorize = encoding === 'json' && options.color;
6066
const buffer = [];
67+
const pipelineDest = compressionTransform
68+
? [compressionTransform(), stream]
69+
: [stream];
6170

6271
await pipeline(async function* () {
6372
if (isStdStream) {
@@ -89,15 +98,15 @@ async function writeIntoStream(stream, data, options, setStageProgress = () => {
8998
if (applyColorize) {
9099
yield colorize(buffer.join(''));
91100
}
101+
}, ...pipelineDest, { end: !isStdStream });
92102

93-
if (isStdStream && endNewline) {
94-
yield'\n';
103+
if (isStdStream) {
104+
if (endNewline) {
105+
stream.write('\n');
95106
}
96107

97-
if (isStdStream) {
98-
setStageProgress('finish-stdout', { newline: !endNewline });
99-
}
100-
}, stream, { end: !isStdStream });
108+
setStageProgress('finish-stdout', { newline: !endNewline });
109+
}
101110
} else {
102111
// dry run
103112
switch (encoding) {

test/test.js

Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,10 @@ const fixtureDir = path.join(__dirname, '../fixtures');
3232
const queryFilename = path.join(fixtureDir, 'query.jora');
3333
const packageJson = fixtureFile('../package.json');
3434
const fixtureJson = fixtureFile('data.json');
35+
const fixtureJsonGz = fixtureFile('data.json.gz');
36+
const fixtureJsonNoFormatGz = fixtureFile('data-no-format.json.gz');
3537
const fixtureJsonxl = fixtureFile('data.jsonxl');
38+
const fixtureJsonxlGz = fixtureFile('data.jsonxl.gz');
3639
const colorFixture = fixtureFile('color-output.json');
3740
const colorFixtureExpected = fixtureFile('color-output.expected').text.trim();
3841
const colorFixtureExpectedCompact = fixtureFile('color-output.compact.expected').text.trim();
@@ -237,6 +240,40 @@ describe('jsonxl', () => {
237240
);
238241
});
239242

243+
describe('compression', function() {
244+
describe('gzip', () => {
245+
it('gziped JSON input', () =>
246+
run()
247+
.input(fixtureJsonGz.raw)
248+
.output(fixtureJson.string)
249+
);
250+
251+
it('gziped JSON output', () =>
252+
run('-c')
253+
.input(fixtureJson.text)
254+
.output(fixtureJsonNoFormatGz.raw)
255+
);
256+
257+
it('gziped formatted JSON output', () =>
258+
run('-c', '-p')
259+
.input(fixtureJson.text)
260+
.output(fixtureJsonGz.raw)
261+
);
262+
263+
it('gziped JSONXL input', () =>
264+
run()
265+
.input(fixtureJsonxlGz.raw)
266+
.output(fixtureJsonxl.string)
267+
);
268+
269+
it('gziped JSONXL output', () =>
270+
run('-c', '-e', 'jsonxl')
271+
.input(fixtureJsonxl.raw)
272+
.output(fixtureJsonxlGz.raw)
273+
);
274+
});
275+
});
276+
240277
describe('pretty print', function() {
241278
it('indentation should be 4 spaces by default', () =>
242279
run('dependencies.keys()', '-p')

0 commit comments

Comments
 (0)