From 3a66a2cd6ecc90c8dce85a1a06aba20e6dfc6456 Mon Sep 17 00:00:00 2001 From: David Bau Date: Wed, 24 Jun 2015 17:20:54 -0400 Subject: [PATCH] Draft fix for utf-8 issues. --- content/src/filetype.js | 6 ++++++ server/filemeta.js | 22 +++++++++++++++++----- server/load.js | 9 +++++---- server/save.js | 3 ++- 4 files changed, 30 insertions(+), 10 deletions(-) diff --git a/content/src/filetype.js b/content/src/filetype.js index 8a2c50a9..1b794ead 100644 --- a/content/src/filetype.js +++ b/content/src/filetype.js @@ -9,6 +9,11 @@ function inferScriptType(filename) { return mime.replace(/;.*$/, ''); } +function binaryTypeFilename(filename) { + var mime = mimeForFilename(filename); + return !(/^text\//.test(mime)); +} + // Scans for HTML HEAD content at the top, remembering the positions // after any start-tags seen and before any legal end-tags. // Returns { @@ -319,6 +324,7 @@ function isDefaultMeta(meta) { var impl = { mimeForFilename: mimeForFilename, + binaryTypeFilename: binaryTypeFilename, modifyForPreview: modifyForPreview, effectiveMeta: effectiveMeta, isDefaultMeta: isDefaultMeta, diff --git a/server/filemeta.js b/server/filemeta.js index b2e465d4..40d16f01 100644 --- a/server/filemeta.js +++ b/server/filemeta.js @@ -21,7 +21,7 @@ var delimiter = [ } ]; -function parseMetaString(buf) { +function parseMetaString(buf, assumeBinary) { var j, d, limit, start, end, meta, enc, str = buf.toString('binary'); for (j = 0; j < delimiter.length; ++j) { d = delimiter[j]; @@ -39,10 +39,22 @@ function parseMetaString(buf) { if (meta && meta.encoding && Buffer.isEncoding(meta.encoding)) { enc = meta.encoding; } else { - enc = 'binary'; + enc = assumeBinary ? 'binary' : 'utf8'; + } + var decoded = buf.toString(enc, 0, limit); + // Deal with legacy data that might have been encoded as binary + // when the new default for text files is utf8. + if (enc == 'utf8' && decoded.indexOf('\ufffd') != -1) { + var recoded = new Buffer(decoded, enc); + // The utf8 is malformed if reencoding it results in different bytes. + if (recoded.length != limit || !recoded.equals(buf.slice(0, limit))) { + // In this case, treat the encoding as binary instead. + enc = 'binary'; + decoded = buf.toString(enc, 0, limit); + } } return { - data: buf.toString(enc, 0, limit), + data: decoded, meta: meta } } catch (e) { } @@ -50,7 +62,7 @@ function parseMetaString(buf) { return { data: str, meta: null }; } -function printMetaString(data, meta) { +function printMetaString(data, meta, assumeBinary) { if (meta == null && (data.lastIndexOf('META@') == -1 || data.lastIndexOf('@META') == -1) && /^[\0-\xff]*$/.test(data)) { @@ -62,7 +74,7 @@ function printMetaString(data, meta) { if (delimiter[j].type.test(meta.type)) { d = delimiter[j]; break; } } } - var enc = 'binary'; + var enc = assumeBinary ? 'binary' : 'utf8'; if (meta && meta.encoding && Buffer.isEncoding(meta.encoding)) { enc = meta.encoding; } else if (!/^[\0-\xff]*$/.test(data)) { diff --git a/server/load.js b/server/load.js index 3d594a7e..65c9c82e 100644 --- a/server/load.js +++ b/server/load.js @@ -96,6 +96,7 @@ exports.handleLoad = function(req, res, app, format) { utils.errorExit('Bad filename: ' + filename); } } + var assumeBinary = filetype.binaryTypeFilename(filename); var absfile = utils.makeAbsolute(filename, app); @@ -105,7 +106,7 @@ exports.handleLoad = function(req, res, app, format) { // Handle the case of a file that's present if (utils.isPresent(absfile, 'file')) { var m = filemeta.parseMetaString( - fs.readFileSync(absfile)), + fs.readFileSync(absfile), assumeBinary), data = m.data, meta = m.meta; @@ -161,7 +162,7 @@ exports.handleLoad = function(req, res, app, format) { else if (format == 'code') { // For loading the code only var mt = filetype.mimeForFilename(filename), m = filemeta.parseMetaString( - fs.readFileSync(absfile)), + fs.readFileSync(absfile), assumeBinary), data = m.data, meta = m.meta; res.set('Cache-Control', 'must-revalidate'); @@ -173,7 +174,7 @@ exports.handleLoad = function(req, res, app, format) { else if (format == 'print') { // For printing the code var mt = filetype.mimeForFilename(filename), m = filemeta.parseMetaString( - fs.readFileSync(absfile)), + fs.readFileSync(absfile), assumeBinary), data = m.data, meta = m.meta, needline = false, @@ -224,7 +225,7 @@ exports.handleLoad = function(req, res, app, format) { if (utils.isPresent(absfile, 'file')) { var mt = filetype.mimeForFilename(filename), m = filemeta.parseMetaString( - fs.readFileSync(absfile)); + fs.readFileSync(absfile), assumeBinary); // For turtle bits, assume it's coffeescript if (mt.indexOf('text/x-pencilcode') == 0) { diff --git a/server/save.js b/server/save.js index 3561b11b..71874fcd 100644 --- a/server/save.js +++ b/server/save.js @@ -300,7 +300,8 @@ exports.handleSave = function(req, res, app) { var statObj; try { - var content = filemeta.printMetaString(data, meta); + var assumeBinary = filetype.binaryTypeFilename(filename); + var content = filemeta.printMetaString(data, meta, assumeBinary); fd = fs.writeFileSync(absfile, content); var statObj = fs.statSync(absfile); touchUserDir(userdir);