From 3ff660392ee7d365acb5b0de1a9969daac753f37 Mon Sep 17 00:00:00 2001 From: Stefan Bethke Date: Fri, 30 Sep 2016 14:13:44 +0200 Subject: [PATCH 01/13] TSII-1535: Also do not un-escape \uffff --- lib/output.js | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/lib/output.js b/lib/output.js index a3c6b4ab..62dbf49b 100644 --- a/lib/output.js +++ b/lib/output.js @@ -134,7 +134,7 @@ function OutputStream(options) { function make_string(str, quote) { var dq = 0, sq = 0; - str = str.replace(/[\\\b\f\n\r\v\t\x22\x27\u2028\u2029\0\ufeff]/g, + str = str.replace(/[\\\b\f\n\r\v\t\x22\x27\u2028\u2029\0\ufeff\uffff]/g, function(s, i){ switch (s) { case '"': ++dq; return '"'; @@ -149,6 +149,7 @@ function OutputStream(options) { case "\u2028": return "\\u2028"; case "\u2029": return "\\u2029"; case "\ufeff": return "\\ufeff"; + case "\uffff": return "\\uffff"; case "\0": return /[0-7]/.test(str.charAt(i+1)) ? "\\x00" : "\\0"; } @@ -1282,6 +1283,7 @@ function OutputStream(options) { 0x0d , // \r 0x00 , // \0 0xfeff , // Unicode BOM + 0xffff , // Unicode noncharacter 0x2028 , // unicode "line separator" 0x2029 , // unicode "paragraph separator" ].indexOf(code) < 0; From 947aef405cae91e3ea49e58875576c4d0ebb5e4a Mon Sep 17 00:00:00 2001 From: Stefan Bethke Date: Fri, 30 Sep 2016 14:54:35 +0200 Subject: [PATCH 02/13] Add more characters that should not be un-escaped in regexps. --- lib/output.js | 1 + 1 file changed, 1 insertion(+) diff --git a/lib/output.js b/lib/output.js index 62dbf49b..05f8b5d1 100644 --- a/lib/output.js +++ b/lib/output.js @@ -1282,6 +1282,7 @@ function OutputStream(options) { 0x0a , // \n 0x0d , // \r 0x00 , // \0 + 0x1f , 0xfeff , // Unicode BOM 0xffff , // Unicode noncharacter 0x2028 , // unicode "line separator" From b4f8e7dc7b257da2ad1dbbc53788fabd105b6bce Mon Sep 17 00:00:00 2001 From: Stefan Bethke Date: Fri, 30 Sep 2016 15:10:23 +0200 Subject: [PATCH 03/13] Add more characters that should not be un-escaped in regexps. --- lib/output.js | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/lib/output.js b/lib/output.js index 05f8b5d1..59556f5e 100644 --- a/lib/output.js +++ b/lib/output.js @@ -134,7 +134,7 @@ function OutputStream(options) { function make_string(str, quote) { var dq = 0, sq = 0; - str = str.replace(/[\\\b\f\n\r\v\t\x22\x27\u2028\u2029\0\ufeff\uffff]/g, + str = str.replace(/[\\\b\f\n\r\v\t\x1f\x22\x27\u2028\u2029\0\ufeff\uffff]/g, function(s, i){ switch (s) { case '"': ++dq; return '"'; @@ -146,6 +146,7 @@ function OutputStream(options) { case "\b": return "\\b"; case "\f": return "\\f"; case "\x0B": return options.screw_ie8 ? "\\v" : "\\x0B"; + case "\x1f": return "\\x1f"; case "\u2028": return "\\u2028"; case "\u2029": return "\\u2029"; case "\ufeff": return "\\ufeff"; From 75e5f6edf9425f52d199fb56417802239d42c7b1 Mon Sep 17 00:00:00 2001 From: Stefan Bethke Date: Fri, 30 Sep 2016 15:17:05 +0200 Subject: [PATCH 04/13] Add more characters that should not be un-escaped in regexps. --- lib/output.js | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/lib/output.js b/lib/output.js index 59556f5e..53afdbf9 100644 --- a/lib/output.js +++ b/lib/output.js @@ -134,7 +134,7 @@ function OutputStream(options) { function make_string(str, quote) { var dq = 0, sq = 0; - str = str.replace(/[\\\b\f\n\r\v\t\x1f\x22\x27\u2028\u2029\0\ufeff\uffff]/g, + str = str.replace(/[\\\b\f\n\r\v\t\x1f\x22\x27\u2028\u2029\0\ufeff\ufffe\uffff]/g, function(s, i){ switch (s) { case '"': ++dq; return '"'; @@ -150,6 +150,7 @@ function OutputStream(options) { case "\u2028": return "\\u2028"; case "\u2029": return "\\u2029"; case "\ufeff": return "\\ufeff"; + case "\ufffe": return "\\ufffe"; case "\uffff": return "\\uffff"; case "\0": return /[0-7]/.test(str.charAt(i+1)) ? "\\x00" : "\\0"; @@ -1285,6 +1286,7 @@ function OutputStream(options) { 0x00 , // \0 0x1f , 0xfeff , // Unicode BOM + 0xfffe , // Unicode noncharacter 0xffff , // Unicode noncharacter 0x2028 , // unicode "line separator" 0x2029 , // unicode "paragraph separator" From 4e98b220da7e7d3ac32127a1f5d249dd1a9995dc Mon Sep 17 00:00:00 2001 From: Stefan Bethke Date: Sat, 1 Oct 2016 13:29:11 +0200 Subject: [PATCH 05/13] Fix test case for added un-escaped characters --- test/compress/ascii.js | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/test/compress/ascii.js b/test/compress/ascii.js index 2232d263..2b019829 100644 --- a/test/compress/ascii.js +++ b/test/compress/ascii.js @@ -31,6 +31,5 @@ ascii_only_false: { "\x20\x21\x22\x23 ... \x7d\x7e\x7f\x80\x81 ... \xfe\xff\u0fff\uffff"; } } - expect_exact: 'function f(){return"\\x000\\x001\\x007\\08\\0"+"\\0\x01\x02\x03\x04\x05\x06\x07\\b\\t\\n\\v\\f\\r\x0e\x0f"+"\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f"+\' !"# ... }~\x7f\x80\x81 ... \xfe\xff\u0fff\uffff\'}' + expect_exact: 'function f(){return"\\x000\\x001\\x007\\08\\0"+"\\0\x01\x02\x03\x04\x05\x06\x07\\b\\t\\n\\v\\f\\r\x0e\x0f"+"\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1a\x1b\x1c\x1d\x1e\\x1f"+\' !"# ... }~\x7f\x80\x81 ... \xfe\xff\u0fff\\uffff\'}' } - From 9ba2bbed46453c2241df61f6f812685130b1d482 Mon Sep 17 00:00:00 2001 From: Stefan Bethke Date: Sat, 1 Oct 2016 17:50:25 +0200 Subject: [PATCH 06/13] Add \uFFFE to tests. --- test/compress/ascii.js | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/test/compress/ascii.js b/test/compress/ascii.js index 2b019829..4b629c8f 100644 --- a/test/compress/ascii.js +++ b/test/compress/ascii.js @@ -10,10 +10,10 @@ ascii_only_true: { return "\x000\x001\x007\x008\x00" + "\x00\x01\x02\x03\x04\x05\x06\x07\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f" + "\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f" + - "\x20\x21\x22\x23 ... \x7d\x7e\x7f\x80\x81 ... \xfe\xff\u0fff\uffff"; + "\x20\x21\x22\x23 ... \x7d\x7e\x7f\x80\x81 ... \xfe\xff\u0fff\ufffe\uffff"; } } - expect_exact: 'function f(){return"\\x000\\x001\\x007\\08\\0"+"\\0\\x01\\x02\\x03\\x04\\x05\\x06\\x07\\b\\t\\n\\v\\f\\r\\x0e\\x0f"+"\\x10\\x11\\x12\\x13\\x14\\x15\\x16\\x17\\x18\\x19\\x1a\\x1b\\x1c\\x1d\\x1e\\x1f"+\' !"# ... }~\\x7f\\x80\\x81 ... \\xfe\\xff\\u0fff\\uffff\'}' + expect_exact: 'function f(){return"\\x000\\x001\\x007\\08\\0"+"\\0\\x01\\x02\\x03\\x04\\x05\\x06\\x07\\b\\t\\n\\v\\f\\r\\x0e\\x0f"+"\\x10\\x11\\x12\\x13\\x14\\x15\\x16\\x17\\x18\\x19\\x1a\\x1b\\x1c\\x1d\\x1e\\x1f"+\' !"# ... }~\\x7f\\x80\\x81 ... \\xfe\\xff\\u0fff\\ufffe\\uffff\'}' } ascii_only_false: { @@ -28,8 +28,8 @@ ascii_only_false: { return "\x000\x001\x007\x008\x00" + "\x00\x01\x02\x03\x04\x05\x06\x07\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f" + "\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f" + - "\x20\x21\x22\x23 ... \x7d\x7e\x7f\x80\x81 ... \xfe\xff\u0fff\uffff"; + "\x20\x21\x22\x23 ... \x7d\x7e\x7f\x80\x81 ... \xfe\xff\u0fff\ufffe\uffff"; } } - expect_exact: 'function f(){return"\\x000\\x001\\x007\\08\\0"+"\\0\x01\x02\x03\x04\x05\x06\x07\\b\\t\\n\\v\\f\\r\x0e\x0f"+"\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1a\x1b\x1c\x1d\x1e\\x1f"+\' !"# ... }~\x7f\x80\x81 ... \xfe\xff\u0fff\\uffff\'}' + expect_exact: 'function f(){return"\\x000\\x001\\x007\\08\\0"+"\\0\x01\x02\x03\x04\x05\x06\x07\\b\\t\\n\\v\\f\\r\x0e\x0f"+"\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1a\x1b\x1c\x1d\x1e\\x1f"+\' !"# ... }~\x7f\x80\x81 ... \xfe\xff\u0fff\\ufffe\\uffff\'}' } From 9c6036bd71bf2c40f80c0dc3299b7205813386dc Mon Sep 17 00:00:00 2001 From: Stefan Bethke Date: Tue, 4 Oct 2016 11:38:35 +0200 Subject: [PATCH 07/13] Factor out the escaping of characters that are not XML-safe into its own output option xml_safe --- lib/output.js | 36 ++++++++++++++++++++--------- test/compress/ascii.js | 24 ++++++++++++++++++-- test/compress/regexp.js | 50 +++++++++++++++++++++++++++++++++++++++++ 3 files changed, 97 insertions(+), 13 deletions(-) create mode 100644 test/compress/regexp.js diff --git a/lib/output.js b/lib/output.js index 53afdbf9..2dc0694a 100644 --- a/lib/output.js +++ b/lib/output.js @@ -53,6 +53,7 @@ function OutputStream(options) { quote_keys : false, space_colon : true, ascii_only : false, + xml_safe : false, unescape_regexps : false, inline_script : false, width : 80, @@ -134,7 +135,10 @@ function OutputStream(options) { function make_string(str, quote) { var dq = 0, sq = 0; - str = str.replace(/[\\\b\f\n\r\v\t\x1f\x22\x27\u2028\u2029\0\ufeff\ufffe\uffff]/g, + var re = options.xml_safe ? + /[\x00-\x1f\x22\x27\u2028\u2029\0\ud800-\udfff\ufeff\ufffe\uffff]/g : + /[\\\b\f\n\r\v\t\x22\x27\u2028\u2029\0\ufeff\uffff]/g; + str = str.replace(re, function(s, i){ switch (s) { case '"': ++dq; return '"'; @@ -146,14 +150,10 @@ function OutputStream(options) { case "\b": return "\\b"; case "\f": return "\\f"; case "\x0B": return options.screw_ie8 ? "\\v" : "\\x0B"; - case "\x1f": return "\\x1f"; - case "\u2028": return "\\u2028"; - case "\u2029": return "\\u2029"; - case "\ufeff": return "\\ufeff"; - case "\ufffe": return "\\ufffe"; - case "\uffff": return "\\uffff"; case "\0": return /[0-7]/.test(str.charAt(i+1)) ? "\\x00" : "\\0"; + default: + return to_ascii(s); } return s; }); @@ -1262,7 +1262,22 @@ function OutputStream(options) { } }); - function regexp_safe_literal(code) { + function regexp_safe_literal(code, xml_safe) { + if (xml_safe) { + if ([ + // all C0 characters except tab, newline and return are invalid + 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, + 0x08, 0x0c, 0x0e, 0x0f, + 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, + 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, + 0xfffe + ].indexOf(code) >= 0) + return false; + // C1 (except 0x85) and surrogate pair code points are invalid + if ((code >= 0x7f && code != 0x85 && code <= 0x9f) + || (code >= 0xd800 && code <= 0xdfff)) + return false; + } return [ 0x5c , // \ 0x2f , // / @@ -1284,9 +1299,7 @@ function OutputStream(options) { 0x0a , // \n 0x0d , // \r 0x00 , // \0 - 0x1f , 0xfeff , // Unicode BOM - 0xfffe , // Unicode noncharacter 0xffff , // Unicode noncharacter 0x2028 , // unicode "line separator" 0x2029 , // unicode "paragraph separator" @@ -1298,10 +1311,11 @@ function OutputStream(options) { if (output.option("ascii_only")) { str = output.to_ascii(str); } else if (output.option("unescape_regexps")) { + var xml_safe = output.option("xml_safe"); str = str.split("\\\\").map(function(str){ return str.replace(/\\u[0-9a-fA-F]{4}|\\x[0-9a-fA-F]{2}/g, function(s){ var code = parseInt(s.substr(2), 16); - return regexp_safe_literal(code) ? String.fromCharCode(code) : s; + return regexp_safe_literal(code, xml_safe) ? String.fromCharCode(code) : s; }); }).join("\\\\"); } diff --git a/test/compress/ascii.js b/test/compress/ascii.js index 4b629c8f..16c31d14 100644 --- a/test/compress/ascii.js +++ b/test/compress/ascii.js @@ -1,3 +1,22 @@ +ascii_only_false_xml_safe_false: { + options = {} + beautify = { + ascii_only : false, + xml_safe : false, + screw_ie8 : true, + beautify : false, + } + input: { + function f() { + return "\x000\x001\x007\x008\x00" + + "\x00\x01\x02\x03\x04\x05\x06\x07\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f" + + "\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f" + + "\x20\x21\x22\x23 ... \x7d\x7e\x7f\x80\x81 ... \xfe\xff\u0fff\ufffe\uffff"; + } + } + expect_exact: 'function f(){return"\\x000\\x001\\x007\\08\\0"+"\\0\x01\x02\x03\x04\x05\x06\x07\\b\\t\\n\\v\\f\\r\x0e\x0f"+"\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f"+\' !"# ... }~\x7f\x80\x81 ... \xfe\xff\u0fff\ufffe\\uffff\'}' +} + ascii_only_true: { options = {} beautify = { @@ -16,10 +35,11 @@ ascii_only_true: { expect_exact: 'function f(){return"\\x000\\x001\\x007\\08\\0"+"\\0\\x01\\x02\\x03\\x04\\x05\\x06\\x07\\b\\t\\n\\v\\f\\r\\x0e\\x0f"+"\\x10\\x11\\x12\\x13\\x14\\x15\\x16\\x17\\x18\\x19\\x1a\\x1b\\x1c\\x1d\\x1e\\x1f"+\' !"# ... }~\\x7f\\x80\\x81 ... \\xfe\\xff\\u0fff\\ufffe\\uffff\'}' } -ascii_only_false: { +xml_safe_true: { options = {} beautify = { ascii_only : false, + xml_safe : true, screw_ie8 : true, beautify : false, } @@ -31,5 +51,5 @@ ascii_only_false: { "\x20\x21\x22\x23 ... \x7d\x7e\x7f\x80\x81 ... \xfe\xff\u0fff\ufffe\uffff"; } } - expect_exact: 'function f(){return"\\x000\\x001\\x007\\08\\0"+"\\0\x01\x02\x03\x04\x05\x06\x07\\b\\t\\n\\v\\f\\r\x0e\x0f"+"\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1a\x1b\x1c\x1d\x1e\\x1f"+\' !"# ... }~\x7f\x80\x81 ... \xfe\xff\u0fff\\ufffe\\uffff\'}' + expect_exact: 'function f(){return"\\x000\\x001\\x007\\08\\0"+"\\0\\x01\\x02\\x03\\x04\\x05\\x06\\x07\\b\\t\\n\\v\\f\\r\\x0e\\x0f"+"\\x10\\x11\\x12\\x13\\x14\\x15\\x16\\x17\\x18\\x19\\x1a\\x1b\\x1c\\x1d\\x1e\\x1f"+\' !"# ... }~\x7f\x80\x81 ... \xfe\xff\u0fff\\ufffe\\uffff\'}' } diff --git a/test/compress/regexp.js b/test/compress/regexp.js new file mode 100644 index 00000000..b804dccc --- /dev/null +++ b/test/compress/regexp.js @@ -0,0 +1,50 @@ +regexp: { + options = {} + beautify = { + ascii_only : false, + xml_safe : false, + unescape_regexps : true, + screw_ie8 : true, + beautify : false, + } + input: { + function f() { + return /[\x00\x0d\x1f \x61\u0fff\ud800\ufffe\uffff]/; + } + } + expect_exact: 'function f(){return/[\\x00\\x0d\x1f a\u0fff\ud800\ufffe\\uffff]/}' +} + +regexp_ascii_only: { + options = {} + beautify = { + ascii_only : true, + xml_safe : false, + unescape_regexps : true, + screw_ie8 : true, + beautify : false, + } + input: { + function f() { + return /[\x00\x0d\x1f \x61\u0fff\ud800\ufffe\uffff]/; + } + } + expect_exact: 'function f(){return/[\\x00\\x0d\\x1f \\x61\\u0fff\\ud800\\ufffe\\uffff]/}' +} + +regexp_xml_safe_true: { + options = {} + beautify = { + ascii_only : false, + xml_safe : true, + unescape_regexps : true, + screw_ie8 : true, + beautify : false, + } + input: { + function f() { + return /[\x00\x0d\x1f \x61\u0fff\ud800\ufffe\uffff]/; + } + } + expect_exact: 'function f(){return/[\\x00\\x0d\\x1f a\u0fff\\ud800\\ufffe\\uffff]/}' +} From 9c0ecd3fa1a5f511ca1cc2ad5f1f9b55503f3964 Mon Sep 17 00:00:00 2001 From: Stefan Bethke Date: Tue, 4 Oct 2016 17:46:21 +0200 Subject: [PATCH 08/13] Rearrange tests to increase readability of diff --- test/compress/ascii.js | 38 +++++++++++++++++++------------------- test/compress/regexp.js | 2 +- 2 files changed, 20 insertions(+), 20 deletions(-) diff --git a/test/compress/ascii.js b/test/compress/ascii.js index 16c31d14..671007bb 100644 --- a/test/compress/ascii.js +++ b/test/compress/ascii.js @@ -1,22 +1,3 @@ -ascii_only_false_xml_safe_false: { - options = {} - beautify = { - ascii_only : false, - xml_safe : false, - screw_ie8 : true, - beautify : false, - } - input: { - function f() { - return "\x000\x001\x007\x008\x00" + - "\x00\x01\x02\x03\x04\x05\x06\x07\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f" + - "\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f" + - "\x20\x21\x22\x23 ... \x7d\x7e\x7f\x80\x81 ... \xfe\xff\u0fff\ufffe\uffff"; - } - } - expect_exact: 'function f(){return"\\x000\\x001\\x007\\08\\0"+"\\0\x01\x02\x03\x04\x05\x06\x07\\b\\t\\n\\v\\f\\r\x0e\x0f"+"\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f"+\' !"# ... }~\x7f\x80\x81 ... \xfe\xff\u0fff\ufffe\\uffff\'}' -} - ascii_only_true: { options = {} beautify = { @@ -35,6 +16,25 @@ ascii_only_true: { expect_exact: 'function f(){return"\\x000\\x001\\x007\\08\\0"+"\\0\\x01\\x02\\x03\\x04\\x05\\x06\\x07\\b\\t\\n\\v\\f\\r\\x0e\\x0f"+"\\x10\\x11\\x12\\x13\\x14\\x15\\x16\\x17\\x18\\x19\\x1a\\x1b\\x1c\\x1d\\x1e\\x1f"+\' !"# ... }~\\x7f\\x80\\x81 ... \\xfe\\xff\\u0fff\\ufffe\\uffff\'}' } +ascii_only_false: { // also tests xml_safe===false + options = {} + beautify = { + ascii_only : false, + xml_safe : false, + screw_ie8 : true, + beautify : false, + } + input: { + function f() { + return "\x000\x001\x007\x008\x00" + + "\x00\x01\x02\x03\x04\x05\x06\x07\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f" + + "\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f" + + "\x20\x21\x22\x23 ... \x7d\x7e\x7f\x80\x81 ... \xfe\xff\u0fff\ufffe\uffff"; + } + } + expect_exact: 'function f(){return"\\x000\\x001\\x007\\08\\0"+"\\0\x01\x02\x03\x04\x05\x06\x07\\b\\t\\n\\v\\f\\r\x0e\x0f"+"\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f"+\' !"# ... }~\x7f\x80\x81 ... \xfe\xff\u0fff\ufffe\\uffff\'}' +} + xml_safe_true: { options = {} beautify = { diff --git a/test/compress/regexp.js b/test/compress/regexp.js index b804dccc..27a3c1a2 100644 --- a/test/compress/regexp.js +++ b/test/compress/regexp.js @@ -15,7 +15,7 @@ regexp: { expect_exact: 'function f(){return/[\\x00\\x0d\x1f a\u0fff\ud800\ufffe\\uffff]/}' } -regexp_ascii_only: { +regexp_ascii_only_true: { options = {} beautify = { ascii_only : true, From c04a343e29432d4d67031ebf832a4748c3139bbc Mon Sep 17 00:00:00 2001 From: Stefan Bethke Date: Tue, 4 Oct 2016 18:27:33 +0200 Subject: [PATCH 09/13] Fully restore standard functionality; all addional escaping is now properly hidden behind xml_safe --- lib/output.js | 5 ++--- test/compress/ascii.js | 10 +++++----- test/compress/regexp.js | 2 +- 3 files changed, 8 insertions(+), 9 deletions(-) diff --git a/lib/output.js b/lib/output.js index 2dc0694a..764b8e36 100644 --- a/lib/output.js +++ b/lib/output.js @@ -137,7 +137,7 @@ function OutputStream(options) { var dq = 0, sq = 0; var re = options.xml_safe ? /[\x00-\x1f\x22\x27\u2028\u2029\0\ud800-\udfff\ufeff\ufffe\uffff]/g : - /[\\\b\f\n\r\v\t\x22\x27\u2028\u2029\0\ufeff\uffff]/g; + /[\\\b\f\n\r\v\t\x22\x27\u2028\u2029\0\ufeff]/g; str = str.replace(re, function(s, i){ switch (s) { @@ -1270,7 +1270,7 @@ function OutputStream(options) { 0x08, 0x0c, 0x0e, 0x0f, 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, - 0xfffe + 0xfffe, 0xffff ].indexOf(code) >= 0) return false; // C1 (except 0x85) and surrogate pair code points are invalid @@ -1300,7 +1300,6 @@ function OutputStream(options) { 0x0d , // \r 0x00 , // \0 0xfeff , // Unicode BOM - 0xffff , // Unicode noncharacter 0x2028 , // unicode "line separator" 0x2029 , // unicode "paragraph separator" ].indexOf(code) < 0; diff --git a/test/compress/ascii.js b/test/compress/ascii.js index 671007bb..7166f506 100644 --- a/test/compress/ascii.js +++ b/test/compress/ascii.js @@ -10,13 +10,13 @@ ascii_only_true: { return "\x000\x001\x007\x008\x00" + "\x00\x01\x02\x03\x04\x05\x06\x07\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f" + "\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f" + - "\x20\x21\x22\x23 ... \x7d\x7e\x7f\x80\x81 ... \xfe\xff\u0fff\ufffe\uffff"; + "\x20\x21\x22\x23 ... \x7d\x7e\x7f\x80\x81 ... \xfe\xff\u0fff\uffff"; } } - expect_exact: 'function f(){return"\\x000\\x001\\x007\\08\\0"+"\\0\\x01\\x02\\x03\\x04\\x05\\x06\\x07\\b\\t\\n\\v\\f\\r\\x0e\\x0f"+"\\x10\\x11\\x12\\x13\\x14\\x15\\x16\\x17\\x18\\x19\\x1a\\x1b\\x1c\\x1d\\x1e\\x1f"+\' !"# ... }~\\x7f\\x80\\x81 ... \\xfe\\xff\\u0fff\\ufffe\\uffff\'}' + expect_exact: 'function f(){return"\\x000\\x001\\x007\\08\\0"+"\\0\\x01\\x02\\x03\\x04\\x05\\x06\\x07\\b\\t\\n\\v\\f\\r\\x0e\\x0f"+"\\x10\\x11\\x12\\x13\\x14\\x15\\x16\\x17\\x18\\x19\\x1a\\x1b\\x1c\\x1d\\x1e\\x1f"+\' !"# ... }~\\x7f\\x80\\x81 ... \\xfe\\xff\\u0fff\\uffff\'}' } -ascii_only_false: { // also tests xml_safe===false +ascii_only_false: { options = {} beautify = { ascii_only : false, @@ -29,10 +29,10 @@ ascii_only_false: { // also tests xml_safe===false return "\x000\x001\x007\x008\x00" + "\x00\x01\x02\x03\x04\x05\x06\x07\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f" + "\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f" + - "\x20\x21\x22\x23 ... \x7d\x7e\x7f\x80\x81 ... \xfe\xff\u0fff\ufffe\uffff"; + "\x20\x21\x22\x23 ... \x7d\x7e\x7f\x80\x81 ... \xfe\xff\u0fff\uffff"; } } - expect_exact: 'function f(){return"\\x000\\x001\\x007\\08\\0"+"\\0\x01\x02\x03\x04\x05\x06\x07\\b\\t\\n\\v\\f\\r\x0e\x0f"+"\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f"+\' !"# ... }~\x7f\x80\x81 ... \xfe\xff\u0fff\ufffe\\uffff\'}' + expect_exact: 'function f(){return"\\x000\\x001\\x007\\08\\0"+"\\0\x01\x02\x03\x04\x05\x06\x07\\b\\t\\n\\v\\f\\r\x0e\x0f"+"\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f"+\' !"# ... }~\x7f\x80\x81 ... \xfe\xff\u0fff\uffff\'}' } xml_safe_true: { diff --git a/test/compress/regexp.js b/test/compress/regexp.js index 27a3c1a2..1dc7f7e2 100644 --- a/test/compress/regexp.js +++ b/test/compress/regexp.js @@ -12,7 +12,7 @@ regexp: { return /[\x00\x0d\x1f \x61\u0fff\ud800\ufffe\uffff]/; } } - expect_exact: 'function f(){return/[\\x00\\x0d\x1f a\u0fff\ud800\ufffe\\uffff]/}' + expect_exact: 'function f(){return/[\\x00\\x0d\x1f a\u0fff\ud800\ufffe\uffff]/}' } regexp_ascii_only_true: { From e1d78410c61596b1f9b3116e21b82699c85cc974 Mon Sep 17 00:00:00 2001 From: Stefan Bethke Date: Tue, 4 Oct 2016 18:47:29 +0200 Subject: [PATCH 10/13] Extract array of unsafe XML chars to variable --- lib/output.js | 18 ++++++++++-------- 1 file changed, 10 insertions(+), 8 deletions(-) diff --git a/lib/output.js b/lib/output.js index 764b8e36..ddc28820 100644 --- a/lib/output.js +++ b/lib/output.js @@ -1262,16 +1262,18 @@ function OutputStream(options) { } }); + var xml_unsafe = [ + // all C0 characters except tab, newline and return are invalid + 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, + 0x08, 0x0c, 0x0e, 0x0f, + 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, + 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, + 0xfffe, 0xffff + ]; + function regexp_safe_literal(code, xml_safe) { if (xml_safe) { - if ([ - // all C0 characters except tab, newline and return are invalid - 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, - 0x08, 0x0c, 0x0e, 0x0f, - 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, - 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, - 0xfffe, 0xffff - ].indexOf(code) >= 0) + if (xml_unsafe.indexOf(code) >= 0) return false; // C1 (except 0x85) and surrogate pair code points are invalid if ((code >= 0x7f && code != 0x85 && code <= 0x9f) From 41ad14e6fac98828a2c2e0cdacd364417413a193 Mon Sep 17 00:00:00 2001 From: Stefan Bethke Date: Tue, 4 Oct 2016 18:56:08 +0200 Subject: [PATCH 11/13] Add Unicode line break, paragraph bream and byte order mark to tests. --- test/compress/ascii.js | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/test/compress/ascii.js b/test/compress/ascii.js index 7166f506..4d0f8a21 100644 --- a/test/compress/ascii.js +++ b/test/compress/ascii.js @@ -10,10 +10,11 @@ ascii_only_true: { return "\x000\x001\x007\x008\x00" + "\x00\x01\x02\x03\x04\x05\x06\x07\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f" + "\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f" + - "\x20\x21\x22\x23 ... \x7d\x7e\x7f\x80\x81 ... \xfe\xff\u0fff\uffff"; + "\x20\x21\x22\x23 ... \x7d\x7e\x7f\x80\x81 ... \xfe\xff\u0fff" + + "\u2028\u2029\ufeff\uffff"; } } - expect_exact: 'function f(){return"\\x000\\x001\\x007\\08\\0"+"\\0\\x01\\x02\\x03\\x04\\x05\\x06\\x07\\b\\t\\n\\v\\f\\r\\x0e\\x0f"+"\\x10\\x11\\x12\\x13\\x14\\x15\\x16\\x17\\x18\\x19\\x1a\\x1b\\x1c\\x1d\\x1e\\x1f"+\' !"# ... }~\\x7f\\x80\\x81 ... \\xfe\\xff\\u0fff\\uffff\'}' + expect_exact: 'function f(){return"\\x000\\x001\\x007\\08\\0"+"\\0\\x01\\x02\\x03\\x04\\x05\\x06\\x07\\b\\t\\n\\v\\f\\r\\x0e\\x0f"+"\\x10\\x11\\x12\\x13\\x14\\x15\\x16\\x17\\x18\\x19\\x1a\\x1b\\x1c\\x1d\\x1e\\x1f"+\' !"# ... }~\\x7f\\x80\\x81 ... \\xfe\\xff\\u0fff\'+"\\u2028\\u2029\\ufeff\\uffff"}' } ascii_only_false: { @@ -29,10 +30,11 @@ ascii_only_false: { return "\x000\x001\x007\x008\x00" + "\x00\x01\x02\x03\x04\x05\x06\x07\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f" + "\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f" + - "\x20\x21\x22\x23 ... \x7d\x7e\x7f\x80\x81 ... \xfe\xff\u0fff\uffff"; + "\x20\x21\x22\x23 ... \x7d\x7e\x7f\x80\x81 ... \xfe\xff\u0fff" + + "\u2028\u2029\ufeff\uffff"; } } - expect_exact: 'function f(){return"\\x000\\x001\\x007\\08\\0"+"\\0\x01\x02\x03\x04\x05\x06\x07\\b\\t\\n\\v\\f\\r\x0e\x0f"+"\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f"+\' !"# ... }~\x7f\x80\x81 ... \xfe\xff\u0fff\uffff\'}' + expect_exact: 'function f(){return"\\x000\\x001\\x007\\08\\0"+"\\0\x01\x02\x03\x04\x05\x06\x07\\b\\t\\n\\v\\f\\r\x0e\x0f"+"\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f"+\' !"# ... }~\x7f\x80\x81 ... \xfe\xff\u0fff\'+"\\u2028\\u2029\\ufeff\uffff"}' } xml_safe_true: { @@ -48,8 +50,9 @@ xml_safe_true: { return "\x000\x001\x007\x008\x00" + "\x00\x01\x02\x03\x04\x05\x06\x07\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f" + "\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f" + - "\x20\x21\x22\x23 ... \x7d\x7e\x7f\x80\x81 ... \xfe\xff\u0fff\ufffe\uffff"; + "\x20\x21\x22\x23 ... \x7d\x7e\x7f\x80\x81 ... \xfe\xff\u0fff" + + "\u2028\u2029\ufeff\ufffe\uffff"; } } - expect_exact: 'function f(){return"\\x000\\x001\\x007\\08\\0"+"\\0\\x01\\x02\\x03\\x04\\x05\\x06\\x07\\b\\t\\n\\v\\f\\r\\x0e\\x0f"+"\\x10\\x11\\x12\\x13\\x14\\x15\\x16\\x17\\x18\\x19\\x1a\\x1b\\x1c\\x1d\\x1e\\x1f"+\' !"# ... }~\x7f\x80\x81 ... \xfe\xff\u0fff\\ufffe\\uffff\'}' + expect_exact: 'function f(){return"\\x000\\x001\\x007\\08\\0"+"\\0\\x01\\x02\\x03\\x04\\x05\\x06\\x07\\b\\t\\n\\v\\f\\r\\x0e\\x0f"+"\\x10\\x11\\x12\\x13\\x14\\x15\\x16\\x17\\x18\\x19\\x1a\\x1b\\x1c\\x1d\\x1e\\x1f"+\' !"# ... }~\x7f\x80\x81 ... \xfe\xff\u0fff\'+"\\u2028\\u2029\\ufeff\\ufffe\\uffff"}' } From 63a83a65c3e7b6ea8d1e3dd7f8594c73ebfb4c64 Mon Sep 17 00:00:00 2001 From: Stefan Bethke Date: Wed, 5 Oct 2016 16:49:53 +0200 Subject: [PATCH 12/13] Remove superflous code. --- lib/output.js | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/lib/output.js b/lib/output.js index ddc28820..dd94fea5 100644 --- a/lib/output.js +++ b/lib/output.js @@ -152,10 +152,8 @@ function OutputStream(options) { case "\x0B": return options.screw_ie8 ? "\\v" : "\\x0B"; case "\0": return /[0-7]/.test(str.charAt(i+1)) ? "\\x00" : "\\0"; - default: - return to_ascii(s); } - return s; + return to_ascii(s); }); function quote_single() { return "'" + str.replace(/\x27/g, "\\'") + "'"; From 0b6b8b3da3bd1974f26e6b65fa9bb0f3cb001200 Mon Sep 17 00:00:00 2001 From: Stefan Bethke Date: Wed, 5 Oct 2016 18:15:05 +0200 Subject: [PATCH 13/13] Improve readability --- lib/output.js | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/lib/output.js b/lib/output.js index dd94fea5..ad2ccecb 100644 --- a/lib/output.js +++ b/lib/output.js @@ -138,8 +138,7 @@ function OutputStream(options) { var re = options.xml_safe ? /[\x00-\x1f\x22\x27\u2028\u2029\0\ud800-\udfff\ufeff\ufffe\uffff]/g : /[\\\b\f\n\r\v\t\x22\x27\u2028\u2029\0\ufeff]/g; - str = str.replace(re, - function(s, i){ + str = str.replace(re, function(s, i){ switch (s) { case '"': ++dq; return '"'; case "'": ++sq; return "'";