diff --git a/lib/output.js b/lib/output.js index 53afdbf9..2dc0694a 100644 --- a/lib/output.js +++ b/lib/output.js @@ -53,6 +53,7 @@ function OutputStream(options) { quote_keys : false, space_colon : true, ascii_only : false, + xml_safe : false, unescape_regexps : false, inline_script : false, width : 80, @@ -134,7 +135,10 @@ function OutputStream(options) { function make_string(str, quote) { var dq = 0, sq = 0; - str = str.replace(/[\\\b\f\n\r\v\t\x1f\x22\x27\u2028\u2029\0\ufeff\ufffe\uffff]/g, + var re = options.xml_safe ? + /[\x00-\x1f\x22\x27\u2028\u2029\0\ud800-\udfff\ufeff\ufffe\uffff]/g : + /[\\\b\f\n\r\v\t\x22\x27\u2028\u2029\0\ufeff\uffff]/g; + str = str.replace(re, function(s, i){ switch (s) { case '"': ++dq; return '"'; @@ -146,14 +150,10 @@ function OutputStream(options) { case "\b": return "\\b"; case "\f": return "\\f"; case "\x0B": return options.screw_ie8 ? "\\v" : "\\x0B"; - case "\x1f": return "\\x1f"; - case "\u2028": return "\\u2028"; - case "\u2029": return "\\u2029"; - case "\ufeff": return "\\ufeff"; - case "\ufffe": return "\\ufffe"; - case "\uffff": return "\\uffff"; case "\0": return /[0-7]/.test(str.charAt(i+1)) ? "\\x00" : "\\0"; + default: + return to_ascii(s); } return s; }); @@ -1262,7 +1262,22 @@ function OutputStream(options) { } }); - function regexp_safe_literal(code) { + function regexp_safe_literal(code, xml_safe) { + if (xml_safe) { + if ([ + // all C0 characters except tab, newline and return are invalid + 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, + 0x08, 0x0c, 0x0e, 0x0f, + 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, + 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, + 0xfffe + ].indexOf(code) >= 0) + return false; + // C1 (except 0x85) and surrogate pair code points are invalid + if ((code >= 0x7f && code != 0x85 && code <= 0x9f) + || (code >= 0xd800 && code <= 0xdfff)) + return false; + } return [ 0x5c , // \ 0x2f , // / @@ -1284,9 +1299,7 @@ function OutputStream(options) { 0x0a , // \n 0x0d , // \r 0x00 , // \0 - 0x1f , 0xfeff , // Unicode BOM - 0xfffe , // Unicode noncharacter 0xffff , // Unicode noncharacter 0x2028 , // unicode "line separator" 0x2029 , // unicode "paragraph separator" @@ -1298,10 +1311,11 @@ function OutputStream(options) { if (output.option("ascii_only")) { str = output.to_ascii(str); } else if (output.option("unescape_regexps")) { + var xml_safe = output.option("xml_safe"); str = str.split("\\\\").map(function(str){ return str.replace(/\\u[0-9a-fA-F]{4}|\\x[0-9a-fA-F]{2}/g, function(s){ var code = parseInt(s.substr(2), 16); - return regexp_safe_literal(code) ? String.fromCharCode(code) : s; + return regexp_safe_literal(code, xml_safe) ? String.fromCharCode(code) : s; }); }).join("\\\\"); } diff --git a/test/compress/ascii.js b/test/compress/ascii.js index 4b629c8f..16c31d14 100644 --- a/test/compress/ascii.js +++ b/test/compress/ascii.js @@ -1,3 +1,22 @@ +ascii_only_false_xml_safe_false: { + options = {} + beautify = { + ascii_only : false, + xml_safe : false, + screw_ie8 : true, + beautify : false, + } + input: { + function f() { + return "\x000\x001\x007\x008\x00" + + "\x00\x01\x02\x03\x04\x05\x06\x07\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f" + + "\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f" + + "\x20\x21\x22\x23 ... \x7d\x7e\x7f\x80\x81 ... \xfe\xff\u0fff\ufffe\uffff"; + } + } + expect_exact: 'function f(){return"\\x000\\x001\\x007\\08\\0"+"\\0\x01\x02\x03\x04\x05\x06\x07\\b\\t\\n\\v\\f\\r\x0e\x0f"+"\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f"+\' !"# ... }~\x7f\x80\x81 ... \xfe\xff\u0fff\ufffe\\uffff\'}' +} + ascii_only_true: { options = {} beautify = { @@ -16,10 +35,11 @@ ascii_only_true: { expect_exact: 'function f(){return"\\x000\\x001\\x007\\08\\0"+"\\0\\x01\\x02\\x03\\x04\\x05\\x06\\x07\\b\\t\\n\\v\\f\\r\\x0e\\x0f"+"\\x10\\x11\\x12\\x13\\x14\\x15\\x16\\x17\\x18\\x19\\x1a\\x1b\\x1c\\x1d\\x1e\\x1f"+\' !"# ... }~\\x7f\\x80\\x81 ... \\xfe\\xff\\u0fff\\ufffe\\uffff\'}' } -ascii_only_false: { +xml_safe_true: { options = {} beautify = { ascii_only : false, + xml_safe : true, screw_ie8 : true, beautify : false, } @@ -31,5 +51,5 @@ ascii_only_false: { "\x20\x21\x22\x23 ... \x7d\x7e\x7f\x80\x81 ... \xfe\xff\u0fff\ufffe\uffff"; } } - expect_exact: 'function f(){return"\\x000\\x001\\x007\\08\\0"+"\\0\x01\x02\x03\x04\x05\x06\x07\\b\\t\\n\\v\\f\\r\x0e\x0f"+"\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1a\x1b\x1c\x1d\x1e\\x1f"+\' !"# ... }~\x7f\x80\x81 ... \xfe\xff\u0fff\\ufffe\\uffff\'}' + expect_exact: 'function f(){return"\\x000\\x001\\x007\\08\\0"+"\\0\\x01\\x02\\x03\\x04\\x05\\x06\\x07\\b\\t\\n\\v\\f\\r\\x0e\\x0f"+"\\x10\\x11\\x12\\x13\\x14\\x15\\x16\\x17\\x18\\x19\\x1a\\x1b\\x1c\\x1d\\x1e\\x1f"+\' !"# ... }~\x7f\x80\x81 ... \xfe\xff\u0fff\\ufffe\\uffff\'}' } diff --git a/test/compress/regexp.js b/test/compress/regexp.js new file mode 100644 index 00000000..b804dccc --- /dev/null +++ b/test/compress/regexp.js @@ -0,0 +1,50 @@ +regexp: { + options = {} + beautify = { + ascii_only : false, + xml_safe : false, + unescape_regexps : true, + screw_ie8 : true, + beautify : false, + } + input: { + function f() { + return /[\x00\x0d\x1f \x61\u0fff\ud800\ufffe\uffff]/; + } + } + expect_exact: 'function f(){return/[\\x00\\x0d\x1f a\u0fff\ud800\ufffe\\uffff]/}' +} + +regexp_ascii_only: { + options = {} + beautify = { + ascii_only : true, + xml_safe : false, + unescape_regexps : true, + screw_ie8 : true, + beautify : false, + } + input: { + function f() { + return /[\x00\x0d\x1f \x61\u0fff\ud800\ufffe\uffff]/; + } + } + expect_exact: 'function f(){return/[\\x00\\x0d\\x1f \\x61\\u0fff\\ud800\\ufffe\\uffff]/}' +} + +regexp_xml_safe_true: { + options = {} + beautify = { + ascii_only : false, + xml_safe : true, + unescape_regexps : true, + screw_ie8 : true, + beautify : false, + } + input: { + function f() { + return /[\x00\x0d\x1f \x61\u0fff\ud800\ufffe\uffff]/; + } + } + expect_exact: 'function f(){return/[\\x00\\x0d\\x1f a\u0fff\\ud800\\ufffe\\uffff]/}' +}