Factor out the escaping of characters that are not XML-safe into its own output option xml_safe

This commit is contained in:
Stefan Bethke 2016-10-04 11:38:35 +02:00
parent 9ba2bbed46
commit 9c6036bd71
3 changed files with 97 additions and 13 deletions

View File

@ -53,6 +53,7 @@ function OutputStream(options) {
quote_keys : false,
space_colon : true,
ascii_only : false,
xml_safe : false,
unescape_regexps : false,
inline_script : false,
width : 80,
@ -134,7 +135,10 @@ function OutputStream(options) {
function make_string(str, quote) {
var dq = 0, sq = 0;
str = str.replace(/[\\\b\f\n\r\v\t\x1f\x22\x27\u2028\u2029\0\ufeff\ufffe\uffff]/g,
var re = options.xml_safe ?
/[\x00-\x1f\x22\x27\u2028\u2029\0\ud800-\udfff\ufeff\ufffe\uffff]/g :
/[\\\b\f\n\r\v\t\x22\x27\u2028\u2029\0\ufeff\uffff]/g;
str = str.replace(re,
function(s, i){
switch (s) {
case '"': ++dq; return '"';
@ -146,14 +150,10 @@ function OutputStream(options) {
case "\b": return "\\b";
case "\f": return "\\f";
case "\x0B": return options.screw_ie8 ? "\\v" : "\\x0B";
case "\x1f": return "\\x1f";
case "\u2028": return "\\u2028";
case "\u2029": return "\\u2029";
case "\ufeff": return "\\ufeff";
case "\ufffe": return "\\ufffe";
case "\uffff": return "\\uffff";
case "\0":
return /[0-7]/.test(str.charAt(i+1)) ? "\\x00" : "\\0";
default:
return to_ascii(s);
}
return s;
});
@ -1262,7 +1262,22 @@ function OutputStream(options) {
}
});
function regexp_safe_literal(code) {
function regexp_safe_literal(code, xml_safe) {
if (xml_safe) {
if ([
// all C0 characters except tab, newline and return are invalid
0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
0x08, 0x0c, 0x0e, 0x0f,
0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17,
0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f,
0xfffe
].indexOf(code) >= 0)
return false;
// C1 (except 0x85) and surrogate pair code points are invalid
if ((code >= 0x7f && code != 0x85 && code <= 0x9f)
|| (code >= 0xd800 && code <= 0xdfff))
return false;
}
return [
0x5c , // \
0x2f , // /
@ -1284,9 +1299,7 @@ function OutputStream(options) {
0x0a , // \n
0x0d , // \r
0x00 , // \0
0x1f ,
0xfeff , // Unicode BOM
0xfffe , // Unicode noncharacter
0xffff , // Unicode noncharacter
0x2028 , // unicode "line separator"
0x2029 , // unicode "paragraph separator"
@ -1298,10 +1311,11 @@ function OutputStream(options) {
if (output.option("ascii_only")) {
str = output.to_ascii(str);
} else if (output.option("unescape_regexps")) {
var xml_safe = output.option("xml_safe");
str = str.split("\\\\").map(function(str){
return str.replace(/\\u[0-9a-fA-F]{4}|\\x[0-9a-fA-F]{2}/g, function(s){
var code = parseInt(s.substr(2), 16);
return regexp_safe_literal(code) ? String.fromCharCode(code) : s;
return regexp_safe_literal(code, xml_safe) ? String.fromCharCode(code) : s;
});
}).join("\\\\");
}

View File

@ -1,3 +1,22 @@
ascii_only_false_xml_safe_false: {
options = {}
beautify = {
ascii_only : false,
xml_safe : false,
screw_ie8 : true,
beautify : false,
}
input: {
function f() {
return "\x000\x001\x007\x008\x00" +
"\x00\x01\x02\x03\x04\x05\x06\x07\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f" +
"\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f" +
"\x20\x21\x22\x23 ... \x7d\x7e\x7f\x80\x81 ... \xfe\xff\u0fff\ufffe\uffff";
}
}
expect_exact: 'function f(){return"\\x000\\x001\\x007\\08\\0"+"\\0\x01\x02\x03\x04\x05\x06\x07\\b\\t\\n\\v\\f\\r\x0e\x0f"+"\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f"+\' !"# ... }~\x7f\x80\x81 ... \xfe\xff\u0fff\ufffe\\uffff\'}'
}
ascii_only_true: {
options = {}
beautify = {
@ -16,10 +35,11 @@ ascii_only_true: {
expect_exact: 'function f(){return"\\x000\\x001\\x007\\08\\0"+"\\0\\x01\\x02\\x03\\x04\\x05\\x06\\x07\\b\\t\\n\\v\\f\\r\\x0e\\x0f"+"\\x10\\x11\\x12\\x13\\x14\\x15\\x16\\x17\\x18\\x19\\x1a\\x1b\\x1c\\x1d\\x1e\\x1f"+\' !"# ... }~\\x7f\\x80\\x81 ... \\xfe\\xff\\u0fff\\ufffe\\uffff\'}'
}
ascii_only_false: {
xml_safe_true: {
options = {}
beautify = {
ascii_only : false,
xml_safe : true,
screw_ie8 : true,
beautify : false,
}
@ -31,5 +51,5 @@ ascii_only_false: {
"\x20\x21\x22\x23 ... \x7d\x7e\x7f\x80\x81 ... \xfe\xff\u0fff\ufffe\uffff";
}
}
expect_exact: 'function f(){return"\\x000\\x001\\x007\\08\\0"+"\\0\x01\x02\x03\x04\x05\x06\x07\\b\\t\\n\\v\\f\\r\x0e\x0f"+"\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1a\x1b\x1c\x1d\x1e\\x1f"+\' !"# ... }~\x7f\x80\x81 ... \xfe\xff\u0fff\\ufffe\\uffff\'}'
expect_exact: 'function f(){return"\\x000\\x001\\x007\\08\\0"+"\\0\\x01\\x02\\x03\\x04\\x05\\x06\\x07\\b\\t\\n\\v\\f\\r\\x0e\\x0f"+"\\x10\\x11\\x12\\x13\\x14\\x15\\x16\\x17\\x18\\x19\\x1a\\x1b\\x1c\\x1d\\x1e\\x1f"+\' !"# ... }~\x7f\x80\x81 ... \xfe\xff\u0fff\\ufffe\\uffff\'}'
}

50
test/compress/regexp.js Normal file
View File

@ -0,0 +1,50 @@
regexp: {
options = {}
beautify = {
ascii_only : false,
xml_safe : false,
unescape_regexps : true,
screw_ie8 : true,
beautify : false,
}
input: {
function f() {
return /[\x00\x0d\x1f \x61\u0fff\ud800\ufffe\uffff]/;
}
}
expect_exact: 'function f(){return/[\\x00\\x0d\x1f a\u0fff\ud800\ufffe\\uffff]/}'
}
regexp_ascii_only: {
options = {}
beautify = {
ascii_only : true,
xml_safe : false,
unescape_regexps : true,
screw_ie8 : true,
beautify : false,
}
input: {
function f() {
return /[\x00\x0d\x1f \x61\u0fff\ud800\ufffe\uffff]/;
}
}
expect_exact: 'function f(){return/[\\x00\\x0d\\x1f \\x61\\u0fff\\ud800\\ufffe\\uffff]/}'
}
regexp_xml_safe_true: {
options = {}
beautify = {
ascii_only : false,
xml_safe : true,
unescape_regexps : true,
screw_ie8 : true,
beautify : false,
}
input: {
function f() {
return /[\x00\x0d\x1f \x61\u0fff\ud800\ufffe\uffff]/;
}
}
expect_exact: 'function f(){return/[\\x00\\x0d\\x1f a\u0fff\\ud800\\ufffe\\uffff]/}'
}