diff --git a/bin/js_features.js b/bin/js_features.js index 95a0c7cf..c6a4ba07 100755 --- a/bin/js_features.js +++ b/bin/js_features.js @@ -8,23 +8,37 @@ var sys = require("util"); var yargs = require("yargs"); var fs = require("fs"); var colors = require("colors"); +var http = require('http'); var ARGS = yargs .usage("$0 input1.js \n") - .describe("print_ast", "Prints a dot file describing the internal abstract syntax") - .describe("json_formatting", "Prints the JSON nicelly formatted") + .describe("rename", "Renames variables names with names learnt from large amount of non-obfuscated JavaScript code") + .describe("nice2predict_server", "server URL used in renaming") + .describe("print_ast", "Prints a dot file describing the internal abstract syntax tree") + .describe("nice_formatting", "Prints the results nicely formatted") .describe("skip_minified", "Whether to skip processing minified files") + .describe("extract_features", "extract features into JSON") .describe("features", "Comma separated list of features: \n" + "ASTREL - relations in AST, \n" + "FNAMES - function names to internal calls,\n" + "FSCOPE - add variable scope constraints.") - .demand(1) + //.demand(1) .default('features', 'ASTREL,FNAMES,FSCOPE') + .default('nice2predict_server', 'www.nice2predict.org:5745') + .default('rename', true) + .boolean("rename") + .boolean('extract_features') .boolean("print_ast") .boolean("skip_minified") - .boolean("json_formatting") + .boolean("nice_formatting") .string("features") + .string("nice2predict_server") .wrap(80) + .check(function(argv, options){ + if (argv._.length == 0){ + throw "ERROR: ".red + "Nothing to analyze. No input file provided."; + } + }) .argv ; @@ -45,6 +59,16 @@ if (ARGS.features === true) { process.exit(1); } +//http request does not handle http:// and https:// prefixes +ARGS.nice2predict_server = ARGS.nice2predict_server.replace(/^(http:\/\/|https:\/\/)/, ''); +var HOST = ARGS.nice2predict_server.split(":")[0]; +var PORT = parseInt(ARGS.nice2predict_server.split(":")[1]); + +//make only one mode active +if (ARGS.extract_feeatures){ + ARGS.rename = false; +} + var features = ARGS.features.split(","); for (var i = 0; i < features.length; i++) { if (features[i] != "FNAMES" && features[i] != "ASTREL" && features[i] != "FSCOPE") { @@ -53,7 +77,7 @@ for (var i = 0; i < features.length; i++) { } for (var i = 0; i < files.length; i++) { - processFile(files[i], ARGS.print_ast, ARGS.features, ARGS.json_formatting, ARGS.skip_minified); + processFile(files[i]); } function stripInterpreter(code){ @@ -64,7 +88,7 @@ function stripInterpreter(code){ return code.slice(code.indexOf('\n') + 1); } -function processFile(file, print_ast, features, json_formatting, skip_minified) { +function processFile(file) { var code; try { code = fs.readFileSync(file, "utf-8"); @@ -78,7 +102,7 @@ function processFile(file, print_ast, features, json_formatting, skip_minified) code = stripInterpreter(code); try { - var output = UglifyJS.extractFeatures(code, file, print_ast, features, skip_minified); + var output = UglifyJS.extractFeatures(code, file, ARGS.print_ast, ARGS.features, ARGS.skip_minified); } catch (ex){ if (ex instanceof UglifyJS.Parse_Error){ sys.error("ERROR: ".red + "cannot parse file '" + file + "': " + ex.message); @@ -95,7 +119,7 @@ function processFile(file, print_ast, features, json_formatting, skip_minified) return; } - if (!json_formatting) { + if (!ARGS.nice_formatting) { output = removeWhitespace(output); } @@ -107,13 +131,74 @@ function processFile(file, print_ast, features, json_formatting, skip_minified) throw e; } - if (removeWhitespace(output) != '{"query":[],"assign":[]}') { - console.log(output); - //sys.error("OK: ".green + "'" + file + "'"); - } else { + if (removeWhitespace(output) == '{"query":[],"assign":[]}') { sys.error("WARN: ".yellow + " no features extracted '" + file + "'"); + } else { + //sys.error("OK: ".green + "'" + file + "'"); } - + + if (ARGS.extract_features) { + console.log(output); + } else if (ARGS.rename){ + callServer( + HOST, + PORT, + "infer", + JSON.parse(output), + function(data) { + var result = JSON.parse(data).result; + var inferred_names = {}; + for (var i = 0; i < result.length; i++) { + if (result[i].hasOwnProperty("inf")) { + inferred_names[result[i].v] = result[i].inf.green; + } + } + console.log(UglifyJS.replaceMangled(code, file, inferred_names)); + }, + function(err) { + console.log("ERROR: ".red + "connecting to server '" + HOST + ":" + PORT + "' " + err); + }); + } +} + +var json_rpc_id = 0; + +function callServer(server, port, methodName, params, success_cb, error_cb) { + var req = { + jsonrpc : '2.0', + method : methodName, + id : (++json_rpc_id) + }; + req.params = params; + var post_data = JSON.stringify(req); + + var options = { + host: server, + port: port, + method: 'POST', + headers: { + 'Content-Type': 'application/json', + 'Content-Length': post_data.length + } + }; + + var req = http.request(options, function(res) { + res.setEncoding('utf8'); + var data = ""; + res.on('data', function (chunk) { + data += chunk; + }); + res.on('end', function () { + success_cb(data); + }); + }); + + req.on('error', function(err) { + error_cb(err); + }); + + req.write(post_data); + req.end(); } /* ------------------------ */ diff --git a/lib/feature_extractor.js b/lib/feature_extractor.js index 4174d2b9..93733bb3 100644 --- a/lib/feature_extractor.js +++ b/lib/feature_extractor.js @@ -35,25 +35,39 @@ function isMinified(toplevel, code, file){ numNumberedNames >= NUM_NUMBERED_LOCALS; } -function replaceMangled(code, file) { +function replaceMangled(code, file, infered_names) { var toplevel; try { toplevel = parseFile(code, file); - } catch (e) { - console.warn("Cannot parse file: '%s'", file); - return null; + } catch (ex){ + throw new Parse_Error(ex); } extendAst(toplevel); + //feature_outputter.string_map defines what id is assigned to each node in the final output + //therefore to assign same ids, we need to first populate string_map by running feature extraction var feature_outputter = new FeatureJsonOutputter(); generateAstFeatures(toplevel, feature_outputter); generateFnamesFeatures(toplevel, feature_outputter); generateFscopeConstraints(toplevel, feature_outputter); - //feature_outputter.string_map defines what id is assigned to each node in the final output - //therefore to assign same ids, we need to first populate string_map by running feature extraction - var stream = OutputStream({beautify : true, replace_mangled_map : feature_outputter.string_map}); + var stream; + if (typeof infered_names !== 'undefined') { + //replace variables with inferred names + stream = OutputStream({ + beautify: true, replace_mangled: function (node) { + return node.definition() ? infered_names[feature_outputter.string_map.getId("$" + node.definition().id + "-" + node.name)] : node.name; + } + }); + } else { + //replace variables with placeholders. Using in the online demo for interactive renaming. + stream = OutputStream({ + beautify: true, replace_mangled: function (node) { + return node.definition() ? "local$$" + feature_outputter.string_map.getId("$" + node.definition().id + "-" + node.name) : node.name; + } + }); + } toplevel.print(stream); return stream.toString(); } @@ -83,8 +97,6 @@ function extractFeatures(code, file, print_ast, features, skip_minified) { if (skip_minified && isMinified(toplevel, code, file)){ throw new Minified_Error("Skipping minified file"); - //console.warn("Skipping minified file: '%s'", file); - //return null; } var feature_outputter = new FeatureJsonOutputter(); diff --git a/lib/output.js b/lib/output.js index 42549ad3..fc786e98 100644 --- a/lib/output.js +++ b/lib/output.js @@ -63,7 +63,7 @@ function OutputStream(options) { preserve_line : false, screw_ie8 : false, preamble : null, - replace_mangled_map : null + replace_mangled : null }, true); var indentation = 0; @@ -309,7 +309,7 @@ function OutputStream(options) { var stack = []; return { - replace_mangled_map : options.replace_mangled_map, + replace_mangled : options.replace_mangled, get : get, toString : get, indent : indent, @@ -1100,11 +1100,13 @@ function OutputStream(options) { DEFPRINT(AST_Symbol, function(self, output){ var def = self.definition(); // output.print_name(def ? def.mangled_name || def.name : self.name); - // replace all the variable names to be renamed with a placeholder - if (output.replace_mangled_map == null || self instanceof AST_This || self.unmangleable()) + + if (output.replace_mangled == null || self instanceof AST_This || self.unmangleable()) + //retain original name for variables that cannot be mangled output.print_name(def ? def.mangled_name || def.name : self.name); else { - output.print_name(def ? "local$$" + output.replace_mangled_map.getId("$" + def.id + "-" + self.name) : self.name); + // rename the rest using the client provided function + output.print_name(output.replace_mangled(self)); } }); DEFPRINT(AST_Undefined, function(self, output){