"use strict"; var INFER = '$'; var GIVEN = '#'; var EXPECTED_MAX_NODES_PER_NONOBFUSACATED_LINE = 25; var MAX_RATIO_SHORT_NAMES = 0.45; var NUM_NUMBERED_LOCALS = 5; function isMinified(toplevel, code, file){ var numLines = code.split(/\r\n|\r|\n/).length; var numStatements = 0; var numNames = 0; var numShortNames = 0; var numNumberedNames = 0; toplevel.walk(new TreeWalker(function(node, descend){ numStatements++; if (node instanceof AST_Symbol && !(node instanceof AST_This)) { numNames++; if (node.name.length <= 2 && node.name != "el" && node.name != "$") { numShortNames++; } if (node.name.length >= 2 && node.name[0] == '_') { var c2 = node.name[1]; if (c2 >= '0' && c2 <= '9') ++numNumberedNames; } } })); return (EXPECTED_MAX_NODES_PER_NONOBFUSACATED_LINE * numLines <= numStatements) || (numShortNames > numNames * MAX_RATIO_SHORT_NAMES) || numNumberedNames == numNames || numNumberedNames >= NUM_NUMBERED_LOCALS; } function replaceMangled(code, file) { var toplevel; try { toplevel = parseFile(code, file); } catch (e) { console.warn("Cannot parse file: '%s'", file); return null; } extendAst(toplevel); var feature_outputter = new FeatureJsonOutputter(); generateAstFeatures(toplevel, feature_outputter); generateFnamesFeatures(toplevel, feature_outputter); //feature_outputter.string_map defines what id is assigment to each node in the final output //therefore to assign same ids, we need to first populate by running feature extraction var stream = OutputStream({beautify : true, replace_mangled_map : feature_outputter.string_map}); toplevel.print(stream); return stream.toString(); } function extractFeatures(code, file, print_ast, features, skip_minified) { var toplevel; try { toplevel = parseFile(code, file); } catch (e) { console.warn("Cannot parse file: '%s'", file); return null; } extendAst(toplevel); if (print_ast) { return printAst(toplevel); } if (skip_minified && isMinified(toplevel, code, file)){ console.warn("Skipping minified file: '%s'", file); return null; } var feature_outputter = new FeatureJsonOutputter(); feature_outputter.openElem(); feature_outputter.openArray("query"); if (features.indexOf("ASTREL") != -1) { generateAstFeatures(toplevel, feature_outputter); } if (features.indexOf("FNAMES") != -1) { generateFnamesFeatures(toplevel, feature_outputter); } if (features.indexOf("FSCOPE") != -1) { generateFscopeConstraints(toplevel, feature_outputter); } feature_outputter.closeArray(); feature_outputter.dumpSymbols(); feature_outputter.closeElem(); return feature_outputter.output; } /* -----[ functions ]----- */ function nodeToString(node) { if (node == null) return null; if (node instanceof AST_Symbol){ if (node instanceof AST_This ){ return GIVEN + node.name; } // AST_Symbol::unmangleable() returns true if this symbol cannot be renamed (it's either global, undeclared, or defined in scope where eval or with are in use. if (node.unmangleable()){ return GIVEN + node.name; } return INFER + node.definition().id + "-" + node.name; } else if (node instanceof AST_Constant){ return GIVEN + String(node.value).slice(0,64); } else if (node instanceof AST_Sub){ //x[1], x -> expression, 1 -> property return nodeToString(node.expression) + "[]"; } else if (node instanceof AST_PropAccess){ return GIVEN + node.property; } else if (node instanceof AST_Defun) { //function foo(...) { ... } return nodeToString(node.name); } else if (node instanceof AST_VarDef){ // var x = function () { ... } return nodeToString(node.name); } else if (node instanceof AST_Assign){ //x = function () { ... } return nodeToString(node.left); } else if (node instanceof AST_ObjectProperty){ // { "x" : function () { ... } } return GIVEN + node.key; } else if (node instanceof AST_Call){ //x.foo( function () { ... } ) //foo( function () { ... } ) return nodeToString(node.expression); } else if (node instanceof AST_Lambda) { if (node.parent instanceof AST_Call){ //'node.parent.expression != node' as lambda can call itself return (node.parent.expression == node) ? null : nodeToString(node.parent.expression) + "(" + node.child_id + ")"; } return nodeToString(node.parent); } return null; } function nodeType(node) { if (node instanceof AST_Binary || node instanceof AST_Unary) { return node.__proto__.TYPE + node.operator; } else if (node instanceof AST_Boolean) { return "Bool"; } else if (node instanceof AST_Atom && !(node instanceof AST_Constant)) { //atoms are special constant values as Nan, Undefined, Infinity,.. return "Atom"; } return node.__proto__.TYPE; } function pathToStringFw(path, start){ var res = ""; for (var i = start; i < path.length - 1; i++) { res += nodeType(path[i]); res += "[" + path[i+1].child_id + "]"; } return res; } function pathToStringBw(path, start){ var res = ""; for (var i = path.length - 2; i >= start; i--) { res += nodeType(path[i]); res += "[" + path[i+1].child_id + "]"; } return res; } function printAst(toplevel){ var output = ""; var walker = new TreeWalker(function(node){ output += string_template(" node{id} [label=\"{label}\"];\n", { id: node.id, label: nodeType(node) }); if (walker.parent() != null) { output += string_template(" node{id1} -> node{id2} [weight=1];\n", { id1: walker.parent().id, id2: node.id }); } }); output += "digraph AST {\n"; toplevel.walk(walker); output += "}\n"; return output; } function generateAstFeatures(toplevel, feature_outputter) { var walker = new TreeWalker(function(node){ // console.log(nodeType(node) + " - " + nodeToString(node)); var paths = this.node_finder.find(node); for (var i = 0; i < paths.length; i++) { var path1 = paths[i]; var node1 = path1[path1.length - 1]; for (var j = i + 1; j < paths.length; j++) { var common_prefix_len = 0; var path2 = paths[j]; var node2 = path2[path2.length - 1]; //determine common prefix to be skipped while(common_prefix_len < path1.length && common_prefix_len < path2.length && path1[common_prefix_len] === path2[common_prefix_len]){ common_prefix_len++; } if (common_prefix_len == 0) { throw "common prefix not greater than 0!"; } feature_outputter.addFeature( nodeToString(node1), nodeToString(node2), pathToStringBw(path1, common_prefix_len) + ":" + nodeType(path1[common_prefix_len - 1]) + ":" + pathToStringFw(path2, common_prefix_len) ); } } }); walker.node_finder = new NodePathFinder(3, function(node) { return (node instanceof AST_Symbol || node instanceof AST_Constant || node instanceof AST_PropAccess); }); toplevel.walk(walker); } function addFeatures(lhss, lhs_label, rhs, rhs_label, feature_outputter){ var prefix = ""; for (var i = lhss.length - 1; i >= 0; i--) { prefix += lhs_label; feature_outputter.addFeature(lhss[i], rhs, prefix + rhs_label); } } function addScopeConstraints(node, toplevel, feature_outputter){ feature_outputter.beginScope(); var name = nodeToString(node); if (name != null) feature_outputter.addToScope(name); for (var i = 0; i < node.enclosed.length; i++){ feature_outputter.addToScope(nodeToString(node.enclosed[i].orig[0])); } node.variables.each(function(symbol){ feature_outputter.addToScope(nodeToString(symbol.orig[0])); }); toplevel.globals.each(function(symbol){ feature_outputter.addToScope(nodeToString(symbol.orig[0])); }); feature_outputter.endScope(); } function generateFscopeConstraints(toplevel, feature_outputter){ addScopeConstraints(toplevel, toplevel, feature_outputter); toplevel.walk(new TreeWalker(function(node) { if (node instanceof AST_Defun || node instanceof AST_Lambda) { addScopeConstraints(node, toplevel, feature_outputter); } })); } function generateFnamesFeatures(toplevel, feature_outputter){ var outer_funcs = []; toplevel.walk(new TreeWalker(function(node, descend){ if ((node instanceof AST_Defun || node instanceof AST_Lambda) && nodeToString(node) != null) { var name = nodeToString(node); for (var i = 0; i < node.argnames.length; i++) { addFeatures([name], "FN", nodeToString(node.argnames[i]), "PAR", feature_outputter); } outer_funcs.push(name); descend(); //traverse childs outer_funcs.pop(); return true; //do not traverse childs again } if (node instanceof AST_New) { addFeatures(outer_funcs, "FN", nodeToString(node), "NEW", feature_outputter); } else if (node instanceof AST_Call) { addFeatures(outer_funcs, "FN", nodeToString(node), "CALL", feature_outputter); } else if (node instanceof AST_Constant){ addFeatures(outer_funcs, "FN", nodeToString(node), nodeType(node).toUpperCase(), feature_outputter); } else if (node instanceof AST_VarDef){ addFeatures(outer_funcs, "FN", nodeToString(node.name), "DECL", feature_outputter); } else if (node instanceof AST_Dot && !(node.parent instanceof AST_Call)) { addFeatures(outer_funcs, "FN", nodeToString(node), "PROP", feature_outputter); } else if (node instanceof AST_Return && nodeToString(node.value) != null) { addFeatures(outer_funcs, "FN", nodeToString(node.value), "RETURN", feature_outputter); } })); } /* -----[ NodePathFinder ]----- */ function NodePathFinder(max_depth, filter) { this.max_depth = max_depth; this.paths = []; this.filter = filter; } NodePathFinder.prototype = new TreeWalker(function(node, descend){ if (this.stack.length > this.max_depth || node instanceof AST_Defun){ return true; } //enforce in-order traversal //otherwise we get for "x.foo()" feature foo - x instead of x - foo as x is a parent of foo in the AST descend(); if (this.filter(node)) { this.paths.push(this.stack.slice(0)); } return true; }); NodePathFinder.prototype.find = function(node) { this.root = node; this.paths = []; node.walk(this); return this.paths; }; /* ---[ JsonOutputter ]--- */ function FeatureJsonOutputter() { this.string_map = new StringMap(false); this.first_element = true; this.output = ""; this.depth = 0; this.pairs = {}; this.cur_scope = {}; } FeatureJsonOutputter.prototype.indent = function() { var res = ""; for (var i = 0; i < this.depth; i++) { res += " "; } return res; }; FeatureJsonOutputter.prototype.openElem = function() { if (!this.first_element) { this.output += ","; } this.output += "\n" + this.indent() + "{"; this.first_element = true; this.depth++; }; FeatureJsonOutputter.prototype.closeElem = function() { this.depth--; this.output += "}"; this.first_element = false; }; FeatureJsonOutputter.prototype.openArray = function(name){ if (!this.first_element) { this.output += ","; } this.output += "\n" + this.indent() + "\"" + name + "\":["; this.first_element = true; this.depth++; }; FeatureJsonOutputter.prototype.closeArray = function(){ this.depth--; this.output += "\n" + this.indent() + "]"; this.first_element = false; }; FeatureJsonOutputter.prototype.visitFeature = function(a_id, b_id, name){ if (! (a_id in this.pairs) ) { this.pairs[a_id] = []; } var visited = this.pairs[a_id]; if (visited.indexOf(b_id + "-" + name) >= 0) { return true; } visited.push(b_id + "-" + name); return false; }; FeatureJsonOutputter.prototype.addFeature = function(a, b, name){ if (a == null || b == null){ return; } //do not add features between two fixed nodes if (a[0] == GIVEN && b[0] == GIVEN) { return; } var a_id = this.string_map.getId(a); var b_id = this.string_map.getId(b); if (a_id == b_id || this.visitFeature(a_id, b_id, name)){ return; } this.openElem(); this.output += '"a": ' + a_id + ","; this.output += '\t"b": ' + b_id + ","; this.output += '\t"f2": "' + name + '"'; this.closeElem(); }; FeatureJsonOutputter.prototype.addSymbol = function(key){ this.openElem(); this.output += '"v": ' + this.string_map.getId(key) + ","; if (key[0] == INFER){ //${id}-{name} this.output += '\t"inf": "' + escapeString(key.split("-")[1]) + '"'; } else { //#{name} this.output += '\t"giv": "' + escapeString(key.slice(1)) + '"'; } this.closeElem(); }; FeatureJsonOutputter.prototype.dumpSymbols = function(){ this.openArray("assign"); // var keys = Object.keys( this.string_map.map ); var keys = this.string_map.keys; for( var i = 0,length = keys.length; i < length; i++ ) { this.addSymbol(keys[i]); } this.closeArray(); }; FeatureJsonOutputter.prototype.beginScope = function(){ this.cur_scope = {}; }; FeatureJsonOutputter.prototype.addToScope = function(a){ var a_id = this.string_map.getId(a); this.cur_scope[a_id] = true; }; FeatureJsonOutputter.prototype.endScope = function(){ //{"cn":"!=","n":[14,366,370,372,108,40,356]} var keys = Object.keys(this.cur_scope); if (keys.length <= 1) { return; } this.openElem(); this.output += '"cn":"!=", "n":['; this.output += keys[0]; for(var i = 1,length = keys.length; i < length; i++ ) { this.output += ','; this.output += keys[i]; } this.output += "]"; this.closeElem(); }; /* -----[ StringMap ]----- */ function StringMap(nice_names) { this.map = {}; this.current_id = 0; this.nice_names = nice_names; this.keys = []; } StringMap.prototype.getId = function(input){ if (input == null){ throw new Error("error null"); } if (this.nice_names) return input; //we add a special character in from to allow for keys such as "toString" var escaped_input = "#" + input; if (!(escaped_input in this.map)) { this.map[escaped_input] = this.current_id; //keep ordered map of keys for iterating later this.keys.push(input); this.current_id++; } return this.map[escaped_input]; }; /* ------------------------ */ function escapeString(input){ return encodeURIComponent(input); } function parseFile(code, file) { var toplevel = parse(code, { filename : file }); toplevel.figure_out_scope(); return toplevel; } function FakeSymbolDef(name, id) { this.name = name; this.id = id; }; function extendAst(root){ var current_id = 0; var walker = new TreeWalker(function(node){ if (!node.hasOwnProperty("id")){ node.id = current_id; current_id += 1; } if (!node.hasOwnProperty("parent")){ node.parent = walker.parent(); } node.num_childs = 0; node.child_id = 0; if (walker.parent() !== undefined){ node.child_id = walker.parent().num_childs; walker.parent().num_childs++; } if (node instanceof AST_Symbol) { // if (node.definition() == null && node instanceof AST_This){ // var scope = node; // while (!(scope instanceof AST_Lambda) && scope.parent != null){ // scope = scope.parent; // } // var name = nodeToString(scope); // node.thedef = new FakeSymbolDef(name + "_this", scope.id); // } else { if (node.definition() != null) { node.definition().id = current_id; current_id++; } } }); root.walk(walker); }