579 lines
15 KiB
JavaScript
579 lines
15 KiB
JavaScript
|
|
"use strict";
|
|
|
|
var INFER = '$';
|
|
var GIVEN = '#';
|
|
|
|
var EXPECTED_MAX_NODES_PER_NONOBFUSACATED_LINE = 25;
|
|
var MAX_RATIO_SHORT_NAMES = 0.45;
|
|
var NUM_NUMBERED_LOCALS = 5;
|
|
|
|
function isMinified(toplevel, code, file){
|
|
var numLines = code.split(/\r\n|\r|\n/).length;
|
|
var numStatements = 0;
|
|
var numNames = 0;
|
|
var numShortNames = 0;
|
|
var numNumberedNames = 0;
|
|
|
|
toplevel.walk(new TreeWalker(function(node, descend){
|
|
numStatements++;
|
|
if (node instanceof AST_Symbol && !(node instanceof AST_This)) {
|
|
numNames++;
|
|
if (node.name.length <= 2 && node.name != "el" && node.name != "$") {
|
|
numShortNames++;
|
|
}
|
|
if (node.name.length >= 2 && node.name[0] == '_') {
|
|
var c2 = node.name[1];
|
|
if (c2 >= '0' && c2 <= '9') ++numNumberedNames;
|
|
}
|
|
}
|
|
}));
|
|
|
|
return (EXPECTED_MAX_NODES_PER_NONOBFUSACATED_LINE * numLines <= numStatements) ||
|
|
(numShortNames > numNames * MAX_RATIO_SHORT_NAMES) ||
|
|
numNumberedNames == numNames ||
|
|
numNumberedNames >= NUM_NUMBERED_LOCALS;
|
|
}
|
|
|
|
function replaceMangled(code, file) {
|
|
var toplevel;
|
|
try {
|
|
toplevel = parseFile(code, file);
|
|
} catch (e) {
|
|
console.warn("Cannot parse file: '%s'", file);
|
|
return null;
|
|
}
|
|
|
|
extendAst(toplevel);
|
|
|
|
var feature_outputter = new FeatureJsonOutputter();
|
|
generateAstFeatures(toplevel, feature_outputter);
|
|
generateFnamesFeatures(toplevel, feature_outputter);
|
|
|
|
//feature_outputter.string_map defines what id is assigment to each node in the final output
|
|
//therefore to assign same ids, we need to first populate by running feature extraction
|
|
var stream = OutputStream({beautify : true, replace_mangled_map : feature_outputter.string_map});
|
|
toplevel.print(stream);
|
|
return stream.toString();
|
|
}
|
|
|
|
function extractFeatures(code, file, print_ast, features, skip_minified) {
|
|
var toplevel;
|
|
try {
|
|
toplevel = parseFile(code, file);
|
|
} catch (e) {
|
|
console.warn("Cannot parse file: '%s'", file);
|
|
return null;
|
|
}
|
|
|
|
extendAst(toplevel);
|
|
|
|
if (print_ast) {
|
|
return printAst(toplevel);
|
|
}
|
|
|
|
if (skip_minified && isMinified(toplevel, code, file)){
|
|
console.warn("Skipping minified file: '%s'", file);
|
|
return null;
|
|
}
|
|
|
|
var feature_outputter = new FeatureJsonOutputter();
|
|
feature_outputter.openElem();
|
|
feature_outputter.openArray("query");
|
|
|
|
if (features.indexOf("ASTREL") != -1) {
|
|
generateAstFeatures(toplevel, feature_outputter);
|
|
}
|
|
|
|
if (features.indexOf("FNAMES") != -1) {
|
|
generateFnamesFeatures(toplevel, feature_outputter);
|
|
}
|
|
|
|
if (features.indexOf("FSCOPE") != -1) {
|
|
generateFscopeConstraints(toplevel, feature_outputter);
|
|
}
|
|
|
|
feature_outputter.closeArray();
|
|
feature_outputter.dumpSymbols();
|
|
feature_outputter.closeElem();
|
|
|
|
return feature_outputter.output;
|
|
}
|
|
|
|
/* -----[ functions ]----- */
|
|
|
|
function nodeToString(node) {
|
|
if (node == null) return null;
|
|
|
|
if (node instanceof AST_Symbol){
|
|
if (node instanceof AST_This ){
|
|
return GIVEN + node.name;
|
|
}
|
|
// AST_Symbol::unmangleable() returns true if this symbol cannot be renamed (it's either global, undeclared, or defined in scope where eval or with are in use.
|
|
if (node.unmangleable()){
|
|
return GIVEN + node.name;
|
|
}
|
|
return INFER + node.definition().id + "-" + node.name;
|
|
} else if (node instanceof AST_Constant){
|
|
return GIVEN + String(node.value).slice(0,64);
|
|
} else if (node instanceof AST_Sub){
|
|
//x[1], x -> expression, 1 -> property
|
|
return nodeToString(node.expression) + "[]";
|
|
} else if (node instanceof AST_PropAccess){
|
|
return GIVEN + node.property;
|
|
} else if (node instanceof AST_Defun) {
|
|
//function foo(...) { ... }
|
|
return nodeToString(node.name);
|
|
} else if (node instanceof AST_VarDef){
|
|
// var x = function () { ... }
|
|
return nodeToString(node.name);
|
|
} else if (node instanceof AST_Assign){
|
|
//x = function () { ... }
|
|
return nodeToString(node.left);
|
|
} else if (node instanceof AST_ObjectProperty){
|
|
// { "x" : function () { ... } }
|
|
return GIVEN + node.key;
|
|
} else if (node instanceof AST_Call){
|
|
//x.foo( function () { ... } )
|
|
//foo( function () { ... } )
|
|
return nodeToString(node.expression);
|
|
} else if (node instanceof AST_Lambda) {
|
|
if (node.parent instanceof AST_Call){
|
|
//'node.parent.expression != node' as lambda can call itself
|
|
return (node.parent.expression == node) ? null : nodeToString(node.parent.expression) + "(" + node.child_id + ")";
|
|
}
|
|
return nodeToString(node.parent);
|
|
}
|
|
|
|
return null;
|
|
}
|
|
|
|
function nodeType(node) {
|
|
if (node instanceof AST_Binary || node instanceof AST_Unary) {
|
|
return node.__proto__.TYPE + node.operator;
|
|
} else if (node instanceof AST_Boolean) {
|
|
return "Bool";
|
|
} else if (node instanceof AST_Atom && !(node instanceof AST_Constant)) {
|
|
//atoms are special constant values as Nan, Undefined, Infinity,..
|
|
return "Atom";
|
|
}
|
|
|
|
return node.__proto__.TYPE;
|
|
}
|
|
|
|
function pathToStringFw(path, start){
|
|
var res = "";
|
|
for (var i = start; i < path.length - 1; i++) {
|
|
res += nodeType(path[i]);
|
|
res += "[" + path[i+1].child_id + "]";
|
|
}
|
|
|
|
return res;
|
|
}
|
|
|
|
function pathToStringBw(path, start){
|
|
var res = "";
|
|
for (var i = path.length - 2; i >= start; i--) {
|
|
res += nodeType(path[i]);
|
|
res += "[" + path[i+1].child_id + "]";
|
|
}
|
|
|
|
return res;
|
|
}
|
|
|
|
function printAst(toplevel){
|
|
var output = "";
|
|
|
|
var walker = new TreeWalker(function(node){
|
|
output += string_template(" node{id} [label=\"{label}\"];\n", {
|
|
id: node.id,
|
|
label: nodeType(node)
|
|
});
|
|
|
|
if (walker.parent() != null) {
|
|
output += string_template(" node{id1} -> node{id2} [weight=1];\n", {
|
|
id1: walker.parent().id,
|
|
id2: node.id
|
|
});
|
|
}
|
|
});
|
|
|
|
output += "digraph AST {\n";
|
|
toplevel.walk(walker);
|
|
output += "}\n";
|
|
return output;
|
|
}
|
|
|
|
function generateAstFeatures(toplevel, feature_outputter) {
|
|
var walker = new TreeWalker(function(node){
|
|
// console.log(nodeType(node) + " - " + nodeToString(node));
|
|
var paths = this.node_finder.find(node);
|
|
for (var i = 0; i < paths.length; i++) {
|
|
var path1 = paths[i];
|
|
var node1 = path1[path1.length - 1];
|
|
|
|
for (var j = i + 1; j < paths.length; j++) {
|
|
var common_prefix_len = 0;
|
|
var path2 = paths[j];
|
|
var node2 = path2[path2.length - 1];
|
|
|
|
//determine common prefix to be skipped
|
|
while(common_prefix_len < path1.length && common_prefix_len < path2.length
|
|
&& path1[common_prefix_len] === path2[common_prefix_len]){
|
|
common_prefix_len++;
|
|
}
|
|
|
|
if (common_prefix_len == 0) {
|
|
throw "common prefix not greater than 0!";
|
|
}
|
|
|
|
feature_outputter.addFeature(
|
|
nodeToString(node1),
|
|
nodeToString(node2),
|
|
pathToStringBw(path1, common_prefix_len) + ":" + nodeType(path1[common_prefix_len - 1]) + ":" + pathToStringFw(path2, common_prefix_len)
|
|
);
|
|
}
|
|
}
|
|
});
|
|
|
|
walker.node_finder = new NodePathFinder(3, function(node) {
|
|
return (node instanceof AST_Symbol || node instanceof AST_Constant || node instanceof AST_PropAccess);
|
|
});
|
|
|
|
toplevel.walk(walker);
|
|
}
|
|
|
|
function addFeatures(lhss, lhs_label, rhs, rhs_label, feature_outputter){
|
|
var prefix = "";
|
|
for (var i = lhss.length - 1; i >= 0; i--) {
|
|
prefix += lhs_label;
|
|
feature_outputter.addFeature(lhss[i], rhs, prefix + rhs_label);
|
|
}
|
|
}
|
|
|
|
function addScopeConstraints(node, toplevel, feature_outputter){
|
|
feature_outputter.beginScope();
|
|
var name = nodeToString(node);
|
|
if (name != null)
|
|
feature_outputter.addToScope(name);
|
|
|
|
for (var i = 0; i < node.enclosed.length; i++){
|
|
feature_outputter.addToScope(nodeToString(node.enclosed[i].orig[0]));
|
|
}
|
|
|
|
node.variables.each(function(symbol){
|
|
feature_outputter.addToScope(nodeToString(symbol.orig[0]));
|
|
});
|
|
|
|
toplevel.globals.each(function(symbol){
|
|
feature_outputter.addToScope(nodeToString(symbol.orig[0]));
|
|
});
|
|
|
|
feature_outputter.endScope();
|
|
}
|
|
|
|
|
|
function generateFscopeConstraints(toplevel, feature_outputter){
|
|
addScopeConstraints(toplevel, toplevel, feature_outputter);
|
|
toplevel.walk(new TreeWalker(function(node) {
|
|
if (node instanceof AST_Defun || node instanceof AST_Lambda) {
|
|
addScopeConstraints(node, toplevel, feature_outputter);
|
|
}
|
|
}));
|
|
}
|
|
|
|
function generateFnamesFeatures(toplevel, feature_outputter){
|
|
var outer_funcs = [];
|
|
|
|
toplevel.walk(new TreeWalker(function(node, descend){
|
|
|
|
if ((node instanceof AST_Defun || node instanceof AST_Lambda) && nodeToString(node) != null) {
|
|
var name = nodeToString(node);
|
|
|
|
for (var i = 0; i < node.argnames.length; i++) {
|
|
addFeatures([name], "FN", nodeToString(node.argnames[i]), "PAR", feature_outputter);
|
|
}
|
|
|
|
outer_funcs.push(name);
|
|
descend(); //traverse childs
|
|
outer_funcs.pop();
|
|
|
|
return true; //do not traverse childs again
|
|
}
|
|
|
|
if (node instanceof AST_New) {
|
|
addFeatures(outer_funcs, "FN", nodeToString(node), "NEW", feature_outputter);
|
|
} else if (node instanceof AST_Call) {
|
|
addFeatures(outer_funcs, "FN", nodeToString(node), "CALL", feature_outputter);
|
|
} else if (node instanceof AST_Constant){
|
|
addFeatures(outer_funcs, "FN", nodeToString(node), nodeType(node).toUpperCase(), feature_outputter);
|
|
} else if (node instanceof AST_VarDef){
|
|
addFeatures(outer_funcs, "FN", nodeToString(node.name), "DECL", feature_outputter);
|
|
} else if (node instanceof AST_Dot && !(node.parent instanceof AST_Call)) {
|
|
addFeatures(outer_funcs, "FN", nodeToString(node), "PROP", feature_outputter);
|
|
} else if (node instanceof AST_Return && nodeToString(node.value) != null) {
|
|
addFeatures(outer_funcs, "FN", nodeToString(node.value), "RETURN", feature_outputter);
|
|
}
|
|
}));
|
|
}
|
|
|
|
/* -----[ NodePathFinder ]----- */
|
|
|
|
function NodePathFinder(max_depth, filter) {
|
|
this.max_depth = max_depth;
|
|
this.paths = [];
|
|
this.filter = filter;
|
|
}
|
|
|
|
NodePathFinder.prototype = new TreeWalker(function(node, descend){
|
|
if (this.stack.length > this.max_depth || node instanceof AST_Defun){
|
|
return true;
|
|
}
|
|
|
|
//enforce in-order traversal
|
|
//otherwise we get for "x.foo()" feature foo - x instead of x - foo as x is a parent of foo in the AST
|
|
descend();
|
|
|
|
if (this.filter(node)) {
|
|
this.paths.push(this.stack.slice(0));
|
|
}
|
|
|
|
return true;
|
|
});
|
|
|
|
NodePathFinder.prototype.find = function(node) {
|
|
this.root = node;
|
|
this.paths = [];
|
|
node.walk(this);
|
|
return this.paths;
|
|
};
|
|
|
|
/* ---[ JsonOutputter ]--- */
|
|
|
|
function FeatureJsonOutputter() {
|
|
this.string_map = new StringMap(false);
|
|
this.first_element = true;
|
|
this.output = "";
|
|
this.depth = 0;
|
|
this.pairs = {};
|
|
this.cur_scope = {};
|
|
}
|
|
|
|
FeatureJsonOutputter.prototype.indent = function() {
|
|
var res = "";
|
|
for (var i = 0; i < this.depth; i++) {
|
|
res += " ";
|
|
}
|
|
return res;
|
|
};
|
|
|
|
FeatureJsonOutputter.prototype.openElem = function() {
|
|
if (!this.first_element) {
|
|
this.output += ",";
|
|
}
|
|
this.output += "\n" + this.indent() + "{";
|
|
this.first_element = true;
|
|
this.depth++;
|
|
};
|
|
|
|
FeatureJsonOutputter.prototype.closeElem = function() {
|
|
this.depth--;
|
|
this.output += "}";
|
|
this.first_element = false;
|
|
};
|
|
|
|
|
|
FeatureJsonOutputter.prototype.openArray = function(name){
|
|
if (!this.first_element) {
|
|
this.output += ",";
|
|
}
|
|
this.output += "\n" + this.indent() + "\"" + name + "\":[";
|
|
this.first_element = true;
|
|
this.depth++;
|
|
};
|
|
|
|
FeatureJsonOutputter.prototype.closeArray = function(){
|
|
this.depth--;
|
|
this.output += "\n" + this.indent() + "]";
|
|
this.first_element = false;
|
|
};
|
|
|
|
FeatureJsonOutputter.prototype.visitFeature = function(a_id, b_id, name){
|
|
if (! (a_id in this.pairs) ) {
|
|
this.pairs[a_id] = [];
|
|
}
|
|
var visited = this.pairs[a_id];
|
|
|
|
if (visited.indexOf(b_id + "-" + name) >= 0) {
|
|
return true;
|
|
}
|
|
visited.push(b_id + "-" + name);
|
|
return false;
|
|
};
|
|
|
|
FeatureJsonOutputter.prototype.addFeature = function(a, b, name){
|
|
if (a == null || b == null){
|
|
return;
|
|
}
|
|
|
|
//do not add features between two fixed nodes
|
|
if (a[0] == GIVEN && b[0] == GIVEN) {
|
|
return;
|
|
}
|
|
|
|
var a_id = this.string_map.getId(a);
|
|
var b_id = this.string_map.getId(b);
|
|
|
|
if (a_id == b_id || this.visitFeature(a_id, b_id, name)){
|
|
return;
|
|
}
|
|
|
|
this.openElem();
|
|
|
|
this.output += '"a": ' + a_id + ",";
|
|
this.output += '\t"b": ' + b_id + ",";
|
|
this.output += '\t"f2": "' + name + '"';
|
|
|
|
this.closeElem();
|
|
};
|
|
|
|
FeatureJsonOutputter.prototype.addSymbol = function(key){
|
|
this.openElem();
|
|
|
|
this.output += '"v": ' + this.string_map.getId(key) + ",";
|
|
if (key[0] == INFER){
|
|
//${id}-{name}
|
|
this.output += '\t"inf": "' + escapeString(key.split("-")[1]) + '"';
|
|
} else {
|
|
//#{name}
|
|
this.output += '\t"giv": "' + escapeString(key.slice(1)) + '"';
|
|
}
|
|
|
|
this.closeElem();
|
|
};
|
|
|
|
FeatureJsonOutputter.prototype.dumpSymbols = function(){
|
|
this.openArray("assign");
|
|
|
|
// var keys = Object.keys( this.string_map.map );
|
|
var keys = this.string_map.keys;
|
|
for( var i = 0,length = keys.length; i < length; i++ ) {
|
|
this.addSymbol(keys[i]);
|
|
}
|
|
|
|
this.closeArray();
|
|
};
|
|
|
|
FeatureJsonOutputter.prototype.beginScope = function(){
|
|
this.cur_scope = {};
|
|
};
|
|
|
|
FeatureJsonOutputter.prototype.addToScope = function(a){
|
|
var a_id = this.string_map.getId(a);
|
|
this.cur_scope[a_id] = true;
|
|
};
|
|
|
|
FeatureJsonOutputter.prototype.endScope = function(){
|
|
//{"cn":"!=","n":[14,366,370,372,108,40,356]}
|
|
var keys = Object.keys(this.cur_scope);
|
|
if (keys.length <= 1) {
|
|
return;
|
|
}
|
|
|
|
this.openElem();
|
|
this.output += '"cn":"!=", "n":[';
|
|
|
|
this.output += keys[0];
|
|
for(var i = 1,length = keys.length; i < length; i++ ) {
|
|
this.output += ',';
|
|
this.output += keys[i];
|
|
}
|
|
|
|
this.output += "]";
|
|
this.closeElem();
|
|
};
|
|
|
|
/* -----[ StringMap ]----- */
|
|
|
|
function StringMap(nice_names) {
|
|
this.map = {};
|
|
this.current_id = 0;
|
|
this.nice_names = nice_names;
|
|
this.keys = [];
|
|
}
|
|
|
|
StringMap.prototype.getId = function(input){
|
|
if (input == null){
|
|
throw new Error("error null");
|
|
}
|
|
if (this.nice_names) return input;
|
|
|
|
//we add a special character in from to allow for keys such as "toString"
|
|
var escaped_input = "#" + input;
|
|
|
|
if (!(escaped_input in this.map)) {
|
|
this.map[escaped_input] = this.current_id;
|
|
|
|
//keep ordered map of keys for iterating later
|
|
this.keys.push(input);
|
|
this.current_id++;
|
|
}
|
|
|
|
return this.map[escaped_input];
|
|
};
|
|
|
|
/* ------------------------ */
|
|
|
|
function escapeString(input){
|
|
return encodeURIComponent(input);
|
|
}
|
|
|
|
function parseFile(code, file) {
|
|
var toplevel = parse(code, {
|
|
filename : file
|
|
});
|
|
toplevel.figure_out_scope();
|
|
return toplevel;
|
|
}
|
|
|
|
function FakeSymbolDef(name, id) {
|
|
this.name = name;
|
|
this.id = id;
|
|
};
|
|
|
|
function extendAst(root){
|
|
var current_id = 0;
|
|
|
|
var walker = new TreeWalker(function(node){
|
|
|
|
if (!node.hasOwnProperty("id")){
|
|
node.id = current_id;
|
|
current_id += 1;
|
|
}
|
|
if (!node.hasOwnProperty("parent")){
|
|
node.parent = walker.parent();
|
|
}
|
|
node.num_childs = 0;
|
|
node.child_id = 0;
|
|
if (walker.parent() !== undefined){
|
|
node.child_id = walker.parent().num_childs;
|
|
walker.parent().num_childs++;
|
|
}
|
|
|
|
if (node instanceof AST_Symbol) {
|
|
// if (node.definition() == null && node instanceof AST_This){
|
|
// var scope = node;
|
|
// while (!(scope instanceof AST_Lambda) && scope.parent != null){
|
|
// scope = scope.parent;
|
|
// }
|
|
// var name = nodeToString(scope);
|
|
// node.thedef = new FakeSymbolDef(name + "_this", scope.id);
|
|
// } else {
|
|
if (node.definition() != null) {
|
|
node.definition().id = current_id;
|
|
current_id++;
|
|
}
|
|
}
|
|
});
|
|
root.walk(walker);
|
|
} |