diff --git a/bin/js_features.js b/bin/js_features.js new file mode 100755 index 00000000..50fcbbeb --- /dev/null +++ b/bin/js_features.js @@ -0,0 +1,111 @@ +//#! /usr/bin/env nodejs +// -*- js -*- + +"use strict"; + +var UglifyJS = require("../tools/node"); +var sys = require("util"); +var yargs = require("yargs"); +var fs = require("fs"); + +var ARGS = yargs + .usage("$0 input1.js \n") + .describe("print_ast", "Prints a dot file describing the internal abstract syntax") + .describe("json_formatting", "Prints the JSON nicelly formatted") + .describe("skip_minified", "Whether to skip processing minified files") + .describe("features", "Comma separated list of features: \n" + + "ASTREL - relations in AST, \n" + + "FNAMES - function names to internal calls") + .demand(1) + .default('features', 'ASTREL,FNAMES') + .boolean("print_ast") + .boolean("skip_minified") + .boolean("json_formatting") + .string("features") + .wrap(80) + .argv +; + +normalize(ARGS); + +if (ARGS.h || ARGS.help) { + sys.puts(yargs.help()); + process.exit(0); +} + +var files = ARGS._.slice(); +if (files.length > 1) { + sys.error("WARNING: expected only single input file. Processing file '" + files[0] + "' while the rest is ignored."); +}; + +if (ARGS.features === true) { + sys.error("ERROR: empty set of features."); + process.exit(1); +}; + +var features = ARGS.features.split(","); +for (var i = 0; i < features.length; i++) { + if (features[i] != "FNAMES" && features[i] != "ASTREL") { + sys.error("WARNING: ignoring not suppored feature '" + features[i] + "'."); + }; +}; + +for (var i = 0; i < files.length; i++) { + processFile(files[i], ARGS.print_ast, ARGS.features, ARGS.json_formatting, ARGS.skip_minified); +}; + +function stripInterpreter(code){ + if (code.slice(0,2) != "#!"){ + return code; + } + + return code.slice(code.indexOf('\n') + 1); +} + +function processFile(file, print_ast, features, json_formatting, skip_minified) { + var code; + try { + code = fs.readFileSync(file, "utf-8"); + } + catch (ex) { + sys.error("ERROR: can't read file: " + file); + return; + } + + //if it is a script, the UglifyJS parser will fail to parse it + code = stripInterpreter(code); + + var output = UglifyJS.extractFeatures(code, file, print_ast, features, skip_minified); + if (output == null) { + return; + } + + if (!json_formatting) { + output = removeWhitespace(output); + } + + //validate JSON + try { + JSON.parse(output); + } catch (e) { + throw e; + } + + if (removeWhitespace(output) != '{"query":[],"assign":[]}') { + console.log(output); + } + +} + +/* ------------------------ */ + +function normalize(o) { + for (var i in o) if (o.hasOwnProperty(i) && /-/.test(i)) { + o[i.replace(/-/g, "_")] = o[i]; + delete o[i]; + } +} + +function removeWhitespace(input){ + return input.replace(/\s/g,""); +} \ No newline at end of file diff --git a/extract_features.py b/extract_features.py new file mode 100755 index 00000000..f78a58cd --- /dev/null +++ b/extract_features.py @@ -0,0 +1,61 @@ +#!/usr/bin/python + +import multiprocessing +import os +import sys +import shutil + +def PrintUsage(): + print """ +Usage: + extract_features.py --filelist +OR + extract_features.py --dir +""" + exit(1) + +def GetJSFilesInDir(d): + for root, _, files in os.walk(d): + for f in files: + fname = os.path.join(root, f) + if fname.endswith('.js'): + yield fname + + +TMP_DIR = "" + +def ExtractFeaturesForFile(f): + global TMP_DIR + os.system("nodejs bin/js_features.js --skip_minified '%s' >> %s/%d" % (f, TMP_DIR, os.getpid())) + +def ExtractFeaturesForFileList(files): + global TMP_DIR + TMP_DIR = "/tmp/feature_extractor%d" % (os.getpid()) + if os.path.exists(TMP_DIR): + shutil.rmtree(TMP_DIR) + os.makedirs(TMP_DIR) + try: + p = multiprocessing.Pool(multiprocessing.cpu_count()) + p.map(ExtractFeaturesForFile, files) + output_files = os.listdir(TMP_DIR) + for f in output_files: + os.system("cat %s/%s" % (TMP_DIR, f)) + finally: + shutil.rmtree(TMP_DIR) + + +if __name__ == '__main__': + if (len(sys.argv) <= 1): + PrintUsage() + + # Process command line arguments + if (sys.argv[1] == "--filelist"): + files = open(sys.argv[2], 'r').read().split('\n') + elif (sys.argv[1] == "--dir"): + files = [f for f in GetJSFilesInDir(sys.argv[2])] + else: + PrintUsage() + # Remove files that say they are minified. + files = [f for f in files if not f.endswith('.min.js')] + ExtractFeaturesForFileList(files) + diff --git a/lib/feature_extractor.js b/lib/feature_extractor.js new file mode 100644 index 00000000..cea2db00 --- /dev/null +++ b/lib/feature_extractor.js @@ -0,0 +1,514 @@ + +"use strict"; + +var INFER = '$'; +var GIVEN = '#'; + +var EXPECTED_MAX_NODES_PER_NONOBFUSACATED_LINE = 25; +var MAX_RATIO_SHORT_NAMES = 0.45; +var NUM_NUMBERED_LOCALS = 5; + +function isMinified(toplevel, code, file){ + var numLines = code.split(/\r\n|\r|\n/).length; + var numStatements = 0; + var numNames = 0; + var numShortNames = 0; + var numNumberedNames = 0; + + toplevel.walk(new TreeWalker(function(node, descend){ + numStatements++; + if (node instanceof AST_Symbol && !(node instanceof AST_This)) { + numNames++; + if (node.name.length <= 2 && node.name != "el" && node.name != "$") { + numShortNames++; + } + if (node.name.length >= 2 && node.name[0] == '_') { + var c2 = node.name[1]; + if (c2 >= '0' && c2 <= '9') ++numNumberedNames; + } + } + })); + + return (EXPECTED_MAX_NODES_PER_NONOBFUSACATED_LINE * numLines <= numStatements) || + (numShortNames > numNames * MAX_RATIO_SHORT_NAMES) || + numNumberedNames == numNames || + numNumberedNames >= NUM_NUMBERED_LOCALS; +} + +function replaceMangled(code, file) { + var toplevel; + try { + toplevel = parseFile(code, file); + } catch (e) { + console.warn("Cannot parse file: '%s'", file); + return null; + } + + extendAst(toplevel); + + var feature_outputter = new FeatureJsonOutputter(); + generateAstFeatures(toplevel, feature_outputter); + generateFnamesFeatures(toplevel, feature_outputter); + + //feature_outputter.string_map defines what id is assigment to each node in the final output + //therefore to assign same ids, we need to first populate by running feature extraction + var stream = OutputStream({beautify : true, replace_mangled_map : feature_outputter.string_map}); + toplevel.print(stream); + return stream.toString(); +} + +function extractFeatures(code, file, print_ast, features, skip_minified) { + var toplevel; + try { + toplevel = parseFile(code, file); + } catch (e) { + console.warn("Cannot parse file: '%s'", file); + return null; + } + + extendAst(toplevel); + + if (print_ast) { + return printAst(toplevel); + } + + if (skip_minified && isMinified(toplevel, code, file)){ + console.warn("Skipping minified file: '%s'", file); + return null; + } + + var feature_outputter = new FeatureJsonOutputter(); + feature_outputter.openElem(); + feature_outputter.openArray("query"); + + if (features.indexOf("ASTREL") != -1) { + generateAstFeatures(toplevel, feature_outputter); + } + + if (features.indexOf("FNAMES") != -1) { + generateFnamesFeatures(toplevel, feature_outputter); + } + + feature_outputter.closeArray(); + feature_outputter.dumpSymbols(); + feature_outputter.closeElem(); + + return feature_outputter.output; +} + +/* -----[ functions ]----- */ + +function nodeToString(node) { + if (node == null) return null; + + if (node instanceof AST_Symbol){ + if (node instanceof AST_This ){ + return GIVEN + node.name; + } + // AST_Symbol::unmangleable() returns true if this symbol cannot be renamed (it's either global, undeclared, or defined in scope where eval or with are in use. + if (node.unmangleable()){ + return GIVEN + node.name; + } + return INFER + node.definition().id + "-" + node.name; + } else if (node instanceof AST_Constant){ + return GIVEN + String(node.value).slice(0,64); + } else if (node instanceof AST_Sub){ + //x[1], x -> expression, 1 -> property + return nodeToString(node.expression) + "[]"; + } else if (node instanceof AST_PropAccess){ + return GIVEN + node.property; + } else if (node instanceof AST_Defun) { + //function foo(...) { ... } + return nodeToString(node.name); + } else if (node instanceof AST_VarDef){ + // var x = function () { ... } + return nodeToString(node.name); + } else if (node instanceof AST_Assign){ + //x = function () { ... } + return nodeToString(node.left); + } else if (node instanceof AST_ObjectProperty){ + // { "x" : function () { ... } } + return GIVEN + node.key; + } else if (node instanceof AST_Call){ + //x.foo( function () { ... } ) + //foo( function () { ... } ) + return nodeToString(node.expression); + } else if (node instanceof AST_Lambda) { + if (node.parent instanceof AST_Call){ + //'node.parent.expression != node' as lambda can call itself + return (node.parent.expression == node) ? null : nodeToString(node.parent.expression) + "(" + node.child_id + ")"; + } + return nodeToString(node.parent); + } + + return null; +} + +function nodeType(node) { + if (node instanceof AST_Binary || node instanceof AST_Unary) { + return node.__proto__.TYPE + node.operator; + } else if (node instanceof AST_Boolean) { + return "Bool"; + } else if (node instanceof AST_Atom && !(node instanceof AST_Constant)) { + //atoms are special constant values as Nan, Undefined, Infinity,.. + return "Atom"; + } + + return node.__proto__.TYPE; +} + +function pathToStringFw(path, start){ + var res = ""; + for (var i = start; i < path.length - 1; i++) { + res += nodeType(path[i]); + res += "[" + path[i+1].child_id + "]"; + } + + return res; +} + +function pathToStringBw(path, start){ + var res = ""; + for (var i = path.length - 2; i >= start; i--) { + res += nodeType(path[i]); + res += "[" + path[i+1].child_id + "]"; + } + + return res; +} + +function printAst(toplevel){ + var output = ""; + + var walker = new TreeWalker(function(node){ + output += string_template(" node{id} [label=\"{label}\"];\n", { + id: node.id, + label: nodeType(node) + }); + + if (walker.parent() != null) { + output += string_template(" node{id1} -> node{id2} [weight=1];\n", { + id1: walker.parent().id, + id2: node.id + }); + } + }); + + output += "digraph AST {\n"; + toplevel.walk(walker); + output += "}\n"; + return output; +} + +function generateAstFeatures(toplevel, feature_outputter) { + var walker = new TreeWalker(function(node){ + // console.log(nodeType(node) + " - " + nodeToString(node)); + var paths = this.node_finder.find(node); + for (var i = 0; i < paths.length; i++) { + var path1 = paths[i]; + var node1 = path1[path1.length - 1]; + + for (var j = i + 1; j < paths.length; j++) { + var common_prefix_len = 0; + var path2 = paths[j]; + var node2 = path2[path2.length - 1]; + + //determine common prefix to be skipped + while(common_prefix_len < path1.length && common_prefix_len < path2.length + && path1[common_prefix_len] === path2[common_prefix_len]){ + common_prefix_len++; + } + + if (common_prefix_len == 0) { + throw "common prefix not greater than 0!"; + } + + feature_outputter.addFeature( + nodeToString(node1), + nodeToString(node2), + pathToStringBw(path1, common_prefix_len) + ":" + nodeType(path1[common_prefix_len - 1]) + ":" + pathToStringFw(path2, common_prefix_len) + ); + } + } + }); + + walker.node_finder = new NodePathFinder(3, function(node) { + return (node instanceof AST_Symbol || node instanceof AST_Constant || node instanceof AST_PropAccess); + }); + + toplevel.walk(walker); +} + +function addFeatures(lhss, lhs_label, rhs, rhs_label, feature_outputter){ + var prefix = ""; + for (var i = lhss.length - 1; i >= 0; i--) { + prefix += lhs_label; + feature_outputter.addFeature(lhss[i], rhs, prefix + rhs_label); + } +} + +function generateFnamesFeatures(toplevel, feature_outputter){ + var outer_funcs = []; + + toplevel.walk(new TreeWalker(function(node, descend){ + + if ((node instanceof AST_Defun || node instanceof AST_Lambda) && nodeToString(node) != null) { + var name = nodeToString(node); + + for (var i = 0; i < node.argnames.length; i++) { + addFeatures([name], "FN", nodeToString(node.argnames[i]), "PAR", feature_outputter); + } + + outer_funcs.push(name); + descend(); //traverse childs + outer_funcs.pop(); + + return true; //do not traverse childs again + } + + if (node instanceof AST_New) { + addFeatures(outer_funcs, "FN", nodeToString(node), "NEW", feature_outputter); + } else if (node instanceof AST_Call) { + addFeatures(outer_funcs, "FN", nodeToString(node), "CALL", feature_outputter); + } else if (node instanceof AST_Constant){ + addFeatures(outer_funcs, "FN", nodeToString(node), nodeType(node).toUpperCase(), feature_outputter); + } else if (node instanceof AST_VarDef){ + addFeatures(outer_funcs, "FN", nodeToString(node.name), "DECL", feature_outputter); + } else if (node instanceof AST_Dot && !(node.parent instanceof AST_Call)) { + addFeatures(outer_funcs, "FN", nodeToString(node), "PROP", feature_outputter); + } else if (node instanceof AST_Return && nodeToString(node.value) != null) { + addFeatures(outer_funcs, "FN", nodeToString(node.value), "RETURN", feature_outputter); + } + })); +} + +/* -----[ NodePathFinder ]----- */ + +function NodePathFinder(max_depth, filter) { + this.max_depth = max_depth; + this.paths = []; + this.filter = filter; +} + +NodePathFinder.prototype = new TreeWalker(function(node, descend){ + if (this.stack.length > this.max_depth || node instanceof AST_Defun){ + return true; + } + + //enforce in-order traversal + //otherwise we get for "x.foo()" feature foo - x instead of x - foo as x is a parent of foo in the AST + descend(); + + if (this.filter(node)) { + this.paths.push(this.stack.slice(0)); + } + + return true; +}); + +NodePathFinder.prototype.find = function(node) { + this.root = node; + this.paths = []; + node.walk(this); + return this.paths; +}; + +/* ---[ JsonOutputter ]--- */ + +function FeatureJsonOutputter() { + this.string_map = new StringMap(false); + this.first_element = true; + this.output = ""; + this.depth = 0; + this.pairs = {}; +} + +FeatureJsonOutputter.prototype.indent = function() { + var res = ""; + for (var i = 0; i < this.depth; i++) { + res += " "; + } + return res; +}; + +FeatureJsonOutputter.prototype.openElem = function() { + if (!this.first_element) { + this.output += ","; + } + this.output += "\n" + this.indent() + "{"; + this.first_element = true; + this.depth++; +}; + +FeatureJsonOutputter.prototype.closeElem = function() { + this.depth--; + this.output += "}"; + this.first_element = false; +}; + + +FeatureJsonOutputter.prototype.openArray = function(name){ + if (!this.first_element) { + this.output += ","; + } + this.output += "\n" + this.indent() + "\"" + name + "\":["; + this.first_element = true; + this.depth++; +}; + +FeatureJsonOutputter.prototype.closeArray = function(){ + this.depth--; + this.output += "\n" + this.indent() + "]"; + this.first_element = false; +}; + +FeatureJsonOutputter.prototype.visitFeature = function(a_id, b_id, name){ + if (! (a_id in this.pairs) ) { + this.pairs[a_id] = []; + } + var visited = this.pairs[a_id]; + + if (visited.indexOf(b_id + "-" + name) >= 0) { + return true; + } + visited.push(b_id + "-" + name); + return false; +}; + +FeatureJsonOutputter.prototype.addFeature = function(a, b, name){ + if (a == null || b == null){ + return; + } + + //do not add features between two fixed nodes + if (a[0] == GIVEN && b[0] == GIVEN) { + return; + } + + var a_id = this.string_map.getId(a); + var b_id = this.string_map.getId(b); + + if (a_id == b_id || this.visitFeature(a_id, b_id, name)){ + return; + } + + this.openElem(); + + this.output += '"a": ' + a_id + ","; + this.output += '\t"b": ' + b_id + ","; + this.output += '\t"f2": "' + name + '"'; + + this.closeElem(); +}; + +FeatureJsonOutputter.prototype.addSymbol = function(key){ + this.openElem(); + + this.output += '"v": ' + this.string_map.getId(key) + ","; + if (key[0] == INFER){ + //${id}-{name} + this.output += '\t"inf": "' + escapeString(key.split("-")[1]) + '"'; + } else { + //#{name} + this.output += '\t"giv": "' + escapeString(key.slice(1)) + '"'; + } + + this.closeElem(); +}; + +FeatureJsonOutputter.prototype.dumpSymbols = function(){ + this.openArray("assign"); + + // var keys = Object.keys( this.string_map.map ); + var keys = this.string_map.keys; + for( var i = 0,length = keys.length; i < length; i++ ) { + this.addSymbol(keys[i]); + } + + this.closeArray(); +}; + +/* -----[ StringMap ]----- */ + +function StringMap(nice_names) { + this.map = {}; + this.current_id = 0; + this.nice_names = nice_names; + this.keys = []; +} + +StringMap.prototype.getId = function(input){ + if (input == null){ + throw new Error("error null"); + } + if (this.nice_names) return input; + + //we add a special character in from to allow for keys such as "toString" + var escaped_input = "#" + input; + + if (!(escaped_input in this.map)) { + this.map[escaped_input] = this.current_id; + + //keep ordered map of keys for iterating later + this.keys.push(input); + this.current_id++; + } + + return this.map[escaped_input]; +}; + +/* ------------------------ */ + +function escapeString(input){ + return encodeURIComponent(input); +} + +function parseFile(code, file) { + var toplevel = parse(code, { + filename : file + }); + toplevel.figure_out_scope(); + return toplevel; +} + +function FakeSymbolDef(name, id) { + this.name = name; + this.id = id; +}; + +function extendAst(root){ + var current_id = 0; + + var walker = new TreeWalker(function(node){ + + if (!node.hasOwnProperty("id")){ + node.id = current_id; + current_id += 1; + } + if (!node.hasOwnProperty("parent")){ + node.parent = walker.parent(); + } + node.num_childs = 0; + node.child_id = 0; + if (walker.parent() !== undefined){ + node.child_id = walker.parent().num_childs; + walker.parent().num_childs++; + } + + if (node instanceof AST_Symbol) { + // if (node.definition() == null && node instanceof AST_This){ + // var scope = node; + // while (!(scope instanceof AST_Lambda) && scope.parent != null){ + // scope = scope.parent; + // } + // var name = nodeToString(scope); + // node.thedef = new FakeSymbolDef(name + "_this", scope.id); + // } else { + if (node.definition() != null) { + node.definition().id = current_id; + current_id++; + } + } + }); + root.walk(walker); +} \ No newline at end of file diff --git a/lib/output.js b/lib/output.js index 72bcdd5e..42549ad3 100644 --- a/lib/output.js +++ b/lib/output.js @@ -63,6 +63,7 @@ function OutputStream(options) { preserve_line : false, screw_ie8 : false, preamble : null, + replace_mangled_map : null }, true); var indentation = 0; @@ -308,6 +309,7 @@ function OutputStream(options) { var stack = []; return { + replace_mangled_map : options.replace_mangled_map, get : get, toString : get, indent : indent, @@ -1097,7 +1099,13 @@ function OutputStream(options) { }); DEFPRINT(AST_Symbol, function(self, output){ var def = self.definition(); - output.print_name(def ? def.mangled_name || def.name : self.name); + // output.print_name(def ? def.mangled_name || def.name : self.name); + // replace all the variable names to be renamed with a placeholder + if (output.replace_mangled_map == null || self instanceof AST_This || self.unmangleable()) + output.print_name(def ? def.mangled_name || def.name : self.name); + else { + output.print_name(def ? "local$$" + output.replace_mangled_map.getId("$" + def.id + "-" + self.name) : self.name); + } }); DEFPRINT(AST_Undefined, function(self, output){ output.print("void 0"); diff --git a/test/feature_extraction/fnames.js b/test/feature_extraction/fnames.js new file mode 100644 index 00000000..fc7b9be1 --- /dev/null +++ b/test/feature_extraction/fnames.js @@ -0,0 +1,246 @@ +func_decl_params: { + options = { + features : "FNAMES" + }; + input: { + function chunkData(e, t) { } + } + expect: { + '{ + "query":[ + {"a": 0, "b": 1, "f2": "FNPAR"}, + {"a": 0, "b": 2, "f2": "FNPAR"} + ], + "assign":[ + {"v": 0, "giv": "chunkData"}, + {"v": 1, "inf": "e"}, + {"v": 2, "inf": "t"} + ] + }' + } +} + +func_simple_call: { + options = { + features : "FNAMES" + }; + input: { + function chunkData() { + foo(); + } + } + expect: { + '{ + "query":[ + ], + "assign":[ + ] + } + ' + } +} + +func_args: { + options = { + features : "FNAMES" + }; + input: { + function chunkData(x) { + var n1 = "hello"; + x.foo(b, n1, 42, n1, 42); + } + } + expect: { + '{ + "query":[ + {"a": 0, "b": 1, "f2": "FNPAR"}, + {"a": 0, "b": 2, "f2": "FNDECL"} + ], + "assign":[ + {"v": 0, "giv": "chunkData"}, + {"v": 1, "inf": "x"}, + {"v": 2, "inf": "n1"} + ] + } + ' + } +} + +inner_lambda_assign_local: { + options = { + features : "FNAMES" + }; + input: { + function chunkData(x) { + var local = function () { + console.log("hello"); + }; + } + } + expect: { + '{ + "query":[ + {"a": 0, "b": 1, "f2": "FNPAR"}, + {"a": 0, "b": 2, "f2": "FNDECL"}, + {"a": 2, "b": 3, "f2": "FNCALL"}, + {"a": 2, "b": 4, "f2": "FNSTRING"} + ], + "assign":[ + {"v": 0, "giv": "chunkData"}, + {"v": 1, "inf": "x"}, + {"v": 2, "inf": "local"}, + {"v": 3, "giv": "log"}, + {"v": 4, "giv": "hello"} + ] + }' + } +} + +inner_lambda_assign_global: { + options = { + features : "FNAMES" + }; + input: { + function chunkData() { + global = function (a) { + console.log("hello"); + }; + } + } + expect: { + '{ + "query":[ + {"a": 0, "b": 1, "f2": "FNPAR"} + ], + "assign":[ + {"v": 0, "giv": "global"}, + {"v": 1, "inf": "a"} + ] + }' + } +} + +inner_lambda_assign_sub: { + options = { + features : "FNAMES" + }; + input: { + function chunkData() { + global[42] = function (a) { + console.log("hello"); + }; + } + } + expect: { + '{ + "query":[ + {"a": 0, "b": 1, "f2": "FNPAR"} + ], + "assign":[ + {"v": 0, "giv": "global%5B%5D"}, + {"v": 1, "inf": "a"} + ] + }' + } +} + +inner_lambda_prop: { + options = { + features : "FNAMES" + }; + input: { + function chunkData(x) { + n17.substring( { + "awesome_key" : function(a) { + console.log("hello"); + } + }); + } + } + expect: { + '{ + "query":[ + {"a": 0, "b": 1, "f2": "FNPAR"}, + {"a": 2, "b": 3, "f2": "FNPAR"} + ], + "assign":[ + {"v": 0, "giv": "chunkData"}, + {"v": 1, "inf": "x"}, + {"v": 2, "giv": "awesome_key"}, + {"v": 3, "inf": "a"} + ] + }' + } +} + +inner_lambda_arg: { + options = { + features : "FNAMES" + }; + input: { + function chunkData() { + foo.substring(a, function(x) { + console.log("hello"); + }); + } + } + expect: { + '{ + "query":[ + {"a": 0, "b": 1, "f2": "FNPAR"} + ], + "assign":[ + {"v": 0, "giv": "substring(2)"}, + {"v": 1, "inf": "x"} + ] + }' + } +} + +inner_lambda_arg2: { + options = { + features : "FNAMES" + }; + input: { + function chunkData() { + substring(a, function(b) { + console.log("hello"); + }); + } + } + expect: { + '{ + "query":[ + {"a": 0, "b": 1, "f2": "FNPAR"} + ], + "assign":[ + {"v": 0, "giv": "substring(2)"}, + {"v": 1, "inf": "b"} + ] + }' + } +} + +func_return: { + options = { + features : "FNAMES" + }; + input: { + function chunkData() { + var a = "foo"; + return a; + } + } + expect: { + '{ + "query":[ + {"a": 0, "b": 1, "f2": "FNDECL"}, + {"a": 0, "b": 1, "f2": "FNRETURN"} + ], + "assign":[ + {"v": 0, "giv": "chunkData"}, + {"v": 1, "inf": "a"} + ] + }' + } +} \ No newline at end of file diff --git a/test/feature_extraction/general.js b/test/feature_extraction/general.js new file mode 100644 index 00000000..3f0733df --- /dev/null +++ b/test/feature_extraction/general.js @@ -0,0 +1,300 @@ +truncate_constants: { + options = { + features : "ASTREL" + }; + input: { + function chunkData() { + var x = "very long string, very long string, very long string, very long string, very long string, very long string, very long string, very long string, very long string, very long string, very long string, very long string"; + } + } + expect: { + '{ + "query":[ + {"a": 0, "b": 1, "f2": ":VarDef:"} + ], + "assign":[ + {"v": 0, "inf": "x"}, + {"v": 1, "giv": "very%20long%20string%2C%20very%20long%20string%2C%20very%20long%20string%2C%20very%20long%20"} + ] + }' + } +} + +escape_constants: { + options = { + features : "ASTREL" + }; + input: { + function chunkData() { + var x = '"quoted text"'; + } + } + expect: { + '{ + "query":[ + {"a": 0, "b": 1, "f2": ":VarDef:"} + ], + "assign":[ + {"v": 0, "inf": "x"}, + {"v": 1, "giv": "%22quoted%20text%22"} + ] + }' + } +} + + +var_scope: { + options = { + features : "ASTREL" + }; + input: { + function chunkData() { + var a = 1; + var b = 2; + var x = a + b; + x = a + b; + } + } + expect: { + '{ + "query":[ + {"a": 0, "b": 1, "f2": ":VarDef:"}, + {"a": 2, "b": 3, "f2": ":VarDef:"}, + {"a": 4, "b": 0, "f2": ":VarDef:Binary+[0]"}, + {"a": 4, "b": 2, "f2": ":VarDef:Binary+[1]"}, + {"a": 0, "b": 2, "f2": ":Binary+:"}, + {"a": 4, "b": 0, "f2": ":Assign=:Binary+[0]"}, + {"a": 4, "b": 2, "f2": ":Assign=:Binary+[1]"} + ], + "assign":[ + {"v": 0, "inf": "a"}, + {"v": 1, "giv": "1"}, + {"v": 2, "inf": "b"}, + {"v": 3, "giv": "2"}, + {"v": 4, "inf": "x"} + ] + }' + } +} + +this_scope: { + options = { + features : "ASTREL" + }; + input: { + function chunkData(a) { + this.x = a; + } + + function chunkData2(a) { + this.x = a; + } + } + expect: { + '{ + "query":[ + {"a": 0, "b": 1, "f2": ":Assign=:"}, + {"a": 2, "b": 1, "f2": "Dot[0]:Assign=:"}, + {"a": 0, "b": 3, "f2": ":Assign=:"}, + {"a": 2, "b": 3, "f2": "Dot[0]:Assign=:"} + ], + "assign":[ + {"v": 0, "giv": "x"}, + {"v": 1, "inf": "a"}, + {"v": 2, "giv": "this"}, + {"v": 3, "inf": "a"} + ] + }' + } +} + +this_is_given: { + options = { + features : "ASTREL" + }; + input: { + function chunkData(a) { + this.x = a; + } + } + expect: { + '{ + "query":[ + {"a": 0, "b": 1, "f2": ":Assign=:"}, + {"a": 2, "b": 1, "f2": "Dot[0]:Assign=:"} + ], + "assign":[ + {"v": 0, "giv": "x"}, + {"v": 1, "inf": "a"}, + {"v": 2, "giv": "this"} + ] + }' + } +} + + +this_attr_scope: { + options = { + features : "ASTREL" + }; + input: { + function chunkData(a,b) { + this.x = a; + this.x = b; + } + } + expect: { + '{ + "query":[ + {"a": 0, "b": 1, "f2": ":Assign=:"}, + {"a": 2, "b": 1, "f2": "Dot[0]:Assign=:"}, + {"a": 0, "b": 3, "f2": ":Assign=:"}, + {"a": 2, "b": 3, "f2": "Dot[0]:Assign=:"} + ], + "assign":[ + {"v": 0, "giv": "x"}, + {"v": 1, "inf": "a"}, + {"v": 2, "giv": "this"}, + {"v": 3, "inf": "b"} + ] + }' + } +} + +bool_const_type: { + options = { + features : "FNAMES" + }; + input: { + function chunkData() { + var x = true; + } + } + expect: { + '{ + "query":[ + {"a": 0, "b": 1, "f2": "FNDECL"} + ], + "assign":[ + {"v": 0, "giv": "chunkData"}, + {"v": 1, "inf": "x"} + ] + }' + } +} + +handles_toString_call: { + options = { + features : "FNAMES" + }; + input: { + function chunkData() { + var x = true; + x.toString(); + } + } + expect: { + '{ + "query":[ + {"a": 0, "b": 1, "f2": "FNDECL"} + ], + "assign":[ + {"v": 0, "giv": "chunkData"}, + {"v": 1, "inf": "x"} + ] + }' + } +} + +escape_backslash: { + options = { + features : "FNAMES" + }; + input: { + function chunkData(x) { + x.replace(/\s/g, "a"); + } + } + expect: { + '{ + "query":[ + {"a": 0, "b": 1, "f2": "FNPAR"} + ], + "assign":[ + {"v": 0, "giv": "chunkData"}, + {"v": 1, "inf": "x"} + ] + }' + } +} + +func_no_duplicates: { + options = { + features : "ASTREL" + }; + input: { + function chunkData() { + var a = new chunkData(); + a = new chunkData(); + } + } + expect: { + '{ + "query":[ + {"a": 0, "b": 1, "f2": ":VarDef:New[0]"}, + {"a": 0, "b": 1, "f2": ":Assign=:New[0]"} + ], + "assign":[ + {"v": 0, "inf": "a"}, + {"v": 1, "giv": "chunkData"} + ] + }' + } +} + +func_allow_different_features_duplicates: { + options = { + features : "ASTREL" + }; + input: { + function chunkData(x) { + x.foo(42, 42, 42, 42); + return 42; + } + } + expect: { + '{ + "query":[ + {"a": 0, "b": 1, "f2": ":Dot:"}, + {"a": 0, "b": 2, "f2": "Dot[0]:Call:"} + ], + "assign":[ + {"v": 0, "inf": "x"}, + {"v": 1, "giv": "foo"}, + {"v": 2, "giv": "42"} + ] + }' + } +} + +method_name_fixed: { + input: { + function chunkData(x) { + x.foo(); + bar(); + } + } + expect: { + '{ + "query":[ + {"a": 0, "b": 1, "f2": ":Dot:"}, + {"a": 2, "b": 0, "f2": "FNPAR"} + ], + "assign":[ + {"v": 0, "inf": "x"}, + {"v": 1, "giv": "foo"}, + {"v": 2, "giv": "chunkData"} + ] + }' + } +} \ No newline at end of file diff --git a/test/run-tests.js b/test/run-tests.js index 94bf6ad9..93c79ce9 100755 --- a/test/run-tests.js +++ b/test/run-tests.js @@ -23,6 +23,8 @@ run_ast_conversion_tests({ iterations: 1000 }); +run_feature_extraction_tests(); + /* -----[ utils ]----- */ function tmpl() { @@ -110,6 +112,51 @@ function run_compress_tests() { }); } + +function removeWhitespace(input){ + return input.replace(/\s/g,""); +} + +function run_feature_extraction_tests() { + var dir = test_directory("feature_extraction"); + log_directory("feature_extraction"); + var files = find_test_files(dir); + function test_file(file) { + log_start_file(file); + function test_case(test) { + log_test(test.name); + + var features = "FNAMES,ASTREL"; + if (test.options.hasOwnProperty("features")) { + features = test.options.features; + }; + + var expect = test.expect.body.value; + + var input_code = make_code(test.input); + var output = U.extractFeatures(input_code, test.name, false, features); + + if (removeWhitespace(expect) != removeWhitespace(output)) { + log("!!! failed\n---INPUT---\n{input}\n---OUTPUT---\n{output}\n---EXPECTED---\n{expected}\n\n", { + input: input_code, + output: output, + expected: expect + }); + failures++; + failed_files[file] = 1; + } + } + var tests = parse_test(path.resolve(dir, file)); + for (var i in tests) if (tests.hasOwnProperty(i)) { + test_case(tests[i]); + } + } + files.forEach(function(file){ + test_file(file); + }); +} + + function parse_test(file) { var script = fs.readFileSync(file, "utf8"); var ast = U.parse(script, { diff --git a/tools/node.js b/tools/node.js index 4bc8517b..a5eb7fff 100644 --- a/tools/node.js +++ b/tools/node.js @@ -33,7 +33,8 @@ var FILES = exports.FILES = [ "../lib/output.js", "../lib/compress.js", "../lib/sourcemap.js", - "../lib/mozilla-ast.js" + "../lib/mozilla-ast.js", + "../lib/feature_extractor.js" ].map(function(file){ return fs.realpathSync(path.join(path.dirname(__filename), file)); });