Add support for extracting features from JavaScript files

This commit is contained in:
Pavol Bielik 2015-01-22 16:21:02 +01:00
parent d36067cd35
commit 874b20eaff
8 changed files with 1290 additions and 2 deletions

111
bin/js_features.js Executable file
View File

@ -0,0 +1,111 @@
//#! /usr/bin/env nodejs
// -*- js -*-
"use strict";
var UglifyJS = require("../tools/node");
var sys = require("util");
var yargs = require("yargs");
var fs = require("fs");
var ARGS = yargs
.usage("$0 input1.js \n")
.describe("print_ast", "Prints a dot file describing the internal abstract syntax")
.describe("json_formatting", "Prints the JSON nicelly formatted")
.describe("skip_minified", "Whether to skip processing minified files")
.describe("features", "Comma separated list of features: \n" +
"ASTREL - relations in AST, \n" +
"FNAMES - function names to internal calls")
.demand(1)
.default('features', 'ASTREL,FNAMES')
.boolean("print_ast")
.boolean("skip_minified")
.boolean("json_formatting")
.string("features")
.wrap(80)
.argv
;
normalize(ARGS);
if (ARGS.h || ARGS.help) {
sys.puts(yargs.help());
process.exit(0);
}
var files = ARGS._.slice();
if (files.length > 1) {
sys.error("WARNING: expected only single input file. Processing file '" + files[0] + "' while the rest is ignored.");
};
if (ARGS.features === true) {
sys.error("ERROR: empty set of features.");
process.exit(1);
};
var features = ARGS.features.split(",");
for (var i = 0; i < features.length; i++) {
if (features[i] != "FNAMES" && features[i] != "ASTREL") {
sys.error("WARNING: ignoring not suppored feature '" + features[i] + "'.");
};
};
for (var i = 0; i < files.length; i++) {
processFile(files[i], ARGS.print_ast, ARGS.features, ARGS.json_formatting, ARGS.skip_minified);
};
function stripInterpreter(code){
if (code.slice(0,2) != "#!"){
return code;
}
return code.slice(code.indexOf('\n') + 1);
}
function processFile(file, print_ast, features, json_formatting, skip_minified) {
var code;
try {
code = fs.readFileSync(file, "utf-8");
}
catch (ex) {
sys.error("ERROR: can't read file: " + file);
return;
}
//if it is a script, the UglifyJS parser will fail to parse it
code = stripInterpreter(code);
var output = UglifyJS.extractFeatures(code, file, print_ast, features, skip_minified);
if (output == null) {
return;
}
if (!json_formatting) {
output = removeWhitespace(output);
}
//validate JSON
try {
JSON.parse(output);
} catch (e) {
throw e;
}
if (removeWhitespace(output) != '{"query":[],"assign":[]}') {
console.log(output);
}
}
/* ------------------------ */
function normalize(o) {
for (var i in o) if (o.hasOwnProperty(i) && /-/.test(i)) {
o[i.replace(/-/g, "_")] = o[i];
delete o[i];
}
}
function removeWhitespace(input){
return input.replace(/\s/g,"");
}

61
extract_features.py Executable file
View File

@ -0,0 +1,61 @@
#!/usr/bin/python
import multiprocessing
import os
import sys
import shutil
def PrintUsage():
print """
Usage:
extract_features.py --filelist <file>
OR
extract_features.py --dir <directory>
"""
exit(1)
def GetJSFilesInDir(d):
for root, _, files in os.walk(d):
for f in files:
fname = os.path.join(root, f)
if fname.endswith('.js'):
yield fname
TMP_DIR = ""
def ExtractFeaturesForFile(f):
global TMP_DIR
os.system("nodejs bin/js_features.js --skip_minified '%s' >> %s/%d" % (f, TMP_DIR, os.getpid()))
def ExtractFeaturesForFileList(files):
global TMP_DIR
TMP_DIR = "/tmp/feature_extractor%d" % (os.getpid())
if os.path.exists(TMP_DIR):
shutil.rmtree(TMP_DIR)
os.makedirs(TMP_DIR)
try:
p = multiprocessing.Pool(multiprocessing.cpu_count())
p.map(ExtractFeaturesForFile, files)
output_files = os.listdir(TMP_DIR)
for f in output_files:
os.system("cat %s/%s" % (TMP_DIR, f))
finally:
shutil.rmtree(TMP_DIR)
if __name__ == '__main__':
if (len(sys.argv) <= 1):
PrintUsage()
# Process command line arguments
if (sys.argv[1] == "--filelist"):
files = open(sys.argv[2], 'r').read().split('\n')
elif (sys.argv[1] == "--dir"):
files = [f for f in GetJSFilesInDir(sys.argv[2])]
else:
PrintUsage()
# Remove files that say they are minified.
files = [f for f in files if not f.endswith('.min.js')]
ExtractFeaturesForFileList(files)

514
lib/feature_extractor.js Normal file
View File

@ -0,0 +1,514 @@
"use strict";
var INFER = '$';
var GIVEN = '#';
var EXPECTED_MAX_NODES_PER_NONOBFUSACATED_LINE = 25;
var MAX_RATIO_SHORT_NAMES = 0.45;
var NUM_NUMBERED_LOCALS = 5;
function isMinified(toplevel, code, file){
var numLines = code.split(/\r\n|\r|\n/).length;
var numStatements = 0;
var numNames = 0;
var numShortNames = 0;
var numNumberedNames = 0;
toplevel.walk(new TreeWalker(function(node, descend){
numStatements++;
if (node instanceof AST_Symbol && !(node instanceof AST_This)) {
numNames++;
if (node.name.length <= 2 && node.name != "el" && node.name != "$") {
numShortNames++;
}
if (node.name.length >= 2 && node.name[0] == '_') {
var c2 = node.name[1];
if (c2 >= '0' && c2 <= '9') ++numNumberedNames;
}
}
}));
return (EXPECTED_MAX_NODES_PER_NONOBFUSACATED_LINE * numLines <= numStatements) ||
(numShortNames > numNames * MAX_RATIO_SHORT_NAMES) ||
numNumberedNames == numNames ||
numNumberedNames >= NUM_NUMBERED_LOCALS;
}
function replaceMangled(code, file) {
var toplevel;
try {
toplevel = parseFile(code, file);
} catch (e) {
console.warn("Cannot parse file: '%s'", file);
return null;
}
extendAst(toplevel);
var feature_outputter = new FeatureJsonOutputter();
generateAstFeatures(toplevel, feature_outputter);
generateFnamesFeatures(toplevel, feature_outputter);
//feature_outputter.string_map defines what id is assigment to each node in the final output
//therefore to assign same ids, we need to first populate by running feature extraction
var stream = OutputStream({beautify : true, replace_mangled_map : feature_outputter.string_map});
toplevel.print(stream);
return stream.toString();
}
function extractFeatures(code, file, print_ast, features, skip_minified) {
var toplevel;
try {
toplevel = parseFile(code, file);
} catch (e) {
console.warn("Cannot parse file: '%s'", file);
return null;
}
extendAst(toplevel);
if (print_ast) {
return printAst(toplevel);
}
if (skip_minified && isMinified(toplevel, code, file)){
console.warn("Skipping minified file: '%s'", file);
return null;
}
var feature_outputter = new FeatureJsonOutputter();
feature_outputter.openElem();
feature_outputter.openArray("query");
if (features.indexOf("ASTREL") != -1) {
generateAstFeatures(toplevel, feature_outputter);
}
if (features.indexOf("FNAMES") != -1) {
generateFnamesFeatures(toplevel, feature_outputter);
}
feature_outputter.closeArray();
feature_outputter.dumpSymbols();
feature_outputter.closeElem();
return feature_outputter.output;
}
/* -----[ functions ]----- */
function nodeToString(node) {
if (node == null) return null;
if (node instanceof AST_Symbol){
if (node instanceof AST_This ){
return GIVEN + node.name;
}
// AST_Symbol::unmangleable() returns true if this symbol cannot be renamed (it's either global, undeclared, or defined in scope where eval or with are in use.
if (node.unmangleable()){
return GIVEN + node.name;
}
return INFER + node.definition().id + "-" + node.name;
} else if (node instanceof AST_Constant){
return GIVEN + String(node.value).slice(0,64);
} else if (node instanceof AST_Sub){
//x[1], x -> expression, 1 -> property
return nodeToString(node.expression) + "[]";
} else if (node instanceof AST_PropAccess){
return GIVEN + node.property;
} else if (node instanceof AST_Defun) {
//function foo(...) { ... }
return nodeToString(node.name);
} else if (node instanceof AST_VarDef){
// var x = function () { ... }
return nodeToString(node.name);
} else if (node instanceof AST_Assign){
//x = function () { ... }
return nodeToString(node.left);
} else if (node instanceof AST_ObjectProperty){
// { "x" : function () { ... } }
return GIVEN + node.key;
} else if (node instanceof AST_Call){
//x.foo( function () { ... } )
//foo( function () { ... } )
return nodeToString(node.expression);
} else if (node instanceof AST_Lambda) {
if (node.parent instanceof AST_Call){
//'node.parent.expression != node' as lambda can call itself
return (node.parent.expression == node) ? null : nodeToString(node.parent.expression) + "(" + node.child_id + ")";
}
return nodeToString(node.parent);
}
return null;
}
function nodeType(node) {
if (node instanceof AST_Binary || node instanceof AST_Unary) {
return node.__proto__.TYPE + node.operator;
} else if (node instanceof AST_Boolean) {
return "Bool";
} else if (node instanceof AST_Atom && !(node instanceof AST_Constant)) {
//atoms are special constant values as Nan, Undefined, Infinity,..
return "Atom";
}
return node.__proto__.TYPE;
}
function pathToStringFw(path, start){
var res = "";
for (var i = start; i < path.length - 1; i++) {
res += nodeType(path[i]);
res += "[" + path[i+1].child_id + "]";
}
return res;
}
function pathToStringBw(path, start){
var res = "";
for (var i = path.length - 2; i >= start; i--) {
res += nodeType(path[i]);
res += "[" + path[i+1].child_id + "]";
}
return res;
}
function printAst(toplevel){
var output = "";
var walker = new TreeWalker(function(node){
output += string_template(" node{id} [label=\"{label}\"];\n", {
id: node.id,
label: nodeType(node)
});
if (walker.parent() != null) {
output += string_template(" node{id1} -> node{id2} [weight=1];\n", {
id1: walker.parent().id,
id2: node.id
});
}
});
output += "digraph AST {\n";
toplevel.walk(walker);
output += "}\n";
return output;
}
function generateAstFeatures(toplevel, feature_outputter) {
var walker = new TreeWalker(function(node){
// console.log(nodeType(node) + " - " + nodeToString(node));
var paths = this.node_finder.find(node);
for (var i = 0; i < paths.length; i++) {
var path1 = paths[i];
var node1 = path1[path1.length - 1];
for (var j = i + 1; j < paths.length; j++) {
var common_prefix_len = 0;
var path2 = paths[j];
var node2 = path2[path2.length - 1];
//determine common prefix to be skipped
while(common_prefix_len < path1.length && common_prefix_len < path2.length
&& path1[common_prefix_len] === path2[common_prefix_len]){
common_prefix_len++;
}
if (common_prefix_len == 0) {
throw "common prefix not greater than 0!";
}
feature_outputter.addFeature(
nodeToString(node1),
nodeToString(node2),
pathToStringBw(path1, common_prefix_len) + ":" + nodeType(path1[common_prefix_len - 1]) + ":" + pathToStringFw(path2, common_prefix_len)
);
}
}
});
walker.node_finder = new NodePathFinder(3, function(node) {
return (node instanceof AST_Symbol || node instanceof AST_Constant || node instanceof AST_PropAccess);
});
toplevel.walk(walker);
}
function addFeatures(lhss, lhs_label, rhs, rhs_label, feature_outputter){
var prefix = "";
for (var i = lhss.length - 1; i >= 0; i--) {
prefix += lhs_label;
feature_outputter.addFeature(lhss[i], rhs, prefix + rhs_label);
}
}
function generateFnamesFeatures(toplevel, feature_outputter){
var outer_funcs = [];
toplevel.walk(new TreeWalker(function(node, descend){
if ((node instanceof AST_Defun || node instanceof AST_Lambda) && nodeToString(node) != null) {
var name = nodeToString(node);
for (var i = 0; i < node.argnames.length; i++) {
addFeatures([name], "FN", nodeToString(node.argnames[i]), "PAR", feature_outputter);
}
outer_funcs.push(name);
descend(); //traverse childs
outer_funcs.pop();
return true; //do not traverse childs again
}
if (node instanceof AST_New) {
addFeatures(outer_funcs, "FN", nodeToString(node), "NEW", feature_outputter);
} else if (node instanceof AST_Call) {
addFeatures(outer_funcs, "FN", nodeToString(node), "CALL", feature_outputter);
} else if (node instanceof AST_Constant){
addFeatures(outer_funcs, "FN", nodeToString(node), nodeType(node).toUpperCase(), feature_outputter);
} else if (node instanceof AST_VarDef){
addFeatures(outer_funcs, "FN", nodeToString(node.name), "DECL", feature_outputter);
} else if (node instanceof AST_Dot && !(node.parent instanceof AST_Call)) {
addFeatures(outer_funcs, "FN", nodeToString(node), "PROP", feature_outputter);
} else if (node instanceof AST_Return && nodeToString(node.value) != null) {
addFeatures(outer_funcs, "FN", nodeToString(node.value), "RETURN", feature_outputter);
}
}));
}
/* -----[ NodePathFinder ]----- */
function NodePathFinder(max_depth, filter) {
this.max_depth = max_depth;
this.paths = [];
this.filter = filter;
}
NodePathFinder.prototype = new TreeWalker(function(node, descend){
if (this.stack.length > this.max_depth || node instanceof AST_Defun){
return true;
}
//enforce in-order traversal
//otherwise we get for "x.foo()" feature foo - x instead of x - foo as x is a parent of foo in the AST
descend();
if (this.filter(node)) {
this.paths.push(this.stack.slice(0));
}
return true;
});
NodePathFinder.prototype.find = function(node) {
this.root = node;
this.paths = [];
node.walk(this);
return this.paths;
};
/* ---[ JsonOutputter ]--- */
function FeatureJsonOutputter() {
this.string_map = new StringMap(false);
this.first_element = true;
this.output = "";
this.depth = 0;
this.pairs = {};
}
FeatureJsonOutputter.prototype.indent = function() {
var res = "";
for (var i = 0; i < this.depth; i++) {
res += " ";
}
return res;
};
FeatureJsonOutputter.prototype.openElem = function() {
if (!this.first_element) {
this.output += ",";
}
this.output += "\n" + this.indent() + "{";
this.first_element = true;
this.depth++;
};
FeatureJsonOutputter.prototype.closeElem = function() {
this.depth--;
this.output += "}";
this.first_element = false;
};
FeatureJsonOutputter.prototype.openArray = function(name){
if (!this.first_element) {
this.output += ",";
}
this.output += "\n" + this.indent() + "\"" + name + "\":[";
this.first_element = true;
this.depth++;
};
FeatureJsonOutputter.prototype.closeArray = function(){
this.depth--;
this.output += "\n" + this.indent() + "]";
this.first_element = false;
};
FeatureJsonOutputter.prototype.visitFeature = function(a_id, b_id, name){
if (! (a_id in this.pairs) ) {
this.pairs[a_id] = [];
}
var visited = this.pairs[a_id];
if (visited.indexOf(b_id + "-" + name) >= 0) {
return true;
}
visited.push(b_id + "-" + name);
return false;
};
FeatureJsonOutputter.prototype.addFeature = function(a, b, name){
if (a == null || b == null){
return;
}
//do not add features between two fixed nodes
if (a[0] == GIVEN && b[0] == GIVEN) {
return;
}
var a_id = this.string_map.getId(a);
var b_id = this.string_map.getId(b);
if (a_id == b_id || this.visitFeature(a_id, b_id, name)){
return;
}
this.openElem();
this.output += '"a": ' + a_id + ",";
this.output += '\t"b": ' + b_id + ",";
this.output += '\t"f2": "' + name + '"';
this.closeElem();
};
FeatureJsonOutputter.prototype.addSymbol = function(key){
this.openElem();
this.output += '"v": ' + this.string_map.getId(key) + ",";
if (key[0] == INFER){
//${id}-{name}
this.output += '\t"inf": "' + escapeString(key.split("-")[1]) + '"';
} else {
//#{name}
this.output += '\t"giv": "' + escapeString(key.slice(1)) + '"';
}
this.closeElem();
};
FeatureJsonOutputter.prototype.dumpSymbols = function(){
this.openArray("assign");
// var keys = Object.keys( this.string_map.map );
var keys = this.string_map.keys;
for( var i = 0,length = keys.length; i < length; i++ ) {
this.addSymbol(keys[i]);
}
this.closeArray();
};
/* -----[ StringMap ]----- */
function StringMap(nice_names) {
this.map = {};
this.current_id = 0;
this.nice_names = nice_names;
this.keys = [];
}
StringMap.prototype.getId = function(input){
if (input == null){
throw new Error("error null");
}
if (this.nice_names) return input;
//we add a special character in from to allow for keys such as "toString"
var escaped_input = "#" + input;
if (!(escaped_input in this.map)) {
this.map[escaped_input] = this.current_id;
//keep ordered map of keys for iterating later
this.keys.push(input);
this.current_id++;
}
return this.map[escaped_input];
};
/* ------------------------ */
function escapeString(input){
return encodeURIComponent(input);
}
function parseFile(code, file) {
var toplevel = parse(code, {
filename : file
});
toplevel.figure_out_scope();
return toplevel;
}
function FakeSymbolDef(name, id) {
this.name = name;
this.id = id;
};
function extendAst(root){
var current_id = 0;
var walker = new TreeWalker(function(node){
if (!node.hasOwnProperty("id")){
node.id = current_id;
current_id += 1;
}
if (!node.hasOwnProperty("parent")){
node.parent = walker.parent();
}
node.num_childs = 0;
node.child_id = 0;
if (walker.parent() !== undefined){
node.child_id = walker.parent().num_childs;
walker.parent().num_childs++;
}
if (node instanceof AST_Symbol) {
// if (node.definition() == null && node instanceof AST_This){
// var scope = node;
// while (!(scope instanceof AST_Lambda) && scope.parent != null){
// scope = scope.parent;
// }
// var name = nodeToString(scope);
// node.thedef = new FakeSymbolDef(name + "_this", scope.id);
// } else {
if (node.definition() != null) {
node.definition().id = current_id;
current_id++;
}
}
});
root.walk(walker);
}

View File

@ -63,6 +63,7 @@ function OutputStream(options) {
preserve_line : false,
screw_ie8 : false,
preamble : null,
replace_mangled_map : null
}, true);
var indentation = 0;
@ -308,6 +309,7 @@ function OutputStream(options) {
var stack = [];
return {
replace_mangled_map : options.replace_mangled_map,
get : get,
toString : get,
indent : indent,
@ -1097,7 +1099,13 @@ function OutputStream(options) {
});
DEFPRINT(AST_Symbol, function(self, output){
var def = self.definition();
output.print_name(def ? def.mangled_name || def.name : self.name);
// output.print_name(def ? def.mangled_name || def.name : self.name);
// replace all the variable names to be renamed with a placeholder
if (output.replace_mangled_map == null || self instanceof AST_This || self.unmangleable())
output.print_name(def ? def.mangled_name || def.name : self.name);
else {
output.print_name(def ? "local$$" + output.replace_mangled_map.getId("$" + def.id + "-" + self.name) : self.name);
}
});
DEFPRINT(AST_Undefined, function(self, output){
output.print("void 0");

View File

@ -0,0 +1,246 @@
func_decl_params: {
options = {
features : "FNAMES"
};
input: {
function chunkData(e, t) { }
}
expect: {
'{
"query":[
{"a": 0, "b": 1, "f2": "FNPAR"},
{"a": 0, "b": 2, "f2": "FNPAR"}
],
"assign":[
{"v": 0, "giv": "chunkData"},
{"v": 1, "inf": "e"},
{"v": 2, "inf": "t"}
]
}'
}
}
func_simple_call: {
options = {
features : "FNAMES"
};
input: {
function chunkData() {
foo();
}
}
expect: {
'{
"query":[
],
"assign":[
]
}
'
}
}
func_args: {
options = {
features : "FNAMES"
};
input: {
function chunkData(x) {
var n1 = "hello";
x.foo(b, n1, 42, n1, 42);
}
}
expect: {
'{
"query":[
{"a": 0, "b": 1, "f2": "FNPAR"},
{"a": 0, "b": 2, "f2": "FNDECL"}
],
"assign":[
{"v": 0, "giv": "chunkData"},
{"v": 1, "inf": "x"},
{"v": 2, "inf": "n1"}
]
}
'
}
}
inner_lambda_assign_local: {
options = {
features : "FNAMES"
};
input: {
function chunkData(x) {
var local = function () {
console.log("hello");
};
}
}
expect: {
'{
"query":[
{"a": 0, "b": 1, "f2": "FNPAR"},
{"a": 0, "b": 2, "f2": "FNDECL"},
{"a": 2, "b": 3, "f2": "FNCALL"},
{"a": 2, "b": 4, "f2": "FNSTRING"}
],
"assign":[
{"v": 0, "giv": "chunkData"},
{"v": 1, "inf": "x"},
{"v": 2, "inf": "local"},
{"v": 3, "giv": "log"},
{"v": 4, "giv": "hello"}
]
}'
}
}
inner_lambda_assign_global: {
options = {
features : "FNAMES"
};
input: {
function chunkData() {
global = function (a) {
console.log("hello");
};
}
}
expect: {
'{
"query":[
{"a": 0, "b": 1, "f2": "FNPAR"}
],
"assign":[
{"v": 0, "giv": "global"},
{"v": 1, "inf": "a"}
]
}'
}
}
inner_lambda_assign_sub: {
options = {
features : "FNAMES"
};
input: {
function chunkData() {
global[42] = function (a) {
console.log("hello");
};
}
}
expect: {
'{
"query":[
{"a": 0, "b": 1, "f2": "FNPAR"}
],
"assign":[
{"v": 0, "giv": "global%5B%5D"},
{"v": 1, "inf": "a"}
]
}'
}
}
inner_lambda_prop: {
options = {
features : "FNAMES"
};
input: {
function chunkData(x) {
n17.substring( {
"awesome_key" : function(a) {
console.log("hello");
}
});
}
}
expect: {
'{
"query":[
{"a": 0, "b": 1, "f2": "FNPAR"},
{"a": 2, "b": 3, "f2": "FNPAR"}
],
"assign":[
{"v": 0, "giv": "chunkData"},
{"v": 1, "inf": "x"},
{"v": 2, "giv": "awesome_key"},
{"v": 3, "inf": "a"}
]
}'
}
}
inner_lambda_arg: {
options = {
features : "FNAMES"
};
input: {
function chunkData() {
foo.substring(a, function(x) {
console.log("hello");
});
}
}
expect: {
'{
"query":[
{"a": 0, "b": 1, "f2": "FNPAR"}
],
"assign":[
{"v": 0, "giv": "substring(2)"},
{"v": 1, "inf": "x"}
]
}'
}
}
inner_lambda_arg2: {
options = {
features : "FNAMES"
};
input: {
function chunkData() {
substring(a, function(b) {
console.log("hello");
});
}
}
expect: {
'{
"query":[
{"a": 0, "b": 1, "f2": "FNPAR"}
],
"assign":[
{"v": 0, "giv": "substring(2)"},
{"v": 1, "inf": "b"}
]
}'
}
}
func_return: {
options = {
features : "FNAMES"
};
input: {
function chunkData() {
var a = "foo";
return a;
}
}
expect: {
'{
"query":[
{"a": 0, "b": 1, "f2": "FNDECL"},
{"a": 0, "b": 1, "f2": "FNRETURN"}
],
"assign":[
{"v": 0, "giv": "chunkData"},
{"v": 1, "inf": "a"}
]
}'
}
}

View File

@ -0,0 +1,300 @@
truncate_constants: {
options = {
features : "ASTREL"
};
input: {
function chunkData() {
var x = "very long string, very long string, very long string, very long string, very long string, very long string, very long string, very long string, very long string, very long string, very long string, very long string";
}
}
expect: {
'{
"query":[
{"a": 0, "b": 1, "f2": ":VarDef:"}
],
"assign":[
{"v": 0, "inf": "x"},
{"v": 1, "giv": "very%20long%20string%2C%20very%20long%20string%2C%20very%20long%20string%2C%20very%20long%20"}
]
}'
}
}
escape_constants: {
options = {
features : "ASTREL"
};
input: {
function chunkData() {
var x = '"quoted text"';
}
}
expect: {
'{
"query":[
{"a": 0, "b": 1, "f2": ":VarDef:"}
],
"assign":[
{"v": 0, "inf": "x"},
{"v": 1, "giv": "%22quoted%20text%22"}
]
}'
}
}
var_scope: {
options = {
features : "ASTREL"
};
input: {
function chunkData() {
var a = 1;
var b = 2;
var x = a + b;
x = a + b;
}
}
expect: {
'{
"query":[
{"a": 0, "b": 1, "f2": ":VarDef:"},
{"a": 2, "b": 3, "f2": ":VarDef:"},
{"a": 4, "b": 0, "f2": ":VarDef:Binary+[0]"},
{"a": 4, "b": 2, "f2": ":VarDef:Binary+[1]"},
{"a": 0, "b": 2, "f2": ":Binary+:"},
{"a": 4, "b": 0, "f2": ":Assign=:Binary+[0]"},
{"a": 4, "b": 2, "f2": ":Assign=:Binary+[1]"}
],
"assign":[
{"v": 0, "inf": "a"},
{"v": 1, "giv": "1"},
{"v": 2, "inf": "b"},
{"v": 3, "giv": "2"},
{"v": 4, "inf": "x"}
]
}'
}
}
this_scope: {
options = {
features : "ASTREL"
};
input: {
function chunkData(a) {
this.x = a;
}
function chunkData2(a) {
this.x = a;
}
}
expect: {
'{
"query":[
{"a": 0, "b": 1, "f2": ":Assign=:"},
{"a": 2, "b": 1, "f2": "Dot[0]:Assign=:"},
{"a": 0, "b": 3, "f2": ":Assign=:"},
{"a": 2, "b": 3, "f2": "Dot[0]:Assign=:"}
],
"assign":[
{"v": 0, "giv": "x"},
{"v": 1, "inf": "a"},
{"v": 2, "giv": "this"},
{"v": 3, "inf": "a"}
]
}'
}
}
this_is_given: {
options = {
features : "ASTREL"
};
input: {
function chunkData(a) {
this.x = a;
}
}
expect: {
'{
"query":[
{"a": 0, "b": 1, "f2": ":Assign=:"},
{"a": 2, "b": 1, "f2": "Dot[0]:Assign=:"}
],
"assign":[
{"v": 0, "giv": "x"},
{"v": 1, "inf": "a"},
{"v": 2, "giv": "this"}
]
}'
}
}
this_attr_scope: {
options = {
features : "ASTREL"
};
input: {
function chunkData(a,b) {
this.x = a;
this.x = b;
}
}
expect: {
'{
"query":[
{"a": 0, "b": 1, "f2": ":Assign=:"},
{"a": 2, "b": 1, "f2": "Dot[0]:Assign=:"},
{"a": 0, "b": 3, "f2": ":Assign=:"},
{"a": 2, "b": 3, "f2": "Dot[0]:Assign=:"}
],
"assign":[
{"v": 0, "giv": "x"},
{"v": 1, "inf": "a"},
{"v": 2, "giv": "this"},
{"v": 3, "inf": "b"}
]
}'
}
}
bool_const_type: {
options = {
features : "FNAMES"
};
input: {
function chunkData() {
var x = true;
}
}
expect: {
'{
"query":[
{"a": 0, "b": 1, "f2": "FNDECL"}
],
"assign":[
{"v": 0, "giv": "chunkData"},
{"v": 1, "inf": "x"}
]
}'
}
}
handles_toString_call: {
options = {
features : "FNAMES"
};
input: {
function chunkData() {
var x = true;
x.toString();
}
}
expect: {
'{
"query":[
{"a": 0, "b": 1, "f2": "FNDECL"}
],
"assign":[
{"v": 0, "giv": "chunkData"},
{"v": 1, "inf": "x"}
]
}'
}
}
escape_backslash: {
options = {
features : "FNAMES"
};
input: {
function chunkData(x) {
x.replace(/\s/g, "a");
}
}
expect: {
'{
"query":[
{"a": 0, "b": 1, "f2": "FNPAR"}
],
"assign":[
{"v": 0, "giv": "chunkData"},
{"v": 1, "inf": "x"}
]
}'
}
}
func_no_duplicates: {
options = {
features : "ASTREL"
};
input: {
function chunkData() {
var a = new chunkData();
a = new chunkData();
}
}
expect: {
'{
"query":[
{"a": 0, "b": 1, "f2": ":VarDef:New[0]"},
{"a": 0, "b": 1, "f2": ":Assign=:New[0]"}
],
"assign":[
{"v": 0, "inf": "a"},
{"v": 1, "giv": "chunkData"}
]
}'
}
}
func_allow_different_features_duplicates: {
options = {
features : "ASTREL"
};
input: {
function chunkData(x) {
x.foo(42, 42, 42, 42);
return 42;
}
}
expect: {
'{
"query":[
{"a": 0, "b": 1, "f2": ":Dot:"},
{"a": 0, "b": 2, "f2": "Dot[0]:Call:"}
],
"assign":[
{"v": 0, "inf": "x"},
{"v": 1, "giv": "foo"},
{"v": 2, "giv": "42"}
]
}'
}
}
method_name_fixed: {
input: {
function chunkData(x) {
x.foo();
bar();
}
}
expect: {
'{
"query":[
{"a": 0, "b": 1, "f2": ":Dot:"},
{"a": 2, "b": 0, "f2": "FNPAR"}
],
"assign":[
{"v": 0, "inf": "x"},
{"v": 1, "giv": "foo"},
{"v": 2, "giv": "chunkData"}
]
}'
}
}

View File

@ -23,6 +23,8 @@ run_ast_conversion_tests({
iterations: 1000
});
run_feature_extraction_tests();
/* -----[ utils ]----- */
function tmpl() {
@ -110,6 +112,51 @@ function run_compress_tests() {
});
}
function removeWhitespace(input){
return input.replace(/\s/g,"");
}
function run_feature_extraction_tests() {
var dir = test_directory("feature_extraction");
log_directory("feature_extraction");
var files = find_test_files(dir);
function test_file(file) {
log_start_file(file);
function test_case(test) {
log_test(test.name);
var features = "FNAMES,ASTREL";
if (test.options.hasOwnProperty("features")) {
features = test.options.features;
};
var expect = test.expect.body.value;
var input_code = make_code(test.input);
var output = U.extractFeatures(input_code, test.name, false, features);
if (removeWhitespace(expect) != removeWhitespace(output)) {
log("!!! failed\n---INPUT---\n{input}\n---OUTPUT---\n{output}\n---EXPECTED---\n{expected}\n\n", {
input: input_code,
output: output,
expected: expect
});
failures++;
failed_files[file] = 1;
}
}
var tests = parse_test(path.resolve(dir, file));
for (var i in tests) if (tests.hasOwnProperty(i)) {
test_case(tests[i]);
}
}
files.forEach(function(file){
test_file(file);
});
}
function parse_test(file) {
var script = fs.readFileSync(file, "utf8");
var ast = U.parse(script, {

View File

@ -33,7 +33,8 @@ var FILES = exports.FILES = [
"../lib/output.js",
"../lib/compress.js",
"../lib/sourcemap.js",
"../lib/mozilla-ast.js"
"../lib/mozilla-ast.js",
"../lib/feature_extractor.js"
].map(function(file){
return fs.realpathSync(path.join(path.dirname(__filename), file));
});