diff --git a/lib/feature_extractor.js b/lib/feature_extractor.js index 157556b7..0e927bc1 100644 --- a/lib/feature_extractor.js +++ b/lib/feature_extractor.js @@ -135,41 +135,65 @@ function extractFeatures(code, file, print_ast, features, skip_minified) { /* -----[ functions ]----- */ -function nodeToString(node, parent) { +function Property(must_infer, name, annotation) { + this.must_infer = must_infer; + this.name = name; + this.annotation = annotation; +} + +Property.prototype.toString = function () { + return (this.must_infer ? INFER : GIVEN) + this.name; +} + +function nodeToProperty(node, parent) { if (node == null) return null; if (node instanceof AST_Symbol){ if (node instanceof AST_This ){ - return GIVEN + node.name; + //return GIVEN + node.name; + return new Property(false, node.name, ""); } // AST_Symbol::unmangleable() returns true if this symbol cannot be renamed (it's either global, undeclared, or defined in scope where eval or with are in use. if (node.unmangleable()){ - return GIVEN + node.name; + //return GIVEN + node.name; + return new Property(false, node.name, ""); } - return INFER + node.definition().id + "-" + node.name; + //return INFER + node.definition().id + "-" + node.name; + return new Property(true, node.definition().id + "-" + node.name, ""); } else if (node instanceof AST_Constant){ - return GIVEN + "!" + nodeType(node) + "!" + String(node.value).slice(0,64); + //var name = GIVEN + String(node.value).slice(0,64); + //name.annotation = "!" + nodeType(node) + "!"; + //return name; + return new Property(false, String(node.value).slice(0,64), nodeType(node)); } else if (node instanceof AST_Sub){ //x[1], x -> expression, 1 -> property - return (nodeToString(node.expression, node) != null) ? nodeToString(node.expression, node) + "[]" : null; + + if (nodeToProperty(node.expression, node) == null) { + return null; + } + var prop = nodeToProperty(node.expression, node); + prop.annotation += "[]"; + return prop; } else if (node instanceof AST_PropAccess){ - return GIVEN + node.property; + //return GIVEN + node.property; + return new Property(false, node.property, ""); } else if (node instanceof AST_Defun) { //function foo(...) { ... } - return nodeToString(node.name, node); + return nodeToProperty(node.name, node); } else if (node instanceof AST_VarDef){ // var x = function () { ... } - return nodeToString(node.name, node); + return nodeToProperty(node.name, node); } else if (node instanceof AST_Assign){ //x = function () { ... } - return nodeToString(node.left, node); + return nodeToProperty(node.left, node); } else if (node instanceof AST_ObjectProperty){ // { "x" : function () { ... } } - return GIVEN + node.key; + //return GIVEN + node.key; + return new Property(false, node.key, ""); } else if (node instanceof AST_Call){ //x.foo( function () { ... } ) //foo( function () { ... } ) - return nodeToString(node.expression, node); + return nodeToProperty(node.expression, node); } else if (node instanceof AST_Lambda) { if (node.parent instanceof AST_Call){ //'node.parent.expression != node' as lambda can call itself @@ -177,10 +201,19 @@ function nodeToString(node, parent) { return null; } - return (nodeToString(node.parent.expression, node) != null) ? nodeToString(node.parent.expression, node) + "(" + node.child_id + ")" : null; + if (nodeToProperty(node.parent.expression, node) == null) { + return null; + } + + //var name = nodeToProperty(node.parent.expression, node); + //name.annotation = "(" + node.child_id + ")"; + //return name; + var prop = nodeToProperty(node.parent.expression, node); + prop.annotation += "(" + node.child_id + ")"; + return prop; } if (node.parent != parent) { - return nodeToString(node.parent, node); + return nodeToProperty(node.parent, node); } } @@ -245,7 +278,7 @@ function printAst(toplevel){ function generateAstFeatures(toplevel, feature_outputter) { var walker = new TreeWalker(function(node){ - // console.log(nodeType(node) + " - " + nodeToString(node)); + // console.log(nodeType(node) + " - " + nodeToProperty(node)); var paths = this.node_finder.find(node); for (var i = 0; i < paths.length; i++) { var path1 = paths[i]; @@ -267,8 +300,8 @@ function generateAstFeatures(toplevel, feature_outputter) { } feature_outputter.addFeature( - nodeToString(node1), - nodeToString(node2), + nodeToProperty(node1), + nodeToProperty(node2), //pathToStringBw(path1, common_prefix_len) + ":" + nodeType(path1[common_prefix_len - 1]) + ":" + pathToStringFw(path2, common_prefix_len) (path2.length != common_prefix_len) ? pathToStringBw(path1, common_prefix_len) + ":" + pathToStringFw(path2, common_prefix_len - 1) @@ -296,20 +329,20 @@ function addFeatures(lhss, lhs_label, rhs, rhs_label, feature_outputter){ function addScopeConstraints(node, toplevel, feature_outputter){ feature_outputter.beginScope(); - var name = nodeToString(node); + var name = nodeToProperty(node); if (name != null) feature_outputter.addToScope(name); for (var i = 0; i < node.enclosed.length; i++){ - feature_outputter.addToScope(nodeToString(node.enclosed[i].orig[0])); + feature_outputter.addToScope(nodeToProperty(node.enclosed[i].orig[0])); } node.variables.each(function(symbol){ - feature_outputter.addToScope(nodeToString(symbol.orig[0])); + feature_outputter.addToScope(nodeToProperty(symbol.orig[0])); }); toplevel.globals.each(function(symbol){ - feature_outputter.addToScope(nodeToString(symbol.orig[0])); + feature_outputter.addToScope(nodeToProperty(symbol.orig[0])); }); feature_outputter.endScope(); @@ -330,11 +363,11 @@ function generateFnamesFeatures(toplevel, feature_outputter){ toplevel.walk(new TreeWalker(function(node, descend){ - if ((node instanceof AST_Defun || node instanceof AST_Lambda) && nodeToString(node) != null) { - var name = nodeToString(node); + if ((node instanceof AST_Defun || node instanceof AST_Lambda) && nodeToProperty(node) != null) { + var name = nodeToProperty(node); for (var i = 0; i < node.argnames.length; i++) { - addFeatures([name], "FN", nodeToString(node.argnames[i]), "PAR", feature_outputter); + addFeatures([name], "FN", nodeToProperty(node.argnames[i]), "PAR", feature_outputter); } outer_funcs.push(name); @@ -345,17 +378,17 @@ function generateFnamesFeatures(toplevel, feature_outputter){ } if (node instanceof AST_New) { - addFeatures(outer_funcs, "FN", nodeToString(node), "NEW", feature_outputter); + addFeatures(outer_funcs, "FN", nodeToProperty(node), "NEW", feature_outputter); } else if (node instanceof AST_Call) { - addFeatures(outer_funcs, "FN", nodeToString(node), "CALL", feature_outputter); + addFeatures(outer_funcs, "FN", nodeToProperty(node), "CALL", feature_outputter); } else if (node instanceof AST_Constant){ - addFeatures(outer_funcs, "FN", nodeToString(node), nodeType(node).toUpperCase(), feature_outputter); + addFeatures(outer_funcs, "FN", nodeToProperty(node), nodeType(node).toUpperCase(), feature_outputter); } else if (node instanceof AST_VarDef){ - addFeatures(outer_funcs, "FN", nodeToString(node.name), "DECL", feature_outputter); + addFeatures(outer_funcs, "FN", nodeToProperty(node.name), "DECL", feature_outputter); } else if (node instanceof AST_Dot && !(node.parent instanceof AST_Call)) { - addFeatures(outer_funcs, "FN", nodeToString(node), "PROP", feature_outputter); - } else if (node instanceof AST_Return && nodeToString(node.value) != null) { - addFeatures(outer_funcs, "FN", nodeToString(node.value), "RETURN", feature_outputter); + addFeatures(outer_funcs, "FN", nodeToProperty(node), "PROP", feature_outputter); + } else if (node instanceof AST_Return && nodeToProperty(node.value) != null) { + addFeatures(outer_funcs, "FN", nodeToProperty(node.value), "RETURN", feature_outputter); } })); } @@ -461,12 +494,20 @@ FeatureJsonOutputter.prototype.addFeature = function(a, b, name){ } //do not add features between two fixed nodes - if (a[0] == GIVEN && b[0] == GIVEN) { + //if (a[0] == GIVEN && b[0] == GIVEN) { + if (!a.must_infer && !b.must_infer) { return; } - var a_id = this.string_map.getId(a); - var b_id = this.string_map.getId(b); + if (a.annotation != "") { + name = a.annotation + "-" + name; + } + if (b.annotation != "") { + name = name + "-" + b.annotation; + } + + var a_id = this.string_map.getId(a.toString()); + var b_id = this.string_map.getId(b.toString()); if (a_id == b_id || this.visitFeature(a_id, b_id, name)){ return; @@ -563,7 +604,7 @@ StringMap.prototype.hasId = function(input){ } //we add a special character in from to allow for keys such as "toString" - var escaped_input = "#" + input; + var escaped_input = "#" + input.toString(); return escaped_input in this.map; }; @@ -572,6 +613,7 @@ StringMap.prototype.getId = function(input){ throw new Error("error null"); } + input = input.toString(); //we add a special character in from to allow for keys such as "toString" var escaped_input = "#" + input; diff --git a/test/feature_extraction/fnames.js b/test/feature_extraction/fnames.js index ad9e3637..2d5c7951 100644 --- a/test/feature_extraction/fnames.js +++ b/test/feature_extraction/fnames.js @@ -83,14 +83,14 @@ inner_lambda_assign_local: { {"a": 0, "b": 1, "f2": "FNPAR"}, {"a": 0, "b": 2, "f2": "FNDECL"}, {"a": 2, "b": 3, "f2": "FNCALL"}, - {"a": 2, "b": 4, "f2": "FNSTRING"} + {"a": 2, "b": 4, "f2": "FNSTRING-String"} ], "assign":[ {"v": 0, "giv": "chunkData"}, {"v": 1, "inf": "x"}, {"v": 2, "inf": "local"}, {"v": 3, "giv": "log"}, - {"v": 4, "giv": "!String!hello"} + {"v": 4, "giv": "hello"} ] }' } @@ -134,10 +134,10 @@ inner_lambda_assign_sub: { expect: { '{ "query":[ - {"a": 0, "b": 1, "f2": "FNPAR"} + {"a": 0, "b": 1, "f2": "[]-FNPAR"} ], "assign":[ - {"v": 0, "giv": "global%5B%5D"}, + {"v": 0, "giv": "global"}, {"v": 1, "inf": "a"} ] }' @@ -187,10 +187,10 @@ inner_lambda_arg: { expect: { '{ "query":[ - {"a": 0, "b": 1, "f2": "FNPAR"} + {"a": 0, "b": 1, "f2": "(2)-FNPAR"} ], "assign":[ - {"v": 0, "giv": "substring(2)"}, + {"v": 0, "giv": "substring"}, {"v": 1, "inf": "x"} ] }' @@ -211,10 +211,10 @@ inner_lambda_arg2: { expect: { '{ "query":[ - {"a": 0, "b": 1, "f2": "FNPAR"} + {"a": 0, "b": 1, "f2": "(2)-FNPAR"} ], "assign":[ - {"v": 0, "giv": "substring(2)"}, + {"v": 0, "giv": "substring"}, {"v": 1, "inf": "b"} ] }' diff --git a/test/feature_extraction/general.js b/test/feature_extraction/general.js index 96f6e0ea..470a3536 100644 --- a/test/feature_extraction/general.js +++ b/test/feature_extraction/general.js @@ -10,11 +10,11 @@ truncate_constants: { expect: { '{ "query":[ - {"a": 0, "b": 1, "f2": ":VarDef[1]"} + {"a": 0, "b": 1, "f2": ":VarDef[1]-String"} ], "assign":[ {"v": 0, "inf": "x"}, - {"v": 1, "giv": "!String!very%20long%20string%2C%20very%20long%20string%2C%20very%20long%20string%2C%20very%20long%20"} + {"v": 1, "giv": "very%20long%20string%2C%20very%20long%20string%2C%20very%20long%20string%2C%20very%20long%20"} ] }' } @@ -32,11 +32,11 @@ escape_constants: { expect: { '{ "query":[ - {"a": 0, "b": 1, "f2": ":VarDef[1]"} + {"a": 0, "b": 1, "f2": ":VarDef[1]-String"} ], "assign":[ {"v": 0, "inf": "x"}, - {"v": 1, "giv": "!String!%22quoted%20text%22"} + {"v": 1, "giv": "%22quoted%20text%22"} ] }' } @@ -58,8 +58,8 @@ var_scope: { expect: { '{ "query":[ - {"a": 0, "b": 1, "f2": ":VarDef[1]"}, - {"a": 2, "b": 3, "f2": ":VarDef[1]"}, + {"a": 0, "b": 1, "f2": ":VarDef[1]-Number"}, + {"a": 2, "b": 3, "f2": ":VarDef[1]-Number"}, {"a": 4, "b": 0, "f2": ":VarDef[1]Binary+[0]"}, {"a": 4, "b": 2, "f2": ":VarDef[1]Binary+[1]"}, {"a": 0, "b": 2, "f2": ":Binary+[1]"}, @@ -68,9 +68,9 @@ var_scope: { ], "assign":[ {"v": 0, "inf": "a"}, - {"v": 1, "giv": "!Number!1"}, + {"v": 1, "giv": "1"}, {"v": 2, "inf": "b"}, - {"v": 3, "giv": "!Number!2"}, + {"v": 3, "giv": "2"}, {"v": 4, "inf": "x"} ] }' @@ -266,15 +266,15 @@ func_allow_different_features_duplicates: { '{ "query":[ {"a": 0, "b": 1, "f2": ":Dot[0]"}, - {"a": 0, "b": 2, "f2": "Dot[0]:Call[1]"}, - {"a": 0, "b": 2, "f2": "Dot[0]:Call[2]"}, - {"a": 0, "b": 2, "f2": "Dot[0]:Call[3]"}, - {"a": 0, "b": 2, "f2": "Dot[0]:Call[4]"} + {"a": 0, "b": 2, "f2": "Dot[0]:Call[1]-Number"}, + {"a": 0, "b": 2, "f2": "Dot[0]:Call[2]-Number"}, + {"a": 0, "b": 2, "f2": "Dot[0]:Call[3]-Number"}, + {"a": 0, "b": 2, "f2": "Dot[0]:Call[4]-Number"} ], "assign":[ {"v": 0, "inf": "x"}, {"v": 1, "giv": "foo"}, - {"v": 2, "giv": "!Number!42"} + {"v": 2, "giv": "42"} ] }' }