Source code
Revision control
Copy as Markdown
Other Tools
/* This Source Code Form is subject to the terms of the Mozilla Public
* License, v. 2.0. If a copy of the MPL was not distributed with this file,
* You can obtain one at http://mozilla.org/MPL/2.0/. */
/* -*- indent-tabs-mode: nil; js-indent-level: 4 -*- */
// Utility code for traversing the JSON data structures produced by sixgill.
"use strict";
var TRACING = false;
// When edge.Kind == "Pointer", these are the meanings of the edge.Reference field.
var PTR_POINTER = 0;
var PTR_REFERENCE = 1;
var PTR_RVALUE_REF = 2;
// A class wrapping information about a single function, plus access to global
// information.
var FunctionFlowGraph = class FunctionFlowGraph {
constructor({ name, bodies, typeInfo }) {
// Full name ("<unmangled>$<mangled>") of the function.
assert(name);
this.name = name;
// The loop bodies within this function. There will always be 1 main
// body at the end of the list, and additional bodies for all the loops.
// (It's not a simple 1-1 relationship; there could be nested loops and
// artificial loops created to produce a reducible control flow graph.)
assert(bodies);
this.bodies = bodies;
// typeInfo is global data, not tied to this function, but we only want
// to be passing around one thing everywhere.
assert(typeInfo);
this.typeInfo = typeInfo;
}
mainBody() {
// Loops come first, then the main body.
return this.bodies.at(-1);
}
forEachBody(f) {
this.bodies.forEach(f);
}
// Look up a variable declaration with a Variable object from the CFG. Note
// that the body does not matter, because for some strange reason all the
// variable declarations are duplicated across all bodies. If the same name
// is used in different bodies (or within the same body in different
// scopes), the variable name will be disambiguated by appending ":<n>".
lookupDecl(variable) {
return this.mainBody().DefineVariable?.find(
decl => sameVariable(decl.Variable, variable)
);
}
forEachDecl(f) {
this.mainBody().DefineVariable?.forEach(f);
}
getAttrsForTypeName(typeName) {
let attrs = 0;
if (typeName in this.typeInfo.GCSuppressors) {
attrs = attrs | ATTR_GC_SUPPRESSED;
}
return attrs;
}
getBodyByBlockId(blockId) {
const body = this.bodies.find(
body => sameBlockId(body.BlockId, blockId)
);
assert(body);
return body;
}
};
// Find all points (positions within the code) within the body (an outer
// function or a loop within it), recursing into loops if needed. Return them as
// a list of <body, edgeSourceId, bits> tuples, where `bits` is just the same
// value passed in here (in the future, perhaps it could be updated to reflect
// RAII or other scopes).
function findAllPoints(ffg, blockId, bits) {
const body = ffg.getBodyByBlockId(blockId);
if (!("PEdge" in body))
return;
const points = [];
for (var edge of body.PEdge) {
points.push([body, edge.Index[0], bits]);
if (edge.Kind == "Loop")
points.push(...findAllPoints(ffg, edge.BlockId, bits));
}
return points;
}
// Visitor of a graph of <body, ppoint> vertexes and sixgill-generated edges,
// where the edges represent the actual computation happening.
//
// Uses the syntax `var Visitor = class { ... }` rather than `class Visitor`
// to allow reloading this file with the JS debugger.
var Visitor = class {
constructor(ffg) {
this.visited_bodies = new Map();
ffg.forEachBody(body => { this.visited_bodies.set(body, new Map()); });
}
// Prepend `edge` to the info stored at the successor node, returning
// the updated info value. This should be overridden by pretty much any
// subclass, as a traversal's semantics are largely determined by this method.
extend_path(edge, body, ppoint, successor_value) { return true; }
// Default implementation does a basic "only visit nodes once" search.
// (Whether this is BFS/DFS/other is determined by the caller.)
// Override if you need to revisit nodes. Valid actions are "continue",
// "prune", and "done". "continue" means continue with the search. "prune"
// means do not continue to predecessors of this node, only continue with
// the remaining entries in the work queue. "done" means the
// whole search is complete even if unvisited nodes remain.
next_action(prev, current) { return prev ? "prune" : "continue"; }
// Update the info at a node. If this is the first time the node has been
// seen, `prev` will be undefined. `current` will be the info computed by
// `extend_path`. The node will be updated with the return value.
merge_info(prev, current) { return true; }
// Default visit() implementation. Subclasses will usually leave this alone
// and use the other methods as extension points.
//
// Take a body, a point within that body, and the info computed by
// extend_path() for that point when traversing an edge. Return whether the
// search should continue ("continue"), the search should be pruned and
// other paths followed ("prune"), or that the whole search is complete and
// it is time to return a value ("done", and the value returned by
// merge_info() will be returned by the overall search).
//
// Persistently record the value computed so far at each point, and call
// (overridable) next_action() and merge_info() methods with the previous
// and freshly-computed value for each point.
//
// Often, extend_path() will decide how/whether to continue the search and
// will return the search action to take, and next_action() will blindly
// return it if the point has not yet been visited. (And if it has, it will
// prune this branch of the search so that no point is visited multiple
// times.)
visit(body, ppoint, info) {
const visited_value_table = this.visited_bodies.get(body);
const existing_value_if_visited = visited_value_table.get(ppoint);
const action = this.next_action(existing_value_if_visited, info);
const merged = this.merge_info(existing_value_if_visited, info);
visited_value_table.set(ppoint, merged);
return [action, merged];
}
};
// For a given function containing a set of bodies, each containing a set of
// ppoints, perform a mostly breadth-first traversal through the complete graph
// of all <body, ppoint> nodes throughout all the bodies of the function.
//
// When traversing, every <body, ppoint> node is associated with a value that
// is assigned or updated whenever it is visited. The overall traversal
// terminates when a given condition is reached, and an arbitrary custom value
// is returned. If the search completes without the termination condition
// being reached, it will return the value associated with the entrypoint
// node, which is initialized to `entrypoint_fallback_value` (and thus serves as
// the fallback return value if all search paths are pruned before reaching
// the entrypoint.)
//
// The traversal is only *mostly* breadth-first because the visitor decides
// whether to stop searching when it sees a node. If a node is visited for a
// second time, the visitor can choose to continue (and thus revisit the node)
// in order to find "better" paths that may include a node more than once.
// The search is done in the "upwards" direction -- as in, it starts at the
// exit point and searches through predecessors.
//
// Override visitor.visit() to return an action and a value. The action
// determines whether the overall search should terminate ('done'), or
// continues looking through the predecessors of the current node ('continue'),
// or whether it should just continue processing the work queue without
// looking at predecessors ('prune').
//
// This allows this function to be used in different ways. If the visitor
// associates a value with each node that chains onto its forward-flow successors
// (predecessors in the "upwards" search order), then a complete path through
// the graph will be returned.
//
// Alternatively, BFS_upwards() can be used to test whether a condition holds
// (eg "the exit point is reachable only after calling SomethingImportant()"),
// in which case no path is needed and the visitor can compute a simple boolean
// every time it encounters a point. Note that `entrypoint_fallback_value` will
// still be returned if the search terminates without ever reaching the
// entrypoint, which is useful for dominator analyses.
//
// See the Visitor base class's implementation of visit(), above, for the
// most commonly used visit logic.
function BFS_upwards(start_body, start_ppoint, ffg, visitor,
initial_successor_value = {},
entrypoint_fallback_value=null)
{
let entrypoint_value = entrypoint_fallback_value;
const work = [[start_body, start_ppoint, null, initial_successor_value]];
if (TRACING) {
printErr(`BFS start at ${blockIdentifier(start_body)}:${start_ppoint}`);
}
while (work.length > 0) {
const [body, ppoint, edgeToAdd, successor_value] = work.shift();
if (TRACING) {
const s = edgeToAdd ? " : " + str(edgeToAdd) : "";
printErr(`prepending edge from ${ppoint} to state '${successor_value}'${s}`);
}
let value = visitor.extend_path(edgeToAdd, body, ppoint, successor_value);
const [action, merged_value] = visitor.visit(body, ppoint, value);
if (action === "done") {
return merged_value;
}
if (action === "prune") {
// Do not push anything else to the work queue, but continue processing
// other branches.
continue;
}
assert(action == "continue");
value = merged_value;
const predecessors = getPredecessors(body);
for (const edge of (predecessors[ppoint] || [])) {
if (edge.Kind == "Loop") {
// Propagate the search into the exit point of the loop body.
const loopBody = ffg.getBodyByBlockId(edge.BlockId);
const loopEnd = loopBody.Index[1];
work.push([loopBody, loopEnd, null, value]);
// Don't continue to predecessors here without going through
// the loop. (The points in this body that enter the loop will
// be traversed when we reach the entry point of the loop.)
}
work.push([body, edge.Index[0], edge, value]);
}
// Check for hitting the entry point of a loop body.
if (ppoint == body.Index[0] && body.BlockId.Kind == "Loop") {
// Propagate to outer body parents that enter the loop body.
for (const parent of (body.BlockPPoint || [])) {
const parentBody = ffg.getBodyByBlockId(parent.BlockId);
work.push([parentBody, parent.Index, null, value]);
}
// This point is also preceded by the *end* of this loop, for the
// previous iteration.
work.push([body, body.Index[1], null, value]);
}
// Check for reaching the entrypoint of the function.
if (body === start_body && ppoint == body.Index[0]) {
entrypoint_value = value;
}
}
// The search space was exhausted without finding a 'done' state. That
// might be because all search paths were pruned before reaching the entry
// point of the function, in which case entrypoint_value will still be its initial
// value. (If entrypoint_value has been set, then we may still not have visited the
// entire graph, if some paths were pruned but at least one made it to the entrypoint.)
return entrypoint_value;
}
// Given the CFG for the constructor call of some RAII, return whether the
// given edge is the matching destructor call.
function isMatchingDestructor(edge, constructed)
{
if (edge.Kind != "Call")
return false;
var callee = edge.Exp[0];
if (callee.Kind != "Var")
return false;
var variable = callee.Variable;
assert(variable.Kind == "Func");
if (variable.Name[1].charAt(0) != '~')
return false;
// Note that in some situations, a regular function can begin with '~', so
// we don't necessarily have a destructor in hand. This is probably a
// sixgill artifact, but in js::wasm::ModuleGenerator::~ModuleGenerator, a
// templatized static inline EraseIf is invoked, and it gets named ~EraseIf
// for some reason.
if (!("PEdgeCallInstance" in edge))
return false;
if (constructed.Kind != "Var") {
// The constructor has to be just the variable v we're interested in,
// not *v or v.field.
return false;
}
var destructExp = edge.PEdgeCallInstance.Exp;
if (destructExp.Kind != "Var")
return false;
return sameVariable(constructed.Variable, destructExp.Variable);
}
// Return all calls within the RAII scope of any constructor matched by
// matchConstructorEdge(). (Note that this would be insufficient if you needed
// to treat each instance separately, such as when different regions of a
// function body were guarded by these constructors and you needed to do
// something different with each.)
function allRAIIGuardedCallPoints(ffg, body)
{
if (!("PEdge" in body))
return [];
var points = [];
for (const edge of body.PEdge) {
const result = matchConstructorEdge(ffg, edge);
if (result && result.attrs != 0) {
points.push(...pointsInRAIIScope(ffg, body, edge, result.attrs, result.constructed));
}
}
return points;
}
// Test whether the given edge is the constructor corresponding to the given
// destructor edge.
function isMatchingConstructor(destructor, edge)
{
if (edge.Kind != "Call")
return false;
var callee = edge.Exp[0];
if (callee.Kind != "Var")
return false;
var variable = callee.Variable;
if (variable.Kind != "Func")
return false;
var name = readable(variable.Name[0]);
var destructorName = readable(destructor.Exp[0].Variable.Name[0]);
var match = destructorName.match(/^(.*?::)~(\w+)\(/);
if (!match) {
printErr("Unhandled destructor syntax: " + destructorName);
return false;
}
var constructorSubstring = match[1] + match[2];
if (name.indexOf(constructorSubstring) == -1)
return false;
var destructExp = destructor.PEdgeCallInstance.Exp;
if (destructExp.Kind != "Var")
return false;
var constructExp = edge.PEdgeCallInstance.Exp;
if (constructExp.Kind != "Var")
return false;
return sameVariable(constructExp.Variable, destructExp.Variable);
}
function findMatchingConstructor(destructorEdge, body, warnIfNotFound=true)
{
var worklist = [destructorEdge];
var predecessors = getPredecessors(body);
while(worklist.length > 0) {
var edge = worklist.pop();
if (isMatchingConstructor(destructorEdge, edge))
return edge;
if (edge.Index[0] in predecessors) {
for (var e of predecessors[edge.Index[0]])
worklist.push(e);
}
}
if (warnIfNotFound)
printErr("Could not find matching constructor!");
return undefined;
}
// Return an array of all points within the RAII scope, each point being a tuple
// [<body>, <point>, <attributes>].
//
// ffg - information about the function being processed.
// body - the body containing the starting point.
// constructorEdge - the edge representing the constructor invocation. Used
// only to initialize the search to the point just after the ctor call.
// bits - the attributes to include with each point. Currently, this is the
// same for every returned point tuple.
// constructed - the variable that was constructed. For a regular constructor
// call, this could be trivially inferred from `constructorEdge`, but for
// inlined constexpr constructors, the caller needs to figure it out.
function pointsInRAIIScope(ffg, body, constructorEdge, bits, constructed) {
var seen = {};
var worklist = [constructorEdge.Index[1]];
var points = [];
while (worklist.length) {
var point = worklist.pop();
if (point in seen)
continue;
seen[point] = true;
points.push([body, point, bits]);
var successors = getSuccessors(body);
if (!(point in successors))
continue;
for (var nedge of successors[point]) {
if (isMatchingDestructor(nedge, constructed))
continue;
if (nedge.Kind == "Loop")
points.push(...findAllPoints(ffg, nedge.BlockId, bits));
worklist.push(nedge.Index[1]);
}
}
return points;
}
function isImmobileValue(exp) {
if (exp.Kind == "Int" && exp.String == "0") {
return true;
}
return false;
}
// Returns whether decl is a body.DefineVariable[] entry for a non-temporary reference.
function isReferenceDecl(decl) {
return decl.Type.Kind == "Pointer" && decl.Type.Reference != PTR_POINTER && decl.Variable.Kind != "Temp";
}
function expressionIsVariableAddress(exp, variable)
{
while (exp.Kind == "Fld")
exp = exp.Exp[0];
return exp.Kind == "Var" && sameVariable(exp.Variable, variable);
}
function edgeTakesVariableAddress(edge, variable, body)
{
if (ignoreEdgeUse(edge, variable, body))
return false;
if (ignoreEdgeAddressTaken(edge))
return false;
switch (edge.Kind) {
case "Assign":
return expressionIsVariableAddress(edge.Exp[1], variable);
case "Call":
if ("PEdgeCallArguments" in edge) {
for (var exp of edge.PEdgeCallArguments.Exp) {
if (expressionIsVariableAddress(exp, variable))
return true;
}
}
return false;
default:
return false;
}
}
// Look at an invocation of a virtual method or function pointer contained in a
// field, and return the static type of the invocant (or the containing struct,
// for a function pointer field.)
function getFieldCallInstanceCSU(edge, field)
{
if ("FieldInstanceFunction" in field) {
// We have a 'this'.
const instanceExp = edge.PEdgeCallInstance.Exp;
if (instanceExp.Kind == 'Drf') {
// somevar->foo()
return edge.Type.TypeFunctionCSU.Type.Name;
} else if (instanceExp.Kind == 'Fld') {
// somevar.foo()
return instanceExp.Field.FieldCSU.Type.Name;
} else if (instanceExp.Kind == 'Index') {
// A strange construct.
// C++ code: static_cast<JS::CustomAutoRooter*>(this)->trace(trc);
// CFG: Call(21,30, this*[-1]{JS::CustomAutoRooter}.trace*(trc*))
return instanceExp.Type.Name;
} else if (instanceExp.Kind == 'Var') {
// C++: reinterpret_cast<SimpleTimeZone*>(gRawGMT)->~SimpleTimeZone();
// CFG:
// # icu_64::SimpleTimeZone::icu_64::SimpleTimeZone.__comp_dtor
// [6,7] Call gRawGMT.icu_64::SimpleTimeZone.__comp_dtor ()
return field.FieldCSU.Type.Name;
} else {
printErr("------------------ edge -------------------");
printErr(JSON.stringify(edge, null, 4));
printErr("------------------ field -------------------");
printErr(JSON.stringify(field, null, 4));
assert(false, `unrecognized FieldInstanceFunction Kind ${instanceExp.Kind}`);
}
} else {
// somefar.foo() where somevar is a field of some CSU.
return field.FieldCSU.Type.Name;
}
}
function expressionUsesVariable(exp, variable)
{
if (exp.Kind == "Var" && sameVariable(exp.Variable, variable))
return true;
if (!("Exp" in exp))
return false;
for (var childExp of exp.Exp) {
if (expressionUsesVariable(childExp, variable))
return true;
}
return false;
}
function expressionUsesVariableContents(exp, variable)
{
if (!("Exp" in exp))
return false;
for (var childExp of exp.Exp) {
if (childExp.Kind == 'Drf') {
if (expressionUsesVariable(childExp, variable))
return true;
} else if (expressionUsesVariableContents(childExp, variable)) {
return true;
}
}
return false;
}
// Detect simple |return nullptr;| statements.
function isReturningImmobileValue(edge, variable)
{
if (variable.Kind == "Return") {
if (edge.Exp[0].Kind == "Var" && sameVariable(edge.Exp[0].Variable, variable)) {
if (isImmobileValue(edge.Exp[1]))
return true;
}
}
return false;
}
// If the edge uses the given variable's value, return the earliest point at
// which the use is definite. Usually, that means the source of the edge
// (anything that reaches that source point will end up using the variable, but
// there may be other ways to reach the destination of the edge.)
//
// Return values are implicitly used at the very last point in the function.
// This makes a difference: if an RAII class GCs in its destructor, we need to
// start looking at the final point in the function, not one point back from
// that, since that would skip over the GCing call.
//
// Certain references may be annotated to be live to the end of the function
// as well (eg AutoCheckCannotGC&& parameters).
//
// Note that this returns a nonzero value only if the variable's incoming value is used.
// So this would return 0 for 'obj':
//
// obj = someFunction();
//
// but these would return a positive value:
//
// obj = someFunction(obj);
// obj->foo = someFunction();
//
function edgeUsesVariable(ffg, edge, decl, body, liveToEnd=false)
{
const variable = decl.Variable;
if (ignoreEdgeUse(edge, variable, body))
return 0;
if (variable.Kind == "Return") {
liveToEnd = true;
}
if (liveToEnd && body.Index[1] == edge.Index[1] && body.BlockId.Kind == "Function") {
// The last point in the function body is treated as using the return
// value. This is the only time the destination point is returned
// rather than the source point.
return edge.Index[1];
}
var src = edge.Index[0];
switch (edge.Kind) {
case "Assign": {
// Detect `Return := nullptr`.
if (isReturningImmobileValue(edge, variable))
return 0;
const [lhs, rhs] = edge.Exp;
// Detect `lhs := ...variable...`
if (expressionUsesVariable(rhs, variable))
return src;
// Detect `...variable... := rhs` but not `variable := rhs`. The latter
// overwrites the previous value of `variable` without using it.
if (expressionUsesVariable(lhs, variable) && !exprCoversVariable(ffg, lhs, decl)) {
return src;
}
return 0;
}
case "Assume":
return expressionUsesVariableContents(edge.Exp[0], variable) ? src : 0;
case "Call": {
const callee = edge.Exp[0];
if (expressionUsesVariable(callee, variable))
return src;
if ("PEdgeCallInstance" in edge) {
if (expressionUsesVariable(edge.PEdgeCallInstance.Exp, variable)) {
if (edgeStartsValueLiveRange(ffg, edge, decl)) {
// If the variable is being constructed, then the incoming
// value is not used here; it didn't exist before
// construction. (The analysis doesn't get told where
// variables are defined, so must infer it from
// construction. If the variable does not have a
// constructor, its live range may be larger than it really
// ought to be if it is defined within a loop body, but
// that is conservative.)
} else {
return src;
}
}
}
if ("PEdgeCallArguments" in edge) {
for (var exp of edge.PEdgeCallArguments.Exp) {
if (expressionUsesVariable(exp, variable))
return src;
}
}
if (edge.Exp.length == 1)
return 0;
// Assigning call result to a variable.
const lhs = edge.Exp[1];
if (expressionUsesVariable(lhs, variable) && !exprCoversVariable(ffg, lhs, decl))
return src;
return 0;
}
case "Loop":
return 0;
case "Assembly":
return 0;
default:
assert(false);
}
}
// If `decl` is the body.DefineVariable[] declaration of a reference type, then
// return the expression without the outer dereference. Otherwise, return the
// original expression.
function maybeDereference(exp, decl) {
if (exp.Kind == "Drf" && exp.Exp[0].Kind == "Var") {
if (isReferenceDecl(decl)) {
return exp.Exp[0];
}
}
return exp;
}
// Test to see if `exp` is simply the given variable.
function expressionIsVariable(exp, variable)
{
return exp.Kind == "Var" && sameVariable(exp.Variable, variable);
}
function referencedCSUName(type) {
if (type.Kind == "Pointer") {
return referencedCSUName(type.Type);
} else if (type.Kind == "CSU") {
return type.Name;
}
}
function containsGCPointer(ffg, type) {
if (type.Kind == "CSU") {
if (!(type.Name in ffg.typeInfo.AllGCPointers)) {
return false;
}
} else if (type.Kind == "Pointer") {
const pointeeType = type.Type;
if (pointeeType.Kind != "CSU" || !(pointeeType.Name in ffg.typeInfo.AllGCTypes)) {
return false;
}
} else {
// Not a GC pointer.
return false;
}
return true;
}
// Test to see if `exp` is simply the given variable or is a field of the given
// variable that comprises the whole of the interesting parts of that variable's
// type. The latter is to handle anonymous lambda closures that capture a single
// GC pointer variable. The idea is that if you assign to the sole field within
// a struct, then you're overwriting the whole struct and so callers will use
// this to determine that the old value of the struct is now dead.
//
// struct A {
// JSObject* obj;
// int i;
// } a;
// struct B {
// A a;
// void* vp;
// } b;
// struct C {
// A a1;
// A a2;
// } c;
// a.i = 7; // Does not cover whole variable
// a.obj = nullptr; // Covers whole variable
// b.vp = nullptr; // Does not cover whole variable
// b.a.obj = nullptr; // Covers whole variable
// b.a = get_a_struct(); // Covers whole variable
// c.a1.obj = nullptr; // Does not cover whole variable (c.a2 was not overwritten)
//
function exprCoversVariable(ffg, exp, decl)
{
if (exp.Kind == "Var") {
return sameVariable(exp.Variable, decl.Variable);
} else if (exp.Kind == "Fld") {
// Treat x.f1.f2 = val as "setting" the variable x and overwriting its
// previous value if x and x.f1 have types with a single field
// containing GC pointers (because the assignment overwrites the only
// part of the variable we care about), *and* if x.f1.f2 has at least
// one GC pointer (if it has zero, then we're overwriting a different
// part of x.f1 that doesn't touch the GC pointer-containing part. If it
// has more than 1, that's fine; we may be overwriting multiple GC
// pointers at once). If any intervening type has multiple GC pointer
// fields, then it doesn't count (because the rest of that type isn't
// being overwritten).
// Only CSUs (classes/structs/unions) are eligible for this partial
// assignment treatment.
if (decl.Type.Kind != "CSU") {
return false;
}
// Check that the innermost field assignment (eg x.f1.f2) is a GC
// pointer type, since otherwise we're overwriting the wrong field.
if (!containsGCPointer(ffg, exp.Field.Type)) {
return false;
}
// Loop through each nested field access, other than the innermost, to
// check that there aren't any sibling fields containing GC pointers we
// are not overwriting.
let trail = exp;
let e = exp.Exp[0]; // Advance past the innermost field assignment (eg x.f1.f2 -> x.f1)
while (e.Kind == "Fld") {
const csu = referencedCSUName(e.Field.Type);
if (!csu || !(csu in ffg.typeInfo.SingleGCField)) {
return false;
}
trail = e;
e = e.Exp[0];
}
// We also need to know that the original variable (`x` in the example)
// is a SingleGCField type, but the expression CFG does not contain its
// type. Fortunately, we know we're accessing a field, and field
// accesses contain the type of the Class/Struct/Union they are field
// accesses of, so the innermost expr with Kind="Fld" has the type.
if (trail && !(trail.Field.FieldCSU.Type.Name in ffg.typeInfo.SingleGCField)) {
return false;
}
// ...and now make sure `x` is the variable we care about in the first
// place. Note that `e` is the most deeply nested expression in the CFG,
// but it represents the outermost part of the `x.f1.f2` field access
// expression.
if (e.Kind != "Var" || !sameVariable(e.Variable, decl.Variable)) {
return false;
}
return true;
}
return false;
}
// Similar to the above, except treat uses of a reference as if they were uses
// of the dereferenced contents. This requires knowing the type of the
// variable, and so takes its declaration rather than the variable itself.
function expressionIsDeclaredVariable(exp, decl)
{
exp = maybeDereference(exp, decl);
return expressionIsVariable(exp, decl.Variable);
}
function expressionIsMethodOnVariableDecl(exp, decl)
{
// This might be calling a method on a base class, in which case exp will
// be an unnamed field of the variable instead of the variable itself.
while (exp.Kind == "Fld" && exp.Field.Name[0].startsWith("field:"))
exp = exp.Exp[0];
return expressionIsDeclaredVariable(exp, decl);
}
// Return whether the edge starts the live range of a variable's value, by setting
// it to some new value. Examples of starting obj's live range:
//
// obj = foo;
// obj = foo();
// obj = foo(obj); // uses previous value but then sets to new value
// SomeClass obj(true, 1); // constructor
//
function edgeStartsValueLiveRange(ffg, edge, decl)
{
// Direct assignments start live range of lhs: var = value (but not
// including <returnval> = nullptr).
if (edge.Kind == "Assign") {
const [lhs, rhs] = edge.Exp;
return (exprCoversVariable(ffg, lhs, decl) &&
!isReturningImmobileValue(edge, decl.Variable));
}
if (edge.Kind != "Call")
return false;
// Assignments of call results start live range: var = foo()
if (1 in edge.Exp) {
var lhs = edge.Exp[1];
if (exprCoversVariable(ffg, lhs, decl))
return true;
}
// Constructor calls start live range of instance: SomeClass var(...)
if ("PEdgeCallInstance" in edge) {
var instance = edge.PEdgeCallInstance.Exp;
// Kludge around incorrect dereference on some constructor calls.
if (instance.Kind == "Drf")
instance = instance.Exp[0];
if (!exprCoversVariable(ffg, instance, decl))
return false;
var callee = edge.Exp[0];
if (callee.Kind != "Var")
return false;
assert(callee.Variable.Kind == "Func");
var calleeName = readable(callee.Variable.Name[0]);
// Constructor calls include the text 'Name::Name(' or 'Name<...>::Name('.
var openParen = calleeName.indexOf('(');
if (openParen < 0)
return false;
calleeName = calleeName.substring(0, openParen);
var lastColon = calleeName.lastIndexOf('::');
if (lastColon < 0)
return false;
var constructorName = calleeName.substr(lastColon + 2);
calleeName = calleeName.substr(0, lastColon);
var lastTemplateOpen = calleeName.lastIndexOf('<');
if (lastTemplateOpen >= 0)
calleeName = calleeName.substr(0, lastTemplateOpen);
if (calleeName.endsWith(constructorName))
return true;
}
return false;
}
// Return the result of a `matcher` callback on the call found in the given
// `edge`, if the edge is a direct call to a named function (if not, return false).
// `matcher` is given the name of the callee (actually, a tuple
// [fully qualified name, base name]), an array of expressions containing the
// arguments, and if the result of the call is assigned to a variable,
// the expression representing that variable(the lhs).
//
// documentation of the data structure used here.
function matchEdgeCall(edge, matcher) {
if (edge.Kind != "Call") {
return false;
}
const callee = edge.Exp[0];
if (edge.Type.Kind == 'Function' &&
edge.Exp[0].Kind == 'Var' &&
edge.Exp[0].Variable.Kind == 'Func') {
const calleeName = edge.Exp[0].Variable.Name;
const args = edge.PEdgeCallArguments;
const argExprs = args ? args.Exp : [];
const lhs = edge.Exp[1]; // May be undefined
return matcher(calleeName, argExprs, lhs);
}
return false;
}
function edgeMarksVariableGCSafe(edge, variable) {
return matchEdgeCall(edge, (calleeName, argExprs, _lhs) => {
// explicit JS_HAZ_VARIABLE_IS_GC_SAFE annotation
return (calleeName[1] == 'MarkVariableAsGCSafe' &&
calleeName[0].includes("JS::detail::MarkVariableAsGCSafe") &&
argExprs.length == 1 &&
expressionIsVariable(argExprs[0], variable));
});
}
// Match an optional <namespace>:: followed by the class name,
// and then an optional template parameter marker.
//
// Example: mozilla::dom::UniquePtr<...
//
function parseTypeName(typeName) {
const m = typeName.match(/^(((?:\w|::)+::)?(\w+))\b(\<)?/);
if (!m) {
return undefined;
}
const [, type, raw_namespace, classname, is_specialized] = m;
const namespace = raw_namespace === null ? "" : raw_namespace;
return { type, namespace, classname, is_specialized }
}
// Return whether an edge "clears out" a variable's value for the following
// statements. A simple example would be
//
// var = nullptr;
//
// A more complex example is a Maybe<T> that gets reset:
//
// Maybe<AutoCheckCannotGC> nogc(std::in_place, cx);
// nogc.reset();
// gc(); // <-- not a problem; nogc is invalidated by prev line
// nogc.emplace(cx);
// foo(nogc);
//
// Yet another example is a UniquePtr being passed by value, which means the
// receiver takes ownership:
//
// UniquePtr<JSObject*> uobj(obj);
// foo(uobj);
// gc();
//
function edgeEndsValueLiveRange(ffg, edge, decl, body)
{
if (edge.Kind == "Assign") {
// [c++] somevar = nullptr;
const [lhs, rhs] = edge.Exp;
if (exprCoversVariable(ffg, lhs, decl) && isImmobileValue(rhs)) {
return true;
}
// Resetting a Maybe<> as part of a constexpr constructor.
// The expression is Assign v.<base>.<base>.mIsSome = 0.
if (isImmobileValue(rhs) && lhs.Kind == "Fld") {
if (lhs.Field.Name[0] == "mIsSome" &&
lhs.Field.FieldCSU.Type.Name.includes("::MaybeStorage<") &&
rhs.Kind === "Int" &&
rhs.String === "0")
{
// Dig the `v` out of the lhs. It is the innermost Exp[0] of a sequence of field accesses.
let inner = lhs;
while (inner.Kind == "Fld") {
inner = inner.Exp[0];
}
return expressionIsVariable(inner, decl.Variable);
}
}
return false;
}
if (edge.Kind != "Call")
return false;
if (edgeMarksVariableGCSafe(edge, decl.Variable)) {
// explicit JS_HAZ_VARIABLE_IS_GC_SAFE annotation
return true;
}
if (matchEdgeCall(edge, (calleeName, argExprs, lhs) => {
return calleeName[1] == 'move' && calleeName[0].includes('std::move(') &&
expressionIsDeclaredVariable(argExprs[0], decl) &&
lhs &&
lhs.Kind == 'Var' &&
lhs.Variable.Kind == 'Temp';
})) {
// temp = std::move(var)
//
// If var is a UniquePtr, and we pass it into something that takes
// ownership, then it should be considered to be invalid. Example:
//
// consume(std::move(var));
//
// where consume takes a UniquePtr. This will compile to something like
//
// UniquePtr* __temp_1 = &std::move(var);
// UniquePtr&& __temp_2(*temp_1); // move constructor
// consume(__temp_2);
// ~UniquePtr(__temp_2);
//
// The line commented with "// move constructor" is a result of passing
// a UniquePtr as a parameter. If consume() took a UniquePtr&&
// directly, this would just be:
//
// UniquePtr* __temp_1 = &std::move(var);
// consume(__temp_1);
//
// which is not guaranteed to move from the reference. It might just
// ignore the parameter. We can't predict what consume(UniquePtr&&)
// will do. We do know that UniquePtr(UniquePtr&& other) moves out of
// `other`.
//
// The std::move() technically is irrelevant, but because we only care
// about bare variables, it has to be used, which is fortunate because
// the UniquePtr&& constructor operates on a temporary, not the
// variable we care about.
const lhs = edge.Exp[1].Variable;
if (basicBlockEatsVariable(ffg, lhs, body, edge.Index[1]))
return true;
}
const callee = edge.Exp[0];
if (edge.Type.Kind == 'Function' &&
edge.Type.TypeFunctionCSU &&
edge.PEdgeCallInstance &&
expressionIsMethodOnVariableDecl(edge.PEdgeCallInstance.Exp, decl))
{
const typeName = edge.Type.TypeFunctionCSU.Type.Name;
// Synthesize a zero-arg constructor name like
// mozilla::dom::UniquePtr<T>::UniquePtr(). Note that the `<T>` is
// literal -- the pretty name from sixgill will render the actual
// constructor name as something like
//
// UniquePtr<T>::UniquePtr() [where T = int]
//
const parsed = parseTypeName(typeName);
if (parsed) {
const { type, namespace, classname, is_specialized } = parsed;
// special-case: the initial constructor that doesn't provide a value.
// Useful for things like Maybe<T>.
const template = is_specialized ? '<T>' : '';
const ctorName = `${namespace}${classname}${template}::${classname}()`;
if (callee.Kind == 'Var' &&
typesWithSafeConstructors.has(type) &&
callee.Variable.Name[0].includes(ctorName))
{
return true;
}
// special-case: UniquePtr::reset() and similar.
if (callee.Kind == 'Var' &&
type in resetterMethods &&
resetterMethods[type].has(callee.Variable.Name[1]))
{
return true;
}
}
}
// special-case: passing UniquePtr<T> by value.
if (edge.Type.Kind == 'Function' &&
edge.Type.TypeFunctionArgument &&
edge.PEdgeCallArguments)
{
for (const i in edge.Type.TypeFunctionArgument) {
const param = edge.Type.TypeFunctionArgument[i];
if (param.Type.Kind != 'CSU')
continue;
if (!param.Type.Name.startsWith("mozilla::UniquePtr<"))
continue;
const arg = edge.PEdgeCallArguments.Exp[i];
if (expressionIsVariable(arg, decl.Variable)) {
return true;
}
}
}
return false;
}
function edgeMovesVariable(edge, decl)
{
if (edge.Kind != 'Call')
return false;
const callee = edge.Exp[0];
if (callee.Kind == 'Var' &&
callee.Variable.Kind == 'Func')
{
const { Variable: { Name: [ fullname, shortname ] } } = callee;
// Match an rvalue parameter.
if (!edge || !edge.PEdgeCallArguments || !edge.PEdgeCallArguments.Exp) {
return false;
}
for (const arg of edge.PEdgeCallArguments.Exp) {
if (arg.Kind != 'Drf') continue;
const val = arg.Exp[0];
if (val.Kind == 'Var' && sameVariable(val.Variable, decl.Variable)) {
// This argument is the variable we're looking for. Return true
// if it is passed as an rvalue reference.
const type = decl.Type;
if (type.Kind == "Pointer" && type.Reference == PTR_RVALUE_REF) {
return true;
}
}
}
}
return false;
}
// Scan forward through the basic block in 'body' starting at 'startpoint',
// looking for a call that passes 'variable' to a move constructor that
// "consumes" it (eg UniquePtr::UniquePtr(UniquePtr&&)).
function basicBlockEatsVariable(ffg, variable, body, startpoint)
{
const decl = ffg.lookupDecl(variable);
assert(decl);
const successors = getSuccessors(body);
let point = startpoint;
while (point in successors) {
// Only handle a single basic block. If it forks, stop looking.
const edges = successors[point];
if (edges.length != 1) {
return false;
}
const edge = edges[0];
if (edgeMovesVariable(edge, decl)) {
return true;
}
// edgeStartsValueLiveRange will find places where 'variable' is given
// a new value. Never observed in practice, since this function is only
// called with a temporary resulting from std::move(), which is used
// immediately for a call. But just to be robust to future uses:
if (edgeStartsValueLiveRange(ffg, edge, decl)) {
return false;
}
point = edge.Index[1];
}
return false;
}
var PROP_REFCNT = 1 << 0;
var PROP_SHARED_PTR_DTOR = 1 << 1;
function getCalleeProperties(calleeName) {
let props = 0;
if (isRefcountedDtor(calleeName)) {
props |= PROP_REFCNT;
}
if (calleeName.includes("~shared_ptr()")) {
props |= PROP_SHARED_PTR_DTOR;
}
return props;
}
// Basic C++ ABI mangling: prefix an identifier with its length, in decimal.
function mangle(name) {
return name.length + name;
}
var TriviallyDestructibleTypes = new Set([
// Single-token types from
"void", "wchar_t", "bool", "char", "short", "int", "long", "float", "double",
"__int64", "__int128", "__float128", "char32_t", "char16_t", "char8_t",
// Remaining observed cases. These are types T in shared_ptr<T> that have
// been observed, where the types themselves have trivial destructors, and
// the custom deleter doesn't do anything nontrivial that we might care about.
"_IO_FILE"
]);
function synthesizeDestructorName(className) {
if (className.includes("<") || className.includes(" ") || className.includes("{")) {
return;
}
if (TriviallyDestructibleTypes.has(className)) {
return;
}
const parts = className.split("::");
const mangled_dtor = "_ZN" + parts.map(p => mangle(p)).join("") + "D2Ev";
const pretty_dtor = `void ${className}::~${parts.at(-1)}()`;
// Note that there will be a later check to verify that the function name
// synthesized here is an actual function, and assert if not (see
// assertFunctionExists() in computeCallgraph.js.)
return mangled_dtor + "$" + pretty_dtor;
}
function getCallEdgeProperties(ffg, body, edge, calleeName) {
let attrs = 0;
let extraCalls = [];
if (edge.Kind !== "Call") {
return { attrs, extraCalls };
}
const props = getCalleeProperties(calleeName);
if (props & PROP_REFCNT) {
// std::swap of two refcounted values thinks it can drop the
// ref count to zero. Or rather, it just calls operator=() in a context
// where the refcount will never drop to zero.
const blockId = blockIdentifier(body);
if (blockId.includes("std::swap") || blockId.includes("mozilla::Swap")) {
// Replace the refcnt release call with nothing. It's not going to happen.
attrs |= ATTR_REPLACED;
}
}
if (props & PROP_SHARED_PTR_DTOR) {
// Replace shared_ptr<T>::~shared_ptr() calls to T::~T() calls.
// Note that this will only apply to simple cases.
// Any templatized type, in particular, will be ignored and the original
// call tree will be left alone. If this triggers a hazard, then we can
// consider extending the mangling support.
//
// If the call to ~shared_ptr is not replaced, then it might end up calling
// an unknown function pointer. This does not always happen-- in some cases,
// the call tree below ~shared_ptr will invoke the correct destructor without
// going through function pointers.
const m = calleeName.match(/shared_ptr<(.*?)>::~shared_ptr\(\)(?: \[with T = ([\w:]+))?/);
assert(m);
let className = m[1] == "T" ? m[2] : m[1];
assert(className != "");
// cv qualification does not apply to destructors.
className = className.replace("const ", "");
className = className.replace("volatile ", "");
const dtor = synthesizeDestructorName(className);
if (dtor) {
attrs |= ATTR_REPLACED;
extraCalls.push({
attrs: ATTR_SYNTHETIC,
name: dtor,
});
}
}
if ((props & PROP_REFCNT) == 0) {
return { attrs, extraCalls };
}
let callee = edge.Exp[0];
while (callee.Kind === "Drf") {
callee = callee.Exp[0];
}
const instance = edge.PEdgeCallInstance.Exp;
if (instance.Kind !== "Var") {
// TODO: handle field destructors
return { attrs, extraCalls };
}
// Test whether the dtor call is dominated by operations on the variable
// that mean it will not go to a zero refcount in the dtor: either because
// it's already dead (eg r.forget() was called) or because it can be proven
// to have a ref count of greater than 1. This is implemented by looking
// for the reverse: find a path scanning backwards from the dtor call where
// the variable is used in any way that does *not* ensure that it is
// trivially destructible.
const decl = ffg.lookupDecl(instance.Variable);
const visitor = new class DominatorVisitor extends Visitor {
// Do not revisit nodes. For new nodes, relay the decision made by
// extend_path.
next_action(seen, current) { return seen ? "prune" : current; }
// We don't revisit, so always use the new.
merge_info(seen, current) { return current; }
// Return the action to take from this node.
extend_path(edge, body, ppoint, successor_value) {
if (!edge) {
// Dummy edge to join two points.
return "continue";
}
if (!edgeUsesVariable(ffg, edge, decl, body)) {
// Nothing of interest on this edge, keep searching.
return "continue";
}
if (edgeEndsValueLiveRange(ffg, edge, decl, body)) {
// This path is safe!
return "prune";
}
// Unsafe. Found a use that might set the variable to a
// nonzero refcount.
return "done";
}
}(ffg);
// Searching upwards from a destructor call, return the opposite of: is
// there a path to a use or the start of the function that does NOT hit a
// safe assignment like refptr.forget() first?
//
// In graph terms: return whether the destructor call is dominated by forget() calls (or similar).
const edgeIsNonReleasingDtor = !BFS_upwards(
body, edge.Index[0], ffg, visitor, "start",
false // Return value if we do not reach the root without finding a non-forget() use.
);
if (edgeIsNonReleasingDtor) {
attrs |= ATTR_GC_SUPPRESSED | ATTR_NONRELEASING;
}
return { attrs, extraCalls };
}
// gcc uses something like "__dt_del " for virtual destructors that it
// generates.
function isSyntheticVirtualDestructor(funcName) {
return funcName.endsWith(" ");
}
function typedField(field)
{
if ("FieldInstanceFunction" in field) {
// Virtual call
//
// This makes a minimal attempt at dealing with overloading, by
// incorporating the number of parameters. So far, that is all that has
// been needed. If more is needed, sixgill will need to produce a full
// mangled type.
const {Type, Name: [name]} = field;
// Virtual destructors don't need a type or argument count,
// and synthetic ones don't have them filled in.
if (isSyntheticVirtualDestructor(name)) {
return name;
}
var nargs = 0;
if (Type.Kind == "Function" && "TypeFunctionArguments" in Type)
nargs = Type.TypeFunctionArguments.Type.length;
return name + ":" + nargs;
} else {
// Function pointer field
return field.Name[0];
}
}
function fieldKey(csuName, field)
{
return csuName + "." + typedField(field);
}