/***** * profiler.h * Andy Hammerlindl 2010/07/24 * * Profiler for the execution of the virtual machine bytecode. *****/ #ifndef PROFILER_H #define PROFILER_H #include #include #include "inst.h" #include "seconds.h" namespace vm { #ifdef DEBUG_BLTIN string lookupBltin(bltin b); #endif inline position positionFromLambda(lambda *func) { if (func == 0) return position(); program& code = *func->code; // Check for empty program. if (code.begin() == code.end()) return position(); return code.begin()->pos; } inline void printNameFromLambda(ostream& out, lambda *func) { if (!func) { out << ""; return; } #ifdef DEBUG_FRAME string name = func->name; #else string name = ""; #endif // If unnamed, use the pointer address. if (name.empty()) out << func; else out << name; out << " at "; positionFromLambda(func).printTerse(out); } inline void printNameFromBltin(ostream& out, bltin b) { #ifdef DEBUG_BLTIN string name = lookupBltin(b); #else string name = ""; #endif if (!name.empty()) out << name << " "; out << "(builtin at " << (void *)b << ")"; } class profiler : public gc { // To do call graph analysis, each call stack that occurs in practice is // represented by a node. For instance, if f and g are functions, then // f -> g -> g // is represented by a node and // g -> f -> g // is represented by a different one. struct node { // The top-level function of the call stack. It is either an asymptote // function with a given lambda, or a builtin function, with a given // bltin. lambda *func; bltin cfunc; // The number of times the top-level function has been called resulting in // this specific call stack. int calls; // The number of bytecode instructions executed with this exact call stack. // It does not include time spent in called function. int instructions; // Number of instructions spent in this function or its children. This is // computed by computeTotals. int instTotal; // The number of real-time nanoseconds spent in this node. WARNING: May // be wildly inaccurate. long long nsecs; // Total including children. long long nsecsTotal; // Call stacks resulting from calls during this call stack. mem::vector children; node() : func(0), cfunc(0), calls(0), instructions(0), instTotal(0), nsecs(0), nsecsTotal(0) {} node(lambda *func) : func(func), cfunc(0), calls(0), instructions(0), instTotal(0), nsecs(0), nsecsTotal(0) {} node(bltin b) : func(0), cfunc(b), calls(0), instructions(0), instTotal(0), nsecs(0), nsecsTotal(0) {} // Return the call stack resulting from a call to func when this call // stack is current. node *getChild(lambda *func) { size_t n = children.size(); for (size_t i = 0; i < n; ++i) if (children[i].func == func) return &children[i]; // Not found, create a new one. children.push_back(node(func)); return &children.back(); } node *getChild(bltin func) { size_t n = children.size(); for (size_t i = 0; i < n; ++i) if (children[i].cfunc == func) return &children[i]; // Not found, create a new one. children.push_back(node(func)); return &children.back(); } void computeTotals() { instTotal = instructions; nsecsTotal = nsecs; size_t n = children.size(); for (size_t i = 0; i < n; ++i) { children[i].computeTotals(); instTotal += children[i].instTotal; nsecsTotal += children[i].nsecsTotal; } } void pydump(ostream& out) { #ifdef DEBUG_FRAME string name = func ? func->name : ""; #else string name = ""; #endif out << "dict(\n" << " name = '" << name << " " << func << "',\n" << " pos = '" << positionFromLambda(func) << "',\n" << " calls = " << calls << ",\n" << " instructions = " << instructions << ",\n" << " nsecs = " << nsecs << ",\n" << " children = [\n"; size_t n = children.size(); for (size_t i = 0; i < n; ++i) { children[i].pydump(out); out << ",\n"; } out << " ])\n"; } }; // An empty call stack. node emptynode; // All of the callstacks. std::stack callstack; node &topnode() { return *callstack.top(); } // Arc representing one function calling another. Used only for building // the output for kcachegrind. struct arc : public gc { int calls; int instTotal; long long nsecsTotal; arc() : calls(0), instTotal(0), nsecsTotal(0) {} void add(node& n) { calls += n.calls; instTotal += n.instTotal; nsecsTotal += n.nsecsTotal; } }; // Representing one function and its calls to other functions. struct fun : public gc { int instructions; long long nsecs; mem::map arcs; mem::map carcs; fun() : instructions(0), nsecs(0) {} void addChildTime(node& n) { if (n.cfunc) carcs[n.cfunc].add(n); else arcs[n.func].add(n); } void analyse(node& n) { instructions += n.instructions; nsecs += n.nsecs; size_t numChildren = n.children.size(); for (size_t i = 0; i < numChildren; ++i) addChildTime(n.children[i]); } void dump(ostream& out) { // The unused line number needed by kcachegrind. static const string POS = "1"; out << POS << " " << instructions << " " << nsecs << "\n"; for (mem::map::iterator i = arcs.begin(); i != arcs.end(); ++i) { lambda *l = i->first; arc& a = i->second; out << "cfl=" << positionFromLambda(l) << "\n"; out << "cfn="; printNameFromLambda(out, l); out << "\n"; out << "calls=" << a.calls << " " << POS << "\n"; out << POS << " " << a.instTotal << " " << a.nsecsTotal << "\n"; } for (mem::map::iterator i = carcs.begin(); i != carcs.end(); ++i) { bltin b = i->first; arc& a = i->second; out << "cfl=C++ code" << endl; out << "cfn="; printNameFromBltin(out, b); out << "\n"; out << "calls=" << a.calls << " " << POS << "\n"; out << POS << " " << a.instTotal << " " << a.nsecsTotal << "\n"; } } }; // The data for each asymptote function. mem::map funs; // The data for each C++ function. mem::map cfuns; void analyseNode(node& n) { fun& f = n.cfunc ? cfuns[n.cfunc] : funs[n.func]; f.analyse(n); size_t numChildren = n.children.size(); for (size_t i = 0; i < numChildren; ++i) analyseNode(n.children[i]); } // Convert data in the tree of callstack nodes into data for each function. void analyseData() { emptynode.computeTotals(); analyseNode(emptynode); } // Timing data. utils::cpuTimer timestamp; void startLap() { timestamp.reset(); } // Called whenever the stack is about to change, in order to record the time // duration for the current node. void recordTime() { topnode().nsecs += (long long) timestamp.nanoseconds(true); } public: profiler(); void beginFunction(lambda *func); void endFunction(lambda *func); void beginFunction(bltin func); void endFunction(bltin func); void recordInstruction(); // TODO: Add position, type of instruction info to profiling. // Dump all of the data out in a format that can be read into Python. void pydump(ostream &out); // Dump all of the data in a format for kcachegrind. void dump(ostream& out); }; inline profiler::profiler() : emptynode() { callstack.push(&emptynode); startLap(); } inline void profiler::beginFunction(lambda *func) { assert(func); assert(!callstack.empty()); recordTime(); callstack.push(topnode().getChild(func)); ++topnode().calls; } inline void profiler::endFunction(lambda *func) { assert(func); assert(!callstack.empty()); assert(topnode().func == func); recordTime(); callstack.pop(); } inline void profiler::beginFunction(bltin cfunc) { assert(cfunc); assert(!callstack.empty()); recordTime(); callstack.push(topnode().getChild(cfunc)); ++topnode().calls; } inline void profiler::endFunction(bltin cfunc) { assert(cfunc); assert(!callstack.empty()); assert(topnode().cfunc == cfunc); recordTime(); callstack.pop(); } inline void profiler::recordInstruction() { assert(!callstack.empty()); ++topnode().instructions; } inline void profiler::pydump(ostream& out) { out << "profile = "; emptynode.pydump(out); } inline void profiler::dump(ostream& out) { analyseData(); out << "events: Instructions Nanoseconds\n"; for (mem::map::iterator i = funs.begin(); i != funs.end(); ++i) { lambda *l = i->first; fun& f = i->second; out << "fl=" << positionFromLambda(l) << "\n"; out << "fn="; printNameFromLambda(out, l); out << "\n"; f.dump(out); } for (mem::map::iterator i = cfuns.begin(); i != cfuns.end(); ++i) { bltin b = i->first; fun& f = i->second; out << "fl=C++ code\n"; out << "fn="; printNameFromBltin(out, b); out << "\n"; f.dump(out); } } } // namespace vm #endif // PROFILER_H