A single moderately complex shader. Compiled to multiple targets by the harness (same source, varying -target) to isolate generateOutput cost. ``n`` scales the amount of straight-line math the backend must emit.
bucket: codegen · compile mode: target · flags: -target spirv -emit-spirv-directly · default N: 400
Full sub-counter decomposition of compileInner — named leaf timers plus (self) residuals (a parent's time not covered by a named child, e.g. the autodiff transform in linkAndOptimizeIR (self)). Topmost band traces compileInner; hover a band for its phase.
exact compiled source (N = 400); long files show the first 40 lines, the area around computeMain (±40), and the last 40 lines (gaps elided)
// AUTO-GENERATED by perf-suite/workloads.py — do not edit by hand.
RWStructuredBuffer<float> outBuf;
[shader("compute")]
[numthreads(64,1,1)]
void computeMain(uint3 tid : SV_DispatchThreadID)
{
float x = outBuf[tid.x];
float acc = x;
acc = acc * 1.0009 + sin(acc + 0.0) * 0.5 - cos(acc * 0.5) * 0.25 + sqrt(abs(acc) + 1.0);
acc = acc * 1.0009 + sin(acc + 1.0) * 0.5 - cos(acc * 0.5) * 0.25 + sqrt(abs(acc) + 2.0);
acc = acc * 1.0009 + sin(acc + 2.0) * 0.5 - cos(acc * 0.5) * 0.25 + sqrt(abs(acc) + 3.0);
acc = acc * 1.0009 + sin(acc + 3.0) * 0.5 - cos(acc * 0.5) * 0.25 + sqrt(abs(acc) + 4.0);
acc = acc * 1.0009 + sin(acc + 4.0) * 0.5 - cos(acc * 0.5) * 0.25 + sqrt(abs(acc) + 5.0);
acc = acc * 1.0009 + sin(acc + 5.0) * 0.5 - cos(acc * 0.5) * 0.25 + sqrt(abs(acc) + 6.0);
acc = acc * 1.0009 + sin(acc + 6.0) * 0.5 - cos(acc * 0.5) * 0.25 + sqrt(abs(acc) + 7.0);
acc = acc * 1.0009 + sin(acc + 7.0) * 0.5 - cos(acc * 0.5) * 0.25 + sqrt(abs(acc) + 8.0);
acc = acc * 1.0009 + sin(acc + 8.0) * 0.5 - cos(acc * 0.5) * 0.25 + sqrt(abs(acc) + 9.0);
acc = acc * 1.0009 + sin(acc + 9.0) * 0.5 - cos(acc * 0.5) * 0.25 + sqrt(abs(acc) + 10.0);
acc = acc * 1.0009 + sin(acc + 10.0) * 0.5 - cos(acc * 0.5) * 0.25 + sqrt(abs(acc) + 11.0);
acc = acc * 1.0009 + sin(acc + 11.0) * 0.5 - cos(acc * 0.5) * 0.25 + sqrt(abs(acc) + 12.0);
acc = acc * 1.0009 + sin(acc + 12.0) * 0.5 - cos(acc * 0.5) * 0.25 + sqrt(abs(acc) + 13.0);
acc = acc * 1.0009 + sin(acc + 13.0) * 0.5 - cos(acc * 0.5) * 0.25 + sqrt(abs(acc) + 14.0);
acc = acc * 1.0009 + sin(acc + 14.0) * 0.5 - cos(acc * 0.5) * 0.25 + sqrt(abs(acc) + 15.0);
acc = acc * 1.0009 + sin(acc + 15.0) * 0.5 - cos(acc * 0.5) * 0.25 + sqrt(abs(acc) + 16.0);
acc = acc * 1.0009 + sin(acc + 16.0) * 0.5 - cos(acc * 0.5) * 0.25 + sqrt(abs(acc) + 17.0);
acc = acc * 1.0009 + sin(acc + 17.0) * 0.5 - cos(acc * 0.5) * 0.25 + sqrt(abs(acc) + 18.0);
acc = acc * 1.0009 + sin(acc + 18.0) * 0.5 - cos(acc * 0.5) * 0.25 + sqrt(abs(acc) + 19.0);
acc = acc * 1.0009 + sin(acc + 19.0) * 0.5 - cos(acc * 0.5) * 0.25 + sqrt(abs(acc) + 20.0);
acc = acc * 1.0009 + sin(acc + 20.0) * 0.5 - cos(acc * 0.5) * 0.25 + sqrt(abs(acc) + 21.0);
acc = acc * 1.0009 + sin(acc + 21.0) * 0.5 - cos(acc * 0.5) * 0.25 + sqrt(abs(acc) + 22.0);
acc = acc * 1.0009 + sin(acc + 22.0) * 0.5 - cos(acc * 0.5) * 0.25 + sqrt(abs(acc) + 23.0);
acc = acc * 1.0009 + sin(acc + 23.0) * 0.5 - cos(acc * 0.5) * 0.25 + sqrt(abs(acc) + 24.0);
acc = acc * 1.0009 + sin(acc + 24.0) * 0.5 - cos(acc * 0.5) * 0.25 + sqrt(abs(acc) + 25.0);
acc = acc * 1.0009 + sin(acc + 25.0) * 0.5 - cos(acc * 0.5) * 0.25 + sqrt(abs(acc) + 26.0);
acc = acc * 1.0009 + sin(acc + 26.0) * 0.5 - cos(acc * 0.5) * 0.25 + sqrt(abs(acc) + 27.0);
acc = acc * 1.0009 + sin(acc + 27.0) * 0.5 - cos(acc * 0.5) * 0.25 + sqrt(abs(acc) + 28.0);
acc = acc * 1.0009 + sin(acc + 28.0) * 0.5 - cos(acc * 0.5) * 0.25 + sqrt(abs(acc) + 29.0);
acc = acc * 1.0009 + sin(acc + 29.0) * 0.5 - cos(acc * 0.5) * 0.25 + sqrt(abs(acc) + 30.0);
acc = acc * 1.0009 + sin(acc + 30.0) * 0.5 - cos(acc * 0.5) * 0.25 + sqrt(abs(acc) + 31.0);
acc = acc * 1.0009 + sin(acc + 31.0) * 0.5 - cos(acc * 0.5) * 0.25 + sqrt(abs(acc) + 32.0);
acc = acc * 1.0009 + sin(acc + 32.0) * 0.5 - cos(acc * 0.5) * 0.25 + sqrt(abs(acc) + 33.0);
acc = acc * 1.0009 + sin(acc + 33.0) * 0.5 - cos(acc * 0.5) * 0.25 + sqrt(abs(acc) + 34.0);
acc = acc * 1.0009 + sin(acc + 34.0) * 0.5 - cos(acc * 0.5) * 0.25 + sqrt(abs(acc) + 35.0);
acc = acc * 1.0009 + sin(acc + 35.0) * 0.5 - cos(acc * 0.5) * 0.25 + sqrt(abs(acc) + 36.0);
acc = acc * 1.0009 + sin(acc + 36.0) * 0.5 - cos(acc * 0.5) * 0.25 + sqrt(abs(acc) + 37.0);
// … 325 lines omitted …
acc = acc * 1.0009 + sin(acc + 362.0) * 0.5 - cos(acc * 0.5) * 0.25 + sqrt(abs(acc) + 363.0);
acc = acc * 1.0009 + sin(acc + 363.0) * 0.5 - cos(acc * 0.5) * 0.25 + sqrt(abs(acc) + 364.0);
acc = acc * 1.0009 + sin(acc + 364.0) * 0.5 - cos(acc * 0.5) * 0.25 + sqrt(abs(acc) + 365.0);
acc = acc * 1.0009 + sin(acc + 365.0) * 0.5 - cos(acc * 0.5) * 0.25 + sqrt(abs(acc) + 366.0);
acc = acc * 1.0009 + sin(acc + 366.0) * 0.5 - cos(acc * 0.5) * 0.25 + sqrt(abs(acc) + 367.0);
acc = acc * 1.0009 + sin(acc + 367.0) * 0.5 - cos(acc * 0.5) * 0.25 + sqrt(abs(acc) + 368.0);
acc = acc * 1.0009 + sin(acc + 368.0) * 0.5 - cos(acc * 0.5) * 0.25 + sqrt(abs(acc) + 369.0);
acc = acc * 1.0009 + sin(acc + 369.0) * 0.5 - cos(acc * 0.5) * 0.25 + sqrt(abs(acc) + 370.0);
acc = acc * 1.0009 + sin(acc + 370.0) * 0.5 - cos(acc * 0.5) * 0.25 + sqrt(abs(acc) + 371.0);
acc = acc * 1.0009 + sin(acc + 371.0) * 0.5 - cos(acc * 0.5) * 0.25 + sqrt(abs(acc) + 372.0);
acc = acc * 1.0009 + sin(acc + 372.0) * 0.5 - cos(acc * 0.5) * 0.25 + sqrt(abs(acc) + 373.0);
acc = acc * 1.0009 + sin(acc + 373.0) * 0.5 - cos(acc * 0.5) * 0.25 + sqrt(abs(acc) + 374.0);
acc = acc * 1.0009 + sin(acc + 374.0) * 0.5 - cos(acc * 0.5) * 0.25 + sqrt(abs(acc) + 375.0);
acc = acc * 1.0009 + sin(acc + 375.0) * 0.5 - cos(acc * 0.5) * 0.25 + sqrt(abs(acc) + 376.0);
acc = acc * 1.0009 + sin(acc + 376.0) * 0.5 - cos(acc * 0.5) * 0.25 + sqrt(abs(acc) + 377.0);
acc = acc * 1.0009 + sin(acc + 377.0) * 0.5 - cos(acc * 0.5) * 0.25 + sqrt(abs(acc) + 378.0);
acc = acc * 1.0009 + sin(acc + 378.0) * 0.5 - cos(acc * 0.5) * 0.25 + sqrt(abs(acc) + 379.0);
acc = acc * 1.0009 + sin(acc + 379.0) * 0.5 - cos(acc * 0.5) * 0.25 + sqrt(abs(acc) + 380.0);
acc = acc * 1.0009 + sin(acc + 380.0) * 0.5 - cos(acc * 0.5) * 0.25 + sqrt(abs(acc) + 381.0);
acc = acc * 1.0009 + sin(acc + 381.0) * 0.5 - cos(acc * 0.5) * 0.25 + sqrt(abs(acc) + 382.0);
acc = acc * 1.0009 + sin(acc + 382.0) * 0.5 - cos(acc * 0.5) * 0.25 + sqrt(abs(acc) + 383.0);
acc = acc * 1.0009 + sin(acc + 383.0) * 0.5 - cos(acc * 0.5) * 0.25 + sqrt(abs(acc) + 384.0);
acc = acc * 1.0009 + sin(acc + 384.0) * 0.5 - cos(acc * 0.5) * 0.25 + sqrt(abs(acc) + 385.0);
acc = acc * 1.0009 + sin(acc + 385.0) * 0.5 - cos(acc * 0.5) * 0.25 + sqrt(abs(acc) + 386.0);
acc = acc * 1.0009 + sin(acc + 386.0) * 0.5 - cos(acc * 0.5) * 0.25 + sqrt(abs(acc) + 387.0);
acc = acc * 1.0009 + sin(acc + 387.0) * 0.5 - cos(acc * 0.5) * 0.25 + sqrt(abs(acc) + 388.0);
acc = acc * 1.0009 + sin(acc + 388.0) * 0.5 - cos(acc * 0.5) * 0.25 + sqrt(abs(acc) + 389.0);
acc = acc * 1.0009 + sin(acc + 389.0) * 0.5 - cos(acc * 0.5) * 0.25 + sqrt(abs(acc) + 390.0);
acc = acc * 1.0009 + sin(acc + 390.0) * 0.5 - cos(acc * 0.5) * 0.25 + sqrt(abs(acc) + 391.0);
acc = acc * 1.0009 + sin(acc + 391.0) * 0.5 - cos(acc * 0.5) * 0.25 + sqrt(abs(acc) + 392.0);
acc = acc * 1.0009 + sin(acc + 392.0) * 0.5 - cos(acc * 0.5) * 0.25 + sqrt(abs(acc) + 393.0);
acc = acc * 1.0009 + sin(acc + 393.0) * 0.5 - cos(acc * 0.5) * 0.25 + sqrt(abs(acc) + 394.0);
acc = acc * 1.0009 + sin(acc + 394.0) * 0.5 - cos(acc * 0.5) * 0.25 + sqrt(abs(acc) + 395.0);
acc = acc * 1.0009 + sin(acc + 395.0) * 0.5 - cos(acc * 0.5) * 0.25 + sqrt(abs(acc) + 396.0);
acc = acc * 1.0009 + sin(acc + 396.0) * 0.5 - cos(acc * 0.5) * 0.25 + sqrt(abs(acc) + 397.0);
acc = acc * 1.0009 + sin(acc + 397.0) * 0.5 - cos(acc * 0.5) * 0.25 + sqrt(abs(acc) + 398.0);
acc = acc * 1.0009 + sin(acc + 398.0) * 0.5 - cos(acc * 0.5) * 0.25 + sqrt(abs(acc) + 399.0);
acc = acc * 1.0009 + sin(acc + 399.0) * 0.5 - cos(acc * 0.5) * 0.25 + sqrt(abs(acc) + 400.0);
outBuf[tid.x] = acc;
}