A single shader that ramps *several* realistic dimensions together as n grows, modelling how a real shader's compile cost behaves going from simple to highly complex — as opposed to the single-axis stressors, which isolate one pass. Each step adds: a branchy helper (control flow / SSA + phi), a generic call (sema + specialization), a small bounded inner loop, resource reads, and a dynamic-dispatch site. Helpers are chained in bounded-depth groups (call graph depth) so doubling n roughly doubles total work across all of front-end, IR opt, and codegen at once. Sweep this to get the holistic complexity->compile-time curve and a floor+slope fit.
bucket: realistic_scaling · compile mode: target · flags: -target spirv -emit-spirv-directly · default N: 160
Full sub-counter decomposition of compileInner — named leaf timers plus (self) residuals (a parent's time not covered by a named child, e.g. the autodiff transform in linkAndOptimizeIR (self)). Topmost band traces compileInner; hover a band for its phase.
exact compiled source (N = 160); long files show the first 40 lines, the area around computeMain (±40), and the last 40 lines (gaps elided)
// AUTO-GENERATED by perf-suite/workloads.py — do not edit by hand.
RWStructuredBuffer<float> outBuf;
StructuredBuffer<float> inBuf;
interface IOp { float apply(float x); }
struct OpAdd : IOp { float apply(float x) { return x + 1.0; } }
struct OpMul : IOp { float apply(float x) { return x * 1.5 + 0.25; } }
struct OpSq : IOp { float apply(float x) { return x * x - 0.5; } }
T gpoly<T : IArithmetic>(T a, T b) { return a * a + b * a + a; }
float dispatch(int id, float x)
{
IOp o;
switch (id % 3)
{
case 0: o = OpAdd(); break;
case 1: o = OpMul(); break;
default: o = OpSq(); break;
}
return o.apply(x);
}
float h_0(float x)
{
float t = x;
if (t > 0.0) t = t * 1.01 + sin(t + 0.0);
else t = t - cos(t * 0.5) * 0.25;
[MaxIters(4)] for (int k = 0; k < (int(t) & 3) + 1; ++k)
t = t * 0.999 + gpoly<float>(t, 0.0);
return t;
}
float h_1(float x)
{
float t = h_0(x);
if (t > 1.0) t = t * 1.01 + sin(t + 1.0);
else t = t - cos(t * 0.5) * 0.25;
[MaxIters(4)] for (int k = 0; k < (int(t) & 3) + 1; ++k)
t = t * 0.999 + gpoly<float>(t, 1.0);
return t;
// … 1386 lines omitted …
}
float h_156(float x)
{
float t = h_155(x);
if (t > 2.0) t = t * 1.01 + sin(t + 2.0);
else t = t - cos(t * 0.5) * 0.25;
[MaxIters(4)] for (int k = 0; k < (int(t) & 3) + 1; ++k)
t = t * 0.999 + gpoly<float>(t, 1.0);
return t;
}
float h_157(float x)
{
float t = h_156(x);
if (t > 3.0) t = t * 1.01 + sin(t + 3.0);
else t = t - cos(t * 0.5) * 0.25;
[MaxIters(4)] for (int k = 0; k < (int(t) & 3) + 1; ++k)
t = t * 0.999 + gpoly<float>(t, 2.0);
return t;
}
float h_158(float x)
{
float t = h_157(x);
if (t > 4.0) t = t * 1.01 + sin(t + 4.0);
else t = t - cos(t * 0.5) * 0.25;
[MaxIters(4)] for (int k = 0; k < (int(t) & 3) + 1; ++k)
t = t * 0.999 + gpoly<float>(t, 3.0);
return t;
}
float h_159(float x)
{
float t = h_158(x);
if (t > 5.0) t = t * 1.01 + sin(t + 5.0);
else t = t - cos(t * 0.5) * 0.25;
[MaxIters(4)] for (int k = 0; k < (int(t) & 3) + 1; ++k)
t = t * 0.999 + gpoly<float>(t, 4.0);
return t;
}
[shader("compute")]
[numthreads(64,1,1)]
void computeMain(uint3 tid : SV_DispatchThreadID)
{
int idx = int(tid.x);
float acc = inBuf[idx];
acc += h_7(acc + float(0));
acc += dispatch(idx + 0, acc);
acc += h_15(acc + float(1));
acc += dispatch(idx + 1, acc);
acc += h_23(acc + float(2));
acc += dispatch(idx + 2, acc);
acc += h_31(acc + float(3));
acc += dispatch(idx + 3, acc);
acc += h_39(acc + float(4));
acc += dispatch(idx + 4, acc);
acc += h_47(acc + float(5));
acc += dispatch(idx + 5, acc);
acc += h_55(acc + float(6));
acc += dispatch(idx + 6, acc);
acc += h_63(acc + float(7));
acc += dispatch(idx + 7, acc);
acc += h_71(acc + float(8));
acc += dispatch(idx + 8, acc);
acc += h_79(acc + float(9));
acc += dispatch(idx + 9, acc);
acc += h_87(acc + float(10));
acc += dispatch(idx + 10, acc);
acc += h_95(acc + float(11));
acc += dispatch(idx + 11, acc);
acc += h_103(acc + float(12));
acc += dispatch(idx + 12, acc);
acc += h_111(acc + float(13));
acc += dispatch(idx + 13, acc);
acc += h_119(acc + float(14));
acc += dispatch(idx + 14, acc);
acc += h_127(acc + float(15));
acc += dispatch(idx + 15, acc);
acc += h_135(acc + float(16));
acc += dispatch(idx + 16, acc);
acc += h_143(acc + float(17));
acc += dispatch(idx + 17, acc);
acc += h_151(acc + float(18));
acc += dispatch(idx + 18, acc);
acc += h_159(acc + float(19));
acc += dispatch(idx + 19, acc);
acc += float(gpoly<int>(idx, idx + 1));
outBuf[idx] = acc;
}