complexity_ladder

A single shader that ramps *several* realistic dimensions together as n grows, modelling how a real shader's compile cost behaves going from simple to highly complex — as opposed to the single-axis stressors, which isolate one pass. Each step adds: a branchy helper (control flow / SSA + phi), a generic call (sema + specialization), a small bounded inner loop, resource reads, and a dynamic-dispatch site. Helpers are chained in bounded-depth groups (call graph depth) so doubling n roughly doubles total work across all of front-end, IR opt, and codegen at once. Sweep this to get the holistic complexity->compile-time curve and a floor+slope fit.

bucket: realistic_scaling · compile mode: target · flags: -target spirv -emit-spirv-directly · default N: 160

Phase composition across releases

Full sub-counter decomposition of compileInner — named leaf timers plus (self) residuals (a parent's time not covered by a named child, e.g. the autodiff transform in linkAndOptimizeIR (self)). Topmost band traces compileInner; hover a band for its phase.

Compiled Slang source

exact compiled source (N = 160); long files show the first 40 lines, the area around computeMain (±40), and the last 40 lines (gaps elided)

complexity_ladder.slang

// AUTO-GENERATED by perf-suite/workloads.py — do not edit by hand.
RWStructuredBuffer<float> outBuf;
StructuredBuffer<float> inBuf;

interface IOp { float apply(float x); }
struct OpAdd : IOp { float apply(float x) { return x + 1.0; } }
struct OpMul : IOp { float apply(float x) { return x * 1.5 + 0.25; } }
struct OpSq  : IOp { float apply(float x) { return x * x - 0.5; } }

T gpoly<T : IArithmetic>(T a, T b) { return a * a + b * a + a; }

float dispatch(int id, float x)
{
    IOp o;
    switch (id % 3)
    {
    case 0: o = OpAdd(); break;
    case 1: o = OpMul(); break;
    default: o = OpSq(); break;
    }
    return o.apply(x);
}

float h_0(float x)
{
    float t = x;
    if (t > 0.0) t = t * 1.01 + sin(t + 0.0);
    else t = t - cos(t * 0.5) * 0.25;
    [MaxIters(4)] for (int k = 0; k < (int(t) & 3) + 1; ++k)
        t = t * 0.999 + gpoly<float>(t, 0.0);
    return t;
}
float h_1(float x)
{
    float t = h_0(x);
    if (t > 1.0) t = t * 1.01 + sin(t + 1.0);
    else t = t - cos(t * 0.5) * 0.25;
    [MaxIters(4)] for (int k = 0; k < (int(t) & 3) + 1; ++k)
        t = t * 0.999 + gpoly<float>(t, 1.0);
    return t;

// … 1386 lines omitted …

}
float h_156(float x)
{
    float t = h_155(x);
    if (t > 2.0) t = t * 1.01 + sin(t + 2.0);
    else t = t - cos(t * 0.5) * 0.25;
    [MaxIters(4)] for (int k = 0; k < (int(t) & 3) + 1; ++k)
        t = t * 0.999 + gpoly<float>(t, 1.0);
    return t;
}
float h_157(float x)
{
    float t = h_156(x);
    if (t > 3.0) t = t * 1.01 + sin(t + 3.0);
    else t = t - cos(t * 0.5) * 0.25;
    [MaxIters(4)] for (int k = 0; k < (int(t) & 3) + 1; ++k)
        t = t * 0.999 + gpoly<float>(t, 2.0);
    return t;
}
float h_158(float x)
{
    float t = h_157(x);
    if (t > 4.0) t = t * 1.01 + sin(t + 4.0);
    else t = t - cos(t * 0.5) * 0.25;
    [MaxIters(4)] for (int k = 0; k < (int(t) & 3) + 1; ++k)
        t = t * 0.999 + gpoly<float>(t, 3.0);
    return t;
}
float h_159(float x)
{
    float t = h_158(x);
    if (t > 5.0) t = t * 1.01 + sin(t + 5.0);
    else t = t - cos(t * 0.5) * 0.25;
    [MaxIters(4)] for (int k = 0; k < (int(t) & 3) + 1; ++k)
        t = t * 0.999 + gpoly<float>(t, 4.0);
    return t;
}

[shader("compute")]
[numthreads(64,1,1)]
void computeMain(uint3 tid : SV_DispatchThreadID)
{
    int idx = int(tid.x);
    float acc = inBuf[idx];
    acc += h_7(acc + float(0));
    acc += dispatch(idx + 0, acc);
    acc += h_15(acc + float(1));
    acc += dispatch(idx + 1, acc);
    acc += h_23(acc + float(2));
    acc += dispatch(idx + 2, acc);
    acc += h_31(acc + float(3));
    acc += dispatch(idx + 3, acc);
    acc += h_39(acc + float(4));
    acc += dispatch(idx + 4, acc);
    acc += h_47(acc + float(5));
    acc += dispatch(idx + 5, acc);
    acc += h_55(acc + float(6));
    acc += dispatch(idx + 6, acc);
    acc += h_63(acc + float(7));
    acc += dispatch(idx + 7, acc);
    acc += h_71(acc + float(8));
    acc += dispatch(idx + 8, acc);
    acc += h_79(acc + float(9));
    acc += dispatch(idx + 9, acc);
    acc += h_87(acc + float(10));
    acc += dispatch(idx + 10, acc);
    acc += h_95(acc + float(11));
    acc += dispatch(idx + 11, acc);
    acc += h_103(acc + float(12));
    acc += dispatch(idx + 12, acc);
    acc += h_111(acc + float(13));
    acc += dispatch(idx + 13, acc);
    acc += h_119(acc + float(14));
    acc += dispatch(idx + 14, acc);
    acc += h_127(acc + float(15));
    acc += dispatch(idx + 15, acc);
    acc += h_135(acc + float(16));
    acc += dispatch(idx + 16, acc);
    acc += h_143(acc + float(17));
    acc += dispatch(idx + 17, acc);
    acc += h_151(acc + float(18));
    acc += dispatch(idx + 18, acc);
    acc += h_159(acc + float(19));
    acc += dispatch(idx + 19, acc);
    acc += float(gpoly<int>(idx, idx + 1));
    outBuf[idx] = acc;
}