← all workloads

emit_wgsl

A single moderately complex shader. Compiled to multiple targets by the harness (same source, varying -target) to isolate generateOutput cost. ``n`` scales the amount of straight-line math the backend must emit.

bucket: codegen_source  ·  compile mode: target  ·  flags: -target wgsl  ·  default N: 400

Phase composition across releases

Full sub-counter decomposition of compileInner — named leaf timers plus (self) residuals (a parent's time not covered by a named child, e.g. the autodiff transform in linkAndOptimizeIR (self)). Topmost band traces compileInner; hover a band for its phase.

emit_wgsl — full phase breakdown across releases (median ms) emit_wgsl 0.51× 0.0 945 1890 daily → 25.14 25.15 25.16 25.17 25.18 25.19 25.20 25.21 25.22 25.23 25.24 26.1 26.2 26.3 26.4 26.5 26.7 26.8 26.9 26.10 26.11 06-25 06-26 emit_wgsl — parseTranslationUnit emit_wgsl — SemanticChecking emit_wgsl — generateIR emit_wgsl — frontEndExecute (self) emit_wgsl — specializeModule emit_wgsl — simplifyIR emit_wgsl — linkIR emit_wgsl — unrollLoopsInModule emit_wgsl — legalizeResourceTypes emit_wgsl — legalizeExistentialTypeLayout emit_wgsl — performMandatoryEarlyInlining emit_wgsl — performForceInlining emit_wgsl — emitEntryPointsSourceFromIR emit_wgsl — compileInner (self) phase buckets parseTranslationUnit SemanticChecking generateIR frontEndExecute (self) specializeModule simplifyIR linkIR unrollLoopsInModule legalizeResourceTypes legalizeExistentialTypeLayout performMandatoryEarlyInlining performForceInlining linkAndOptimizeIR (self) emitEntryPointsSourceFromIR generateOutput (self) compileInner (self)

Compiled Slang source

exact compiled source (N = 400); long files show the first 40 lines, the area around computeMain (±40), and the last 40 lines (gaps elided)

codegen.slang

// AUTO-GENERATED by perf-suite/workloads.py — do not edit by hand.
RWStructuredBuffer<float> outBuf;

[shader("compute")]
[numthreads(64,1,1)]
void computeMain(uint3 tid : SV_DispatchThreadID)
{
    float x = outBuf[tid.x];
    float acc = x;
    acc = acc * 1.0009 + sin(acc + 0.0) * 0.5 - cos(acc * 0.5) * 0.25 + sqrt(abs(acc) + 1.0);
    acc = acc * 1.0009 + sin(acc + 1.0) * 0.5 - cos(acc * 0.5) * 0.25 + sqrt(abs(acc) + 2.0);
    acc = acc * 1.0009 + sin(acc + 2.0) * 0.5 - cos(acc * 0.5) * 0.25 + sqrt(abs(acc) + 3.0);
    acc = acc * 1.0009 + sin(acc + 3.0) * 0.5 - cos(acc * 0.5) * 0.25 + sqrt(abs(acc) + 4.0);
    acc = acc * 1.0009 + sin(acc + 4.0) * 0.5 - cos(acc * 0.5) * 0.25 + sqrt(abs(acc) + 5.0);
    acc = acc * 1.0009 + sin(acc + 5.0) * 0.5 - cos(acc * 0.5) * 0.25 + sqrt(abs(acc) + 6.0);
    acc = acc * 1.0009 + sin(acc + 6.0) * 0.5 - cos(acc * 0.5) * 0.25 + sqrt(abs(acc) + 7.0);
    acc = acc * 1.0009 + sin(acc + 7.0) * 0.5 - cos(acc * 0.5) * 0.25 + sqrt(abs(acc) + 8.0);
    acc = acc * 1.0009 + sin(acc + 8.0) * 0.5 - cos(acc * 0.5) * 0.25 + sqrt(abs(acc) + 9.0);
    acc = acc * 1.0009 + sin(acc + 9.0) * 0.5 - cos(acc * 0.5) * 0.25 + sqrt(abs(acc) + 10.0);
    acc = acc * 1.0009 + sin(acc + 10.0) * 0.5 - cos(acc * 0.5) * 0.25 + sqrt(abs(acc) + 11.0);
    acc = acc * 1.0009 + sin(acc + 11.0) * 0.5 - cos(acc * 0.5) * 0.25 + sqrt(abs(acc) + 12.0);
    acc = acc * 1.0009 + sin(acc + 12.0) * 0.5 - cos(acc * 0.5) * 0.25 + sqrt(abs(acc) + 13.0);
    acc = acc * 1.0009 + sin(acc + 13.0) * 0.5 - cos(acc * 0.5) * 0.25 + sqrt(abs(acc) + 14.0);
    acc = acc * 1.0009 + sin(acc + 14.0) * 0.5 - cos(acc * 0.5) * 0.25 + sqrt(abs(acc) + 15.0);
    acc = acc * 1.0009 + sin(acc + 15.0) * 0.5 - cos(acc * 0.5) * 0.25 + sqrt(abs(acc) + 16.0);
    acc = acc * 1.0009 + sin(acc + 16.0) * 0.5 - cos(acc * 0.5) * 0.25 + sqrt(abs(acc) + 17.0);
    acc = acc * 1.0009 + sin(acc + 17.0) * 0.5 - cos(acc * 0.5) * 0.25 + sqrt(abs(acc) + 18.0);
    acc = acc * 1.0009 + sin(acc + 18.0) * 0.5 - cos(acc * 0.5) * 0.25 + sqrt(abs(acc) + 19.0);
    acc = acc * 1.0009 + sin(acc + 19.0) * 0.5 - cos(acc * 0.5) * 0.25 + sqrt(abs(acc) + 20.0);
    acc = acc * 1.0009 + sin(acc + 20.0) * 0.5 - cos(acc * 0.5) * 0.25 + sqrt(abs(acc) + 21.0);
    acc = acc * 1.0009 + sin(acc + 21.0) * 0.5 - cos(acc * 0.5) * 0.25 + sqrt(abs(acc) + 22.0);
    acc = acc * 1.0009 + sin(acc + 22.0) * 0.5 - cos(acc * 0.5) * 0.25 + sqrt(abs(acc) + 23.0);
    acc = acc * 1.0009 + sin(acc + 23.0) * 0.5 - cos(acc * 0.5) * 0.25 + sqrt(abs(acc) + 24.0);
    acc = acc * 1.0009 + sin(acc + 24.0) * 0.5 - cos(acc * 0.5) * 0.25 + sqrt(abs(acc) + 25.0);
    acc = acc * 1.0009 + sin(acc + 25.0) * 0.5 - cos(acc * 0.5) * 0.25 + sqrt(abs(acc) + 26.0);
    acc = acc * 1.0009 + sin(acc + 26.0) * 0.5 - cos(acc * 0.5) * 0.25 + sqrt(abs(acc) + 27.0);
    acc = acc * 1.0009 + sin(acc + 27.0) * 0.5 - cos(acc * 0.5) * 0.25 + sqrt(abs(acc) + 28.0);
    acc = acc * 1.0009 + sin(acc + 28.0) * 0.5 - cos(acc * 0.5) * 0.25 + sqrt(abs(acc) + 29.0);
    acc = acc * 1.0009 + sin(acc + 29.0) * 0.5 - cos(acc * 0.5) * 0.25 + sqrt(abs(acc) + 30.0);
    acc = acc * 1.0009 + sin(acc + 30.0) * 0.5 - cos(acc * 0.5) * 0.25 + sqrt(abs(acc) + 31.0);
    acc = acc * 1.0009 + sin(acc + 31.0) * 0.5 - cos(acc * 0.5) * 0.25 + sqrt(abs(acc) + 32.0);
    acc = acc * 1.0009 + sin(acc + 32.0) * 0.5 - cos(acc * 0.5) * 0.25 + sqrt(abs(acc) + 33.0);
    acc = acc * 1.0009 + sin(acc + 33.0) * 0.5 - cos(acc * 0.5) * 0.25 + sqrt(abs(acc) + 34.0);
    acc = acc * 1.0009 + sin(acc + 34.0) * 0.5 - cos(acc * 0.5) * 0.25 + sqrt(abs(acc) + 35.0);
    acc = acc * 1.0009 + sin(acc + 35.0) * 0.5 - cos(acc * 0.5) * 0.25 + sqrt(abs(acc) + 36.0);
    acc = acc * 1.0009 + sin(acc + 36.0) * 0.5 - cos(acc * 0.5) * 0.25 + sqrt(abs(acc) + 37.0);

// … 325 lines omitted …

    acc = acc * 1.0009 + sin(acc + 362.0) * 0.5 - cos(acc * 0.5) * 0.25 + sqrt(abs(acc) + 363.0);
    acc = acc * 1.0009 + sin(acc + 363.0) * 0.5 - cos(acc * 0.5) * 0.25 + sqrt(abs(acc) + 364.0);
    acc = acc * 1.0009 + sin(acc + 364.0) * 0.5 - cos(acc * 0.5) * 0.25 + sqrt(abs(acc) + 365.0);
    acc = acc * 1.0009 + sin(acc + 365.0) * 0.5 - cos(acc * 0.5) * 0.25 + sqrt(abs(acc) + 366.0);
    acc = acc * 1.0009 + sin(acc + 366.0) * 0.5 - cos(acc * 0.5) * 0.25 + sqrt(abs(acc) + 367.0);
    acc = acc * 1.0009 + sin(acc + 367.0) * 0.5 - cos(acc * 0.5) * 0.25 + sqrt(abs(acc) + 368.0);
    acc = acc * 1.0009 + sin(acc + 368.0) * 0.5 - cos(acc * 0.5) * 0.25 + sqrt(abs(acc) + 369.0);
    acc = acc * 1.0009 + sin(acc + 369.0) * 0.5 - cos(acc * 0.5) * 0.25 + sqrt(abs(acc) + 370.0);
    acc = acc * 1.0009 + sin(acc + 370.0) * 0.5 - cos(acc * 0.5) * 0.25 + sqrt(abs(acc) + 371.0);
    acc = acc * 1.0009 + sin(acc + 371.0) * 0.5 - cos(acc * 0.5) * 0.25 + sqrt(abs(acc) + 372.0);
    acc = acc * 1.0009 + sin(acc + 372.0) * 0.5 - cos(acc * 0.5) * 0.25 + sqrt(abs(acc) + 373.0);
    acc = acc * 1.0009 + sin(acc + 373.0) * 0.5 - cos(acc * 0.5) * 0.25 + sqrt(abs(acc) + 374.0);
    acc = acc * 1.0009 + sin(acc + 374.0) * 0.5 - cos(acc * 0.5) * 0.25 + sqrt(abs(acc) + 375.0);
    acc = acc * 1.0009 + sin(acc + 375.0) * 0.5 - cos(acc * 0.5) * 0.25 + sqrt(abs(acc) + 376.0);
    acc = acc * 1.0009 + sin(acc + 376.0) * 0.5 - cos(acc * 0.5) * 0.25 + sqrt(abs(acc) + 377.0);
    acc = acc * 1.0009 + sin(acc + 377.0) * 0.5 - cos(acc * 0.5) * 0.25 + sqrt(abs(acc) + 378.0);
    acc = acc * 1.0009 + sin(acc + 378.0) * 0.5 - cos(acc * 0.5) * 0.25 + sqrt(abs(acc) + 379.0);
    acc = acc * 1.0009 + sin(acc + 379.0) * 0.5 - cos(acc * 0.5) * 0.25 + sqrt(abs(acc) + 380.0);
    acc = acc * 1.0009 + sin(acc + 380.0) * 0.5 - cos(acc * 0.5) * 0.25 + sqrt(abs(acc) + 381.0);
    acc = acc * 1.0009 + sin(acc + 381.0) * 0.5 - cos(acc * 0.5) * 0.25 + sqrt(abs(acc) + 382.0);
    acc = acc * 1.0009 + sin(acc + 382.0) * 0.5 - cos(acc * 0.5) * 0.25 + sqrt(abs(acc) + 383.0);
    acc = acc * 1.0009 + sin(acc + 383.0) * 0.5 - cos(acc * 0.5) * 0.25 + sqrt(abs(acc) + 384.0);
    acc = acc * 1.0009 + sin(acc + 384.0) * 0.5 - cos(acc * 0.5) * 0.25 + sqrt(abs(acc) + 385.0);
    acc = acc * 1.0009 + sin(acc + 385.0) * 0.5 - cos(acc * 0.5) * 0.25 + sqrt(abs(acc) + 386.0);
    acc = acc * 1.0009 + sin(acc + 386.0) * 0.5 - cos(acc * 0.5) * 0.25 + sqrt(abs(acc) + 387.0);
    acc = acc * 1.0009 + sin(acc + 387.0) * 0.5 - cos(acc * 0.5) * 0.25 + sqrt(abs(acc) + 388.0);
    acc = acc * 1.0009 + sin(acc + 388.0) * 0.5 - cos(acc * 0.5) * 0.25 + sqrt(abs(acc) + 389.0);
    acc = acc * 1.0009 + sin(acc + 389.0) * 0.5 - cos(acc * 0.5) * 0.25 + sqrt(abs(acc) + 390.0);
    acc = acc * 1.0009 + sin(acc + 390.0) * 0.5 - cos(acc * 0.5) * 0.25 + sqrt(abs(acc) + 391.0);
    acc = acc * 1.0009 + sin(acc + 391.0) * 0.5 - cos(acc * 0.5) * 0.25 + sqrt(abs(acc) + 392.0);
    acc = acc * 1.0009 + sin(acc + 392.0) * 0.5 - cos(acc * 0.5) * 0.25 + sqrt(abs(acc) + 393.0);
    acc = acc * 1.0009 + sin(acc + 393.0) * 0.5 - cos(acc * 0.5) * 0.25 + sqrt(abs(acc) + 394.0);
    acc = acc * 1.0009 + sin(acc + 394.0) * 0.5 - cos(acc * 0.5) * 0.25 + sqrt(abs(acc) + 395.0);
    acc = acc * 1.0009 + sin(acc + 395.0) * 0.5 - cos(acc * 0.5) * 0.25 + sqrt(abs(acc) + 396.0);
    acc = acc * 1.0009 + sin(acc + 396.0) * 0.5 - cos(acc * 0.5) * 0.25 + sqrt(abs(acc) + 397.0);
    acc = acc * 1.0009 + sin(acc + 397.0) * 0.5 - cos(acc * 0.5) * 0.25 + sqrt(abs(acc) + 398.0);
    acc = acc * 1.0009 + sin(acc + 398.0) * 0.5 - cos(acc * 0.5) * 0.25 + sqrt(abs(acc) + 399.0);
    acc = acc * 1.0009 + sin(acc + 399.0) * 0.5 - cos(acc * 0.5) * 0.25 + sqrt(abs(acc) + 400.0);
    outBuf[tid.x] = acc;
}