← all workloads

emit_metal

A single moderately complex shader. Compiled to multiple targets by the harness (same source, varying -target) to isolate generateOutput cost. ``n`` scales the amount of straight-line math the backend must emit.

bucket: codegen_source  ·  compile mode: target  ·  flags: -target metal  ·  default N: 400

Phase composition across releases

Full sub-counter decomposition of compileInner — named leaf timers plus (self) residuals (a parent's time not covered by a named child, e.g. the autodiff transform in linkAndOptimizeIR (self)). Topmost band traces compileInner; hover a band for its phase.

emit_metal — full phase breakdown across releases (median ms) emit_metal 0.46× 0.0 1353 2705 daily → 25.14 25.15 25.16 25.17 25.18 25.19 25.20 25.21 25.22 25.23 25.24 26.1 26.2 26.3 26.4 26.5 26.7 26.8 26.9 26.10 26.11 06-25 06-26 emit_metal — parseTranslationUnit emit_metal — SemanticChecking emit_metal — generateIR emit_metal — frontEndExecute (self) emit_metal — specializeModule emit_metal — simplifyIR emit_metal — linkIR emit_metal — unrollLoopsInModule emit_metal — legalizeResourceTypes emit_metal — legalizeExistentialTypeLayout emit_metal — performMandatoryEarlyInlining emit_metal — performForceInlining emit_metal — emitEntryPointsSourceFromIR emit_metal — compileInner (self) phase buckets parseTranslationUnit SemanticChecking generateIR frontEndExecute (self) specializeModule simplifyIR linkIR unrollLoopsInModule legalizeResourceTypes legalizeExistentialTypeLayout performMandatoryEarlyInlining performForceInlining linkAndOptimizeIR (self) emitEntryPointsSourceFromIR generateOutput (self) compileInner (self)

Compiled Slang source

exact compiled source (N = 400); long files show the first 40 lines, the area around computeMain (±40), and the last 40 lines (gaps elided)

codegen.slang

// AUTO-GENERATED by perf-suite/workloads.py — do not edit by hand.
RWStructuredBuffer<float> outBuf;

[shader("compute")]
[numthreads(64,1,1)]
void computeMain(uint3 tid : SV_DispatchThreadID)
{
    float x = outBuf[tid.x];
    float acc = x;
    acc = acc * 1.0009 + sin(acc + 0.0) * 0.5 - cos(acc * 0.5) * 0.25 + sqrt(abs(acc) + 1.0);
    acc = acc * 1.0009 + sin(acc + 1.0) * 0.5 - cos(acc * 0.5) * 0.25 + sqrt(abs(acc) + 2.0);
    acc = acc * 1.0009 + sin(acc + 2.0) * 0.5 - cos(acc * 0.5) * 0.25 + sqrt(abs(acc) + 3.0);
    acc = acc * 1.0009 + sin(acc + 3.0) * 0.5 - cos(acc * 0.5) * 0.25 + sqrt(abs(acc) + 4.0);
    acc = acc * 1.0009 + sin(acc + 4.0) * 0.5 - cos(acc * 0.5) * 0.25 + sqrt(abs(acc) + 5.0);
    acc = acc * 1.0009 + sin(acc + 5.0) * 0.5 - cos(acc * 0.5) * 0.25 + sqrt(abs(acc) + 6.0);
    acc = acc * 1.0009 + sin(acc + 6.0) * 0.5 - cos(acc * 0.5) * 0.25 + sqrt(abs(acc) + 7.0);
    acc = acc * 1.0009 + sin(acc + 7.0) * 0.5 - cos(acc * 0.5) * 0.25 + sqrt(abs(acc) + 8.0);
    acc = acc * 1.0009 + sin(acc + 8.0) * 0.5 - cos(acc * 0.5) * 0.25 + sqrt(abs(acc) + 9.0);
    acc = acc * 1.0009 + sin(acc + 9.0) * 0.5 - cos(acc * 0.5) * 0.25 + sqrt(abs(acc) + 10.0);
    acc = acc * 1.0009 + sin(acc + 10.0) * 0.5 - cos(acc * 0.5) * 0.25 + sqrt(abs(acc) + 11.0);
    acc = acc * 1.0009 + sin(acc + 11.0) * 0.5 - cos(acc * 0.5) * 0.25 + sqrt(abs(acc) + 12.0);
    acc = acc * 1.0009 + sin(acc + 12.0) * 0.5 - cos(acc * 0.5) * 0.25 + sqrt(abs(acc) + 13.0);
    acc = acc * 1.0009 + sin(acc + 13.0) * 0.5 - cos(acc * 0.5) * 0.25 + sqrt(abs(acc) + 14.0);
    acc = acc * 1.0009 + sin(acc + 14.0) * 0.5 - cos(acc * 0.5) * 0.25 + sqrt(abs(acc) + 15.0);
    acc = acc * 1.0009 + sin(acc + 15.0) * 0.5 - cos(acc * 0.5) * 0.25 + sqrt(abs(acc) + 16.0);
    acc = acc * 1.0009 + sin(acc + 16.0) * 0.5 - cos(acc * 0.5) * 0.25 + sqrt(abs(acc) + 17.0);
    acc = acc * 1.0009 + sin(acc + 17.0) * 0.5 - cos(acc * 0.5) * 0.25 + sqrt(abs(acc) + 18.0);
    acc = acc * 1.0009 + sin(acc + 18.0) * 0.5 - cos(acc * 0.5) * 0.25 + sqrt(abs(acc) + 19.0);
    acc = acc * 1.0009 + sin(acc + 19.0) * 0.5 - cos(acc * 0.5) * 0.25 + sqrt(abs(acc) + 20.0);
    acc = acc * 1.0009 + sin(acc + 20.0) * 0.5 - cos(acc * 0.5) * 0.25 + sqrt(abs(acc) + 21.0);
    acc = acc * 1.0009 + sin(acc + 21.0) * 0.5 - cos(acc * 0.5) * 0.25 + sqrt(abs(acc) + 22.0);
    acc = acc * 1.0009 + sin(acc + 22.0) * 0.5 - cos(acc * 0.5) * 0.25 + sqrt(abs(acc) + 23.0);
    acc = acc * 1.0009 + sin(acc + 23.0) * 0.5 - cos(acc * 0.5) * 0.25 + sqrt(abs(acc) + 24.0);
    acc = acc * 1.0009 + sin(acc + 24.0) * 0.5 - cos(acc * 0.5) * 0.25 + sqrt(abs(acc) + 25.0);
    acc = acc * 1.0009 + sin(acc + 25.0) * 0.5 - cos(acc * 0.5) * 0.25 + sqrt(abs(acc) + 26.0);
    acc = acc * 1.0009 + sin(acc + 26.0) * 0.5 - cos(acc * 0.5) * 0.25 + sqrt(abs(acc) + 27.0);
    acc = acc * 1.0009 + sin(acc + 27.0) * 0.5 - cos(acc * 0.5) * 0.25 + sqrt(abs(acc) + 28.0);
    acc = acc * 1.0009 + sin(acc + 28.0) * 0.5 - cos(acc * 0.5) * 0.25 + sqrt(abs(acc) + 29.0);
    acc = acc * 1.0009 + sin(acc + 29.0) * 0.5 - cos(acc * 0.5) * 0.25 + sqrt(abs(acc) + 30.0);
    acc = acc * 1.0009 + sin(acc + 30.0) * 0.5 - cos(acc * 0.5) * 0.25 + sqrt(abs(acc) + 31.0);
    acc = acc * 1.0009 + sin(acc + 31.0) * 0.5 - cos(acc * 0.5) * 0.25 + sqrt(abs(acc) + 32.0);
    acc = acc * 1.0009 + sin(acc + 32.0) * 0.5 - cos(acc * 0.5) * 0.25 + sqrt(abs(acc) + 33.0);
    acc = acc * 1.0009 + sin(acc + 33.0) * 0.5 - cos(acc * 0.5) * 0.25 + sqrt(abs(acc) + 34.0);
    acc = acc * 1.0009 + sin(acc + 34.0) * 0.5 - cos(acc * 0.5) * 0.25 + sqrt(abs(acc) + 35.0);
    acc = acc * 1.0009 + sin(acc + 35.0) * 0.5 - cos(acc * 0.5) * 0.25 + sqrt(abs(acc) + 36.0);
    acc = acc * 1.0009 + sin(acc + 36.0) * 0.5 - cos(acc * 0.5) * 0.25 + sqrt(abs(acc) + 37.0);

// … 325 lines omitted …

    acc = acc * 1.0009 + sin(acc + 362.0) * 0.5 - cos(acc * 0.5) * 0.25 + sqrt(abs(acc) + 363.0);
    acc = acc * 1.0009 + sin(acc + 363.0) * 0.5 - cos(acc * 0.5) * 0.25 + sqrt(abs(acc) + 364.0);
    acc = acc * 1.0009 + sin(acc + 364.0) * 0.5 - cos(acc * 0.5) * 0.25 + sqrt(abs(acc) + 365.0);
    acc = acc * 1.0009 + sin(acc + 365.0) * 0.5 - cos(acc * 0.5) * 0.25 + sqrt(abs(acc) + 366.0);
    acc = acc * 1.0009 + sin(acc + 366.0) * 0.5 - cos(acc * 0.5) * 0.25 + sqrt(abs(acc) + 367.0);
    acc = acc * 1.0009 + sin(acc + 367.0) * 0.5 - cos(acc * 0.5) * 0.25 + sqrt(abs(acc) + 368.0);
    acc = acc * 1.0009 + sin(acc + 368.0) * 0.5 - cos(acc * 0.5) * 0.25 + sqrt(abs(acc) + 369.0);
    acc = acc * 1.0009 + sin(acc + 369.0) * 0.5 - cos(acc * 0.5) * 0.25 + sqrt(abs(acc) + 370.0);
    acc = acc * 1.0009 + sin(acc + 370.0) * 0.5 - cos(acc * 0.5) * 0.25 + sqrt(abs(acc) + 371.0);
    acc = acc * 1.0009 + sin(acc + 371.0) * 0.5 - cos(acc * 0.5) * 0.25 + sqrt(abs(acc) + 372.0);
    acc = acc * 1.0009 + sin(acc + 372.0) * 0.5 - cos(acc * 0.5) * 0.25 + sqrt(abs(acc) + 373.0);
    acc = acc * 1.0009 + sin(acc + 373.0) * 0.5 - cos(acc * 0.5) * 0.25 + sqrt(abs(acc) + 374.0);
    acc = acc * 1.0009 + sin(acc + 374.0) * 0.5 - cos(acc * 0.5) * 0.25 + sqrt(abs(acc) + 375.0);
    acc = acc * 1.0009 + sin(acc + 375.0) * 0.5 - cos(acc * 0.5) * 0.25 + sqrt(abs(acc) + 376.0);
    acc = acc * 1.0009 + sin(acc + 376.0) * 0.5 - cos(acc * 0.5) * 0.25 + sqrt(abs(acc) + 377.0);
    acc = acc * 1.0009 + sin(acc + 377.0) * 0.5 - cos(acc * 0.5) * 0.25 + sqrt(abs(acc) + 378.0);
    acc = acc * 1.0009 + sin(acc + 378.0) * 0.5 - cos(acc * 0.5) * 0.25 + sqrt(abs(acc) + 379.0);
    acc = acc * 1.0009 + sin(acc + 379.0) * 0.5 - cos(acc * 0.5) * 0.25 + sqrt(abs(acc) + 380.0);
    acc = acc * 1.0009 + sin(acc + 380.0) * 0.5 - cos(acc * 0.5) * 0.25 + sqrt(abs(acc) + 381.0);
    acc = acc * 1.0009 + sin(acc + 381.0) * 0.5 - cos(acc * 0.5) * 0.25 + sqrt(abs(acc) + 382.0);
    acc = acc * 1.0009 + sin(acc + 382.0) * 0.5 - cos(acc * 0.5) * 0.25 + sqrt(abs(acc) + 383.0);
    acc = acc * 1.0009 + sin(acc + 383.0) * 0.5 - cos(acc * 0.5) * 0.25 + sqrt(abs(acc) + 384.0);
    acc = acc * 1.0009 + sin(acc + 384.0) * 0.5 - cos(acc * 0.5) * 0.25 + sqrt(abs(acc) + 385.0);
    acc = acc * 1.0009 + sin(acc + 385.0) * 0.5 - cos(acc * 0.5) * 0.25 + sqrt(abs(acc) + 386.0);
    acc = acc * 1.0009 + sin(acc + 386.0) * 0.5 - cos(acc * 0.5) * 0.25 + sqrt(abs(acc) + 387.0);
    acc = acc * 1.0009 + sin(acc + 387.0) * 0.5 - cos(acc * 0.5) * 0.25 + sqrt(abs(acc) + 388.0);
    acc = acc * 1.0009 + sin(acc + 388.0) * 0.5 - cos(acc * 0.5) * 0.25 + sqrt(abs(acc) + 389.0);
    acc = acc * 1.0009 + sin(acc + 389.0) * 0.5 - cos(acc * 0.5) * 0.25 + sqrt(abs(acc) + 390.0);
    acc = acc * 1.0009 + sin(acc + 390.0) * 0.5 - cos(acc * 0.5) * 0.25 + sqrt(abs(acc) + 391.0);
    acc = acc * 1.0009 + sin(acc + 391.0) * 0.5 - cos(acc * 0.5) * 0.25 + sqrt(abs(acc) + 392.0);
    acc = acc * 1.0009 + sin(acc + 392.0) * 0.5 - cos(acc * 0.5) * 0.25 + sqrt(abs(acc) + 393.0);
    acc = acc * 1.0009 + sin(acc + 393.0) * 0.5 - cos(acc * 0.5) * 0.25 + sqrt(abs(acc) + 394.0);
    acc = acc * 1.0009 + sin(acc + 394.0) * 0.5 - cos(acc * 0.5) * 0.25 + sqrt(abs(acc) + 395.0);
    acc = acc * 1.0009 + sin(acc + 395.0) * 0.5 - cos(acc * 0.5) * 0.25 + sqrt(abs(acc) + 396.0);
    acc = acc * 1.0009 + sin(acc + 396.0) * 0.5 - cos(acc * 0.5) * 0.25 + sqrt(abs(acc) + 397.0);
    acc = acc * 1.0009 + sin(acc + 397.0) * 0.5 - cos(acc * 0.5) * 0.25 + sqrt(abs(acc) + 398.0);
    acc = acc * 1.0009 + sin(acc + 398.0) * 0.5 - cos(acc * 0.5) * 0.25 + sqrt(abs(acc) + 399.0);
    acc = acc * 1.0009 + sin(acc + 399.0) * 0.5 - cos(acc * 0.5) * 0.25 + sqrt(abs(acc) + 400.0);
    outBuf[tid.x] = acc;
}