Use direct libav bindings via dlopen instead of ffmpeg subprocess

Loads libavformat + libavdevice at runtime — no dev packages, no subprocess, no pipe buffering. Falls back gracefully to ffmpeg subprocess then demo mode if .so files aren't present. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-04-05 19:33:52 +10:00
parent 3c9a9c7c89
commit 3fc7ed1c4e
2 changed files with 265 additions and 27 deletions
--- a/src/osc/audio.nim
+++ b/src/osc/audio.nim
@@ -1,23 +1,39 @@
-## Audio capture: tries ffmpeg (PulseAudio monitor) → parec → demo signal.
+## Audio capture via libavdevice/libavformat (direct C bindings),
+## with fallback to ffmpeg subprocess, then demo signal.

 import osproc, streams, strutils, math
 import scope

-type
-  AudioMode* = enum
-    amLive   ## Capturing real audio via ffmpeg/parec
-    amDemo   ## Built-in synthesized waveforms
+# ── libav C helper bindings ──────────────────────────────────────────

-  AudioCapture* = object
-    mode*: AudioMode
-    process: Process
-    stream: Stream
-    phase: float
-    demoFreqL*, demoFreqR*: float
-    demoPreset*: int
+{.compile: "avhelper.c".}
+{.passL: "-ldl".}
+
+type
+  AVFormatContext = object  # opaque, only used as pointer
+  AVPacket = object         # opaque, only used as pointer
+
+proc av_helper_init(): cint {.importc, cdecl.}
+proc av_helper_open_pulse(ctx: ptr ptr AVFormatContext,
+    device: cstring): cint {.importc, cdecl.}
+proc av_helper_find_stream_info(ctx: ptr AVFormatContext): cint
+    {.importc, cdecl.}
+proc av_helper_find_audio_stream(ctx: ptr AVFormatContext): cint
+    {.importc, cdecl.}
+proc av_helper_read_frame(ctx: ptr AVFormatContext,
+    pkt: ptr AVPacket): cint {.importc, cdecl.}
+proc av_helper_packet_stream(pkt: ptr AVPacket): cint {.importc, cdecl.}
+proc av_helper_packet_data(pkt: ptr AVPacket): ptr UncheckedArray[uint8]
+    {.importc, cdecl.}
+proc av_helper_packet_size(pkt: ptr AVPacket): cint {.importc, cdecl.}
+proc av_helper_packet_alloc(): ptr AVPacket {.importc, cdecl.}
+proc av_helper_packet_unref(pkt: ptr AVPacket) {.importc, cdecl.}
+proc av_helper_packet_free(pkt: ptr ptr AVPacket) {.importc, cdecl.}
+proc av_helper_close(ctx: ptr ptr AVFormatContext) {.importc, cdecl.}
+
+# ── Monitor source detection ─────────────────────────────────────────

 proc findMonitorSource(): string =
-  ## Find the PulseAudio monitor for the default audio sink.
  try:
    let inspect = execProcess("wpctl",
      args = ["inspect", "@DEFAULT_AUDIO_SINK@"],
@@ -30,10 +46,51 @@ proc findMonitorSource(): string =
  except: discard
  ""

+# ── Audio capture types ──────────────────────────────────────────────
+
+type
+  AudioMode* = enum
+    amLibav   ## Direct libav capture (fastest, no subprocess)
+    amLive    ## ffmpeg/parec subprocess fallback
+    amDemo    ## Built-in synthesized waveforms
+
+  AudioCapture* = object
+    mode*: AudioMode
+    # libav state
+    fmtCtx: ptr AVFormatContext
+    packet: ptr AVPacket
+    streamIdx: cint
+    # subprocess fallback
+    process: Process
+    stream: Stream
+    # demo state
+    phase: float
+    demoFreqL*, demoFreqR*: float
+    demoPreset*: int
+
+# ── Start / stop ─────────────────────────────────────────────────────
+
 proc startAudio*(): AudioCapture =
-  ## Try real audio capture, fall back to demo.
  let monitor = findMonitorSource()
  if monitor.len > 0:
+    # Try direct libav first (dlopen at runtime, no dev packages needed)
+    block libav:
+      if av_helper_init() < 0: break libav
+      var ctx: ptr AVFormatContext = nil
+      if av_helper_open_pulse(addr ctx, monitor.cstring) < 0: break libav
+      if av_helper_find_stream_info(ctx) < 0:
+        av_helper_close(addr ctx)
+        break libav
+      let idx = av_helper_find_audio_stream(ctx)
+      let pkt = av_helper_packet_alloc()
+      if pkt != nil:
+        return AudioCapture(
+          mode: amLibav, fmtCtx: ctx, packet: pkt,
+          streamIdx: idx.cint,
+          demoFreqL: 440.0, demoFreqR: 330.0)
+      av_helper_close(addr ctx)
+
+    # Fallback: ffmpeg subprocess
    try:
      let p = startProcess("ffmpeg",
        args = ["-f", "pulse", "-i", monitor,
@@ -45,27 +102,31 @@ proc startAudio*(): AudioCapture =
                          demoFreqL: 440.0, demoFreqR: 330.0)
    except OSError: discard

-  try:
-    let p = startProcess("parec",
-      args = ["--format=s16le", "--channels=2", "--rate=44100",
-              "--latency-msec=20"],
-      options = {poUsePath})
-    return AudioCapture(mode: amLive, process: p, stream: p.outputStream,
-                        demoFreqL: 440.0, demoFreqR: 330.0)
-  except OSError: discard
-
+  # Fallback: demo
  AudioCapture(mode: amDemo, demoFreqL: 440.0, demoFreqR: 330.0)

 proc stop*(cap: var AudioCapture) =
-  if cap.mode == amLive:
+  case cap.mode
+  of amLibav:
+    if cap.packet != nil:
+      av_helper_packet_free(addr cap.packet)
+    if cap.fmtCtx != nil:
+      av_helper_close(addr cap.fmtCtx)
+  of amLive:
    cap.process.terminate()
    cap.process.close()
+  of amDemo:
+    discard

 proc sourceLabel*(cap: AudioCapture): string =
-  if cap.mode == amLive: "LIVE" else: "DEMO"
+  case cap.mode
+  of amLibav: "LIVE"
+  of amLive:  "LIVE"
+  of amDemo:  "DEMO"
+
+# ── Preset cycling ───────────────────────────────────────────────────

 proc cyclePreset*(cap: var AudioCapture) =
-  ## Cycle through demo frequency ratios for interesting Lissajous patterns.
  if cap.mode != amDemo: return
  cap.demoPreset = (cap.demoPreset + 1) mod 4
  case cap.demoPreset
@@ -75,10 +136,35 @@ proc cyclePreset*(cap: var AudioCapture) =
  of 3: cap.demoFreqL = 440.0; cap.demoFreqR = 293.3   # 3:2
  else: discard

+# ── Sample reading ───────────────────────────────────────────────────
+
 proc readSamples*(cap: var AudioCapture, scope: var Scope) =
  case cap.mode
+  of amLibav:
+    # Read frames directly from libav — no subprocess, no pipe
+    const frameSize = 4  # 2ch × 16-bit
+    var totalSamples = 0
+    while totalSamples < scope.samplesL.len:
+      let ret = av_helper_read_frame(cap.fmtCtx, cap.packet)
+      if ret < 0: break
+      if av_helper_packet_stream(cap.packet) == cap.streamIdx:
+        let data = av_helper_packet_data(cap.packet)
+        let size = av_helper_packet_size(cap.packet)
+        let frames = size div frameSize
+        for i in 0..<frames:
+          if totalSamples >= scope.samplesL.len: break
+          let off = i * frameSize
+          let left = cast[int16]((data[off + 1].uint16 shl 8) or data[off].uint16)
+          let right = cast[int16]((data[off + 3].uint16 shl 8) or data[off + 2].uint16)
+          scope.samplesL[totalSamples] = left.float / 32768.0
+          scope.samplesR[totalSamples] = right.float / 32768.0
+          totalSamples += 1
+      av_helper_packet_unref(cap.packet)
+      if totalSamples > 0: break  # got some data, render it
+    scope.sampleCount = totalSamples
+
  of amLive:
-    const frameSize = 4  # 2 channels × 16-bit
+    const frameSize = 4
    const maxFrames = 2048
    var buf: array[maxFrames * frameSize, uint8]
    let bytesRead = cap.stream.readData(addr buf[0], maxFrames * frameSize)
--- a/src/osc/avhelper.c
+++ b/src/osc/avhelper.c
@@ -0,0 +1,152 @@
+/* Minimal libav audio capture without requiring dev headers.
+   Loads libavformat/libavdevice at runtime via dlopen. */
+
+#include <dlfcn.h>
+#include <stdint.h>
+#include <string.h>
+#include <stdlib.h>
+
+/* Opaque handles — we never touch the struct internals from Nim */
+typedef void AVFormatContext;
+typedef void AVInputFormat;
+typedef void AVDictionary;
+
+/* AVPacket — we only need data, size, stream_index.
+   Layout is stable across FFmpeg 5.x/6.x/7.x:
+   first field is AVBufferRef*, then data, size, stream_index */
+typedef struct {
+    void *buf;
+    uint8_t *data;
+    int size;
+    int stream_index;
+    /* we don't care about the rest */
+} AVPacketHead;
+
+/* Function pointer types matching libav API */
+typedef void (*fn_avdevice_register_all)(void);
+typedef const AVInputFormat* (*fn_av_find_input_format)(const char*);
+typedef int (*fn_avformat_open_input)(AVFormatContext**, const char*,
+    const AVInputFormat*, AVDictionary**);
+typedef int (*fn_avformat_find_stream_info)(AVFormatContext*, AVDictionary**);
+typedef void (*fn_avformat_close_input)(AVFormatContext**);
+typedef int (*fn_av_read_frame)(AVFormatContext*, AVPacketHead*);
+typedef AVPacketHead* (*fn_av_packet_alloc)(void);
+typedef void (*fn_av_packet_free)(AVPacketHead**);
+typedef void (*fn_av_packet_unref)(AVPacketHead*);
+
+/* Accessors for AVFormatContext fields via known offsets.
+   We use av_find_best_stream to avoid struct access entirely. */
+typedef int (*fn_av_find_best_stream)(AVFormatContext*, int media_type,
+    int wanted, int related, void**, int flags);
+
+/* Loaded function pointers */
+static fn_avdevice_register_all p_avdevice_register_all;
+static fn_av_find_input_format p_av_find_input_format;
+static fn_avformat_open_input p_avformat_open_input;
+static fn_avformat_find_stream_info p_avformat_find_stream_info;
+static fn_avformat_close_input p_avformat_close_input;
+static fn_av_read_frame p_av_read_frame;
+static fn_av_packet_alloc p_av_packet_alloc;
+static fn_av_packet_free p_av_packet_free;
+static fn_av_packet_unref p_av_packet_unref;
+static fn_av_find_best_stream p_av_find_best_stream;
+
+static void *h_format, *h_device, *h_util;
+static int loaded = 0;
+
+static int load_libs(void) {
+    if (loaded) return loaded > 0 ? 0 : -1;
+
+    h_format = dlopen("libavformat.so", RTLD_LAZY);
+    if (!h_format) h_format = dlopen("libavformat.so.60", RTLD_LAZY);
+    if (!h_format) h_format = dlopen("libavformat.so.59", RTLD_LAZY);
+
+    h_device = dlopen("libavdevice.so", RTLD_LAZY);
+    if (!h_device) h_device = dlopen("libavdevice.so.60", RTLD_LAZY);
+    if (!h_device) h_device = dlopen("libavdevice.so.59", RTLD_LAZY);
+
+    if (!h_format || !h_device) { loaded = -1; return -1; }
+
+    p_avdevice_register_all = (fn_avdevice_register_all)
+        dlsym(h_device, "avdevice_register_all");
+    p_av_find_input_format = (fn_av_find_input_format)
+        dlsym(h_format, "av_find_input_format");
+    p_avformat_open_input = (fn_avformat_open_input)
+        dlsym(h_format, "avformat_open_input");
+    p_avformat_find_stream_info = (fn_avformat_find_stream_info)
+        dlsym(h_format, "avformat_find_stream_info");
+    p_avformat_close_input = (fn_avformat_close_input)
+        dlsym(h_format, "avformat_close_input");
+    p_av_read_frame = (fn_av_read_frame)
+        dlsym(h_format, "av_read_frame");
+    p_av_find_best_stream = (fn_av_find_best_stream)
+        dlsym(h_format, "av_find_best_stream");
+    p_av_packet_alloc = (fn_av_packet_alloc)
+        dlsym(h_format, "av_packet_alloc");
+    if (!p_av_packet_alloc) {
+        h_util = dlopen("libavcodec.so", RTLD_LAZY);
+        if (!h_util) h_util = dlopen("libavcodec.so.60", RTLD_LAZY);
+        if (h_util) p_av_packet_alloc = (fn_av_packet_alloc)
+            dlsym(h_util, "av_packet_alloc");
+    }
+    p_av_packet_free = (fn_av_packet_free)
+        dlsym(h_format, "av_packet_free");
+    if (!p_av_packet_free && h_util)
+        p_av_packet_free = (fn_av_packet_free)dlsym(h_util, "av_packet_free");
+    p_av_packet_unref = (fn_av_packet_unref)
+        dlsym(h_format, "av_packet_unref");
+    if (!p_av_packet_unref && h_util)
+        p_av_packet_unref = (fn_av_packet_unref)dlsym(h_util, "av_packet_unref");
+
+    if (!p_avformat_open_input || !p_av_read_frame ||
+        !p_av_packet_alloc || !p_av_packet_free) {
+        loaded = -1;
+        return -1;
+    }
+
+    loaded = 1;
+    return 0;
+}
+
+/* ── Public API called from Nim ──────────────────────────────── */
+
+int av_helper_init(void) {
+    if (load_libs() < 0) return -1;
+    if (p_avdevice_register_all) p_avdevice_register_all();
+    return 0;
+}
+
+int av_helper_open_pulse(AVFormatContext **ctx, const char *device) {
+    if (!p_av_find_input_format || !p_avformat_open_input) return -1;
+    const AVInputFormat *fmt = p_av_find_input_format("pulse");
+    if (!fmt) return -1;
+    return p_avformat_open_input(ctx, device, fmt, NULL);
+}
+
+int av_helper_find_audio_stream(AVFormatContext *ctx) {
+    if (!p_av_find_best_stream) return 0; /* assume stream 0 */
+    int ret = p_av_find_best_stream(ctx, 1 /* AVMEDIA_TYPE_AUDIO */,
+                                     -1, -1, NULL, 0);
+    return ret >= 0 ? ret : 0;
+}
+
+int av_helper_find_stream_info(AVFormatContext *ctx) {
+    if (!p_avformat_find_stream_info) return 0;
+    return p_avformat_find_stream_info(ctx, NULL);
+}
+
+int av_helper_read_frame(AVFormatContext *ctx, AVPacketHead *pkt) {
+    return p_av_read_frame(ctx, pkt);
+}
+
+int av_helper_packet_stream(AVPacketHead *pkt) { return pkt->stream_index; }
+uint8_t* av_helper_packet_data(AVPacketHead *pkt) { return pkt->data; }
+int av_helper_packet_size(AVPacketHead *pkt) { return pkt->size; }
+
+AVPacketHead* av_helper_packet_alloc(void) { return p_av_packet_alloc(); }
+void av_helper_packet_unref(AVPacketHead *pkt) { if (p_av_packet_unref) p_av_packet_unref(pkt); }
+void av_helper_packet_free(AVPacketHead **pkt) { if (p_av_packet_free) p_av_packet_free(pkt); }
+
+void av_helper_close(AVFormatContext **ctx) {
+    if (p_avformat_close_input) p_avformat_close_input(ctx);
+}