diff --git a/src/osc/audio.nim b/src/osc/audio.nim index 13f89aa..e29c322 100644 --- a/src/osc/audio.nim +++ b/src/osc/audio.nim @@ -1,23 +1,39 @@ -## Audio capture: tries ffmpeg (PulseAudio monitor) → parec → demo signal. +## Audio capture via libavdevice/libavformat (direct C bindings), +## with fallback to ffmpeg subprocess, then demo signal. import osproc, streams, strutils, math import scope -type - AudioMode* = enum - amLive ## Capturing real audio via ffmpeg/parec - amDemo ## Built-in synthesized waveforms +# ── libav C helper bindings ────────────────────────────────────────── - AudioCapture* = object - mode*: AudioMode - process: Process - stream: Stream - phase: float - demoFreqL*, demoFreqR*: float - demoPreset*: int +{.compile: "avhelper.c".} +{.passL: "-ldl".} + +type + AVFormatContext = object # opaque, only used as pointer + AVPacket = object # opaque, only used as pointer + +proc av_helper_init(): cint {.importc, cdecl.} +proc av_helper_open_pulse(ctx: ptr ptr AVFormatContext, + device: cstring): cint {.importc, cdecl.} +proc av_helper_find_stream_info(ctx: ptr AVFormatContext): cint + {.importc, cdecl.} +proc av_helper_find_audio_stream(ctx: ptr AVFormatContext): cint + {.importc, cdecl.} +proc av_helper_read_frame(ctx: ptr AVFormatContext, + pkt: ptr AVPacket): cint {.importc, cdecl.} +proc av_helper_packet_stream(pkt: ptr AVPacket): cint {.importc, cdecl.} +proc av_helper_packet_data(pkt: ptr AVPacket): ptr UncheckedArray[uint8] + {.importc, cdecl.} +proc av_helper_packet_size(pkt: ptr AVPacket): cint {.importc, cdecl.} +proc av_helper_packet_alloc(): ptr AVPacket {.importc, cdecl.} +proc av_helper_packet_unref(pkt: ptr AVPacket) {.importc, cdecl.} +proc av_helper_packet_free(pkt: ptr ptr AVPacket) {.importc, cdecl.} +proc av_helper_close(ctx: ptr ptr AVFormatContext) {.importc, cdecl.} + +# ── Monitor source detection ───────────────────────────────────────── proc findMonitorSource(): string = - ## Find the PulseAudio monitor for the default audio sink. try: let inspect = execProcess("wpctl", args = ["inspect", "@DEFAULT_AUDIO_SINK@"], @@ -30,10 +46,51 @@ proc findMonitorSource(): string = except: discard "" +# ── Audio capture types ────────────────────────────────────────────── + +type + AudioMode* = enum + amLibav ## Direct libav capture (fastest, no subprocess) + amLive ## ffmpeg/parec subprocess fallback + amDemo ## Built-in synthesized waveforms + + AudioCapture* = object + mode*: AudioMode + # libav state + fmtCtx: ptr AVFormatContext + packet: ptr AVPacket + streamIdx: cint + # subprocess fallback + process: Process + stream: Stream + # demo state + phase: float + demoFreqL*, demoFreqR*: float + demoPreset*: int + +# ── Start / stop ───────────────────────────────────────────────────── + proc startAudio*(): AudioCapture = - ## Try real audio capture, fall back to demo. let monitor = findMonitorSource() if monitor.len > 0: + # Try direct libav first (dlopen at runtime, no dev packages needed) + block libav: + if av_helper_init() < 0: break libav + var ctx: ptr AVFormatContext = nil + if av_helper_open_pulse(addr ctx, monitor.cstring) < 0: break libav + if av_helper_find_stream_info(ctx) < 0: + av_helper_close(addr ctx) + break libav + let idx = av_helper_find_audio_stream(ctx) + let pkt = av_helper_packet_alloc() + if pkt != nil: + return AudioCapture( + mode: amLibav, fmtCtx: ctx, packet: pkt, + streamIdx: idx.cint, + demoFreqL: 440.0, demoFreqR: 330.0) + av_helper_close(addr ctx) + + # Fallback: ffmpeg subprocess try: let p = startProcess("ffmpeg", args = ["-f", "pulse", "-i", monitor, @@ -45,27 +102,31 @@ proc startAudio*(): AudioCapture = demoFreqL: 440.0, demoFreqR: 330.0) except OSError: discard - try: - let p = startProcess("parec", - args = ["--format=s16le", "--channels=2", "--rate=44100", - "--latency-msec=20"], - options = {poUsePath}) - return AudioCapture(mode: amLive, process: p, stream: p.outputStream, - demoFreqL: 440.0, demoFreqR: 330.0) - except OSError: discard - + # Fallback: demo AudioCapture(mode: amDemo, demoFreqL: 440.0, demoFreqR: 330.0) proc stop*(cap: var AudioCapture) = - if cap.mode == amLive: + case cap.mode + of amLibav: + if cap.packet != nil: + av_helper_packet_free(addr cap.packet) + if cap.fmtCtx != nil: + av_helper_close(addr cap.fmtCtx) + of amLive: cap.process.terminate() cap.process.close() + of amDemo: + discard proc sourceLabel*(cap: AudioCapture): string = - if cap.mode == amLive: "LIVE" else: "DEMO" + case cap.mode + of amLibav: "LIVE" + of amLive: "LIVE" + of amDemo: "DEMO" + +# ── Preset cycling ─────────────────────────────────────────────────── proc cyclePreset*(cap: var AudioCapture) = - ## Cycle through demo frequency ratios for interesting Lissajous patterns. if cap.mode != amDemo: return cap.demoPreset = (cap.demoPreset + 1) mod 4 case cap.demoPreset @@ -75,10 +136,35 @@ proc cyclePreset*(cap: var AudioCapture) = of 3: cap.demoFreqL = 440.0; cap.demoFreqR = 293.3 # 3:2 else: discard +# ── Sample reading ─────────────────────────────────────────────────── + proc readSamples*(cap: var AudioCapture, scope: var Scope) = case cap.mode + of amLibav: + # Read frames directly from libav — no subprocess, no pipe + const frameSize = 4 # 2ch × 16-bit + var totalSamples = 0 + while totalSamples < scope.samplesL.len: + let ret = av_helper_read_frame(cap.fmtCtx, cap.packet) + if ret < 0: break + if av_helper_packet_stream(cap.packet) == cap.streamIdx: + let data = av_helper_packet_data(cap.packet) + let size = av_helper_packet_size(cap.packet) + let frames = size div frameSize + for i in 0..= scope.samplesL.len: break + let off = i * frameSize + let left = cast[int16]((data[off + 1].uint16 shl 8) or data[off].uint16) + let right = cast[int16]((data[off + 3].uint16 shl 8) or data[off + 2].uint16) + scope.samplesL[totalSamples] = left.float / 32768.0 + scope.samplesR[totalSamples] = right.float / 32768.0 + totalSamples += 1 + av_helper_packet_unref(cap.packet) + if totalSamples > 0: break # got some data, render it + scope.sampleCount = totalSamples + of amLive: - const frameSize = 4 # 2 channels × 16-bit + const frameSize = 4 const maxFrames = 2048 var buf: array[maxFrames * frameSize, uint8] let bytesRead = cap.stream.readData(addr buf[0], maxFrames * frameSize) diff --git a/src/osc/avhelper.c b/src/osc/avhelper.c new file mode 100644 index 0000000..6cd2282 --- /dev/null +++ b/src/osc/avhelper.c @@ -0,0 +1,152 @@ +/* Minimal libav audio capture without requiring dev headers. + Loads libavformat/libavdevice at runtime via dlopen. */ + +#include +#include +#include +#include + +/* Opaque handles — we never touch the struct internals from Nim */ +typedef void AVFormatContext; +typedef void AVInputFormat; +typedef void AVDictionary; + +/* AVPacket — we only need data, size, stream_index. + Layout is stable across FFmpeg 5.x/6.x/7.x: + first field is AVBufferRef*, then data, size, stream_index */ +typedef struct { + void *buf; + uint8_t *data; + int size; + int stream_index; + /* we don't care about the rest */ +} AVPacketHead; + +/* Function pointer types matching libav API */ +typedef void (*fn_avdevice_register_all)(void); +typedef const AVInputFormat* (*fn_av_find_input_format)(const char*); +typedef int (*fn_avformat_open_input)(AVFormatContext**, const char*, + const AVInputFormat*, AVDictionary**); +typedef int (*fn_avformat_find_stream_info)(AVFormatContext*, AVDictionary**); +typedef void (*fn_avformat_close_input)(AVFormatContext**); +typedef int (*fn_av_read_frame)(AVFormatContext*, AVPacketHead*); +typedef AVPacketHead* (*fn_av_packet_alloc)(void); +typedef void (*fn_av_packet_free)(AVPacketHead**); +typedef void (*fn_av_packet_unref)(AVPacketHead*); + +/* Accessors for AVFormatContext fields via known offsets. + We use av_find_best_stream to avoid struct access entirely. */ +typedef int (*fn_av_find_best_stream)(AVFormatContext*, int media_type, + int wanted, int related, void**, int flags); + +/* Loaded function pointers */ +static fn_avdevice_register_all p_avdevice_register_all; +static fn_av_find_input_format p_av_find_input_format; +static fn_avformat_open_input p_avformat_open_input; +static fn_avformat_find_stream_info p_avformat_find_stream_info; +static fn_avformat_close_input p_avformat_close_input; +static fn_av_read_frame p_av_read_frame; +static fn_av_packet_alloc p_av_packet_alloc; +static fn_av_packet_free p_av_packet_free; +static fn_av_packet_unref p_av_packet_unref; +static fn_av_find_best_stream p_av_find_best_stream; + +static void *h_format, *h_device, *h_util; +static int loaded = 0; + +static int load_libs(void) { + if (loaded) return loaded > 0 ? 0 : -1; + + h_format = dlopen("libavformat.so", RTLD_LAZY); + if (!h_format) h_format = dlopen("libavformat.so.60", RTLD_LAZY); + if (!h_format) h_format = dlopen("libavformat.so.59", RTLD_LAZY); + + h_device = dlopen("libavdevice.so", RTLD_LAZY); + if (!h_device) h_device = dlopen("libavdevice.so.60", RTLD_LAZY); + if (!h_device) h_device = dlopen("libavdevice.so.59", RTLD_LAZY); + + if (!h_format || !h_device) { loaded = -1; return -1; } + + p_avdevice_register_all = (fn_avdevice_register_all) + dlsym(h_device, "avdevice_register_all"); + p_av_find_input_format = (fn_av_find_input_format) + dlsym(h_format, "av_find_input_format"); + p_avformat_open_input = (fn_avformat_open_input) + dlsym(h_format, "avformat_open_input"); + p_avformat_find_stream_info = (fn_avformat_find_stream_info) + dlsym(h_format, "avformat_find_stream_info"); + p_avformat_close_input = (fn_avformat_close_input) + dlsym(h_format, "avformat_close_input"); + p_av_read_frame = (fn_av_read_frame) + dlsym(h_format, "av_read_frame"); + p_av_find_best_stream = (fn_av_find_best_stream) + dlsym(h_format, "av_find_best_stream"); + p_av_packet_alloc = (fn_av_packet_alloc) + dlsym(h_format, "av_packet_alloc"); + if (!p_av_packet_alloc) { + h_util = dlopen("libavcodec.so", RTLD_LAZY); + if (!h_util) h_util = dlopen("libavcodec.so.60", RTLD_LAZY); + if (h_util) p_av_packet_alloc = (fn_av_packet_alloc) + dlsym(h_util, "av_packet_alloc"); + } + p_av_packet_free = (fn_av_packet_free) + dlsym(h_format, "av_packet_free"); + if (!p_av_packet_free && h_util) + p_av_packet_free = (fn_av_packet_free)dlsym(h_util, "av_packet_free"); + p_av_packet_unref = (fn_av_packet_unref) + dlsym(h_format, "av_packet_unref"); + if (!p_av_packet_unref && h_util) + p_av_packet_unref = (fn_av_packet_unref)dlsym(h_util, "av_packet_unref"); + + if (!p_avformat_open_input || !p_av_read_frame || + !p_av_packet_alloc || !p_av_packet_free) { + loaded = -1; + return -1; + } + + loaded = 1; + return 0; +} + +/* ── Public API called from Nim ──────────────────────────────── */ + +int av_helper_init(void) { + if (load_libs() < 0) return -1; + if (p_avdevice_register_all) p_avdevice_register_all(); + return 0; +} + +int av_helper_open_pulse(AVFormatContext **ctx, const char *device) { + if (!p_av_find_input_format || !p_avformat_open_input) return -1; + const AVInputFormat *fmt = p_av_find_input_format("pulse"); + if (!fmt) return -1; + return p_avformat_open_input(ctx, device, fmt, NULL); +} + +int av_helper_find_audio_stream(AVFormatContext *ctx) { + if (!p_av_find_best_stream) return 0; /* assume stream 0 */ + int ret = p_av_find_best_stream(ctx, 1 /* AVMEDIA_TYPE_AUDIO */, + -1, -1, NULL, 0); + return ret >= 0 ? ret : 0; +} + +int av_helper_find_stream_info(AVFormatContext *ctx) { + if (!p_avformat_find_stream_info) return 0; + return p_avformat_find_stream_info(ctx, NULL); +} + +int av_helper_read_frame(AVFormatContext *ctx, AVPacketHead *pkt) { + return p_av_read_frame(ctx, pkt); +} + +int av_helper_packet_stream(AVPacketHead *pkt) { return pkt->stream_index; } +uint8_t* av_helper_packet_data(AVPacketHead *pkt) { return pkt->data; } +int av_helper_packet_size(AVPacketHead *pkt) { return pkt->size; } + +AVPacketHead* av_helper_packet_alloc(void) { return p_av_packet_alloc(); } +void av_helper_packet_unref(AVPacketHead *pkt) { if (p_av_packet_unref) p_av_packet_unref(pkt); } +void av_helper_packet_free(AVPacketHead **pkt) { if (p_av_packet_free) p_av_packet_free(pkt); } + +void av_helper_close(AVFormatContext **ctx) { + if (p_avformat_close_input) p_avformat_close_input(ctx); +}