`ef96b06`

forgot to commit the driver

Authored by

espadonne 11 months ago

SHA: ef96b06e783e722ecba6ca825d3911096be092df
Parents: f1b1d29
Tree: 8c178b6

1 changed file

Status	File	+	-
A	`src/gesture_dsp/cli.py`	404	0

src/gesture_dsp/cli.pyadded

 +#!/usr/bin/env python3
 +#
 +# Hi; I know this is inefficient;
 +# Hi; I know it even hangs on run;
 +# Hi; You know this is insufficient;
 +# Hey, but you know it runs!
 +#
 +#
 +import argparse
 +import cv2
 +import numpy as np
 +import pyaudio
 +import wave
 +import os
 +from collections import deque
 +import time
++
 +# for convolution and threading
 +#
 +from scipy.signal import fftconvolve
 +from concurrent.futures import ThreadPoolExecutor
++
 +import soundfile as sf
++
 +from gesture_dsp.dsp_effects import (
 +    mid_side,
 +    convolution_reverb,
 +    bitcrush,
 +    filters,
 +    spectral_freeze,
 +    delay,
 +    pitch_shift,
 +)
++
++
 +def parse_args():
 +    parser = argparse.ArgumentParser(
 +        description="Gesture-based DSP demo: apply DSP effects via camera gestures."
 +    )
 +    parser.add_argument(
 +        "wav",
 +        help="Path to input stereo WAV file"
 +    )
 +    parser.add_argument(
 +        "--ir",
 +        help="Path to impulse response WAV file (for convolution reverb)",
 +        default=None
 +    )
 +    return parser.parse_args()
++
++
 +# :::
 +# :::: WRAPPERS FOR THE FO(UR)TRAN MODULES ::::
 +# ::::: ::::::::::::::::::::::::::::::::::: :::::
 +#
 +# NOTE: most of the f2-y wrappers want outbuffa as return
 +#
++
 +def apply_mid_side(audio_in: np.ndarray, side_gain: float) -> np.ndarray:
 +    n = audio_in.size // 2
 +    out = np.empty_like(audio_in, dtype=np.float64)
 +    out = mid_side.mid_side(audio_in, side_gain, n)
 +    return out
++
++
 +def apply_bitcrush(audio_in: np.ndarray, bit_depth: int = 8) -> np.ndarray:
 +    out = bitcrush.bitcrush_mod.bitcrush(
 +        audio_in,
 +        bit_depth
 +    )
 +    return out
++
++
 +def apply_lp_filter(audio_in: np.ndarray, cutoff: float, fs: float) -> np.ndarray:
 +    out = filters.filters_mod.lp_filter(audio_in, cutoff, fs)
 +    return out
++
++
 +def apply_hp_filter(audio_in: np.ndarray, cutoff: float, fs: float) -> np.ndarray:
 +    out = filters.filters_mod.hp_filter(audio_in, cutoff, fs)
 +    return out
++
++
 +def apply_delay(audio_in: np.ndarray, delay_samps: int = 4410, feedback: float = 0.5) -> np.ndarray:
 +    out = delay.delay_mod.delay(audio_in, delay_samps, feedback)
 +    return out
++
++
 +def apply_convolution_reverb(audio_in: np.ndarray, ir: np.ndarray) -> np.ndarray:
 +    return fftconvolve(audio_in, ir, mode='same')
++
++
 +def apply_spectral_freeze(audio_in: np.ndarray) -> np.ndarray:
 +    n = audio_in.size
 +    out = np.empty(n, dtype=np.float64)
 +    out = spectral_freeze.spectral_freeze_mod.spectral_freeze(audio_in, n)
 +    return out
++
++
 +def apply_pitch_shift(audio_in: np.ndarray, semitones: int = 4) -> np.ndarray:
 +    n = audio_in.size
 +    out = np.empty(n, dtype=np.float64)
 +    out = pitch_shift.pitch_shift_mod.pitch_shift(audio_in, semitones)
 +    return out
++
++
 +# effect name -> (callable, default params)
 +EFFECTS = [
 +    ("mid_side",    lambda buf, fs, ir, p: apply_mid_side(buf, side_gain=min(max(p*2.0, 0.0), 2.0))),
 +    ("bitcrush",    lambda buf, fs, ir, p: apply_bitcrush(buf, bit_depth=int((p**3)*32)+1)),
 +    ("lowpass",     lambda buf, fs, ir, p: apply_lp_filter(buf, cutoff=p*6000+400, fs=fs)),
 +    ("highpass",    lambda buf, fs, ir, p: apply_hp_filter(buf, cutoff=p*6000+200, fs=fs)),
 +    ("delay",       lambda buf, fs, ir, p: apply_delay(buf, delay_samps=int(fs * (0.2 + 0.6 * p)), feedback=0.3 + (0.6 * p))),
 +    ("reverb",      lambda buf, fs, ir, p: apply_convolution_reverb(buf, ir)),
 +    ("spectral_freeze", lambda buf, fs, ir, p: apply_spectral_freeze(buf)),
 +    ("pitch_shift", lambda buf, fs, ir, p: apply_pitch_shift(buf, semitones=int(p*60-30))),
 +]
 +EFFECT_NAMES = [name for name, _ in EFFECTS]
++
++
 +class GestureDSP:
 +    def __init__(self, wav_path: str, ir_path: str = None):
++
 +        self.wf = wave.open(wav_path, 'rb')
 +        assert self.wf.getnchannels() == 2, "Need stereo WAV"
 +        self.fs = self.wf.getframerate()
 +        self.p = pyaudio.PyAudio()
++
 +        self.frames_per_buffer = 4096
 +        self.stream = self.p.open(
 +            format=pyaudio.paInt16,
 +            channels=2,
 +            rate=self.fs,
 +            output=True,
 +            frames_per_buffer=self.frames_per_buffer,
 +            stream_callback=self._callback
 +        )
++
 +        num_channels = self.wf.getnchannels()
 +        max_samples = self.frames_per_buffer * num_channels
 +        self._in_buffer = np.empty(max_samples, dtype=np.float64)
 +        self._f64_clip_buffer = np.empty(max_samples, dtype=np.float64)
 +        self._int16_out_buffer = np.empty(max_samples, dtype=np.int16)
++
 +        # load IR if provided
 +        if ir_path and os.path.exists(ir_path):
 +            try:
 +                with wave.open(ir_path, 'rb') as ir_wf:
 +                    data = ir_wf.readframes(ir_wf.getnframes())
 +                    self.ir = np.frombuffer(data, dtype=np.int16).astype(np.float64)
 +            except wave.Error:
 +                # fallback for non-PCM WAV formats
 +                data, sr = sf.read(ir_path, dtype='float64')
++
 +                # flatten multi-channel IR
 +                if data.ndim > 1:
 +                    data = data.flatten()
++
 +                # convert float [-1,1] to int16 range
 +                self.ir = (data * 32767).astype(np.float64)
 +        else:
 +            self.ir = np.zeros(1, dtype=np.float64)
++
 +        # out the getgo with number one (or zero if that
 +        # floats your boat...)
 +        self.current_idx = 0
 +        self.current_param = 0.5
++
 +    def _callback(self, in_data, frame_count, time_info, status):
 +        raw = self.wf.readframes(frame_count)
 +        if not raw:
 +            return (raw, pyaudio.paComplete)
++
 +        # convert int16 input into pre-allocated float buffer
 +        in_int16 = np.frombuffer(raw, dtype=np.int16)
 +        self._in_buffer[:in_int16.size] = in_int16.astype(np.float64)
++
 +        # apply DSP
 +        _, func = EFFECTS[self.current_idx]
 +        audio_out = func(self._in_buffer, self.fs, self.ir, self.current_param)
++
 +        # clip and convert to int16 into pre-allocated buffer
 +        np.clip(audio_out, -32768, 32767, out=self._f64_clip_buffer)
 +        self._int16_out_buffer[:audio_out.size] = self._f64_clip_buffer.astype(np.int16)
++
 +        return (self._int16_out_buffer.tobytes(), pyaudio.paContinue)
++
 +    def start(self):
 +        self.stream.start_stream()
++
 +    def stop(self):
 +        self.stream.stop_stream()
 +        self.stream.close()
 +        self.wf.close()
 +        self.p.terminate()
++
 +    def set_effect_by_zone(self, x_center: float, frame_w: int):
 +        """Divide frame into N vertical zones to pick effect."""
 +        zone_width = frame_w / len(EFFECT_NAMES)
 +        idx = int(x_center // zone_width)
 +        self.current_idx = max(0, min(idx, len(EFFECT_NAMES)-1))
++
++
 +# :::
 +# :::: HAND-DETECTION HELPERS ::::
 +# ::::: :::::::::::::::::::::: :::::
 +#
++
 +# god this is ugly;
 +# god; forgive me;
 +# god; and I don't
 +# even believe in you;
 +# ;
 +def is_hand_raised(frame):
 +    blur = cv2.GaussianBlur(frame, (5, 5), 0)
 +    hsv = cv2.cvtColor(blur, cv2.COLOR_BGR2HSV)
 +    lb1, ub1 = np.array([140,50,50]), np.array([179,255,255])
 +    lb2, ub2 = np.array([0,50,50]),  np.array([10,255,255])
 +    m1 = cv2.inRange(hsv, lb1, ub1)
 +    m2 = cv2.inRange(hsv, lb2, ub2)
 +    mask = cv2.morphologyEx(
 +        cv2.bitwise_or(m1, m2),
 +        cv2.MORPH_OPEN,
 +        np.ones((2,2), np.uint8)
 +    )
++
 +    cnts, _ = cv2.findContours(mask, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
 +    if not cnts:
 +        return None
++
 +    c = max(cnts, key=cv2.contourArea)
 +    if cv2.contourArea(c) < 8:
 +        return None
++
 +    x,y,w,h = cv2.boundingRect(c)
 +    if (y + h/2) < frame.shape[0] * 0.70:
 +        return (x, y, w, h)
 +    return None
++
 +def is_second_glove_raised(frame):
 +    blur = cv2.GaussianBlur(frame, (5, 5), 0)
 +    hsv = cv2.cvtColor(blur, cv2.COLOR_BGR2HSV)
 +    lb = np.array([35, 50, 50])
 +    ub = np.array([85, 255, 255])
 +    mask = cv2.inRange(hsv, lb, ub)
 +    mask = cv2.morphologyEx(mask, cv2.MORPH_OPEN, np.ones((2,2), np.uint8))
++
 +    cnts, _ = cv2.findContours(mask, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
 +    if not cnts:
 +        return None
++
 +    # pick the largest contour to avoid multiple detections
 +    c = max(cnts, key=cv2.contourArea)
 +    if cv2.contourArea(c) < 100:
 +        return None
++
 +    x, y, w, h = cv2.boundingRect(c)
 +    return (x, y, w, h)
++
++
 +# :::
 +# :::: MAIN LOOP ::::
 +# ::::: ::::::::: :::::
 +#
++
 +def main():
 +    args = parse_args()
++
 +    cap = cv2.VideoCapture(0)
 +    if not cap.isOpened():
 +        print("🔴 Cannot open camera")
 +        return
++
 +    # offload hand-detection to a worker thread
 +    # not a clue if I'm doing this right...
 +    # ...yet!
 +    # this ain't the therac, chill with your thread shrieks
 +    executor = ThreadPoolExecutor(max_workers=1)
 +    prev_future = None
++
 +    # parameter window thresholds for mapping glove height
 +    param_min_ratio = 0.2
 +    param_max_ratio = 0.8
++
 +    # smoothing factor for parameter changes
 +    param_smooth = 0.2
 +    smoothed_param = 0.0
++
 +    player = GestureDSP(args.wav, ir_path=args.ir)
 +    player.start()
++
 +    # track recent effect indices for cue detection
 +    history = deque(maxlen=10)
 +    cue_mode = False
++
 +    # empty precompute cues
 +    # idk what i want to do with this realy
 +    precomputed_cues = {}
 +    last_cue_time = 0
++
 +    try:
 +        while True:
 +            ret, frame = cap.read()
 +            if not ret:
 +                break
 +            frame = cv2.flip(frame, 1)
++
 +            # schedule detection off the main thread
 +            future = executor.submit(lambda f: (is_hand_raised(f), is_second_glove_raised(f)), frame.copy())
 +            if prev_future and prev_future.done():
 +                hand_box, glove_box = prev_future.result()
 +            else:
 +                hand_box, glove_box = None, None
 +            prev_future = future
++
 +            if hand_box:
 +                x, y, w, h = hand_box
 +                cx = x + w/2
 +            else:
 +                cx = None
 +            if glove_box:
 +                x2, y2, w2, h2 = glove_box
 +                cy = y2 + h2/2
 +            else:
 +                cy = None
++
 +            if cx is not None:
 +                # effect selection
 +                player.set_effect_by_zone(cx, frame.shape[1])
 +                effect_idx = player.current_idx
 +                effect_name = EFFECT_NAMES[effect_idx]
 +                history.append(effect_idx)
 +            else:
 +                effect_name = "none"
++
 +            # map second glove vertical pos to parameter with clamping and smoothing
 +            if cy is not None:
 +                h = frame.shape[0]
 +                min_y = param_min_ratio * h
 +                max_y = param_max_ratio * h
 +                if cy <= min_y:
 +                    raw_param = 1.0
 +                elif cy >= max_y:
 +                    raw_param = 0.0
 +                else:
 +                    raw_param = 1.0 - (cy - min_y) / (max_y - min_y)
 +            else:
 +                raw_param = 0.5
++
 +            # exponential smoothing
 +            smoothed_param = param_smooth * smoothed_param + (1.0 - param_smooth) * raw_param
 +            player.current_param = smoothed_param
++
 +            # detect simple cue: same
 +            # effect selected repeatedly quickly
 +            # NOTE: I hate this whole thing now
 +            #
 +            now = time.time()
 +            if len(history) == history.maxlen and len(set(history)) == 1 and now - last_cue_time > 2:
 +                cue_mode = True
 +                last_cue_time = now
 +            else:
 +                cue_mode = False
++
 +            if True:
 +                # draw detection rectangles
 +                if hand_box:
 +                    x, y, w, h = hand_box
 +                    cv2.rectangle(frame, (x, y), (x+w, y+h), (0,255,0), 2)
 +                if glove_box:
 +                    x2, y2, w2, h2 = glove_box
 +                    cv2.rectangle(frame, (x2, y2), (x2+w2, y2+h2), (180,105,255), 2)
++
 +                # draw effect name
 +                cv2.putText(frame, f"Effect: {effect_name}", (10,30),
 +                            cv2.FONT_HERSHEY_SIMPLEX, 1, (0,255,0), 2)
++
 +                # draw paramabar on right
 +                ph = int(player.current_param * frame.shape[0])
 +                cv2.rectangle(frame,
 +                              (frame.shape[1]-50, frame.shape[0]),
 +                              (frame.shape[1]-10, frame.shape[0]-ph),
 +                              (255,0,0), -1)
++
 +                # cue overlay
 +                if cue_mode:
 +                    cv2.putText(frame, "CUE TRIGGERED", (frame.shape[1]//2-100,50),
 +                                cv2.FONT_HERSHEY_COMPLEX, 1.5, (0,0,255), 3)
++
 +            cv2.imshow("Gesture-DSP Demo", frame)
 +            key = cv2.waitKey(1) & 0xFF
 +            if key in (ord('q'), 27):
 +                break
 +            if not player.stream.is_active():
 +                break
++
 +    finally:
 +        player.stop()
 +        cap.release()
 +        cv2.destroyAllWindows()
++
++
 +if __name__ == "__main__":
 +    main()