| 1 | // js/gesture-detector.js |
| 2 | |
| 3 | export class GestureDetector { |
| 4 | constructor() { |
| 5 | this.video = null; |
| 6 | this.canvas = document.getElementById('videoCanvas'); |
| 7 | this.ctx = this.canvas.getContext('2d'); |
| 8 | this.animationId = null; |
| 9 | |
| 10 | // Callback for detected hands |
| 11 | this.onHandsDetected = null; |
| 12 | |
| 13 | // Detection parameters |
| 14 | this.minHandSize = 30; // Minimum hand size in pixels |
| 15 | this.maxHandSize = 150; // Maximum hand size (hand-sized) |
| 16 | this.idealHandRatio = 1.2; // Expected width/height ratio |
| 17 | |
| 18 | // Tracking state |
| 19 | this.yellowHistory = []; |
| 20 | this.greenHistory = []; |
| 21 | this.maxHistory = 5; |
| 22 | |
| 23 | // Adaptive thresholds |
| 24 | this.yellowThreshold = { h: [0.12, 0.20], s: 0.3, v: 0.4 }; |
| 25 | this.greenThreshold = { h: [0.25, 0.40], s: 0.3, v: 0.3 }; |
| 26 | |
| 27 | // Smoothing |
| 28 | this.lastYellow = null; |
| 29 | this.lastGreen = null; |
| 30 | this.smoothingFactor = 0.3; |
| 31 | |
| 32 | // Motion detection |
| 33 | this.previousFrame = null; |
| 34 | this.motionThreshold = 10; |
| 35 | } |
| 36 | |
| 37 | async start() { |
| 38 | const stream = await navigator.mediaDevices.getUserMedia({ |
| 39 | video: { |
| 40 | width: 640, |
| 41 | height: 480, |
| 42 | facingMode: 'user' |
| 43 | } |
| 44 | }); |
| 45 | |
| 46 | if (!this.video) { |
| 47 | this.video = document.createElement('video'); |
| 48 | this.video.width = 640; |
| 49 | this.video.height = 480; |
| 50 | this.video.autoplay = true; |
| 51 | this.video.playsInline = true; |
| 52 | } |
| 53 | |
| 54 | this.video.srcObject = stream; |
| 55 | |
| 56 | await new Promise((resolve) => { |
| 57 | this.video.onloadedmetadata = resolve; |
| 58 | }); |
| 59 | |
| 60 | this.startProcessing(); |
| 61 | } |
| 62 | |
| 63 | stop() { |
| 64 | if (this.animationId) { |
| 65 | cancelAnimationFrame(this.animationId); |
| 66 | this.animationId = null; |
| 67 | } |
| 68 | |
| 69 | if (this.video && this.video.srcObject) { |
| 70 | const tracks = this.video.srcObject.getTracks(); |
| 71 | tracks.forEach(track => track.stop()); |
| 72 | this.video.srcObject = null; |
| 73 | } |
| 74 | } |
| 75 | |
| 76 | startProcessing() { |
| 77 | const process = () => { |
| 78 | if (this.video && this.video.readyState === this.video.HAVE_ENOUGH_DATA) { |
| 79 | // Draw video frame |
| 80 | this.ctx.drawImage(this.video, 0, 0, this.canvas.width, this.canvas.height); |
| 81 | |
| 82 | // Get image data and detect hands |
| 83 | const imageData = this.ctx.getImageData(0, 0, this.canvas.width, this.canvas.height); |
| 84 | const detection = this.detectHandsSmart(imageData); |
| 85 | |
| 86 | // Draw detection boxes |
| 87 | if (detection.yellowHand) { |
| 88 | this.drawBox(detection.yellowHand, '#ffff00', 'Effect Select'); |
| 89 | } |
| 90 | |
| 91 | if (detection.greenHand) { |
| 92 | this.drawBox(detection.greenHand, '#00ff00', 'Parameter'); |
| 93 | } |
| 94 | |
| 95 | // Call callback if registered |
| 96 | if (this.onHandsDetected) { |
| 97 | this.onHandsDetected(detection.yellowHand, detection.greenHand); |
| 98 | } |
| 99 | } |
| 100 | |
| 101 | this.animationId = requestAnimationFrame(process); |
| 102 | }; |
| 103 | |
| 104 | process(); |
| 105 | } |
| 106 | |
| 107 | detectHandsSmart(imageData) { |
| 108 | const data = imageData.data; |
| 109 | const width = imageData.width; |
| 110 | const height = imageData.height; |
| 111 | |
| 112 | // Detect motion areas first (if we have a previous frame) |
| 113 | const motionMask = this.previousFrame ? |
| 114 | this.detectMotion(data, this.previousFrame, width, height) : null; |
| 115 | |
| 116 | // Store current frame for next iteration |
| 117 | this.previousFrame = new Uint8ClampedArray(data); |
| 118 | |
| 119 | // Color detection with motion priority |
| 120 | const yellowCandidates = []; |
| 121 | const greenCandidates = []; |
| 122 | |
| 123 | // Adaptive sampling - more samples in motion areas |
| 124 | for (let y = 0; y < height; y += 2) { |
| 125 | for (let x = 0; x < width; x += 2) { |
| 126 | const i = (y * width + x) * 4; |
| 127 | |
| 128 | // Skip if no motion in this area (when motion mask exists) |
| 129 | if (motionMask && !motionMask[y * width + x]) { |
| 130 | continue; |
| 131 | } |
| 132 | |
| 133 | const r = data[i] / 255; |
| 134 | const g = data[i + 1] / 255; |
| 135 | const b = data[i + 2] / 255; |
| 136 | |
| 137 | const hsv = this.rgbToHsv(r, g, b); |
| 138 | |
| 139 | // YELLOW detection (bright, saturated yellow) |
| 140 | if (hsv.h >= this.yellowThreshold.h[0] && |
| 141 | hsv.h <= this.yellowThreshold.h[1] && |
| 142 | hsv.s > this.yellowThreshold.s && |
| 143 | hsv.v > this.yellowThreshold.v && |
| 144 | r > 0.6 && g > 0.6 && b < 0.4) { |
| 145 | yellowCandidates.push({ x, y, confidence: hsv.s * hsv.v }); |
| 146 | } |
| 147 | |
| 148 | // GREEN detection (bright, saturated green) |
| 149 | if (hsv.h >= this.greenThreshold.h[0] && |
| 150 | hsv.h <= this.greenThreshold.h[1] && |
| 151 | hsv.s > this.greenThreshold.s && |
| 152 | hsv.v > this.greenThreshold.v && |
| 153 | g > r * 1.3 && g > b * 1.2) { |
| 154 | greenCandidates.push({ x, y, confidence: hsv.s * hsv.v }); |
| 155 | } |
| 156 | } |
| 157 | } |
| 158 | |
| 159 | // Cluster and filter candidates |
| 160 | const yellowClusters = this.clusterPoints(yellowCandidates); |
| 161 | const greenClusters = this.clusterPoints(greenCandidates); |
| 162 | |
| 163 | // Find best hand-sized clusters |
| 164 | const yellowHand = this.findBestHandCluster(yellowClusters, 'yellow'); |
| 165 | const greenHand = this.findBestHandCluster(greenClusters, 'green'); |
| 166 | |
| 167 | // Apply temporal smoothing |
| 168 | const smoothedYellow = this.smoothDetection(yellowHand, this.lastYellow); |
| 169 | const smoothedGreen = this.smoothDetection(greenHand, this.lastGreen); |
| 170 | |
| 171 | this.lastYellow = smoothedYellow; |
| 172 | this.lastGreen = smoothedGreen; |
| 173 | |
| 174 | return { |
| 175 | yellowHand: smoothedYellow, |
| 176 | greenHand: smoothedGreen |
| 177 | }; |
| 178 | } |
| 179 | |
| 180 | detectMotion(currentData, previousData, width, height) { |
| 181 | const motionMask = new Uint8Array(width * height); |
| 182 | |
| 183 | for (let y = 0; y < height; y += 4) { |
| 184 | for (let x = 0; x < width; x += 4) { |
| 185 | const i = (y * width + x) * 4; |
| 186 | |
| 187 | const diffR = Math.abs(currentData[i] - previousData[i]); |
| 188 | const diffG = Math.abs(currentData[i + 1] - previousData[i + 1]); |
| 189 | const diffB = Math.abs(currentData[i + 2] - previousData[i + 2]); |
| 190 | |
| 191 | const totalDiff = diffR + diffG + diffB; |
| 192 | |
| 193 | if (totalDiff > this.motionThreshold) { |
| 194 | // Mark 4x4 area as motion |
| 195 | for (let dy = 0; dy < 4; dy++) { |
| 196 | for (let dx = 0; dx < 4; dx++) { |
| 197 | if (y + dy < height && x + dx < width) { |
| 198 | motionMask[(y + dy) * width + (x + dx)] = 1; |
| 199 | } |
| 200 | } |
| 201 | } |
| 202 | } |
| 203 | } |
| 204 | } |
| 205 | |
| 206 | return motionMask; |
| 207 | } |
| 208 | |
| 209 | clusterPoints(points) { |
| 210 | if (points.length < 20) return []; |
| 211 | |
| 212 | const clusters = []; |
| 213 | const visited = new Set(); |
| 214 | |
| 215 | // DBSCAN-like clustering |
| 216 | const epsilon = 30; // Maximum distance between points in a cluster |
| 217 | const minPoints = 20; // Minimum points to form a cluster |
| 218 | |
| 219 | for (let i = 0; i < points.length; i++) { |
| 220 | if (visited.has(i)) continue; |
| 221 | |
| 222 | const neighbors = []; |
| 223 | const cluster = []; |
| 224 | |
| 225 | // Find all neighbors |
| 226 | for (let j = 0; j < points.length; j++) { |
| 227 | const dist = Math.sqrt( |
| 228 | Math.pow(points[i].x - points[j].x, 2) + |
| 229 | Math.pow(points[i].y - points[j].y, 2) |
| 230 | ); |
| 231 | |
| 232 | if (dist < epsilon) { |
| 233 | neighbors.push(j); |
| 234 | } |
| 235 | } |
| 236 | |
| 237 | if (neighbors.length >= minPoints) { |
| 238 | // Start a new cluster |
| 239 | for (const idx of neighbors) { |
| 240 | if (!visited.has(idx)) { |
| 241 | visited.add(idx); |
| 242 | cluster.push(points[idx]); |
| 243 | } |
| 244 | } |
| 245 | |
| 246 | if (cluster.length > 0) { |
| 247 | clusters.push(cluster); |
| 248 | } |
| 249 | } |
| 250 | } |
| 251 | |
| 252 | return clusters; |
| 253 | } |
| 254 | |
| 255 | findBestHandCluster(clusters, color) { |
| 256 | if (clusters.length === 0) return null; |
| 257 | |
| 258 | let bestCluster = null; |
| 259 | let bestScore = -1; |
| 260 | |
| 261 | for (const cluster of clusters) { |
| 262 | // Calculate bounding box |
| 263 | const xs = cluster.map(p => p.x); |
| 264 | const ys = cluster.map(p => p.y); |
| 265 | |
| 266 | const box = { |
| 267 | x: Math.min(...xs), |
| 268 | y: Math.min(...ys), |
| 269 | width: Math.max(...xs) - Math.min(...xs), |
| 270 | height: Math.max(...ys) - Math.min(...ys) |
| 271 | }; |
| 272 | |
| 273 | // Skip if too small or too large |
| 274 | if (box.width < this.minHandSize || box.height < this.minHandSize || |
| 275 | box.width > this.maxHandSize || box.height > this.maxHandSize) { |
| 276 | continue; |
| 277 | } |
| 278 | |
| 279 | // Calculate quality score |
| 280 | const sizeScore = 1 - Math.abs(box.width * box.height - 5000) / 10000; // Ideal area ~5000px |
| 281 | const ratioScore = 1 - Math.abs(box.width / box.height - this.idealHandRatio) / 2; |
| 282 | const densityScore = cluster.length / (box.width * box.height) * 100; |
| 283 | const confidenceScore = cluster.reduce((sum, p) => sum + p.confidence, 0) / cluster.length; |
| 284 | |
| 285 | const totalScore = sizeScore * 0.3 + ratioScore * 0.2 + densityScore * 0.2 + confidenceScore * 0.3; |
| 286 | |
| 287 | if (totalScore > bestScore) { |
| 288 | bestScore = totalScore; |
| 289 | bestCluster = box; |
| 290 | bestCluster.confidence = confidenceScore; |
| 291 | bestCluster.color = color; |
| 292 | } |
| 293 | } |
| 294 | |
| 295 | // Update history for this color |
| 296 | if (bestCluster) { |
| 297 | const history = color === 'yellow' ? this.yellowHistory : this.greenHistory; |
| 298 | history.push(bestCluster); |
| 299 | if (history.length > this.maxHistory) { |
| 300 | history.shift(); |
| 301 | } |
| 302 | } |
| 303 | |
| 304 | return bestCluster; |
| 305 | } |
| 306 | |
| 307 | smoothDetection(current, previous) { |
| 308 | if (!current) return null; |
| 309 | if (!previous) return current; |
| 310 | |
| 311 | // Smooth position and size |
| 312 | return { |
| 313 | x: previous.x + (current.x - previous.x) * this.smoothingFactor, |
| 314 | y: previous.y + (current.y - previous.y) * this.smoothingFactor, |
| 315 | width: previous.width + (current.width - previous.width) * this.smoothingFactor, |
| 316 | height: previous.height + (current.height - previous.height) * this.smoothingFactor, |
| 317 | confidence: current.confidence, |
| 318 | color: current.color |
| 319 | }; |
| 320 | } |
| 321 | |
| 322 | drawBox(box, color, label) { |
| 323 | // Draw bounding box |
| 324 | this.ctx.strokeStyle = color; |
| 325 | this.ctx.lineWidth = 3; |
| 326 | this.ctx.strokeRect(box.x, box.y, box.width, box.height); |
| 327 | |
| 328 | // Draw corners for style |
| 329 | const cornerLength = 15; |
| 330 | this.ctx.lineWidth = 4; |
| 331 | |
| 332 | // Top-left |
| 333 | this.ctx.beginPath(); |
| 334 | this.ctx.moveTo(box.x, box.y + cornerLength); |
| 335 | this.ctx.lineTo(box.x, box.y); |
| 336 | this.ctx.lineTo(box.x + cornerLength, box.y); |
| 337 | this.ctx.stroke(); |
| 338 | |
| 339 | // Top-right |
| 340 | this.ctx.beginPath(); |
| 341 | this.ctx.moveTo(box.x + box.width - cornerLength, box.y); |
| 342 | this.ctx.lineTo(box.x + box.width, box.y); |
| 343 | this.ctx.lineTo(box.x + box.width, box.y + cornerLength); |
| 344 | this.ctx.stroke(); |
| 345 | |
| 346 | // Bottom-left |
| 347 | this.ctx.beginPath(); |
| 348 | this.ctx.moveTo(box.x, box.y + box.height - cornerLength); |
| 349 | this.ctx.lineTo(box.x, box.y + box.height); |
| 350 | this.ctx.lineTo(box.x + cornerLength, box.y + box.height); |
| 351 | this.ctx.stroke(); |
| 352 | |
| 353 | // Bottom-right |
| 354 | this.ctx.beginPath(); |
| 355 | this.ctx.moveTo(box.x + box.width - cornerLength, box.y + box.height); |
| 356 | this.ctx.lineTo(box.x + box.width, box.y + box.height); |
| 357 | this.ctx.lineTo(box.x + box.width, box.y + box.height - cornerLength); |
| 358 | this.ctx.stroke(); |
| 359 | |
| 360 | // Draw label with confidence |
| 361 | if (label) { |
| 362 | this.ctx.fillStyle = 'rgba(0, 0, 0, 0.7)'; |
| 363 | this.ctx.fillRect(box.x, box.y - 25, 120, 22); |
| 364 | this.ctx.fillStyle = color; |
| 365 | this.ctx.font = 'bold 14px monospace'; |
| 366 | this.ctx.fillText(`${label} ${Math.round(box.confidence * 100)}%`, box.x + 5, box.y - 8); |
| 367 | } |
| 368 | |
| 369 | // Draw center point |
| 370 | const centerX = box.x + box.width / 2; |
| 371 | const centerY = box.y + box.height / 2; |
| 372 | this.ctx.fillStyle = color; |
| 373 | this.ctx.beginPath(); |
| 374 | this.ctx.arc(centerX, centerY, 4, 0, 2 * Math.PI); |
| 375 | this.ctx.fill(); |
| 376 | } |
| 377 | |
| 378 | rgbToHsv(r, g, b) { |
| 379 | const max = Math.max(r, g, b); |
| 380 | const min = Math.min(r, g, b); |
| 381 | const diff = max - min; |
| 382 | |
| 383 | const v = max; |
| 384 | const s = max === 0 ? 0 : diff / max; |
| 385 | |
| 386 | let h = 0; |
| 387 | if (diff !== 0) { |
| 388 | if (max === r) { |
| 389 | h = ((g - b) / diff + (g < b ? 6 : 0)) / 6; |
| 390 | } else if (max === g) { |
| 391 | h = ((b - r) / diff + 2) / 6; |
| 392 | } else { |
| 393 | h = ((r - g) / diff + 4) / 6; |
| 394 | } |
| 395 | } |
| 396 | |
| 397 | return { h, s, v }; |
| 398 | } |
| 399 | } |