package expo.modules.facedetection import android.content.Context import android.graphics.Bitmap import android.graphics.Rect import expo.modules.facedetection.core.MyUtil import expo.modules.facedetection.core.mtcnn.Align import expo.modules.facedetection.core.mtcnn.MTCNN import expo.modules.facedetection.core.mobilefacenet.MobileFaceNet import expo.modules.facedetection.core.faceantispoofing.FaceAntiSpoofing import java.io.IOException /** * Singleton manager for face detection models. * Loads all TFLite models once and provides detection/matching methods. */ class FaceDetectionManager private constructor(context: Context) { private val mtcnn: MTCNN private val mobileFaceNet: MobileFaceNet private val faceAntiSpoofing: FaceAntiSpoofing // Thresholds private var minFaceRatio: Float = 0.2f private var detectionConfidenceThreshold: Float = 0.6f private var livenessThreshold: Float = 0.2f private var sharpnessThreshold: Float = 50f private var matchThreshold: Float = 1.1f // Current target embedding for matching private var targetEmbedding: FloatArray? = null init { try { mtcnn = MTCNN(context.assets) mobileFaceNet = MobileFaceNet(context.assets) faceAntiSpoofing = FaceAntiSpoofing(context.assets) } catch (e: IOException) { throw RuntimeException("Failed to load face detection models", e) } } // ========== Face Detection ========== fun detectFaces(bitmap: Bitmap, cropFaces: Boolean = false): DetectionResult { val startTime = System.currentTimeMillis() val minFaceSize = (bitmap.width * minFaceRatio).toInt() val boxes = mtcnn.detectFaces(bitmap, minFaceSize) val faces = mutableListOf() for (box in boxes) { if (box.score < detectionConfidenceThreshold) continue box.toSquareShape() box.limitSquare(bitmap.width, bitmap.height) val rect = box.transform2Rect() val landmarks = box.landmark.map { point -> point?.let { mapOf("x" to it.x, "y" to it.y) } } val croppedFace = if (cropFaces) { MyUtil.crop(bitmap, rect) } else null faces.add(DetectedFace( box = mapOf( "left" to rect.left, "top" to rect.top, "right" to rect.right, "bottom" to rect.bottom ), landmarks = landmarks, confidence = box.score, croppedFace = croppedFace )) } return DetectionResult( faces = faces, processingTimeMs = System.currentTimeMillis() - startTime, frameWidth = bitmap.width, frameHeight = bitmap.height ) } fun detectLargestFace(bitmap: Bitmap): DetectedFace? { val result = detectFaces(bitmap, cropFaces = false) return result.faces.maxByOrNull { face -> val box = face.box ((box["right"] ?: 0) - (box["left"] ?: 0)) * ((box["bottom"] ?: 0) - (box["top"] ?: 0)) } } // ========== Liveness Detection ========== fun checkLiveness(bitmap: Bitmap): LivenessResult { val startTime = System.currentTimeMillis() val minFaceSize = (bitmap.width * minFaceRatio).toInt() val boxes = mtcnn.detectFaces(bitmap, minFaceSize) if (boxes.isEmpty()) { return LivenessResult( faceDetected = false, isLive = false, livenessScore = 1f, sharpness = 0f, isSharp = false, faceBox = null, confidence = 0f, processingTimeMs = System.currentTimeMillis() - startTime, errorMessage = "No face detected" ) } val box = boxes[0] box.toSquareShape() box.limitSquare(bitmap.width, bitmap.height) val rect = box.transform2Rect() var faceCrop = MyUtil.crop(bitmap, rect) if (faceCrop.config != Bitmap.Config.ARGB_8888) { faceCrop = faceCrop.copy(Bitmap.Config.ARGB_8888, false) } val sharpness = faceAntiSpoofing.laplacian(faceCrop).toFloat() val isSharp = sharpness >= sharpnessThreshold if (!isSharp) { return LivenessResult( faceDetected = true, isLive = false, livenessScore = 1f, sharpness = sharpness, isSharp = false, faceBox = mapOf("left" to rect.left, "top" to rect.top, "right" to rect.right, "bottom" to rect.bottom), confidence = 0f, processingTimeMs = System.currentTimeMillis() - startTime, errorMessage = "Image too blurry" ) } val livenessScore = faceAntiSpoofing.antiSpoofing(faceCrop) val isLive = livenessScore < livenessThreshold val confidence = if (isLive) { maxOf(0f, 1f - (livenessScore / livenessThreshold)) } else { minOf(1f, (livenessScore - livenessThreshold) / (1f - livenessThreshold)) } return LivenessResult( faceDetected = true, isLive = isLive, livenessScore = livenessScore, sharpness = sharpness, isSharp = true, faceBox = mapOf("left" to rect.left, "top" to rect.top, "right" to rect.right, "bottom" to rect.bottom), confidence = confidence, processingTimeMs = System.currentTimeMillis() - startTime, errorMessage = null ) } // ========== Face Registration (Embedding Extraction) ========== fun extractEmbedding(bitmap: Bitmap): RegistrationResult { val startTime = System.currentTimeMillis() val TAG = "FACE_EMBED_DEBUG" android.util.Log.d(TAG, "=== EXTRACT EMBEDDING START ===") android.util.Log.d(TAG, "Input bitmap: ${bitmap.width}x${bitmap.height}, config: ${bitmap.config}") val minFaceSize = (bitmap.width * minFaceRatio).toInt() android.util.Log.d(TAG, "MinFaceSize: $minFaceSize (ratio: $minFaceRatio)") val boxes = mtcnn.detectFaces(bitmap, minFaceSize) android.util.Log.d(TAG, "MTCNN detected ${boxes.size} faces") if (boxes.isEmpty()) { android.util.Log.d(TAG, "=== EXTRACT EMBEDDING FAILED: No face ===") return RegistrationResult( success = false, embedding = null, faceBox = null, processingTimeMs = System.currentTimeMillis() - startTime, errorMessage = "No face detected" ) } val box = boxes[0] android.util.Log.d(TAG, "Face box before transform: score=${box.score}") box.toSquareShape() box.limitSquare(bitmap.width, bitmap.height) val rect = box.transform2Rect() android.util.Log.d(TAG, "Face rect: left=${rect.left}, top=${rect.top}, right=${rect.right}, bottom=${rect.bottom}") val faceCrop = MyUtil.crop(bitmap, rect) android.util.Log.d(TAG, "Face crop: ${faceCrop.width}x${faceCrop.height}") val embedding = mobileFaceNet.getEmbedding(faceCrop) android.util.Log.d(TAG, "Embedding extracted: size=${embedding.size}") android.util.Log.d(TAG, "Embedding first 5: [${embedding.take(5).joinToString(", ") { "%.4f".format(it) }}]") android.util.Log.d(TAG, "Embedding last 5: [${embedding.takeLast(5).joinToString(", ") { "%.4f".format(it) }}]") android.util.Log.d(TAG, "Embedding norm: ${"%.4f".format(kotlin.math.sqrt(embedding.map { it * it }.sum()))}") android.util.Log.d(TAG, "=== EXTRACT EMBEDDING SUCCESS ===") return RegistrationResult( success = true, embedding = embedding.toList(), faceBox = mapOf("left" to rect.left, "top" to rect.top, "right" to rect.right, "bottom" to rect.bottom), processingTimeMs = System.currentTimeMillis() - startTime, errorMessage = null ) } fun registerFace(frontPhoto: Bitmap, leftPhoto: Bitmap, rightPhoto: Bitmap): RegistrationResult { val startTime = System.currentTimeMillis() val TAG = "FACE_EMBED_DEBUG" android.util.Log.d(TAG, "=== REGISTER FACE (3 photos) START ===") val photos = listOf(frontPhoto, leftPhoto, rightPhoto) val photoLabels = listOf("FRONT", "LEFT", "RIGHT") val embeddings = mutableListOf() for ((index, photo) in photos.withIndex()) { android.util.Log.d(TAG, "--- Processing ${photoLabels[index]} photo ---") val result = extractEmbedding(photo) if (!result.success || result.embedding == null) { android.util.Log.e(TAG, "Failed to extract from ${photoLabels[index]}: ${result.errorMessage}") return RegistrationResult( success = false, embedding = null, faceBox = null, processingTimeMs = System.currentTimeMillis() - startTime, errorMessage = "Failed to extract face from photo ${index + 1}: ${result.errorMessage}" ) } embeddings.add(result.embedding.map { it.toFloat() }.toFloatArray()) } // Average the embeddings android.util.Log.d(TAG, "--- Averaging 3 embeddings ---") val avgEmbedding = FloatArray(192) for (i in 0 until 192) { avgEmbedding[i] = (embeddings[0][i] + embeddings[1][i] + embeddings[2][i]) / 3f } val avgNormBefore = kotlin.math.sqrt(avgEmbedding.map { it * it }.sum()) android.util.Log.d(TAG, "Averaged embedding norm BEFORE normalization: ${"%.4f".format(avgNormBefore)}") // Re-normalize the averaged embedding to unit length! if (avgNormBefore > 1e-10) { for (i in 0 until 192) { avgEmbedding[i] = avgEmbedding[i] / avgNormBefore } } val avgNormAfter = kotlin.math.sqrt(avgEmbedding.map { it * it }.sum()) android.util.Log.d(TAG, "Averaged embedding first 5: [${avgEmbedding.take(5).joinToString(", ") { "%.4f".format(it) }}]") android.util.Log.d(TAG, "Averaged embedding norm AFTER normalization: ${"%.4f".format(avgNormAfter)}") android.util.Log.d(TAG, "=== REGISTER FACE SUCCESS ===") return RegistrationResult( success = true, embedding = avgEmbedding.toList(), faceBox = null, processingTimeMs = System.currentTimeMillis() - startTime, errorMessage = null ) } // ========== Face Matching ========== fun setTargetEmbedding(embedding: FloatArray) { val TAG = "FACE_MATCH_DEBUG" android.util.Log.d(TAG, "=== SET TARGET EMBEDDING ===") android.util.Log.d(TAG, "Target size: ${embedding.size}") android.util.Log.d(TAG, "Target first 5: [${embedding.take(5).joinToString(", ") { "%.4f".format(it) }}]") android.util.Log.d(TAG, "Target last 5: [${embedding.takeLast(5).joinToString(", ") { "%.4f".format(it) }}]") android.util.Log.d(TAG, "Target norm: ${"%.4f".format(kotlin.math.sqrt(embedding.map { it * it }.sum()))}") this.targetEmbedding = embedding } fun hasTarget(): Boolean = targetEmbedding != null fun clearTarget() { android.util.Log.d("FACE_MATCH_DEBUG", "=== TARGET CLEARED ===") targetEmbedding = null } fun processFrame(bitmap: Bitmap): MatchResult { val startTime = System.currentTimeMillis() val TAG = "FACE_MATCH_DEBUG" val target = targetEmbedding if (target == null) { android.util.Log.w(TAG, "processFrame: No target embedding set!") return MatchResult( faceDetected = false, isMatch = false, confidence = 0f, distance = Float.MAX_VALUE, faceBox = null, processingTimeMs = System.currentTimeMillis() - startTime, errorMessage = "No target embedding set" ) } val minFaceSize = (bitmap.width * minFaceRatio).toInt() val boxes = mtcnn.detectFaces(bitmap, minFaceSize) if (boxes.isEmpty()) { return MatchResult( faceDetected = false, isMatch = false, confidence = 0f, distance = Float.MAX_VALUE, faceBox = null, processingTimeMs = System.currentTimeMillis() - startTime, errorMessage = null ) } val box = boxes[0] box.toSquareShape() box.limitSquare(bitmap.width, bitmap.height) val rect = box.transform2Rect() val faceCrop = MyUtil.crop(bitmap, rect) android.util.Log.d(TAG, "--- LIVE FRAME PROCESSING ---") android.util.Log.d(TAG, "Frame: ${bitmap.width}x${bitmap.height}, FaceCrop: ${faceCrop.width}x${faceCrop.height}") val embedding = mobileFaceNet.getEmbedding(faceCrop) android.util.Log.d(TAG, "Live embedding first 5: [${embedding.take(5).joinToString(", ") { "%.4f".format(it) }}]") android.util.Log.d(TAG, "Live embedding norm: ${"%.4f".format(kotlin.math.sqrt(embedding.map { it * it }.sum()))}") android.util.Log.d(TAG, "Target embedding first 5: [${target.take(5).joinToString(", ") { "%.4f".format(it) }}]") android.util.Log.d(TAG, "Target embedding norm: ${"%.4f".format(kotlin.math.sqrt(target.map { it * it }.sum()))}") val distance = MobileFaceNet.l2Distance(embedding, target) val isMatch = distance < matchThreshold android.util.Log.d(TAG, ">>> L2 Distance: ${"%.4f".format(distance)}, Threshold: $matchThreshold, Match: $isMatch <<<") val confidence = if (isMatch) { maxOf(0f, 1f - (distance / matchThreshold)) } else { 0f } return MatchResult( faceDetected = true, isMatch = isMatch, confidence = confidence, distance = distance, faceBox = mapOf("left" to rect.left, "top" to rect.top, "right" to rect.right, "bottom" to rect.bottom), processingTimeMs = System.currentTimeMillis() - startTime, errorMessage = null ) } // ========== Threshold Setters/Getters ========== fun setMinFaceRatio(ratio: Float) { this.minFaceRatio = ratio.coerceIn(0.05f, 0.5f) } fun getMinFaceRatio(): Float = minFaceRatio fun setDetectionConfidenceThreshold(threshold: Float) { this.detectionConfidenceThreshold = threshold.coerceIn(0f, 1f) } fun getDetectionConfidenceThreshold(): Float = detectionConfidenceThreshold fun setLivenessThreshold(threshold: Float) { this.livenessThreshold = threshold } fun getLivenessThreshold(): Float = livenessThreshold fun setSharpnessThreshold(threshold: Float) { this.sharpnessThreshold = threshold } fun getSharpnessThreshold(): Float = sharpnessThreshold fun setMatchThreshold(threshold: Float) { this.matchThreshold = threshold } fun getMatchThreshold(): Float = matchThreshold companion object { @Volatile private var instance: FaceDetectionManager? = null fun getInstance(context: Context): FaceDetectionManager { return instance ?: synchronized(this) { instance ?: FaceDetectionManager(context.applicationContext).also { instance = it } } } } } // Data classes for results data class DetectedFace( val box: Map, val landmarks: List?>, val confidence: Float, val croppedFace: Bitmap? = null ) data class DetectionResult( val faces: List, val processingTimeMs: Long, val frameWidth: Int, val frameHeight: Int ) { val faceCount: Int get() = faces.size val hasFaces: Boolean get() = faces.isNotEmpty() } data class LivenessResult( val faceDetected: Boolean, val isLive: Boolean, val livenessScore: Float, val sharpness: Float, val isSharp: Boolean, val faceBox: Map?, val confidence: Float, val processingTimeMs: Long, val errorMessage: String? ) data class RegistrationResult( val success: Boolean, val embedding: List?, val faceBox: Map?, val processingTimeMs: Long, val errorMessage: String? ) data class MatchResult( val faceDetected: Boolean, val isMatch: Boolean, val confidence: Float, val distance: Float, val faceBox: Map?, val processingTimeMs: Long, val errorMessage: String? )