import { loadTensorflowModel } from '@get-id/react-native-fast-tflite'; import { SelfieUserHint } from '../types/sdkTypes'; import type { Scores } from './faceScoring'; const AUTO_CAPTURE_SCORE_THRESHOLD = 0.97; const MODEL_URL = 'https://cdn.getid.cloud/face-detection/2024-05-08/face_detection_short_range.tflite'; const SSD_OPTIONS_SHORT: any = { numLayers: 4, inputHeight: 128, inputWidth: 128, offsetX: 0.5, offsetY: 0.5, strides: [8, 16, 16, 16], }; export interface Point { x: number; y: number; } export interface Landmarks { leftEye: Point; rightEye: Point; nose: Point; mouth: Point; leftEar: Point; rightEar: Point; } interface Angles { roll: number; // Z axis yaw: number; // Y axis pitch: number; // X axis } export interface BoundingBox { tl: Point; br: Point; width: number; height: number; area: number; angles: Angles; landmarks: Landmarks; timestamp: number; } export interface FaceData { score: number; // probability of a face being present topLeft: number[]; // top left corner of the bounding box [x, y] bottomRight: number[]; // bottom right corner of the bounding box [x, y] landmarks: number[]; // facial landmarks. Order of landmarks: left eye, right eye, nose, mouth, right ear, left ear, [x, y, ...] bbox: number[]; // bounding box coordinates. Order of coordinates: top, left, bottom, right [x, y, ...] } interface Rect { centerW: number; centerH: number; width: number; height: number; top: number; left: number; right: number; bottom: number; } export function generateAnchors(opts = SSD_OPTIONS_SHORT) { 'worklet'; let layerId = 0; const numLayers = opts.numLayers; const strides = opts.strides; const inputHeight = opts.inputHeight; const inputWidth = opts.inputWidth; const offsetX = opts.offsetX; const offsetY = opts.offsetY; const generated: number[][] = []; while (layerId < numLayers) { let lastSameStrideLayer = layerId; let repeats = 0; while ( lastSameStrideLayer < numLayers && strides[lastSameStrideLayer] === strides[layerId] ) { lastSameStrideLayer += 1; repeats += 2; } const stride = strides[layerId]; const featureMapHeight = inputHeight / stride; const featureMapWidth = inputWidth / stride; for (let y = 0; y < featureMapHeight; y++) { let centerY = (y + offsetY) / featureMapHeight; for (let x = 0; x < featureMapWidth; x++) { let centerX = (x + offsetX) / featureMapWidth; for (let _ = 0; _ < repeats; _++) { generated.push([centerX, centerY]); } } } layerId = lastSameStrideLayer; } return generated; } function getFacialLandmarks(faceData: FaceData): Landmarks { 'worklet'; const landmarks = faceData.landmarks; return { leftEye: { x: landmarks[0] as number, y: landmarks[1] as number }, rightEye: { x: landmarks[2] as number, y: landmarks[3] as number }, nose: { x: landmarks[4] as number, y: landmarks[5] as number }, mouth: { x: landmarks[6] as number, y: landmarks[7] as number }, rightEar: { x: landmarks[8] as number, y: landmarks[9] as number }, leftEar: { x: landmarks[10] as number, y: landmarks[11] as number }, }; } function calculateAngles(landmarks: Landmarks): Angles { 'worklet'; // Roll (Z-axis) const dx = landmarks.rightEye.x - landmarks.leftEye.x; const dy = landmarks.rightEye.y - landmarks.leftEye.y; const roll = Math.atan2(dy, dx) * (180 / Math.PI); // Yaw (Y-axis) const faceCenterX = (landmarks.leftEar.x + landmarks.rightEar.x) / 2; const yaw = Math.atan2( landmarks.nose.x - faceCenterX, Math.abs(landmarks.rightEar.x - landmarks.leftEar.x) ) * (180 / Math.PI); // Pitch (X-axis) - примерный расчет const eyeCenterY = (landmarks.leftEye.y + landmarks.rightEye.y) / 2; const mouthCenterY = landmarks.mouth.y; const verticalDistance = mouthCenterY - eyeCenterY; const pitch = Math.atan2( verticalDistance, Math.abs(landmarks.rightEar.x - landmarks.leftEar.x) ) * (180 / Math.PI); return { roll, yaw, pitch }; } function getBoundingBox(faceData: FaceData): BoundingBox { 'worklet'; const x1 = faceData.bbox[0] as number; const y1 = faceData.bbox[1] as number; const x2 = faceData.bbox[2] as number; const y2 = faceData.bbox[3] as number; const width = (x2 as number) - (x1 as number); const height = (y2 as number) - (y1 as number); const landmarks = getFacialLandmarks(faceData); const timestamp = Date.now(); return { tl: { x: x1 as number, y: y1 as number }, br: { x: x2 as number, y: y2 as number }, width, height, area: width * height, angles: calculateAngles(landmarks), landmarks, timestamp, }; } export function decodeOutput( rawBoxes: number[], rawScores: number[], anchors: number[][], imgSize: [number, number], minScore: number = 0.5, iouThreshold: number = 0.3 ) { 'worklet'; const vectorSize = 16; const scores = rawScores.map((d) => 1 / (1 + Math.exp(-d))); const decoded: number[][] = []; const normalized = rawBoxes.map((x) => x / 128); for (let i = 0; i < normalized.length; i += vectorSize) { decoded.push(normalized.slice(i, i + vectorSize)); } let bboxes: FaceData[] = []; for (let i = 0; i < decoded.length; i++) { const score = scores[i]; if (!score) continue; if (score <= minScore) continue; for (let index = 0; index < vectorSize; index += 2) { if ([2, 3].includes(index)) continue; // @ts-ignore decoded[i][index] += anchors[i][0]; // @ts-ignore decoded[i][index + 1] += anchors[i][1]; } // @ts-ignore const center = decoded[i].slice(0, 2); // @ts-ignore const size = decoded[i].slice(2, 4).map((x) => x / 2); // @ts-ignore const tl = [center[0] - size[0], center[1] - size[1]]; // @ts-ignore const br = [center[0] + size[0], center[1] + size[1]]; const data = [] // @ts-ignore .concat(tl) // @ts-ignore .concat(br) // @ts-ignore .concat(Array.from(decoded[i].slice(4))); for (let idx = 0; idx < vectorSize; idx += 2) { // @ts-ignore data[idx] *= imgSize[0]; // @ts-ignore data[idx + 1] *= imgSize[1]; } bboxes.push({ // data, score, topLeft: data.slice(0, 2), bottomRight: data.slice(2, 4), landmarks: data.slice(4), bbox: data.slice(0, 4).concat(data.slice(2, 4)), }); } const iou = function (box1: FaceData, box2: FaceData) { const bbox1 = box1.bbox; const bbox2 = box2.bbox; const x1 = Math.max(bbox1[0] as number, bbox2[0] as number); const y1 = Math.max(bbox1[1] as number, bbox2[1] as number); const x2 = Math.min(bbox1[2] as number, bbox2[2] as number); const y2 = Math.min(bbox1[3] as number, bbox2[3] as number); const interArea = Math.max(x2 - x1, 0) * Math.max(y2 - y1, 0); // @ts-ignore const box1Area = (bbox1[2] - bbox1[0]) * (bbox1[3] - bbox1[1]); // @ts-ignore const box2Area = (bbox2[2] - bbox2[0]) * (bbox2[3] - bbox2[1]); const unionArea = box1Area + box2Area - interArea; return interArea / unionArea; }; const nms = function (boxes: FaceData[]) { boxes.sort((a, b) => (a.score > b.score ? -1 : 1)); const nmsBoxes = []; while (boxes.length > 0) { const maxScoreBox = boxes.shift() as FaceData; // @ts-ignore nmsBoxes.push(maxScoreBox); boxes = boxes.filter((box) => iou(box, maxScoreBox) < iouThreshold); } return nmsBoxes; }; const faces = nms(bboxes); return faces.map((face) => getBoundingBox(face)); } export async function loadFaceDetectionModel() { return await loadTensorflowModel({ url: MODEL_URL }); } const anchors = generateAnchors(); export function detectFace( model: any, inputData: any, scaleSize: { width: number; height: number } = { width: 1, height: 1 } ) { 'worklet'; const outputs = model.runSync([inputData]); const boxes = decodeOutput(outputs[0], outputs[1], anchors, [ scaleSize.width, scaleSize.height, ]); return boxes; } export function calculateHint( boxes: any, scaleSize: { width: number; height: number } = { width: 1, height: 1 }, rect: Rect, scores: Scores | null ) { 'worklet'; // If no faces detected, return SELFIE_NO_FACE if (boxes.length < 1 || !boxes[0]) { return SelfieUserHint.SELFIE_NO_FACE; } // If multiple faces detected, return SELFIE_MULTIPLE_FACES if (boxes.length > 1) { return SelfieUserHint.SELFIE_MULTIPLE_FACES; } const box = boxes[0]; // Compute the rectangular area and establish size thresholds const rectArea = rect.width * rect.width; const areaUpperThreshold = rectArea; const areaLowerThreshold = rectArea * 0.6; // Check if face size exceeds the upper threshold if (box.area > areaUpperThreshold) { return SelfieUserHint.SELFIE_TOO_BIG; } // Check if face size falls below the lower threshold if (box.area < areaLowerThreshold) { return SelfieUserHint.SELFIE_TOO_SMALL; } // Check if face angle is within acceptable limits if ( box.angles.roll < -10 || box.angles.roll > 10 || box.angles.yaw < -10 || box.angles.yaw > 10 ) { return SelfieUserHint.SELFIE_NOT_FACING_CAMERA; } // Compute top left and bottom right points of face const boxTl: Point = { x: Math.min(box.tl.x, box.br.x), y: box.tl.y }; const boxBr: Point = { x: Math.max(box.tl.x, box.br.x), y: box.br.y }; // Compute center of face const boxCenter: Point = { x: (boxTl.x + boxBr.x) * 0.5, y: (boxTl.y + boxBr.y) * 0.5, }; // Compute central points of screen const screenCenter: Point = { x: scaleSize.width / 2, y: scaleSize.height / 2, }; // Define a threshold for center positioning const centerThreshold: Point = { x: rect.width * 0.3, y: rect.width * 0.3, }; // Determine if face is centered within threshold const isInRect = Math.abs(screenCenter.x - boxCenter.x) < centerThreshold.x && Math.abs(screenCenter.y - boxCenter.y) < centerThreshold.y; // If face is not centered or is outside the rectangle, return SELFIE_NOT_CENTERED if (!isInRect) { return SelfieUserHint.SELFIE_NOT_CENTERED; } if ( typeof scores?.finalScore === 'number' && scores.finalScore <= AUTO_CAPTURE_SCORE_THRESHOLD ) { return SelfieUserHint.SELFIE_HOLD_STILL; } return SelfieUserHint.SELFIE_OK; }