{
  "definitions": {},
  "$schema": "http://json-schema.org/draft-07/schema#",
  "$id": "https://docs.veritone.com/schemas/vtn-standard/master.json",
  "type": "object",
  "title": "vtn-standard for transcription engines",
  "required": [
    "series"
  ],
  "properties": {
    "schemaId": {
      "type": "string",
      "const": "https://docs.veritone.com/schemas/vtn-standard/master.json"
    },
    "validationContracts": {
      "type": "array",
      "title": "Validation contracts",
      "description": "Engine category contracts to validate this engine output against",
      "contains": {
        "type": "string",
        "description": "The transcription engine category ID",
        "const": "67cd4dd0-2f75-445d-a6f0-2f297d6cd182"
      }
    },
    "series": {
      "type": "array",
      "title": "Time series objects",
      "items": {
        "type": "object",
        "title": "Time series object",
        "required": [
          "startTimeMs",
          "stopTimeMs",
          "words"
        ],
        "properties": {
          "startTimeMs": {
            "type": "integer",
            "title": "Start time (ms)",
            "description": "The time in milliseconds from the beginning of the file where this observation starts occurring",
            "minimum": 0,
            "examples": [
              817000
            ]
          },
          "stopTimeMs": {
            "type": "integer",
            "title": "Stop time (ms)",
            "description": "The time in milliseconds from the beginning of the file where this observation stops occurring.  Needs to be greater than or equal to startTimeMs.",
            "minimum": 0,
            "examples": [
              845000
            ]
          },
          "words": {
            "description": "Transcript. JSON utterance (all word edges between 2 time nodes). Array of n objects describing each alternative word",
            "type": "array",
            "items": {
              "type": "object",
              "properties": {
                "word": {
                  "description": "The word spoken. Could be: !silence",
                  "type": "string"
                },
                "confidence": {
                  "description": "The confidence level of the detected word spoken. 0.00 = 0% to 1.00 = 100%",
                  "type": "number",
                  "minimum": 0,
                  "maximum": 1
                },
                "bestPath": {
                  "description": "Is this word included in the best path through a transcript lattice?",
                  "type": "boolean"
                },
                "utteranceLength": {
                  "description": "Number of consecutive time-slices the utterance spans. example: of->thrones----->of->their-->own-> utteranceLength: thrones: 2; their,own: 1",
                  "type": "integer",
                  "minimum": 1
                }
              },
              "required": [ "word" ]
            }
          }
        }
      }
    }
  }
}