File size: 2,123 Bytes

{
  "add_prefix_space": false,
  "backend": "tokenizers",
  "bos_token": "<s>",
  "clean_up_tokenization_spaces": false,
  "effective_vocab_size": 114822,
  "eos_token": "</s>",
  "errors": "replace",
  "fix_mistral_regex": true,
  "is_local": false,
  "local_files_only": false,
  "model_max_length": 131072,
  "model_type": "byte_level_bpe",
  "no_audio_codec_tokens": true,
  "no_dense_timestamp_tokens": true,
  "open_formosa": {
    "required_special_token_count": 157,
    "required_special_tokens_present": true,
    "required_special_tokens_single_id": true,
    "standard_special_tokens": {
      "bos_token": "<s>",
      "eos_token": "</s>",
      "pad_token": "<pad>",
      "unk_token": "<unk>"
    }
  },
  "pad_token": "<pad>",
  "padding_side": "right",
  "rich_transcription": {
    "allow_non_speech_events": true,
    "compact_json": true,
    "default_format": "json_segments",
    "enabled": true,
    "include_content": true,
    "include_speaker": true,
    "include_start_end": true,
    "no_dense_timestamp_tokens": true,
    "timestamp_precision_digits": 2,
    "timestamp_unit": "seconds"
  },
  "special_tokens": [
    "<|pad|>",
    "<|bos|>",
    "<|eos|>",
    "<|unk|>",
    "<|system|>",
    "<|user_channel|>",
    "<|assistant_channel|>",
    "<|task:speech_to_text|>",
    "<|task:text_to_speech|>",
    "<|input_audio_start|>",
    "<|input_audio_end|>",
    "<|audio_ref_start|>",
    "<|audio_ref_end|>",
    "<|audio_start|>",
    "<|audio_end|>",
    "<|speech_start|>",
    "<|speech_end|>",
    "<|transcript_start|>",
    "<|transcript_end|>",
    "<|segment_start|>",
    "<|segment_end|>",
    "<|speaker|>",
    "<|start_time|>",
    "<|end_time|>",
    "<|duration|>",
    "<|content|>",
    "<|non_speech_event|>",
    "<|retrieval_result_start|>",
    "<|retrieval_result_end|>",
    "<|ocr_start|>",
    "<|ocr_end|>",
    "<|image_start|>",
    "<|image_end|>",
    "<|video_start|>",
    "<|video_end|>"
  ],
  "strict_no_dense_timestamp_tokens": true,
  "tokenizer_class": "GPT2Tokenizer",
  "truncation_side": "right",
  "unk_token": "<unk>",
  "vocab_size": 114688
}