kotoba-tech
/

kotoba-whisper-v2.2

Automatic Speech Recognition

hf-asr-leaderboard

Model card Files Files and versions

asahi417 commited on Oct 23, 2024

Commit

e34822e

·

1 Parent(s): 74437d7

init

Files changed (2) hide show

pipeline/kotoba_whisper.py +1 -1
pipeline/push_pipeline.py +8 -1

pipeline/kotoba_whisper.py CHANGED Viewed

@@ -216,7 +216,7 @@ class KotobaWhisperPipeline(AutomaticSpeechRecognitionPipeline):
                         item["is_last"] = m == len(timelines) - 1 and n == len(labels) - 1 and item["is_last"]
                         yield item
                 else:
-                    if inputs.shape[0] > self.feature_extractor.n_samples:
                         processed = self.feature_extractor(
                             audio_array,
                             sampling_rate=self.feature_extractor.sampling_rate,

                         item["is_last"] = m == len(timelines) - 1 and n == len(labels) - 1 and item["is_last"]
                         yield item
                 else:
+                    if audio_array.shape[0] > self.feature_extractor.n_samples:
                         processed = self.feature_extractor(
                             audio_array,
                             sampling_rate=self.feature_extractor.sampling_rate,

pipeline/push_pipeline.py CHANGED Viewed

@@ -13,7 +13,14 @@ PIPELINE_REGISTRY.register_pipeline(
     pt_model=WhisperForConditionalGeneration,
     tf_model=TFWhisperForConditionalGeneration
 )
-pipe = pipeline(task="kotoba-whisper", model="kotoba-tech/kotoba-whisper-v2.0", chunk_length_s=15, batch_size=16)
 pipe.push_to_hub(model_alias)

     pt_model=WhisperForConditionalGeneration,
     tf_model=TFWhisperForConditionalGeneration
 )
+pipe = pipeline(task="kotoba-whisper", model="kotoba-tech/kotoba-whisper-v2.0", batch_size=16)
+# pprint(pipe("fukabori_trimmed_more_more.mp3"))
+pprint(pipe(
+    "fukabori_trimmed_more_more.mp3",
+    add_silence_end=0.5,
+    add_silence_start=0.5,
+    chunk_length_s=15,
+))
 pipe.push_to_hub(model_alias)