space_10

Sleeping

Frenchizer commited on Jan 26

Commit

c4b718f

verified ·

1 Parent(s): f80fc89

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -4,7 +4,7 @@ from transformers import MarianTokenizer
 import gradio as gr
 # Load the tokenizer from the local folder
-tokenizer_path = "./onnx_model"  # Path to the local tokenizer folder
 tokenizer = MarianTokenizer.from_pretrained(tokenizer_path)
 # Load the ONNX model
@@ -20,6 +20,7 @@ def translate(texts, max_length=512):
     # Initialize variables for decoding
     batch_size = input_ids.shape[0]
     decoder_input_ids = np.array([[tokenizer.pad_token_id]] * batch_size, dtype=np.int64)  # Start with pad token
     # Generate output tokens iteratively
     for _ in range(max_length):
@@ -42,8 +43,11 @@ def translate(texts, max_length=512):
         # Append the next tokens to the decoder input for the next iteration
         decoder_input_ids = np.concatenate([decoder_input_ids, next_tokens[:, None]], axis=-1)
         # Stop if all sequences have reached the EOS token
-        if all(tokenizer.eos_token_id in sequence for sequence in decoder_input_ids):
             break
     # Decode the output tokens to text
@@ -64,7 +68,7 @@ interface = gr.Interface(
     inputs=gr.Textbox(lines=5, placeholder="Enter text to translate...", label="Input Text"),
     outputs=gr.Textbox(lines=5, label="Translated Text"),
     title="ONNX English to French Translation",
-    description="Translate English text to French using an ONNX model.",
 )
 # Launch the Gradio app

 import gradio as gr
 # Load the tokenizer from the local folder
+tokenizer_path = "./tokenizer"  # Path to the local tokenizer folder
 tokenizer = MarianTokenizer.from_pretrained(tokenizer_path)
 # Load the ONNX model
     # Initialize variables for decoding
     batch_size = input_ids.shape[0]
     decoder_input_ids = np.array([[tokenizer.pad_token_id]] * batch_size, dtype=np.int64)  # Start with pad token
+    eos_reached = np.zeros(batch_size, dtype=bool)  # Track which sequences have finished
     # Generate output tokens iteratively
     for _ in range(max_length):
         # Append the next tokens to the decoder input for the next iteration
         decoder_input_ids = np.concatenate([decoder_input_ids, next_tokens[:, None]], axis=-1)
+        # Check if the EOS token has been generated for each sequence
+        eos_reached = eos_reached | (next_tokens == tokenizer.eos_token_id)
         # Stop if all sequences have reached the EOS token
+        if all(eos_reached):
             break
     # Decode the output tokens to text
     inputs=gr.Textbox(lines=5, placeholder="Enter text to translate...", label="Input Text"),
     outputs=gr.Textbox(lines=5, label="Translated Text"),
     title="ONNX English to French Translation",
+    description="Translate English text to French using a MarianMT ONNX model.",
 )
 # Launch the Gradio app