Spaces:

Frenchizer
/

space_1

Sleeping

App Files Files Community

Frenchizer commited on Jan 10

Commit

10fd17d

verified ·

1 Parent(s): f16b34f

Update app.py

Browse files

Files changed (1) hide show

app.py +32 -27

app.py CHANGED Viewed

@@ -3,35 +3,40 @@ import onnxruntime as ort
 from transformers import AutoTokenizer
 import numpy as np
-# Load ONNX model and tokenizer
 MODEL_FILE = "./model.onnx"
 session = ort.InferenceSession(MODEL_FILE)
 tokenizer = AutoTokenizer.from_pretrained("Helsinki-NLP/opus-mt-en-fr")
-# Add this after loading the model
-print("Model inputs:", [input.name for input in session.get_inputs()])
-print("Input shapes:", [input.shape for input in session.get_inputs()])
-# Gradio prediction function
 def gradio_predict(input_text):
-    # Tokenize input text
-    tokenized_input = tokenizer(
-        input_text,
-        return_tensors="np",
-        padding='max_length',  # Pad to max length
-        truncation=True,       # Truncate if longer than max length
-        max_length=512         # Ensure the sequence doesn't exceed the model's max length
-    )
-    # Convert to correct numpy arrays with explicit types
-    input_ids = tokenized_input["input_ids"].astype(np.int64)
-    attention_mask = tokenized_input["attention_mask"].astype(np.int64)
-    # Create decoder input ids (usually it's the same shape as input_ids with initial token)
-    decoder_input_ids = np.array([[tokenizer.pad_token_id]], dtype=np.int64)
-    # Perform inference with ONNX model
-    try:
         outputs = session.run(
             None,
             {
@@ -40,14 +45,14 @@ def gradio_predict(input_text):
                 "decoder_input_ids": decoder_input_ids
             }
         )
-        # Decode output and return translated text
         translated_text = tokenizer.decode(outputs[0][0], skip_special_tokens=True)
         return translated_text
     except Exception as e:
-        print(f"Error during inference: {e}")
-        return "An error occurred during inference."
 # Gradio interface for the web app
 gr.Interface(

 from transformers import AutoTokenizer
 import numpy as np
 MODEL_FILE = "./model.onnx"
 session = ort.InferenceSession(MODEL_FILE)
 tokenizer = AutoTokenizer.from_pretrained("Helsinki-NLP/opus-mt-en-fr")
 def gradio_predict(input_text):
+    try:
+        # Tokenize input text
+        tokenized_input = tokenizer(
+            input_text,
+            return_tensors="np",
+            padding='max_length',
+            truncation=True,
+            max_length=512
+        )
+        # Get shapes from actual input
+        batch_size = tokenized_input["input_ids"].shape[0]  # Should be 1
+        seq_length = tokenized_input["input_ids"].shape[1]  # Should be 512
+        # Prepare inputs with correct shapes
+        input_ids = tokenized_input["input_ids"].astype(np.int64)
+        attention_mask = tokenized_input["attention_mask"].astype(np.int64)
+        # Create decoder_input_ids with matching shape
+        # Usually starts with pad_token_id or bos_token_id
+        decoder_input_ids = np.full((batch_size, seq_length), tokenizer.pad_token_id, dtype=np.int64)
+        decoder_input_ids[:, 0] = tokenizer.bos_token_id if tokenizer.bos_token_id is not None else tokenizer.pad_token_id
+        print("Debug shapes:")
+        print(f"input_ids shape: {input_ids.shape}")
+        print(f"attention_mask shape: {attention_mask.shape}")
+        print(f"decoder_input_ids shape: {decoder_input_ids.shape}")
+        # Run inference
         outputs = session.run(
             None,
             {
                 "decoder_input_ids": decoder_input_ids
             }
         )
+        # Decode output
         translated_text = tokenizer.decode(outputs[0][0], skip_special_tokens=True)
         return translated_text
     except Exception as e:
+        print(f"Detailed error: {str(e)}")  # This will show in the Space's logs
+        return f"Error during translation: {str(e)}"
 # Gradio interface for the web app
 gr.Interface(