Spaces:

Frenchizer
/

space_1

Sleeping

App Files Files Community

Frenchizer commited on Jan 10

Commit

a7c166d

verified ·

1 Parent(s): 65d5469

Update app.py

Browse files

Files changed (1) hide show

app.py +11 -21

app.py CHANGED Viewed

@@ -19,24 +19,13 @@ def gradio_predict(input_text):
         max_length=512         # Ensure the sequence doesn't exceed the model's max length
     )
-    # Check that we have all necessary inputs
-    if "input_ids" not in tokenized_input or "attention_mask" not in tokenized_input:
-        return "Error: Missing required tokenizer outputs."
-    # Convert tokenized inputs to numpy arrays and ensure correct shape
-    input_ids = np.array(tokenized_input["input_ids"], dtype=np.int64)  # Shape should be [1, 512]
-    attention_mask = np.array(tokenized_input["attention_mask"], dtype=np.int64)  # Shape should be [1, 512]
-    # Ensure input is in the correct shape and format for ONNX
-    input_ids = input_ids.astype(np.int64)
-    attention_mask = attention_mask.astype(np.int64)
-    # Prepare decoder input ids if required by your model
-    decoder_input_ids = input_ids  # Adjust as needed based on model requirements
-    # Print the shapes to check if they are correct
-    print(f"input_ids shape: {input_ids.shape}, attention_mask shape: {attention_mask.shape}")
     # Perform inference with ONNX model
     try:
         outputs = session.run(
@@ -47,14 +36,15 @@ def gradio_predict(input_text):
                 "decoder_input_ids": decoder_input_ids
             }
         )
-        # Debugging output for outputs
-        print(f"Outputs: {outputs}, type: {type(outputs)}")
         # Decode output and return translated text
         translated_text = tokenizer.decode(outputs[0][0], skip_special_tokens=True)
         return translated_text
     except Exception as e:
         print(f"Error during inference: {e}")
         return "An error occurred during inference."

         max_length=512         # Ensure the sequence doesn't exceed the model's max length
     )
+    # Convert to correct numpy arrays with explicit types
+    input_ids = tokenized_input["input_ids"].astype(np.int64)
+    attention_mask = tokenized_input["attention_mask"].astype(np.int64)
+    # Create decoder input ids (usually it's the same shape as input_ids with initial token)
+    decoder_input_ids = np.array([[tokenizer.pad_token_id]], dtype=np.int64)
     # Perform inference with ONNX model
     try:
         outputs = session.run(
                 "decoder_input_ids": decoder_input_ids
             }
         )
         # Decode output and return translated text
         translated_text = tokenizer.decode(outputs[0][0], skip_special_tokens=True)
         return translated_text
+    # Add this after loading the model
+    print("Model inputs:", [input.name for input in session.get_inputs()])
+    print("Input shapes:", [input.shape for input in session.get_inputs()])
     except Exception as e:
         print(f"Error during inference: {e}")
         return "An error occurred during inference."