Frenchizer commited on
Commit
a7c166d
·
verified ·
1 Parent(s): 65d5469

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +11 -21
app.py CHANGED
@@ -19,24 +19,13 @@ def gradio_predict(input_text):
19
  max_length=512 # Ensure the sequence doesn't exceed the model's max length
20
  )
21
 
22
- # Check that we have all necessary inputs
23
- if "input_ids" not in tokenized_input or "attention_mask" not in tokenized_input:
24
- return "Error: Missing required tokenizer outputs."
25
-
26
- # Convert tokenized inputs to numpy arrays and ensure correct shape
27
- input_ids = np.array(tokenized_input["input_ids"], dtype=np.int64) # Shape should be [1, 512]
28
- attention_mask = np.array(tokenized_input["attention_mask"], dtype=np.int64) # Shape should be [1, 512]
29
-
30
- # Ensure input is in the correct shape and format for ONNX
31
- input_ids = input_ids.astype(np.int64)
32
- attention_mask = attention_mask.astype(np.int64)
33
-
34
- # Prepare decoder input ids if required by your model
35
- decoder_input_ids = input_ids # Adjust as needed based on model requirements
36
-
37
- # Print the shapes to check if they are correct
38
- print(f"input_ids shape: {input_ids.shape}, attention_mask shape: {attention_mask.shape}")
39
-
40
  # Perform inference with ONNX model
41
  try:
42
  outputs = session.run(
@@ -47,14 +36,15 @@ def gradio_predict(input_text):
47
  "decoder_input_ids": decoder_input_ids
48
  }
49
  )
50
-
51
- # Debugging output for outputs
52
- print(f"Outputs: {outputs}, type: {type(outputs)}")
53
 
54
  # Decode output and return translated text
55
  translated_text = tokenizer.decode(outputs[0][0], skip_special_tokens=True)
56
  return translated_text
57
 
 
 
 
 
58
  except Exception as e:
59
  print(f"Error during inference: {e}")
60
  return "An error occurred during inference."
 
19
  max_length=512 # Ensure the sequence doesn't exceed the model's max length
20
  )
21
 
22
+ # Convert to correct numpy arrays with explicit types
23
+ input_ids = tokenized_input["input_ids"].astype(np.int64)
24
+ attention_mask = tokenized_input["attention_mask"].astype(np.int64)
25
+
26
+ # Create decoder input ids (usually it's the same shape as input_ids with initial token)
27
+ decoder_input_ids = np.array([[tokenizer.pad_token_id]], dtype=np.int64)
28
+
 
 
 
 
 
 
 
 
 
 
 
29
  # Perform inference with ONNX model
30
  try:
31
  outputs = session.run(
 
36
  "decoder_input_ids": decoder_input_ids
37
  }
38
  )
 
 
 
39
 
40
  # Decode output and return translated text
41
  translated_text = tokenizer.decode(outputs[0][0], skip_special_tokens=True)
42
  return translated_text
43
 
44
+ # Add this after loading the model
45
+ print("Model inputs:", [input.name for input in session.get_inputs()])
46
+ print("Input shapes:", [input.shape for input in session.get_inputs()])
47
+
48
  except Exception as e:
49
  print(f"Error during inference: {e}")
50
  return "An error occurred during inference."