Frenchizer commited on
Commit
10fd17d
·
verified ·
1 Parent(s): f16b34f

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +32 -27
app.py CHANGED
@@ -3,35 +3,40 @@ import onnxruntime as ort
3
  from transformers import AutoTokenizer
4
  import numpy as np
5
 
6
- # Load ONNX model and tokenizer
7
  MODEL_FILE = "./model.onnx"
8
  session = ort.InferenceSession(MODEL_FILE)
9
  tokenizer = AutoTokenizer.from_pretrained("Helsinki-NLP/opus-mt-en-fr")
10
 
11
- # Add this after loading the model
12
- print("Model inputs:", [input.name for input in session.get_inputs()])
13
- print("Input shapes:", [input.shape for input in session.get_inputs()])
14
-
15
- # Gradio prediction function
16
  def gradio_predict(input_text):
17
- # Tokenize input text
18
- tokenized_input = tokenizer(
19
- input_text,
20
- return_tensors="np",
21
- padding='max_length', # Pad to max length
22
- truncation=True, # Truncate if longer than max length
23
- max_length=512 # Ensure the sequence doesn't exceed the model's max length
24
- )
25
-
26
- # Convert to correct numpy arrays with explicit types
27
- input_ids = tokenized_input["input_ids"].astype(np.int64)
28
- attention_mask = tokenized_input["attention_mask"].astype(np.int64)
29
 
30
- # Create decoder input ids (usually it's the same shape as input_ids with initial token)
31
- decoder_input_ids = np.array([[tokenizer.pad_token_id]], dtype=np.int64)
 
32
 
33
- # Perform inference with ONNX model
34
- try:
 
 
 
 
 
 
 
 
 
 
 
 
 
35
  outputs = session.run(
36
  None,
37
  {
@@ -40,14 +45,14 @@ def gradio_predict(input_text):
40
  "decoder_input_ids": decoder_input_ids
41
  }
42
  )
43
-
44
- # Decode output and return translated text
45
  translated_text = tokenizer.decode(outputs[0][0], skip_special_tokens=True)
46
  return translated_text
47
-
48
  except Exception as e:
49
- print(f"Error during inference: {e}")
50
- return "An error occurred during inference."
51
 
52
  # Gradio interface for the web app
53
  gr.Interface(
 
3
  from transformers import AutoTokenizer
4
  import numpy as np
5
 
 
6
  MODEL_FILE = "./model.onnx"
7
  session = ort.InferenceSession(MODEL_FILE)
8
  tokenizer = AutoTokenizer.from_pretrained("Helsinki-NLP/opus-mt-en-fr")
9
 
 
 
 
 
 
10
  def gradio_predict(input_text):
11
+ try:
12
+ # Tokenize input text
13
+ tokenized_input = tokenizer(
14
+ input_text,
15
+ return_tensors="np",
16
+ padding='max_length',
17
+ truncation=True,
18
+ max_length=512
19
+ )
 
 
 
20
 
21
+ # Get shapes from actual input
22
+ batch_size = tokenized_input["input_ids"].shape[0] # Should be 1
23
+ seq_length = tokenized_input["input_ids"].shape[1] # Should be 512
24
 
25
+ # Prepare inputs with correct shapes
26
+ input_ids = tokenized_input["input_ids"].astype(np.int64)
27
+ attention_mask = tokenized_input["attention_mask"].astype(np.int64)
28
+
29
+ # Create decoder_input_ids with matching shape
30
+ # Usually starts with pad_token_id or bos_token_id
31
+ decoder_input_ids = np.full((batch_size, seq_length), tokenizer.pad_token_id, dtype=np.int64)
32
+ decoder_input_ids[:, 0] = tokenizer.bos_token_id if tokenizer.bos_token_id is not None else tokenizer.pad_token_id
33
+
34
+ print("Debug shapes:")
35
+ print(f"input_ids shape: {input_ids.shape}")
36
+ print(f"attention_mask shape: {attention_mask.shape}")
37
+ print(f"decoder_input_ids shape: {decoder_input_ids.shape}")
38
+
39
+ # Run inference
40
  outputs = session.run(
41
  None,
42
  {
 
45
  "decoder_input_ids": decoder_input_ids
46
  }
47
  )
48
+
49
+ # Decode output
50
  translated_text = tokenizer.decode(outputs[0][0], skip_special_tokens=True)
51
  return translated_text
52
+
53
  except Exception as e:
54
+ print(f"Detailed error: {str(e)}") # This will show in the Space's logs
55
+ return f"Error during translation: {str(e)}"
56
 
57
  # Gradio interface for the web app
58
  gr.Interface(