Spaces:

Frenchizer
/

space_1

Sleeping

Frenchizer commited on Jan 10

Commit

ce28322

verified ·

1 Parent(s): 6fd7731

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -22,8 +22,9 @@ def gradio_predict(input_text):
         input_ids = tokenized_input["input_ids"].astype(np.int64)
         attention_mask = tokenized_input["attention_mask"].astype(np.int64)
-        # Initialize decoder input
-        decoder_input_ids = np.array([[tokenizer.bos_token_id]], dtype=np.int64)
         print("Initial shapes:")
         print(f"input_ids shape: {input_ids.shape}")
@@ -40,24 +41,15 @@ def gradio_predict(input_text):
             }
         )
-        print("Output information:")
-        print(f"outputs type: {type(outputs)}")
-        print(f"outputs length: {len(outputs)}")
-        print(f"outputs[0] shape: {outputs[0].shape}")
         # Get logits and convert to token ids
         logits = outputs[0]
         token_ids = np.argmax(logits[0], axis=-1)
-        # Find end of sequence
-        eos_token_id = tokenizer.eos_token_id
-        end_idx = np.where(token_ids == eos_token_id)[0]
         if len(end_idx) > 0:
             token_ids = token_ids[:end_idx[0]]
-        print(f"token_ids shape: {token_ids.shape}")
-        print(f"token_ids: {token_ids}")
         # Decode the sequence
         translated_text = tokenizer.decode(token_ids, skip_special_tokens=True)
         return translated_text

         input_ids = tokenized_input["input_ids"].astype(np.int64)
         attention_mask = tokenized_input["attention_mask"].astype(np.int64)
+        # Use a specific token ID for decoder start (for Helsinki-NLP models)
+        decoder_start_token_id = 59513  # This is the typical start token for Helsinki-NLP models
+        decoder_input_ids = np.array([[decoder_start_token_id]], dtype=np.int64)
         print("Initial shapes:")
         print(f"input_ids shape: {input_ids.shape}")
             }
         )
         # Get logits and convert to token ids
         logits = outputs[0]
         token_ids = np.argmax(logits[0], axis=-1)
+        # Find end of sequence (using pad token since eos might also be None)
+        end_idx = np.where(token_ids == tokenizer.pad_token_id)[0]
         if len(end_idx) > 0:
             token_ids = token_ids[:end_idx[0]]
         # Decode the sequence
         translated_text = tokenizer.decode(token_ids, skip_special_tokens=True)
         return translated_text