Spaces:

Frenchizer
/

space_1

Sleeping

App Files Files Community

Frenchizer commited on Jan 10

Commit

76720d2

verified ·

1 Parent(s): 10fd17d

Update app.py

Browse files

Files changed (1) hide show

app.py +25 -15

app.py CHANGED Viewed

@@ -18,23 +18,18 @@ def gradio_predict(input_text):
             max_length=512
         )
-        # Get shapes from actual input
-        batch_size = tokenized_input["input_ids"].shape[0]  # Should be 1
-        seq_length = tokenized_input["input_ids"].shape[1]  # Should be 512
-        # Prepare inputs with correct shapes
         input_ids = tokenized_input["input_ids"].astype(np.int64)
         attention_mask = tokenized_input["attention_mask"].astype(np.int64)
-        # Create decoder_input_ids with matching shape
-        # Usually starts with pad_token_id or bos_token_id
-        decoder_input_ids = np.full((batch_size, seq_length), tokenizer.pad_token_id, dtype=np.int64)
-        decoder_input_ids[:, 0] = tokenizer.bos_token_id if tokenizer.bos_token_id is not None else tokenizer.pad_token_id
-        print("Debug shapes:")
-        print(f"input_ids shape: {input_ids.shape}")
-        print(f"attention_mask shape: {attention_mask.shape}")
-        print(f"decoder_input_ids shape: {decoder_input_ids.shape}")
         # Run inference
         outputs = session.run(
@@ -46,12 +41,27 @@ def gradio_predict(input_text):
             }
         )
         # Decode output
-        translated_text = tokenizer.decode(outputs[0][0], skip_special_tokens=True)
         return translated_text
     except Exception as e:
-        print(f"Detailed error: {str(e)}")  # This will show in the Space's logs
         return f"Error during translation: {str(e)}"
 # Gradio interface for the web app

             max_length=512
         )
+        # Prepare inputs
         input_ids = tokenized_input["input_ids"].astype(np.int64)
         attention_mask = tokenized_input["attention_mask"].astype(np.int64)
+        # Initialize decoder_input_ids with start token
+        decoder_input_ids = np.zeros((1, 512), dtype=np.int64)
+        decoder_input_ids[:, 0] = tokenizer.bos_token_id or tokenizer.pad_token_id
+        print("Input values:")
+        print(f"First few input_ids: {input_ids[0][:10]}")
+        print(f"First few attention_mask: {attention_mask[0][:10]}")
+        print(f"First few decoder_input_ids: {decoder_input_ids[0][:10]}")
         # Run inference
         outputs = session.run(
             }
         )
+        print("Output shape and type:")
+        print(f"Output type: {type(outputs)}")
+        print(f"Output[0] type: {type(outputs[0])}")
+        print(f"Output[0] shape: {outputs[0].shape}")
+        # Process outputs more carefully
+        output_ids = outputs[0]
+        if isinstance(output_ids, np.ndarray):
+            output_ids = output_ids[0]  # Take first sequence
+            # Convert to list of integers if needed
+            if isinstance(output_ids, np.ndarray):
+                output_ids = output_ids.tolist()
         # Decode output
+        translated_text = tokenizer.decode(output_ids, skip_special_tokens=True)
         return translated_text
     except Exception as e:
+        print(f"Detailed error: {str(e)}")
+        import traceback
+        print(traceback.format_exc())
         return f"Error during translation: {str(e)}"
 # Gradio interface for the web app