Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -1,4 +1,71 @@
|
|
| 1 |
import gradio as gr
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2 |
|
| 3 |
# Compute cosine similarities
|
| 4 |
similarities = cosine_similarity(input_embedding, label_embeddings)[0]
|
|
@@ -24,7 +91,7 @@ def get_translation_client(context):
|
|
| 24 |
Returns the appropriate Hugging Face Space client for the given context.
|
| 25 |
For now, all contexts use the same mock space.
|
| 26 |
"""
|
| 27 |
-
return Client("Frenchizer/
|
| 28 |
|
| 29 |
def translate_text(input_text, context):
|
| 30 |
"""
|
|
|
|
| 1 |
import gradio as gr
|
| 2 |
+
from transformers import AutoTokenizer, AutoModel
|
| 3 |
+
from sklearn.metrics.pairwise import cosine_similarity
|
| 4 |
+
import torch
|
| 5 |
+
import numpy as np
|
| 6 |
+
from gradio_client import Client
|
| 7 |
+
from functools import lru_cache
|
| 8 |
+
|
| 9 |
+
# Cache the model and tokenizer using lru_cache
|
| 10 |
+
@lru_cache(maxsize=1)
|
| 11 |
+
def load_model_and_tokenizer():
|
| 12 |
+
model_name = "./all-MiniLM-L6-v2" # Replace with your Space and model path
|
| 13 |
+
tokenizer = AutoTokenizer.from_pretrained(model_name)
|
| 14 |
+
model = AutoModel.from_pretrained(model_name)
|
| 15 |
+
return tokenizer, model
|
| 16 |
+
|
| 17 |
+
# Load the model and tokenizer
|
| 18 |
+
tokenizer, model = load_model_and_tokenizer()
|
| 19 |
+
|
| 20 |
+
# Precompute label embeddings
|
| 21 |
+
labels = [
|
| 22 |
+
"aerospace", "anatomy", "anthropology", "art",
|
| 23 |
+
"automotive", "blockchain", "biology", "chemistry",
|
| 24 |
+
"cryptocurrency", "data science", "design", "e-commerce",
|
| 25 |
+
"education", "engineering", "entertainment", "environment",
|
| 26 |
+
"fashion", "finance", "food commerce", "gaming",
|
| 27 |
+
"healthcare", "history", "information technology",
|
| 28 |
+
"legal", "machine learning", "marketing", "medicine",
|
| 29 |
+
"music", "philosophy", "physics", "politics", "real estate", "retail",
|
| 30 |
+
"robotics", "social media", "sports", "technical",
|
| 31 |
+
"tourism", "travel"
|
| 32 |
+
]
|
| 33 |
+
|
| 34 |
+
tones = [
|
| 35 |
+
"formal", "positive", "negative", "poetic", "polite", "subtle", "casual", "neutral",
|
| 36 |
+
"informal", "pompous", "sustained", "rude", "sustained",
|
| 37 |
+
"sophisticated", "playful", "serious", "friendly"
|
| 38 |
+
]
|
| 39 |
+
|
| 40 |
+
styles = [
|
| 41 |
+
"poetry", "novel", "theater", "slang", "speech", "keywords", "html", "programming"
|
| 42 |
+
]
|
| 43 |
+
|
| 44 |
+
gender_number = [
|
| 45 |
+
"masculine singular", "masculine plural", "feminine singular", "feminine plural"
|
| 46 |
+
]
|
| 47 |
+
|
| 48 |
+
@lru_cache(maxsize=1)
|
| 49 |
+
def precompute_label_embeddings():
|
| 50 |
+
inputs = tokenizer(labels, padding=True, truncation=True, return_tensors="pt")
|
| 51 |
+
with torch.no_grad():
|
| 52 |
+
outputs = model(**inputs)
|
| 53 |
+
return outputs.last_hidden_state.mean(dim=1).numpy() # Mean pooling for embeddings
|
| 54 |
+
|
| 55 |
+
label_embeddings = precompute_label_embeddings()
|
| 56 |
+
|
| 57 |
+
# Softmax function to convert scores to probabilities
|
| 58 |
+
def softmax(x):
|
| 59 |
+
exp_x = np.exp(x - np.max(x)) # Subtract max for numerical stability
|
| 60 |
+
return exp_x / exp_x.sum()
|
| 61 |
+
|
| 62 |
+
# Function to detect context
|
| 63 |
+
def detect_context(input_text, threshold=0.03):
|
| 64 |
+
# Encode the input text
|
| 65 |
+
inputs = tokenizer([input_text], padding=True, truncation=True, return_tensors="pt")
|
| 66 |
+
with torch.no_grad():
|
| 67 |
+
outputs = model(**inputs)
|
| 68 |
+
input_embedding = outputs.last_hidden_state.mean(dim=1).numpy() # Mean pooling for embedding
|
| 69 |
|
| 70 |
# Compute cosine similarities
|
| 71 |
similarities = cosine_similarity(input_embedding, label_embeddings)[0]
|
|
|
|
| 91 |
Returns the appropriate Hugging Face Space client for the given context.
|
| 92 |
For now, all contexts use the same mock space.
|
| 93 |
"""
|
| 94 |
+
return Client("Frenchizer/space_7") # Replace with actual Space paths for each context
|
| 95 |
|
| 96 |
def translate_text(input_text, context):
|
| 97 |
"""
|