Frenchizer commited on
Commit
2022cd7
·
verified ·
1 Parent(s): f9220f6

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +68 -1
app.py CHANGED
@@ -1,4 +1,71 @@
1
  import gradio as gr
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2
 
3
  # Compute cosine similarities
4
  similarities = cosine_similarity(input_embedding, label_embeddings)[0]
@@ -24,7 +91,7 @@ def get_translation_client(context):
24
  Returns the appropriate Hugging Face Space client for the given context.
25
  For now, all contexts use the same mock space.
26
  """
27
- return Client("Frenchizer/space_18") # Replace with actual Space paths for each context
28
 
29
  def translate_text(input_text, context):
30
  """
 
1
  import gradio as gr
2
+ from transformers import AutoTokenizer, AutoModel
3
+ from sklearn.metrics.pairwise import cosine_similarity
4
+ import torch
5
+ import numpy as np
6
+ from gradio_client import Client
7
+ from functools import lru_cache
8
+
9
+ # Cache the model and tokenizer using lru_cache
10
+ @lru_cache(maxsize=1)
11
+ def load_model_and_tokenizer():
12
+ model_name = "./all-MiniLM-L6-v2" # Replace with your Space and model path
13
+ tokenizer = AutoTokenizer.from_pretrained(model_name)
14
+ model = AutoModel.from_pretrained(model_name)
15
+ return tokenizer, model
16
+
17
+ # Load the model and tokenizer
18
+ tokenizer, model = load_model_and_tokenizer()
19
+
20
+ # Precompute label embeddings
21
+ labels = [
22
+ "aerospace", "anatomy", "anthropology", "art",
23
+ "automotive", "blockchain", "biology", "chemistry",
24
+ "cryptocurrency", "data science", "design", "e-commerce",
25
+ "education", "engineering", "entertainment", "environment",
26
+ "fashion", "finance", "food commerce", "gaming",
27
+ "healthcare", "history", "information technology",
28
+ "legal", "machine learning", "marketing", "medicine",
29
+ "music", "philosophy", "physics", "politics", "real estate", "retail",
30
+ "robotics", "social media", "sports", "technical",
31
+ "tourism", "travel"
32
+ ]
33
+
34
+ tones = [
35
+ "formal", "positive", "negative", "poetic", "polite", "subtle", "casual", "neutral",
36
+ "informal", "pompous", "sustained", "rude", "sustained",
37
+ "sophisticated", "playful", "serious", "friendly"
38
+ ]
39
+
40
+ styles = [
41
+ "poetry", "novel", "theater", "slang", "speech", "keywords", "html", "programming"
42
+ ]
43
+
44
+ gender_number = [
45
+ "masculine singular", "masculine plural", "feminine singular", "feminine plural"
46
+ ]
47
+
48
+ @lru_cache(maxsize=1)
49
+ def precompute_label_embeddings():
50
+ inputs = tokenizer(labels, padding=True, truncation=True, return_tensors="pt")
51
+ with torch.no_grad():
52
+ outputs = model(**inputs)
53
+ return outputs.last_hidden_state.mean(dim=1).numpy() # Mean pooling for embeddings
54
+
55
+ label_embeddings = precompute_label_embeddings()
56
+
57
+ # Softmax function to convert scores to probabilities
58
+ def softmax(x):
59
+ exp_x = np.exp(x - np.max(x)) # Subtract max for numerical stability
60
+ return exp_x / exp_x.sum()
61
+
62
+ # Function to detect context
63
+ def detect_context(input_text, threshold=0.03):
64
+ # Encode the input text
65
+ inputs = tokenizer([input_text], padding=True, truncation=True, return_tensors="pt")
66
+ with torch.no_grad():
67
+ outputs = model(**inputs)
68
+ input_embedding = outputs.last_hidden_state.mean(dim=1).numpy() # Mean pooling for embedding
69
 
70
  # Compute cosine similarities
71
  similarities = cosine_similarity(input_embedding, label_embeddings)[0]
 
91
  Returns the appropriate Hugging Face Space client for the given context.
92
  For now, all contexts use the same mock space.
93
  """
94
+ return Client("Frenchizer/space_7") # Replace with actual Space paths for each context
95
 
96
  def translate_text(input_text, context):
97
  """