ndc8
commited on
Commit
·
4599528
1
Parent(s):
385d87b
fix
Browse files- backend_service.py +3 -40
backend_service.py
CHANGED
|
@@ -2,15 +2,6 @@
|
|
| 2 |
FastAPI Backend AI Service converted from Gradio app
|
| 3 |
Provides OpenAI-compatible chat completion endpoints
|
| 4 |
"""
|
| 5 |
-
# Configure Hugging Face cache directory and authentication before any HF imports
|
| 6 |
-
import os
|
| 7 |
-
_CACHE_DIR = os.path.join(os.getcwd(), ".hf_cache")
|
| 8 |
-
os.makedirs(_CACHE_DIR, exist_ok=True)
|
| 9 |
-
# Set environment variables early to avoid default ~/.cache permission issues
|
| 10 |
-
os.environ.setdefault("HF_HOME", _CACHE_DIR)
|
| 11 |
-
os.environ.setdefault("TRANSFORMERS_CACHE", _CACHE_DIR)
|
| 12 |
-
# Authentication token for private models
|
| 13 |
-
hf_token = os.getenv("HF_TOKEN") or os.getenv("HUGGINGFACE_TOKEN")
|
| 14 |
|
| 15 |
import asyncio
|
| 16 |
import logging
|
|
@@ -41,9 +32,6 @@ except ImportError:
|
|
| 41 |
logging.basicConfig(level=logging.INFO)
|
| 42 |
logger = logging.getLogger(__name__)
|
| 43 |
|
| 44 |
-
# Authentication token for private models
|
| 45 |
-
hf_token = os.getenv("HF_TOKEN") or os.getenv("HUGGINGFACE_TOKEN")
|
| 46 |
-
|
| 47 |
# Pydantic models for multimodal content
|
| 48 |
class TextContent(BaseModel):
|
| 49 |
type: str = Field(default="text", description="Content type")
|
|
@@ -176,35 +164,21 @@ async def lifespan(app: FastAPI):
|
|
| 176 |
"""Application lifespan manager for startup and shutdown events"""
|
| 177 |
global inference_client, tokenizer, image_text_pipeline
|
| 178 |
|
| 179 |
-
logger.info(f"Using Hugging Face cache directory: {_CACHE_DIR}")
|
| 180 |
-
if not hf_token:
|
| 181 |
-
logger.info("No Hugging Face auth token provided; private models may fail to load.")
|
| 182 |
-
|
| 183 |
# Startup
|
| 184 |
logger.info("🚀 Starting AI Backend Service...")
|
| 185 |
try:
|
| 186 |
# Initialize HuggingFace Inference Client for text generation
|
| 187 |
-
inference_client = InferenceClient(model=current_model
|
| 188 |
logger.info(f"✅ Initialized inference client with model: {current_model}")
|
| 189 |
|
| 190 |
# Initialize image-text-to-text pipeline
|
| 191 |
if transformers_available and pipeline:
|
| 192 |
try:
|
| 193 |
logger.info(f"🖼️ Initializing image captioning pipeline with model: {vision_model}")
|
| 194 |
-
image_text_pipeline = pipeline(
|
| 195 |
-
"image-to-text",
|
| 196 |
-
model=vision_model,
|
| 197 |
-
use_auth_token=hf_token,
|
| 198 |
-
cache_dir=_CACHE_DIR
|
| 199 |
-
)
|
| 200 |
logger.info("✅ Image captioning pipeline loaded successfully")
|
| 201 |
-
except PermissionError as pe:
|
| 202 |
-
logger.warning(f"⚠️ Permission error while loading image captioning pipeline: {pe}. Check cache directory permissions: {_CACHE_DIR}")
|
| 203 |
-
image_text_pipeline = None
|
| 204 |
except Exception as e:
|
| 205 |
logger.warning(f"⚠️ Could not load image captioning pipeline: {e}")
|
| 206 |
-
if "not a local folder and is not a valid model identifier" in str(e):
|
| 207 |
-
logger.warning("Model identifier invalid; ensure model name is correct or you have access.")
|
| 208 |
image_text_pipeline = None
|
| 209 |
else:
|
| 210 |
logger.warning("⚠️ Transformers not available, image processing disabled")
|
|
@@ -213,24 +187,13 @@ async def lifespan(app: FastAPI):
|
|
| 213 |
# Initialize tokenizer for better text handling
|
| 214 |
if transformers_available and AutoTokenizer:
|
| 215 |
try:
|
| 216 |
-
tokenizer = AutoTokenizer.from_pretrained(
|
| 217 |
-
current_model,
|
| 218 |
-
use_auth_token=hf_token,
|
| 219 |
-
cache_dir=_CACHE_DIR
|
| 220 |
-
) # type: ignore
|
| 221 |
logger.info("✅ Tokenizer loaded successfully")
|
| 222 |
-
except PermissionError as pe:
|
| 223 |
-
logger.warning(f"⚠️ Permission error while loading tokenizer: {pe}. Check cache directory permissions: {_CACHE_DIR}")
|
| 224 |
-
tokenizer = None
|
| 225 |
-
except ValueError as ve:
|
| 226 |
-
logger.warning(f"⚠️ Could not load tokenizer: {ve}. If this is a private model, set HF_TOKEN env var to access it.")
|
| 227 |
-
tokenizer = None
|
| 228 |
except Exception as e:
|
| 229 |
logger.warning(f"⚠️ Could not load tokenizer: {e}")
|
| 230 |
tokenizer = None
|
| 231 |
else:
|
| 232 |
logger.info("⚠️ Tokenizer initialization skipped")
|
| 233 |
-
tokenizer = None
|
| 234 |
|
| 235 |
except Exception as e:
|
| 236 |
logger.error(f"❌ Failed to initialize inference client: {e}")
|
|
|
|
| 2 |
FastAPI Backend AI Service converted from Gradio app
|
| 3 |
Provides OpenAI-compatible chat completion endpoints
|
| 4 |
"""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 5 |
|
| 6 |
import asyncio
|
| 7 |
import logging
|
|
|
|
| 32 |
logging.basicConfig(level=logging.INFO)
|
| 33 |
logger = logging.getLogger(__name__)
|
| 34 |
|
|
|
|
|
|
|
|
|
|
| 35 |
# Pydantic models for multimodal content
|
| 36 |
class TextContent(BaseModel):
|
| 37 |
type: str = Field(default="text", description="Content type")
|
|
|
|
| 164 |
"""Application lifespan manager for startup and shutdown events"""
|
| 165 |
global inference_client, tokenizer, image_text_pipeline
|
| 166 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 167 |
# Startup
|
| 168 |
logger.info("🚀 Starting AI Backend Service...")
|
| 169 |
try:
|
| 170 |
# Initialize HuggingFace Inference Client for text generation
|
| 171 |
+
inference_client = InferenceClient(model=current_model)
|
| 172 |
logger.info(f"✅ Initialized inference client with model: {current_model}")
|
| 173 |
|
| 174 |
# Initialize image-text-to-text pipeline
|
| 175 |
if transformers_available and pipeline:
|
| 176 |
try:
|
| 177 |
logger.info(f"🖼️ Initializing image captioning pipeline with model: {vision_model}")
|
| 178 |
+
image_text_pipeline = pipeline("image-to-text", model=vision_model) # Use image-to-text task
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 179 |
logger.info("✅ Image captioning pipeline loaded successfully")
|
|
|
|
|
|
|
|
|
|
| 180 |
except Exception as e:
|
| 181 |
logger.warning(f"⚠️ Could not load image captioning pipeline: {e}")
|
|
|
|
|
|
|
| 182 |
image_text_pipeline = None
|
| 183 |
else:
|
| 184 |
logger.warning("⚠️ Transformers not available, image processing disabled")
|
|
|
|
| 187 |
# Initialize tokenizer for better text handling
|
| 188 |
if transformers_available and AutoTokenizer:
|
| 189 |
try:
|
| 190 |
+
tokenizer = AutoTokenizer.from_pretrained(current_model) # type: ignore
|
|
|
|
|
|
|
|
|
|
|
|
|
| 191 |
logger.info("✅ Tokenizer loaded successfully")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 192 |
except Exception as e:
|
| 193 |
logger.warning(f"⚠️ Could not load tokenizer: {e}")
|
| 194 |
tokenizer = None
|
| 195 |
else:
|
| 196 |
logger.info("⚠️ Tokenizer initialization skipped")
|
|
|
|
| 197 |
|
| 198 |
except Exception as e:
|
| 199 |
logger.error(f"❌ Failed to initialize inference client: {e}")
|