Spaces:
Runtime error
Runtime error
| import json | |
| import sagemaker | |
| import boto3 | |
| from sagemaker.huggingface import HuggingFaceModel, get_huggingface_llm_image_uri | |
| # Get IAM execution role | |
| try: | |
| role = sagemaker.get_execution_role() | |
| except ValueError: | |
| iam = boto3.client('iam') | |
| role = iam.get_role(RoleName='sagemaker_execution_role')['Role']['Arn'] | |
| # Hugging Face model configuration | |
| hub = { | |
| 'HF_MODEL_ID': 'praneethposina/customer_support_bot', | |
| 'SM_NUM_GPUS': json.dumps(1), | |
| } | |
| # Get the correct image URI for Hugging Face LLM | |
| image_uri = get_huggingface_llm_image_uri( | |
| backend="huggingface", | |
| version="3.2.3" | |
| ) | |
| # Create HuggingFaceModel instance | |
| huggingface_model = HuggingFaceModel( | |
| image_uri=image_uri, | |
| env=hub, | |
| role=role | |
| ) | |
| # Deploy the model to SageMaker | |
| predictor = huggingface_model.deploy( | |
| initial_instance_count=1, | |
| instance_type="ml.g5.2xlarge", | |
| container_startup_health_check_timeout=300, | |
| ) | |
| # Perform inference | |
| response = predictor.predict({ | |
| "inputs": "Hi, what can you help me with?" | |
| }) | |
| print("Model Response:", response) | |