import streamlit as st import pandas as pd import numpy as np from llama_cpp import Llama from huggingface_hub import hf_hub_download import io import os # Page configuration st.set_page_config( page_title="📊 LLM Data Analyzer", page_icon="📊", layout="wide", initial_sidebar_state="expanded" ) # Custom CSS for better UI st.markdown(""" """, unsafe_allow_html=True) # Title and description st.title("📊 LLM Data Analyzer") st.markdown(""" Analyze your CSV/Excel files and chat with an AI assistant powered by Llama 2. This app runs on **free Hugging Face CPU** - response time ~5-10 seconds per query. """) # Cache model loading to avoid reloading @st.cache_resource def load_llm_model(): """Load Llama 2 model from Hugging Face Hub""" st.info("đŸ“Ĩ Downloading model (first time only, ~4GB)... This may take 2-3 minutes.") try: model_path = hf_hub_download( repo_id="TheBloke/Llama-2-7B-Chat-GGUF", filename="llama-2-7b-chat.Q4_K_M.gguf" ) llm = Llama( model_path=model_path, n_ctx=2048, n_threads=4, n_gpu_layers=0, # CPU only (free tier) verbose=False ) return llm except Exception as e: st.error(f"Error loading model: {e}") return None # Load model llm = load_llm_model() if llm is None: st.error("Failed to load model. Please refresh the page.") st.stop() st.success("✅ Model loaded successfully!") # Create tabs tab1, tab2, tab3 = st.tabs(["📤 Upload & Analyze", "đŸ’Ŧ Chat", "📊 About"]) # ============================================================================ # TAB 1: Upload & Analyze # ============================================================================ with tab1: st.header("📤 Upload and Analyze Data") uploaded_file = st.file_uploader( "Upload a CSV or Excel file", type=["csv", "xlsx", "xls"], help="Supported formats: CSV, Excel" ) if uploaded_file is not None: st.success(f"✅ File uploaded: {uploaded_file.name}") # Read the file try: if uploaded_file.name.endswith('.csv'): df = pd.read_csv(uploaded_file) else: df = pd.read_excel(uploaded_file) # Display data preview st.subheader("📋 Data Preview") st.dataframe(df.head(10), use_container_width=True) # Display statistics st.subheader("📊 Data Statistics") col1, col2, col3 = st.columns(3) with col1: st.metric("Rows", df.shape[0]) with col2: st.metric("Columns", df.shape[1]) with col3: st.metric("Memory", f"{df.memory_usage(deep=True).sum() / 1024:.2f} KB") # Detailed statistics st.write(df.describe().T) # Ask AI about the data st.subheader("❓ Ask AI About Your Data") question = st.text_input( "What would you like to know about this data?", placeholder="e.g., What is the average value in column X?" ) if question: with st.spinner("🤔 AI is analyzing your data..."): # Create prompt data_summary = df.describe().to_string() prompt = f"""You are a data analyst expert. You have the following data summary: {data_summary} Column names: {', '.join(df.columns.tolist())} User's question: {question} Please provide a clear, concise analysis based on the data summary. Focus on actionable insights.""" # Generate response response = llm( prompt, max_tokens=300, stop=["\n\nUser:", "Question:"], echo=False, temperature=0.7 ) answer = response['choices'][0]['text'].strip() st.success("✅ Analysis Complete") st.write(answer) except Exception as e: st.error(f"Error reading file: {e}") # ============================================================================ # TAB 2: Chat # ============================================================================ with tab2: st.header("đŸ’Ŧ Chat with AI Assistant") st.write("Have a conversation with Llama 2. Ask anything!") # Initialize session state for chat history if "messages" not in st.session_state: st.session_state.messages = [] # Display chat history for message in st.session_state.messages: with st.chat_message(message["role"]): st.markdown(message["content"]) # Chat input user_input = st.chat_input("Type your message here...") if user_input: # Add user message to history st.session_state.messages.append({"role": "user", "content": user_input}) # Display user message with st.chat_message("user"): st.markdown(user_input) # Generate AI response with st.chat_message("assistant"): with st.spinner("âŗ Generating response..."): prompt = f"""You are a helpful AI assistant. The user asks: {user_input} Provide a clear, helpful, and concise response.""" response = llm( prompt, max_tokens=300, stop=["\n\nUser:", "User:"], echo=False, temperature=0.7 ) assistant_message = response['choices'][0]['text'].strip() st.markdown(assistant_message) # Add assistant message to history st.session_state.messages.append({ "role": "assistant", "content": assistant_message }) # ============================================================================ # TAB 3: About # ============================================================================ with tab3: st.header("â„šī¸ About This App") st.markdown(""" ### đŸŽ¯ What is this? **LLM Data Analyzer** is an AI-powered tool for analyzing data and having conversations with an intelligent assistant. ### 🔧 Technology Stack - **Model:** Llama 2 7B (quantized to 4-bit) - **Framework:** Llama.cpp (CPU inference) - **Frontend:** Streamlit - **Hosting:** Hugging Face Spaces (Free Tier) ### ⚡ Performance - **Speed:** ~5-10 tokens per second (free CPU) - **Context:** 2048 tokens max - **Model Size:** 4GB (quantized) - **Hardware:** Free tier CPU ### 💡 Use Cases 1. **Data Analysis**: Upload CSV/Excel and ask questions 2. **Chat**: General conversation with AI 3. **Learning**: Understand your data better ### 🚀 Faster Version Available For **GPU acceleration** (70+ tokens/sec): - Run locally on Apple Silicon Mac using MLX - Upgrade to Hugging Face PRO tier - Deploy on GPU-enabled cloud servers ### 📝 Tips - Keep questions focused and specific for best results - First request takes longer (model loading) - Data is processed locally, not stored on server ### 🔗 Links - [GitHub Repository](#) - Source code - [Hugging Face Hub](#) - Model info - [Llama.cpp](#) - Inference engine --- **Version:** 1.0 | **Last Updated:** Dec 2025 """)