import streamlit as st import pandas as pd import io # Page configuration st.set_page_config( page_title="📊 LLM Data Analyzer", page_icon="📊", layout="wide", initial_sidebar_state="expanded" ) st.title("📊 LLM Data Analyzer") st.write("*Analyze data and chat with AI - Powered by Hugging Face Spaces*") # Store dataframe in session state globally if "current_df" not in st.session_state: st.session_state.current_df = None # AI response function with data awareness def get_ai_response(prompt, df=None): """Generate AI responses with data awareness""" prompt_lower = prompt.lower() # If we have data, provide data-specific responses if df is not None and not df.empty: # Try to generate data-aware responses numeric_cols = df.select_dtypes(include=['number']).columns.tolist() if "average" in prompt_lower or "mean" in prompt_lower: if numeric_cols: means = df[numeric_cols].mean() return f"📊 **Average values for numeric columns:**\n" + "\n".join([f"- {col}: {means[col]:.2f}" for col in means.index]) return "The data summary shows average values for numeric columns." elif "max" in prompt_lower or "highest" in prompt_lower: if numeric_cols: maxes = df[numeric_cols].max() return f"📊 **Maximum values for numeric columns:**\n" + "\n".join([f"- {col}: {maxes[col]}" for col in maxes.index]) return "No numeric columns found to show max values." elif "min" in prompt_lower or "lowest" in prompt_lower: if numeric_cols: mins = df[numeric_cols].min() return f"📊 **Minimum values for numeric columns:**\n" + "\n".join([f"- {col}: {mins[col]}" for col in mins.index]) return "No numeric columns found to show min values." elif "count" in prompt_lower or "rows" in prompt_lower or "how many" in prompt_lower: return f"📊 Your dataset has **{len(df)} rows** and **{len(df.columns)} columns**.\n\nColumns: {', '.join(df.columns.tolist())}" elif "columns" in prompt_lower or "fields" in prompt_lower or "headers" in prompt_lower: return f"📊 **Dataset Columns ({len(df.columns)}):**\n" + "\n".join([f"- {col}" for col in df.columns]) elif "summary" in prompt_lower or "overview" in prompt_lower or "describe" in prompt_lower: summary = f"📊 **Data Summary:**\n" summary += f"- Rows: {len(df)}\n" summary += f"- Columns: {len(df.columns)}\n" summary += f"- Column names: {', '.join(df.columns.tolist())}\n" if numeric_cols: summary += f"- Numeric columns: {', '.join(numeric_cols)}\n" return summary elif "trend" in prompt_lower or "pattern" in prompt_lower: return "The data shows various patterns. Check the statistics above to identify trends in the numeric columns." else: # Generic response with data context return f"I have access to your data with {len(df)} rows and {len(df.columns)} columns: {', '.join(df.columns.tolist())}. Ask me specific questions like: What's the average? Show me the max values? How many rows?" # Fallback responses (no data) if "hello" in prompt_lower or "hi" in prompt_lower: return "Hello! I'm the LLM Data Analyzer. Load some data first, then ask me questions about it!" elif "what can you do" in prompt_lower or "help" in prompt_lower: return "I can: 1) Load demo data 2) Analyze your CSV 3) Answer questions about averages, max, min, columns, etc. 4) Chat about your data!" elif "thank" in prompt_lower: return "You're welcome! Ask me anything about your data!" else: return "Please load some data first (click 'Load Demo Data' or paste CSV), then ask me questions about it!" # Create tabs tab1, tab2, tab3 = st.tabs(["📤 Paste Data", "đŸ’Ŧ Chat", "📊 About"]) # ============================================================================ # TAB 1: Paste Data # ============================================================================ with tab1: st.header("📤 Analyze Data") st.info("💡 Load demo data or paste your CSV to start analyzing!") # Demo mode if st.button("📌 Load Demo Data (Click to test)", use_container_width=True): demo_csv = """Name,Age,Salary,Department,Experience_Years Alice,25,50000,Sales,2 Bob,30,60000,IT,5 Charlie,35,75000,HR,8 David,28,55000,Sales,3 Eve,32,65000,IT,6""" st.session_state.csv_data = demo_csv st.success("✅ Demo data loaded! Check the Chat tab to ask questions about it.") st.subheader("Or paste your CSV data here:") csv_text = st.text_area( "Paste CSV content (headers, comma-separated):", value=st.session_state.get('csv_data', ''), height=150, placeholder="Name,Age,Salary\nAlice,25,50000\nBob,30,60000", key="csv_input" ) if csv_text.strip(): try: # Parse CSV from text df = pd.read_csv(io.StringIO(csv_text)) # Store in session state for chat to access st.session_state.current_df = df st.success(f"✅ Data loaded: {df.shape[0]} rows, {df.shape[1]} columns") # Display data preview st.subheader("📋 Data Preview") st.dataframe(df, use_container_width=True) # Display statistics st.subheader("📊 Data Statistics") col1, col2, col3 = st.columns(3) with col1: st.metric("Rows", len(df)) with col2: st.metric("Columns", len(df.columns)) with col3: st.metric("Memory", f"{df.memory_usage(deep=True).sum() / 1024:.2f} KB") # Detailed statistics try: numeric_df = df.select_dtypes(include=['number']) if not numeric_df.empty: st.write("### Numeric Columns Summary") st.write(numeric_df.describe().T) else: st.info("No numeric columns found in dataset.") except: st.info("Could not generate statistics for this data.") # Ask AI about the data st.subheader("❓ Ask AI About Your Data") question = st.text_input( "What would you like to know about this data?", placeholder="e.g., What is the average salary? How many rows?", key="data_question" ) if question: response = get_ai_response(question, df) st.success("✅ Analysis Complete") st.write(response) except Exception as e: st.error(f"❌ Error parsing CSV: {str(e)[:100]}") st.info("Make sure your CSV is properly formatted: headers on first line, comma-separated values.") # ============================================================================ # TAB 2: Chat # ============================================================================ with tab2: st.header("đŸ’Ŧ Chat with AI Assistant") st.write("Have a conversation about your data.") # Show current data status if st.session_state.current_df is not None: st.success(f"✅ Data loaded: {len(st.session_state.current_df)} rows, {len(st.session_state.current_df.columns)} columns") st.write(f"Columns: {', '.join(st.session_state.current_df.columns.tolist())}") else: st.warning("âš ī¸ No data loaded yet. Go to 'Paste Data' tab and load data first!") # Initialize session state for chat history if "messages" not in st.session_state: st.session_state.messages = [] # Display chat history for message in st.session_state.messages: with st.chat_message(message["role"]): st.markdown(message["content"]) # Chat input user_input = st.text_input( "Type your message:", placeholder="Ask me about your data...", key="chat_input" ) if user_input: # Add user message immediately st.session_state.messages.append({"role": "user", "content": user_input}) # Get response with data context response = get_ai_response(user_input, st.session_state.current_df) # Add assistant message st.session_state.messages.append({ "role": "assistant", "content": response }) # Display latest messages st.divider() with st.chat_message("assistant"): st.markdown(response) # ============================================================================ # TAB 3: About # ============================================================================ with tab3: st.header("â„šī¸ About This App") st.markdown(""" ### đŸŽ¯ What is this? **LLM Data Analyzer** is a tool for analyzing data and having conversations about your datasets. ### 🔧 Technology Stack - **Framework:** Streamlit - **Hosting:** Hugging Face Spaces (Free Tier) - **Language:** Python ### ⚡ Features 1. **Data Analysis**: Paste CSV and analyze your data 2. **Smart Chat**: Chat with AI about your data 3. **Statistics**: View comprehensive data summaries 4. **Demo Mode**: Test with sample data instantly ### 📝 How to Use 1. **Load Data** - Click "Load Demo Data" or paste your CSV 2. **View Preview** - See your data in table format 3. **Chat** - Go to Chat tab and ask questions about your data 4. **Get Insights** - AI analyzes and answers questions ### 💡 Example Questions - "What's the average salary?" - "Show me the maximum values" - "How many rows do I have?" - "What columns are in the data?" - "Give me a summary" ### 📋 CSV Format Example ``` Name,Age,Salary,Department Alice,25,50000,Sales Bob,30,60000,IT Charlie,35,75000,HR ``` ### 🌐 Powered By - [Hugging Face](https://huggingface.co/) - AI platform and hosting - [Streamlit](https://streamlit.io/) - Web framework - [Pandas](https://pandas.pydata.org/) - Data analysis ### 🛠 Troubleshooting **Chat can't see my data?** - Make sure to load data in the "Paste Data" tab first - Then go to "Chat" tab - it will show your data status **How do I format CSV?** - First line: column headers separated by commas - Following lines: data values separated by commas ### 🔗 Links - [GitHub Repository](https://github.com/Arif-Badhon/LLM-Data-Analyzer) - [Hugging Face Hub](https://huggingface.co/) --- **Version:** 2.1 | **Last Updated:** Dec 2025 💡 **Note:** Chat now has access to your data! Load data first, then ask questions. """)