
import os
import streamlit as st
import pandas as pd
import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots
import numpy as np
import io
import hashlib
from datetime import datetime, timedelta
import chardet
import os

def detect_encoding_and_read_csv(file_buffer):
    """
    Detect file encoding and read CSV with proper encoding handling
    """
    # Reset buffer position
    file_buffer.seek(0)
    raw_data = file_buffer.read()
    
    # Detect encoding
    detected = chardet.detect(raw_data)
    encoding = detected.get('encoding', 'utf-8')
    confidence = detected.get('confidence', 0)
    
    # List of encodings to try (most common for Romanian/European files)
    encodings_to_try = [
        encoding,  # Detected encoding first
        'utf-8',
        'windows-1252',  # Common for Excel exports
        'iso-8859-1',    # Latin-1
        'cp1250',        # Central European
        'utf-8-sig'      # UTF-8 with BOM
    ]
    
    # Remove duplicates while preserving order
    encodings_to_try = list(dict.fromkeys(encodings_to_try))
    
    for enc in encodings_to_try:
        try:
            # Create StringIO from decoded bytes
            decoded_content = raw_data.decode(enc)
            csv_buffer = io.StringIO(decoded_content)
            df = pd.read_csv(csv_buffer)
            
            # Return success info
            return df, enc, confidence, None
            
        except (UnicodeDecodeError, UnicodeError, pd.errors.EmptyDataError) as e:
            continue
        except Exception as e:
            continue
    
    # If all encodings fail
    return None, None, 0, f"Could not read file with any common encoding"

def check_password():
    """Bypass authentication for local use."""
    return True

# Set page config
st.set_page_config(
    page_title="📊 Unified Analytics Dashboard",
    page_icon="🚀",
    layout="wide",
    initial_sidebar_state="expanded",
    menu_items={
        'Get Help': 'https://github.com/your-username/streamlit-analytics-dashboard',
        'Report a bug': 'https://github.com/your-username/streamlit-analytics-dashboard/issues',
        'About': """
        # 🚀 Unified Analytics Dashboard
        
        **Version:** 1.0.0  
        **Author:** Your Name  
        **Description:** Advanced CSV data analysis and fuel sales analytics platform
        
        ### Features:
        - 🔢 CSV Comparison by ID
        - 👤 CSV Comparison by Name  
        - ⛽ Fuel Sales Analytics
        - 🧹 Data Cleaning Tools
        
        ### Built with:
        - Python 3.9+
        - Streamlit
        - Pandas
        - Plotly
        
        **© 2025 - All rights reserved**
        """
    }
)

# Custom CSS
st.markdown("""
<style>
.metric-container {
    background-color: #f0f2f6;
    padding: 1rem;
    border-radius: 0.5rem;
    margin: 0.5rem 0;
}
.highlight {
    background-color: #e1f5fe;
    padding: 0.5rem;
    border-radius: 0.25rem;
    border-left: 4px solid #1976d2;
}
.upload-section {
    background-color: #f8f9fa;
    padding: 1.5rem;
    border-radius: 0.5rem;
    border: 2px dashed #dee2e6;
    margin: 1rem 0;
}
.tab-header {
    font-size: 1.2rem;
    font-weight: bold;
    margin-bottom: 1rem;
}
.fuel-card {
    background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
    padding: 1.5rem;
    border-radius: 0.5rem;
    color: white;
    margin: 0.5rem 0;
}
.partner-card {
    background: linear-gradient(135deg, #f093fb 0%, #f5576c 100%);
    padding: 1.5rem;
    border-radius: 0.5rem;
    color: white;
    margin: 0.5rem 0;
}
.country-card {
    background: linear-gradient(135deg, #4facfe 0%, #00f2fe 100%);
    padding: 1.5rem;
    border-radius: 0.5rem;
    color: white;
    margin: 0.5rem 0;
}
.stats-card {
    background: linear-gradient(135deg, #a8edea 0%, #fed6e3 100%);
    padding: 1.5rem;
    border-radius: 0.5rem;
    margin: 0.5rem 0;
}
</style>
""", unsafe_allow_html=True)

# ==================== FILE HANDLING FUNCTIONS ====================

def read_csv_with_encoding_detection(file):
    """
    Read CSV file with automatic encoding detection
    Handles common encodings including Romanian characters
    """
    try:
        # Reset file pointer to beginning
        file.seek(0)
        
        # Read a sample to detect encoding
        sample = file.read(10000)
        file.seek(0)
        
        # Detect encoding
        detected = chardet.detect(sample)
        encoding = detected['encoding'] if detected['encoding'] else 'utf-8'
        
        # Common encodings to try in order
        encodings_to_try = [
            encoding,
            'utf-8',
            'utf-8-sig',  # UTF-8 with BOM
            'iso-8859-1',  # Latin-1
            'cp1252',      # Windows-1252
            'iso-8859-2',  # Latin-2 (Eastern European)
            'cp1250',      # Windows-1250 (Eastern European)
            'utf-16',
            'ascii'
        ]
        
        # Remove duplicates while preserving order
        seen = set()
        unique_encodings = []
        for enc in encodings_to_try:
            if enc and enc not in seen:
                seen.add(enc)
                unique_encodings.append(enc)
        
        # Try each encoding
        for enc in unique_encodings:
            try:
                file.seek(0)
                content = file.read().decode(enc)
                df = pd.read_csv(io.StringIO(content))
                return df, enc
            except (UnicodeDecodeError, UnicodeError):
                continue
            except Exception as e:
                continue
        
        # If all else fails, try with error handling
        file.seek(0)
        content = file.read().decode('utf-8', errors='replace')
        df = pd.read_csv(io.StringIO(content))
        return df, 'utf-8 (with errors replaced)'
        
    except Exception as e:
        raise Exception(f"Could not read file with any encoding. Original error: {str(e)}")

def clean_romanian_characters(text):
    """
    Clean and standardize Romanian characters
    """
    if pd.isna(text) or not isinstance(text, str):
        return text
    
    # Romanian character replacements
    romanian_chars = {
        'ă': 'a', 'â': 'a', 'î': 'i', 'ș': 's', 'ț': 't',
        'Ă': 'A', 'Â': 'A', 'Î': 'I', 'Ș': 'S', 'Ț': 'T',
        # Also handle common encoding issues
        'ş': 's', 'ţ': 't', 'Ş': 'S', 'Ţ': 'T'
    }
    
    result = text
    for old_char, new_char in romanian_chars.items():
        result = result.replace(old_char, new_char)
    
    return result

# ==================== CSV COMPARISON FUNCTIONS ====================

def clean_and_aggregate_data(df, group_col, value_col, operation='sum'):
    """Clean and aggregate data by specified column and operation with improved name matching"""
    # Make a copy to avoid modifying original
    df_clean = df.copy()
    
    # Handle name-based grouping with normalization
    if df_clean[group_col].dtype == 'object':
        # Convert to string and handle NaN values
        df_clean[group_col] = df_clean[group_col].astype(str)
        
        # Create normalized version for matching
        df_clean['_normalized_key'] = df_clean[group_col].str.lower()
        df_clean['_normalized_key'] = df_clean['_normalized_key'].str.strip()
        df_clean['_normalized_key'] = df_clean['_normalized_key'].str.replace(r'\s+', ' ', regex=True)
        df_clean['_normalized_key'] = df_clean['_normalized_key'].str.replace(r'[^\w\s]', '', regex=True)
        df_clean['_normalized_key'] = df_clean['_normalized_key'].str.replace(r'\s+', ' ', regex=True).str.strip()
        
        # Clean display version
        df_clean[group_col] = df_clean[group_col].str.strip()
        df_clean[group_col] = df_clean[group_col].str.replace(r'\s+', ' ', regex=True)
        df_clean[group_col] = df_clean[group_col].str.title()
        
        group_key = '_normalized_key'
    else:
        # For numeric columns, use as-is
        group_key = group_col
    
    # Ensure value column is numeric
    df_clean[value_col] = pd.to_numeric(df_clean[value_col], errors='coerce')
    
    # Remove rows with NaN values
    df_clean = df_clean.dropna(subset=[group_col, value_col])
    
    # Aggregate by operation using normalized key but keep original name
    if operation == 'sum':
        if group_key == '_normalized_key':
            aggregated = df_clean.groupby(group_key).agg({
                group_col: 'first',  # Keep first occurrence of original name
                value_col: 'sum'
            }).reset_index()
            aggregated = aggregated[[group_col, value_col]]  # Drop the normalized key
        else:
            aggregated = df_clean.groupby(group_key)[value_col].sum().reset_index()
    elif operation == 'mean':
        if group_key == '_normalized_key':
            aggregated = df_clean.groupby(group_key).agg({
                group_col: 'first',
                value_col: 'mean'
            }).reset_index()
            aggregated = aggregated[[group_col, value_col]]
        else:
            aggregated = df_clean.groupby(group_key)[value_col].mean().reset_index()
    elif operation == 'max':
        if group_key == '_normalized_key':
            aggregated = df_clean.groupby(group_key).agg({
                group_col: 'first',
                value_col: 'max'
            }).reset_index()
            aggregated = aggregated[[group_col, value_col]]
        else:
            aggregated = df_clean.groupby(group_key)[value_col].max().reset_index()
    elif operation == 'min':
        if group_key == '_normalized_key':
            aggregated = df_clean.groupby(group_key).agg({
                group_col: 'first',
                value_col: 'min'
            }).reset_index()
            aggregated = aggregated[[group_col, value_col]]
        else:
            aggregated = df_clean.groupby(group_key)[value_col].min().reset_index()
    elif operation == 'count':
        if group_key == '_normalized_key':
            aggregated = df_clean.groupby(group_key).agg({
                group_col: 'first',
                value_col: 'count'
            }).reset_index()
            aggregated = aggregated[[group_col, value_col]]
        else:
            aggregated = df_clean.groupby(group_key)[value_col].count().reset_index()
    else:
        if group_key == '_normalized_key':
            aggregated = df_clean.groupby(group_key).agg({
                group_col: 'first',
                value_col: 'sum'
            }).reset_index()
            aggregated = aggregated[[group_col, value_col]]
        else:
            aggregated = df_clean.groupby(group_key)[value_col].sum().reset_index()
    
    # Rename columns for clarity
    aggregated.columns = ['ID', f'Aggregated_{operation.title()}']
    
    return aggregated

def perform_comparison(df1, df2, group_col1, value_col1, group_col2, value_col2, operation, file1_name, file2_name):
    """Perform comparison between two datasets"""
    # Aggregate both datasets
    agg1 = clean_and_aggregate_data(df1, group_col1, value_col1, operation)
    agg2 = clean_and_aggregate_data(df2, group_col2, value_col2, operation)
    
    # Rename columns to include file names
    agg1.columns = ['ID', f'{file1_name}_{operation.title()}']
    agg2.columns = ['ID', f'{file2_name}_{operation.title()}']
    
    # Merge datasets
    comparison = pd.merge(agg1, agg2, on='ID', how='outer')
    comparison = comparison.fillna(0)
    
    # Calculate difference
    col1_name = f'{file1_name}_{operation.title()}'
    col2_name = f'{file2_name}_{operation.title()}'
    comparison['Difference'] = comparison[col1_name] - comparison[col2_name]
    comparison['Abs_Difference'] = comparison['Difference'].abs()
    
    # Round values to avoid floating point precision issues
    comparison[col1_name] = comparison[col1_name].round(10)
    comparison[col2_name] = comparison[col2_name].round(10)
    comparison['Difference'] = comparison['Difference'].round(10)
    comparison['Abs_Difference'] = comparison['Abs_Difference'].round(10)
    
    # Add status with tolerance for floating point comparison
    tolerance = 1e-10  # Very small tolerance for floating point comparison
    comparison['Status'] = comparison.apply(lambda row: 
        f'Only in {file1_name}' if row[col2_name] == 0 else
        f'Only in {file2_name}' if row[col1_name] == 0 else
        'Perfect Match' if abs(row['Difference']) <= tolerance else
        'Different Values', axis=1)
    
    # Sort by absolute difference
    comparison = comparison.sort_values('Abs_Difference', ascending=False)
    
    return comparison, col1_name, col2_name

def create_comparison_chart(comparison_df, col1_name, col2_name, file1_name, file2_name):
    """Create interactive scatter plot for comparison"""
    fig = px.scatter(
        comparison_df,
        x=col1_name,
        y=col2_name,
        color='Status',
        hover_data=['ID', 'Difference'],
        title=f"Comparison: {file1_name} vs {file2_name}",
        labels={
            col1_name: f'{file1_name} Values',
            col2_name: f'{file2_name} Values'
        }
    )
    
    # Add diagonal line for perfect matches
    max_val = max(comparison_df[col1_name].max(), comparison_df[col2_name].max())
    if max_val > 0:
        fig.add_shape(
            type="line",
            x0=0, y0=0, x1=max_val, y1=max_val,
            line=dict(color="red", width=2, dash="dash"),
            name="Perfect Match Line"
        )
    
    return fig

def file_upload_section(key_prefix):
    """Create file upload section"""
    st.markdown('<div class="upload-section">', unsafe_allow_html=True)
    st.markdown("### 📁 Upload CSV Files")
    
    col1, col2 = st.columns(2)
    
    with col1:
        file1 = st.file_uploader(
            f"Upload First CSV File",
            type=['csv'],
            key=f"{key_prefix}_file1"
        )
        
    with col2:
        file2 = st.file_uploader(
            f"Upload Second CSV File", 
            type=['csv'],
            key=f"{key_prefix}_file2"
        )
    
    st.markdown('</div>', unsafe_allow_html=True)
    
    return file1, file2

# ==================== FUEL ANALYTICS FUNCTIONS ====================

def load_and_format_fuel_data(df):
    """Load and format fuel sales data"""
    df_clean = df.copy()
    
    # Clean column names (remove extra spaces, normalize)
    df_clean.columns = df_clean.columns.str.strip()
    
    # Convert date column (try different formats)
    date_columns = [col for col in df_clean.columns if 'date' in col.lower() or 'data' in col.lower()]
    if date_columns:
        date_col = date_columns[0]
        try:
            df_clean[date_col] = pd.to_datetime(df_clean[date_col], format='%d.%m.%Y', errors='coerce')
        except:
            try:
                df_clean[date_col] = pd.to_datetime(df_clean[date_col], errors='coerce', dayfirst=True)
            except:
                st.warning(f"Could not parse date column: {date_col}")
    
    # Clean numeric columns and format Qty/Value as float with 2 decimals
    numeric_cols = ['Qty', 'Value', 'Cantitate', 'Valoare']
    for col in df_clean.columns:
        if any(nc.lower() in col.lower() for nc in numeric_cols):
            df_clean[col] = pd.to_numeric(df_clean[col], errors='coerce').round(2)
    
    # Clean text columns
    text_cols = ['Partner', 'Product', 'Country', 'Currency']
    for col in df_clean.columns:
        if any(tc.lower() in col.lower() for tc in text_cols):
            if df_clean[col].dtype == 'object':
                df_clean[col] = df_clean[col].astype(str).str.strip()
    
    return df_clean

def create_fuel_analytics_dashboard(df):
    """Create comprehensive fuel analytics dashboard"""
    
    # KPI Summary
    st.markdown("### 📊 Key Performance Indicators")
    
    # Try to identify key columns dynamically
    qty_col = None
    value_col = None
    partner_col = None
    product_col = None
    country_col = None
    date_col = None
    location_col = None
    
    # Find columns by common names (includes Romanian terms)
    for col in df.columns:
        col_lower = col.lower()
        if any(x in col_lower for x in ['qty', 'cantitate', 'quantity', 'vanzari']):
            if qty_col is None:  # Take first match
                qty_col = col
        elif any(x in col_lower for x in ['value', 'valoare', 'amount', 'suma', 'vanzari']):
            if value_col is None:
                value_col = col
        elif any(x in col_lower for x in ['partner', 'partener', 'client', 'customer']):
            if partner_col is None:
                partner_col = col
        elif any(x in col_lower for x in ['product', 'produs', 'combustibil', 'fuel']):
            if product_col is None:
                product_col = col
        elif any(x in col_lower for x in ['country', 'tara', 'nation']):
            if country_col is None:
                country_col = col
        elif any(x in col_lower for x in ['date', 'data']):
            if date_col is None:
                date_col = col
        elif any(x in col_lower for x in ['location', 'localitate', 'city', 'oras']):
            if location_col is None:
                location_col = col
    
    # Main KPIs
    if value_col:
        col1, col2, col3, col4, col5 = st.columns(5)

        with col1:
            total_value = df[value_col].sum() if pd.api.types.is_numeric_dtype(df[value_col]) else 0
            st.metric("💰 Total Sales", f"{total_value:,.0f}")

        with col2:
            total_qty = df[qty_col].sum() if qty_col and qty_col in df.columns and pd.api.types.is_numeric_dtype(df[qty_col]) else 0
            st.metric("🛢️ Total Quantity", f"{total_qty:,.2f}")

        with col3:
            avg_value = df[value_col].mean() if pd.api.types.is_numeric_dtype(df[value_col]) else 0
            st.metric("📊 Average Sale", f"{avg_value:,.0f}")

        with col4:
            total_records = len(df)
            st.metric("📈 Total Records", f"{total_records:,}")

        with col5:
            unique_partners = df[partner_col].nunique() if partner_col else 0
            st.metric("🏢 Unique Partners", f"{unique_partners:,}")
    
    # **MONTHLY SALES & QUANTITY ANALYSIS** - The key feature you requested!
    if date_col and value_col:
    # st.markdown("### 📅 **Monthly Sales & Quantity Analysis**")
        # Convert date column to datetime
        df_time = df.copy()
        try:
            df_time[date_col] = pd.to_datetime(df_time[date_col], errors='coerce', dayfirst=True)
            df_time = df_time.dropna(subset=[date_col])
            if not df_time.empty:
                # Create month-year column
                df_time['Month_Year'] = df_time[date_col].dt.to_period('M').astype(str)
                # Monthly aggregation (add Qty if available)
                monthly_agg_dict = {
                    value_col: ['sum', 'count', 'mean'],
                }
                if 'ID' in df_time.columns:
                    monthly_agg_dict['ID'] = pd.Series.nunique
                if qty_col:
                    monthly_agg_dict[qty_col] = 'sum'
                monthly_stats = df_time.groupby('Month_Year').agg(monthly_agg_dict).round(2)
                # Flatten column names and rename ID_nunique to Used_cards
                flat_cols = []
                for col in monthly_stats.columns:
                    if isinstance(col, tuple):
                        col_name = '_'.join([str(c) for c in col if c])
                        if col_name == 'ID_nunique':
                            col_name = 'Used_cards'
                        flat_cols.append(col_name)
                    else:
                        flat_cols.append(col)
                monthly_stats.columns = flat_cols
                monthly_stats = monthly_stats.reset_index()
                # Merged full-width graph
                if qty_col and f'{qty_col}_sum' in monthly_stats.columns:
                    import plotly.graph_objects as go
                    # Round values for hover display
                    monthly_stats_rounded = monthly_stats.copy()
                    monthly_stats_rounded[f'{value_col}_sum'] = monthly_stats_rounded[f'{value_col}_sum'].round(2)
                    monthly_stats_rounded[f'{qty_col}_sum'] = monthly_stats_rounded[f'{qty_col}_sum'].round(2)
                    fig = go.Figure()
                    fig.add_trace(go.Bar(
                        x=monthly_stats_rounded['Month_Year'],
                        y=monthly_stats_rounded[f'{qty_col}_sum'],
                        name='Quantity',
                        marker_color='#27ae60',
                        yaxis='y1',
                        opacity=0.7,
                        hovertemplate='Quantity: %{y:.2f}<br>Month: %{x}<extra></extra>'
                    ))
                    fig.add_trace(go.Line(
                        x=monthly_stats_rounded['Month_Year'],
                        y=monthly_stats_rounded[f'{value_col}_sum'],
                        name='Sales',
                        marker_color='#2E86AB',
                        yaxis='y2',
                        line=dict(width=3),
                        hovertemplate='Sales: %{y:.2f}<br>Month: %{x}<extra></extra>'
                    ))
                    fig.update_layout(
                        title="Monthly Quantity (bar) & Sales (line)",
                        xaxis=dict(title='Month'),
                        yaxis=dict(title='Quantity', side='left', showgrid=False),
                        yaxis2=dict(title='Sales', side='right', overlaying='y', showgrid=False),
                        legend=dict(x=0.01, y=0.99, bgcolor='rgba(0,0,0,0)'),
                        hovermode='x unified',
                        bargap=0.2,
                        width=None
                    )
                    st.plotly_chart(fig, use_container_width=True, key="fuel_monthly_merged")
                else:
                    # Only sales available
                    fig_monthly_sales = px.line(
                        monthly_stats, 
                        x='Month_Year', 
                        y=f'{value_col}_sum',
                        title="Total Sales by Month",
                        markers=True,
                        line_shape='spline'
                    )
                    fig_monthly_sales.update_traces(line_color='#2E86AB', line_width=3)
                    fig_monthly_sales.update_layout(
                        xaxis_title="Month",
                        yaxis_title="Total Sales",
                        hovermode='x unified'
                    )
                    st.plotly_chart(fig_monthly_sales, use_container_width=True, key="fuel_monthly_sales")
                # Additional monthly insights
                st.markdown("#### 📋 **Monthly Performance Table**")
                if 'Qty_sum' in monthly_stats.columns:
                    growth = monthly_stats['Qty_sum'].diff() / monthly_stats['Qty_sum'].shift(1) * 100
                    growth = growth.round(2).fillna(0)
                    def format_growth(val):
                        if val > 0:
                            return f"<span style='color:green;'>+{val:.2f}% &#8593;</span>"
                        elif val < 0:
                            return f"<span style='color:red;'>{val:.2f}% &#8595;</span>"
                        else:
                            return f"<span style='color:gray;'>{val:.2f}%</span>"
                    monthly_stats['Growth (%)'] = growth.apply(format_growth)
                    cols = list(monthly_stats.columns)
                    if 'Growth (%)' in cols and 'Qty_sum' in cols:
                        qty_idx = cols.index('Qty_sum')
                        cols.remove('Growth (%)')
                        cols.insert(qty_idx + 1, 'Growth (%)')
                        monthly_stats = monthly_stats[cols]
                    # Rename columns for display
                    display_stats = monthly_stats.rename(columns={
                        'Month_Year': 'Month',
                        f'{value_col}_sum': 'Sales Amount',
                        f'{value_col}_count': 'Transactions',
                        f'{value_col}_mean': 'Average Spent',
                        'Used_cards': 'Used Cards',
                        'Qty_sum': 'Quantity'
                    })
                    html_table = display_stats.to_html(escape=False, index=False)
                    html_table = f"<div style='width:100%; margin-bottom: 100px;'><style>table{{width:100% !important;}}</style>{html_table}</div>"
                    st.write(html_table, unsafe_allow_html=True)
            else:
                st.warning("⚠️ Could not parse date column for monthly analysis")
        except Exception as e:
            st.error(f"❌ Error in monthly analysis: {e}")
    
    # **6-MONTH SALES SPIKE ANALYSIS**
    if date_col and value_col:
        st.markdown("### 🚀 **6-Month Sales Spike Analysis**")
        st.markdown("*Identify sales spikes and best performing days*")
        spike_analysis = create_sales_spike_analysis(df, date_col, value_col, period_months=6)
        if spike_analysis:
            # Summary metrics
            col1, col2, col3, col4 = st.columns(4)
            with col1:
                st.metric(
                    "🏆 Best Sales Day",
                    f"{spike_analysis['best_day_sales']['Date']}",
                    f"{spike_analysis['best_day_sales']['Total_Sales']:,.0f}"
                )
            with col2:
                st.metric(
                    "📈 Best Volume Day",
                    f"{spike_analysis['best_day_count']['Date']}",
                    f"{spike_analysis['best_day_count']['Transaction_Count']:.0f} transactions"
                )
            with col3:
                st.metric(
                    "💰 Best Average Day",
                    f"{spike_analysis['best_day_avg']['Date']}",
                    f"{spike_analysis['best_day_avg']['Avg_Sale']:,.0f} avg"
                )
            with col4:
                st.metric(
                    f"📊 Period Total",
                    f"{spike_analysis['period_months']} months",
                    f"{spike_analysis['total_period_sales']:,.0f}"
                )
            # Merge daily sales and transaction count into a dual-axis chart
            daily_data = spike_analysis['daily_sales']
            import plotly.graph_objects as go
            fig = go.Figure()
            fig.add_trace(go.Bar(
                x=daily_data['Date'],
                y=daily_data['Transaction_Count'],
                name='Transactions',
                marker_color='#27ae60',
                yaxis='y1',
                opacity=0.7
            ))
            fig.add_trace(go.Line(
                x=daily_data['Date'],
                y=daily_data['Total_Sales'],
                name='Sales Volume',
                marker_color='#2E86AB',
                yaxis='y2',
                line=dict(width=3)
            ))
            fig.update_layout(
                title="Daily Transactions (bar) & Sales Volume (line) - 6 Months",
                xaxis=dict(title='Date'),
                yaxis=dict(title='Transactions', side='left', showgrid=False),
                yaxis2=dict(title='Sales Volume', side='right', overlaying='y', showgrid=False),
                legend=dict(x=0.01, y=0.99, bgcolor='rgba(0,0,0,0)'),
                hovermode='x unified',
                bargap=0.2,
                width=None
            )
            st.plotly_chart(fig, use_container_width=True, key="fuel_spike_merged")
            # (Removed Sales Spike Insights and Best Days of Week graphs as requested)
        else:
            st.warning("⚠️ Could not perform spike analysis - check date and sales data")
    
    # Partner Performance Analysis
    if partner_col and value_col:
        col1, col2 = st.columns(2)
        with col1:
            st.markdown("#### 📊 **Location Performance Table**")
            if location_col:
                location_stats = df.groupby(location_col).agg({
                    value_col: ['sum', 'count', 'mean'],
                    partner_col: 'nunique' if partner_col else lambda x: 0
                }).round(2)
                location_stats.columns = ['Total_Sales', 'Transactions', 'Avg_Sale', 'Partners']
                location_stats = location_stats.sort_values('Total_Sales', ascending=False)
                st.dataframe(location_stats, use_container_width=True)
            else:
                st.info("No location column found for performance table.")
        with col2:
            st.markdown("#### 🎯 **Partner Market Share**")
            partner_sales = df.groupby(partner_col)[value_col].sum().sort_values(ascending=False).head(10)
            fig_partner_pie = px.pie(
                values=partner_sales.values,
                names=partner_sales.index,
                title="Market Share by Partner"
            )
            st.plotly_chart(fig_partner_pie, use_container_width=True)
    
    # (Removed Geographic Analysis and all subsequent sections as requested)

# ==================== ADVANCED ANALYTICS FUNCTIONS ====================

def extract_partner_and_location(df, partner_col):
    """
    Your brilliant location extraction logic!
    Extract uppercase partner name and location from partner string
    """
    df_processed = df.copy()
    
    # Extract uppercase partner name into new column 'Partner_Name'
    df_processed['Partner_Name'] = df_processed[partner_col].str.extract(r'^([A-Z]+)')
    
    # Extract location by removing the Partner_Name from the original string
    df_processed['Location'] = df_processed.apply(
        lambda row: row[partner_col].replace(row['Partner_Name'], '').strip()
        if pd.notnull(row['Partner_Name']) else row[partner_col],
        axis=1
    )
    
    return df_processed

def create_sales_spike_analysis(df, date_col, value_col, period_months=3):
    """
    Create comprehensive 3-month sales spike analysis
    Identifies best performing days and sales patterns
    """
    try:
        # Prepare data
        df_analysis = df.copy()
        df_analysis[date_col] = pd.to_datetime(df_analysis[date_col], errors='coerce', dayfirst=True)
        df_analysis = df_analysis.dropna(subset=[date_col, value_col])
        
        if df_analysis.empty:
            return None
        
        # Get the most recent 3 months of data
        max_date = df_analysis[date_col].max()
        start_date = max_date - pd.DateOffset(months=period_months)
        
        # Filter to 3-month period
        df_period = df_analysis[df_analysis[date_col] >= start_date].copy()
        
        if df_period.empty:
            return None
        
        # Daily aggregation
        daily_sales = df_period.groupby(df_period[date_col].dt.date).agg({
            value_col: ['sum', 'count', 'mean']
        }).round(2)
        
        # Flatten column names
        daily_sales.columns = ['Total_Sales', 'Transaction_Count', 'Avg_Sale']
        daily_sales = daily_sales.reset_index()
        daily_sales.columns = ['Date', 'Total_Sales', 'Transaction_Count', 'Avg_Sale']
        
        # Find best performing days
        best_day_sales = daily_sales.loc[daily_sales['Total_Sales'].idxmax()]
        best_day_count = daily_sales.loc[daily_sales['Transaction_Count'].idxmax()]
        best_day_avg = daily_sales.loc[daily_sales['Avg_Sale'].idxmax()]
        
        # Calculate period totals
        total_period_sales = daily_sales['Total_Sales'].sum()
        
        return {
            'daily_sales': daily_sales,
            'best_day_sales': best_day_sales,
            'best_day_count': best_day_count,
            'best_day_avg': best_day_avg,
            'total_period_sales': total_period_sales,
            'period_months': period_months,
            'start_date': start_date,
            'end_date': max_date
        }
    except Exception as e:
        st.error(f"Error in spike analysis: {e}")
        return None

def create_business_intelligence_dashboard(df):
    """
    Create comprehensive Business Intelligence dashboard
    """
    st.markdown("#### 🚀 **Comprehensive Business Analysis**")
    
    # Auto-detect key columns
    value_col = None
    partner_col = None
    date_col = None
    location_col = None
    
    for col in df.columns:
        col_lower = col.lower()
        if any(x in col_lower for x in ['value', 'valoare', 'amount', 'suma', 'vanzari', 'sales']):
            if value_col is None:
                value_col = col
        elif any(x in col_lower for x in ['partner', 'partener', 'client', 'customer']):
            if partner_col is None:
                partner_col = col
        elif any(x in col_lower for x in ['date', 'data']):
            if date_col is None:
                date_col = col
        elif any(x in col_lower for x in ['location', 'localitate', 'city', 'oras']):
            if location_col is None:
                location_col = col
    
    # Business Intelligence Tabs
    bi_tab1, bi_tab2, bi_tab3, bi_tab4 = st.tabs([
        "📈 Trend Analysis",
        "🎯 Performance Metrics", 
        "🗺️ Geographic Insights",
        "💡 Strategic Insights"
    ])
    
    with bi_tab1:
        st.markdown("#### 📊 **Trend Analysis & Forecasting**")
        if date_col and value_col:
            # Time-based analysis
            df_trends = df.copy()
            df_trends[date_col] = pd.to_datetime(df_trends[date_col], errors='coerce', dayfirst=True)
            df_trends = df_trends.dropna(subset=[date_col, value_col])
            if not df_trends.empty:
                # Monthly trends
                df_trends['Month_Year'] = df_trends[date_col].dt.to_period('M').astype(str)
                monthly_trends = df_trends.groupby('Month_Year')[value_col].agg(['sum', 'count', 'mean']).round(2)
                monthly_trends.columns = ['Total_Sales', 'Transaction_Count', 'Average_Sale']
                monthly_trends = monthly_trends.reset_index()
                col1, col2 = st.columns(2)
                with col1:
                    # Sales trend
                    fig_trend = px.line(
                        monthly_trends, 
                        x='Month_Year', 
                        y='Total_Sales',
                        title="📈 Sales Trend Over Time",
                        markers=True
                    )
                    fig_trend.update_traces(line_color='#1f77b4', line_width=3)
                    st.plotly_chart(fig_trend, use_container_width=True, key="bi_trend_line")
                with col2:
                    # Transaction volume trend
                    fig_volume = px.bar(
                        monthly_trends, 
                        x='Month_Year', 
                        y='Transaction_Count',
                        title="📊 Transaction Volume Trend",
                        color='Transaction_Count',
                        color_continuous_scale='viridis'
                    )
                    st.plotly_chart(fig_volume, use_container_width=True, key="bi_trend_bar")
                
                # **3-MONTH SPIKE ANALYSIS** - Your requested feature!
                st.markdown("#### 🚀 **3-Month Sales Spike Analysis**")
                spike_analysis = create_sales_spike_analysis(df_trends, date_col, value_col, period_months=3)
                
                if spike_analysis:
                    # Key metrics
                    col1, col2, col3, col4 = st.columns(4)
                    
                    with col1:
                        st.metric(
                            "🏆 Best Sales Day", 
                            f"{spike_analysis['best_day_sales']['Date']}",
                            f"{spike_analysis['best_day_sales']['Total_Sales']:,.0f}"
                        )
                    
                    with col2:
                        st.metric(
                            "📈 Best Volume Day", 
                            f"{spike_analysis['best_day_count']['Date']}",
                            f"{spike_analysis['best_day_count']['Transaction_Count']:.0f} transactions"
                        )
                    
                    with col3:
                        st.metric(
                            "💰 Best Average Day", 
                            f"{spike_analysis['best_day_avg']['Date']}",
                            f"{spike_analysis['best_day_avg']['Avg_Sale']:,.0f} avg"
                        )
                    
                    with col4:
                        st.metric(
                            "📊 3-Month Total", 
                            f"{spike_analysis['period_months']} months",
                            f"{spike_analysis['total_period_sales']:,.0f}"
                        )
                    
                    # Daily sales chart with spikes highlighted
                    col1, col2 = st.columns(2)
                    
                    with col1:
                        st.markdown("#### 📅 **Daily Sales Pattern**")
                        daily_data = spike_analysis['daily_sales']
                        
                        # Create line chart with markers for spikes
                        fig_daily = px.line(
                            daily_data, 
                            x='Date', 
                            y='Total_Sales',
                            title="Daily Sales (3-Month Period)",
                            markers=True,
                            line_shape='spline'
                        )
                        
                        # Highlight best day
                        best_day = spike_analysis['best_day_sales']
                        fig_daily.add_scatter(
                            x=[best_day['Date']], 
                            y=[best_day['Total_Sales']],
                            mode='markers',
                            marker=dict(size=15, color='red', symbol='star'),
                            name='🏆 Best Sales Day'
                        )
                        
                        st.plotly_chart(fig_daily, use_container_width=True, key="bi_daily_sales")
                    
                    with col2:
                        st.markdown("#### 📊 **Daily Transaction Volume**")
                        fig_daily_count = px.bar(
                            daily_data, 
                            x='Date', 
                            y='Transaction_Count',
                            title="Daily Transaction Count",
                            color='Transaction_Count',
                            color_continuous_scale='viridis'
                        )
                        fig_daily_count.update_layout(
                            xaxis_title="Date",
                            yaxis_title="Transaction Count",
                            showlegend=False
                        )
                        st.plotly_chart(fig_daily_count, use_container_width=True, key="bi_daily_count")
                    
                    # Performance insights
                    st.markdown("#### 💡 **Sales Spike Insights**")
                    insights_col1, insights_col2 = st.columns(2)
                    
                    with insights_col1:
                        # Calculate spike threshold (mean + 2*std)
                        mean_sales = daily_data['Total_Sales'].mean()
                        std_sales = daily_data['Total_Sales'].std()
                        spike_threshold = mean_sales + (2 * std_sales)
                        
                        spike_days = daily_data[daily_data['Total_Sales'] > spike_threshold]
                        
                        st.markdown("**🔥 Spike Days Analysis:**")
                        if len(spike_days) > 0:
                            st.success(f"Found {len(spike_days)} spike days above threshold ({spike_threshold:,.0f})")
                            st.dataframe(spike_days[['Date', 'Total_Sales']].head(), use_container_width=True)
                        else:
                            st.info("No significant spikes detected in this period")
                    
                    with insights_col2:
                        # Day of week analysis
                        daily_data_dow = daily_data.copy()
                        daily_data_dow['Day_of_Week'] = pd.to_datetime(daily_data_dow['Date']).dt.day_name()
                        
                        dow_sales = daily_data_dow.groupby('Day_of_Week')['Total_Sales'].mean().sort_values(ascending=False)
                        
                        st.markdown("**📅 Best Days of Week:**")
                        fig_dow = px.bar(
                            x=dow_sales.index,
                            y=dow_sales.values,
                            title="Average Sales by Day of Week",
                            color=dow_sales.values,
                            color_continuous_scale='blues'
                        )
                        fig_dow.update_layout(
                            xaxis_title="Day of Week",
                            yaxis_title="Average Sales",
                            showlegend=False
                        )
                        st.plotly_chart(fig_dow, use_container_width=True, key="bi_dow")
                
                else:
                    st.warning("⚠️ Could not perform spike analysis - check date and sales data")
            else:
                st.warning("⚠️ Could not process date data for trend analysis")
        else:
            st.info("💡 Upload data with date and sales columns for trend analysis")
    
    with bi_tab2:
        st.markdown("#### 🎯 **Performance Metrics & KPIs**")
        
        if value_col:
            # Key performance indicators
            col1, col2, col3, col4 = st.columns(4)
            
            with col1:
                total_sales = df[value_col].sum() if pd.api.types.is_numeric_dtype(df[value_col]) else 0
                st.metric("💰 Total Sales", f"{total_sales:,.0f}")
            
            with col2:
                avg_sale = df[value_col].mean() if pd.api.types.is_numeric_dtype(df[value_col]) else 0
                st.metric("📊 Average Sale", f"{avg_sale:,.0f}")
            
            with col3:
                median_sale = df[value_col].median() if pd.api.types.is_numeric_dtype(df[value_col]) else 0
                st.metric("📈 Median Sale", f"{median_sale:,.0f}")
            
            with col4:
                std_sale = df[value_col].std() if pd.api.types.is_numeric_dtype(df[value_col]) else 0
                st.metric("📉 Sales Volatility", f"{std_sale:,.0f}")
            
            # Performance distribution
            st.markdown("#### 📊 **Sales Distribution Analysis**")
            col1, col2 = st.columns(2)
            
            with col1:
                fig_hist = px.histogram(
                    df, 
                    x=value_col, 
                    nbins=30,
                    title="Sales Distribution",
                    color_discrete_sequence=['#636EFA']
                )
                st.plotly_chart(fig_hist, use_container_width=True, key="bi_hist")
            
            with col2:
                fig_box = px.box(
                    df, 
                    y=value_col,
                    title="Sales Outlier Analysis"
                )
                st.plotly_chart(fig_box, use_container_width=True, key="bi_box")
        
        # Partner performance
        if partner_col and value_col:
            st.markdown("#### 🏢 **Partner Performance Analysis**")
            
            partner_stats = df.groupby(partner_col)[value_col].agg(['sum', 'count', 'mean']).round(2)
            partner_stats.columns = ['Total_Sales', 'Transaction_Count', 'Average_Sale']
            partner_stats = partner_stats.sort_values('Total_Sales', ascending=False).reset_index()
            
            col1, col2 = st.columns(2)
            
            with col1:
                # Top partners chart
                top_10_partners = partner_stats.head(10)
                fig_top_partners = px.bar(
                    top_10_partners,
                    x='Total_Sales',
                    y=partner_col,
                    orientation='h',
                    title="Top 10 Partners by Sales",
                    color='Total_Sales',
                    color_continuous_scale='viridis'
                )
                st.plotly_chart(fig_top_partners, use_container_width=True, key="bi_top_partners")
            
            with col2:
                # Market share
                fig_market_share = px.pie(
                    partner_stats.head(8),
                    values='Total_Sales',
                    names=partner_col,
                    title="Market Share (Top 8 Partners)"
                )
                st.plotly_chart(fig_market_share, use_container_width=True, key="bi_market_share")
            
            # Partner performance table
            st.markdown("#### 📋 **Detailed Partner Performance**")
            st.dataframe(partner_stats, use_container_width=True)
    
    with bi_tab3:
        st.markdown("#### 🗺️ **Geographic Insights**")
        
        if location_col and value_col:
            # Geographic performance
            location_stats = df.groupby(location_col)[value_col].agg(['sum', 'count', 'mean']).round(2)
            location_stats.columns = ['Total_Sales', 'Transaction_Count', 'Average_Sale']
            location_stats = location_stats.sort_values('Total_Sales', ascending=False).reset_index()
            
            col1, col2 = st.columns(2)
            
            with col1:
                # Top locations
                top_locations = location_stats.head(10)
                fig_locations = px.bar(
                    top_locations,
                    x=location_col,
                    y='Total_Sales',
                    title="Sales by Location",
                    color='Total_Sales',
                    color_continuous_scale='reds'
                )
                fig_locations.update_xaxes(tickangle=45)
                st.plotly_chart(fig_locations, use_container_width=True, key="bi_locations")
            
            with col2:
                # Location efficiency (average sale)
                fig_efficiency = px.scatter(
                    location_stats,
                    x='Transaction_Count',
                    y='Average_Sale',
                    size='Total_Sales',
                    hover_name=location_col,
                    title="Location Efficiency Analysis",
                    labels={'Transaction_Count': 'Volume', 'Average_Sale': 'Efficiency'}
                )
                st.plotly_chart(fig_efficiency, use_container_width=True, key="bi_efficiency")
            
            # Geographic insights table
            st.markdown("#### 📊 **Geographic Performance Table**")
            st.dataframe(location_stats, use_container_width=True)
        else:
            st.info("💡 Upload data with location information for geographic analysis")
    
    with bi_tab4:
        st.markdown("#### 💡 **Strategic Business Insights**")
        
        # Generate automated insights
        insights = []
        
        if value_col and partner_col:
            # Top performer
            top_partner = df.groupby(partner_col)[value_col].sum().idxmax()
            top_partner_sales = df.groupby(partner_col)[value_col].sum().max()
            insights.append(f"🏆 **Market Leader**: {top_partner} with {top_partner_sales:,.0f} total sales")
            
            # Market concentration
            total_sales = df[value_col].sum()
            top_3_sales = df.groupby(partner_col)[value_col].sum().nlargest(3).sum()
            concentration = (top_3_sales / total_sales) * 100
            insights.append(f"📊 **Market Concentration**: Top 3 partners control {concentration:.1f}% of market")
            
        if location_col and value_col:
            # Geographic insights
            top_location = df.groupby(location_col)[value_col].sum().idxmax()
            insights.append(f"🗺️ **Top Location**: {top_location} leads in sales performance")
            
        if date_col and value_col:
            # Trend insight
            df_recent = df.copy()
            df_recent[date_col] = pd.to_datetime(df_recent[date_col], errors='coerce', dayfirst=True)
            if not df_recent.empty:
                recent_trend = df_recent.groupby(df_recent[date_col].dt.to_period('M'))[value_col].sum()
                if len(recent_trend) >= 2:
                    trend_direction = "growing" if recent_trend.iloc[-1] > recent_trend.iloc[-2] else "declining"
                    insights.append(f"📈 **Recent Trend**: Sales are {trend_direction} month-over-month")
        
        # Display insights
        st.markdown("#### 🎯 **Key Business Insights**")
        for insight in insights:
            st.markdown(f"• {insight}")
        
        # Recommendations
        st.markdown("#### 🚀 **Strategic Recommendations**")
        recommendations = [
            "📊 **Data Quality**: Ensure consistent data collection across all sources",
            "🎯 **Focus Areas**: Concentrate resources on top-performing locations",
            "📈 **Growth Strategy**: Analyze successful patterns for replication",
            "🔍 **Deep Dive**: Investigate underperforming segments for improvement opportunities",
            "📅 **Regular Reviews**: Schedule monthly performance reviews based on these insights"
        ]
        
        for rec in recommendations:
            st.markdown(f"• {rec}")
        
        # Action items
        st.markdown("#### ✅ **Next Steps**")
        action_items = [
            "🔄 **Regular Monitoring**: Set up automated dashboard updates",
            "📧 **Stakeholder Reports**: Share insights with key decision makers", 
            "📊 **Benchmark Tracking**: Monitor performance against industry standards",
            "🎯 **Goal Setting**: Establish KPIs based on current performance",
            "💡 **Continuous Improvement**: Implement data-driven optimization strategies"
        ]
        
        for action in action_items:
            st.markdown(f"• {action}")

# ==================== MAIN APPLICATION ====================

def main():
    # No authentication required for local use
    run_dashboard()
    
import os


# ==================== FUEL ANALYTICS TAB ====================
# The tab variables (tab1, tab2, tab3, tab4, tab5) are defined inside the dashboard function.
# Move all tab logic (tab3, tab5, etc.) inside the dashboard function and call it at the end.

def run_dashboard():
    # Main Navigation Tabs (reordered: Fuel Analytics, Business Intelligence, Compare by ID, Compare by Name)
    tab_fuel, tab_bi, tab_fin, tab_insights, tab_providers = st.tabs([
        "⛽ Fuel Analytics",
        "💡 Business Process",
        "💵 Financial Data",
        "📈 Amikeco Insights",
        "🏦 Providers"
    ])
    # ==================== TAB: Providers ====================
    with tab_providers:
        st.markdown("<div class='tab-header'>🏦 <b>Providers Analytics</b></div>", unsafe_allow_html=True)
        providers_path = os.path.join("uploaded_data", "providers.xlsx")
        if os.path.exists(providers_path):
            try:
                df_prov = pd.read_excel(providers_path, engine='openpyxl')
                # Standardize column names
                df_prov.columns = [c.strip().lower() for c in df_prov.columns]
                # Rename for convenience
                date_col = next((c for c in df_prov.columns if "data" in c), df_prov.columns[0])
                client_col = next((c for c in df_prov.columns if "nume" in c), df_prov.columns[1])
                cardid_col = next((c for c in df_prov.columns if "cardid" in c), df_prov.columns[2])
                value_col = next((c for c in df_prov.columns if "suma" in c), df_prov.columns[3])
                provider_col = next((c for c in df_prov.columns if "provider" in c), df_prov.columns[4])
                # Parse date and value
                df_prov[date_col] = pd.to_datetime(df_prov[date_col], errors='coerce', dayfirst=True)
                df_prov[value_col] = pd.to_numeric(df_prov[value_col], errors='coerce')
                df_prov = df_prov.dropna(subset=[date_col, value_col, provider_col])
                # Filter only completed months (Feb, Mar 2026)
                df_prov['Month_Year'] = df_prov[date_col].dt.to_period('M').astype(str)
                completed_months = sorted(df_prov['Month_Year'].unique())
                # KPIs
                st.markdown("### 📊 Provider Usage KPIs")
                total_amount = df_prov[value_col].sum()
                revolut_amount = df_prov[df_prov[provider_col].str.lower() == 'revolut'][value_col].sum()
                finqware_amount = df_prov[df_prov[provider_col].str.lower() == 'finqware'][value_col].sum()
                revolut_pct = 100 * revolut_amount / total_amount if total_amount > 0 else 0
                finqware_pct = 100 * finqware_amount / total_amount if total_amount > 0 else 0
                col1, col2, col3 = st.columns(3)
                with col1:
                    st.metric("Revolut %", f"{revolut_pct:.1f}%")
                    st.metric("Revolut Total", f"{revolut_amount:,.2f}")
                with col2:
                    st.metric("Finqware %", f"{finqware_pct:.1f}%")
                    st.metric("Finqware Total", f"{finqware_amount:,.2f}")
                with col3:
                    st.metric("Total Transfers", f"{total_amount:,.2f}")
                    st.metric("Unique Clients", f"{df_prov[cardid_col].nunique():,}")
                # Monthly trend for each provider
                st.markdown("### 📅 Monthly Provider Trend")
                monthly = df_prov.groupby(['Month_Year', provider_col])[value_col].sum().reset_index()
                # Set custom colors for providers
                provider_colors = {"Revolut": "#000000", "Finqware": "#FF9900"}
                fig = px.bar(
                    monthly,
                    x='Month_Year',
                    y=value_col,
                    color=provider_col,
                    barmode='group',
                    title="Monthly Transfers by Provider",
                    text_auto='.2s',
                    color_discrete_map=provider_colors
                )
                st.plotly_chart(fig, use_container_width=True)
                # Growth calculation
                if len(completed_months) >= 2:
                    last, prev = completed_months[-1], completed_months[-2]
                    # Calculate growth for both providers
                    finq_last = monthly[(monthly['Month_Year'] == last) & (monthly[provider_col].str.lower() == 'finqware')][value_col].sum()
                    finq_prev = monthly[(monthly['Month_Year'] == prev) & (monthly[provider_col].str.lower() == 'finqware')][value_col].sum()
                    finq_growth = ((finq_last - finq_prev) / finq_prev * 100) if finq_prev > 0 else 0
                    rev_last = monthly[(monthly['Month_Year'] == last) & (monthly[provider_col].str.lower() == 'revolut')][value_col].sum()
                    rev_prev = monthly[(monthly['Month_Year'] == prev) & (monthly[provider_col].str.lower() == 'revolut')][value_col].sum()
                    rev_growth = ((rev_last - rev_prev) / rev_prev * 100) if rev_prev > 0 else 0
                    st.markdown(f"#### 📈 Growth from {prev} to {last}")
                    col_finq, col_rev = st.columns(2)
                    with col_finq:
                        st.markdown(f"**Finqware Growth**<br><span style='font-size:2.2rem;font-weight:bold;'>+{finq_growth:.1f}%</span>", unsafe_allow_html=True)
                        st.markdown(f"<span style='color:#43a047;font-size:1.2rem;'>⬆ {finq_last-finq_prev:,.2f}</span>", unsafe_allow_html=True)
                    with col_rev:
                        st.markdown(f"**Revolut Growth**<br><span style='font-size:2.2rem;font-weight:bold;'>+{rev_growth:.1f}%</span>", unsafe_allow_html=True)
                        st.markdown(f"<span style='color:#43a047;font-size:1.2rem;'>⬆ {rev_last-rev_prev:,.2f}</span>", unsafe_allow_html=True)
                # Useful insights
                st.markdown("### 💡 Insights for Providers")
                avg_transfer = df_prov.groupby(provider_col)[value_col].mean().reset_index()
                st.markdown("**Average Transfer per Provider:**")
                st.dataframe(avg_transfer.rename(columns={value_col: 'Avg Transfer'}), use_container_width=True)
                top_clients = df_prov.groupby([provider_col, cardid_col, client_col])[value_col].sum().reset_index()
                top_clients = top_clients.sort_values(value_col, ascending=False).groupby(provider_col).head(5)
                st.markdown("**Top 5 Clients per Provider:**")
                st.dataframe(top_clients.rename(columns={value_col: 'Total Transferred'}), use_container_width=True)
                # Table: All monthly sums
                st.markdown("### 📋 Monthly Sums Table")
                monthly_sums = monthly.pivot(index='Month_Year', columns=provider_col, values=value_col).fillna(0)
                st.dataframe(monthly_sums, use_container_width=True)
            except Exception as e:
                st.error(f"Error: {e}")
        else:
            st.info("No providers.xlsx found in uploaded_data. Please add your data file.")

    # ==================== TAB: Amikeco Insights (Google Analytics) ====================
    with tab_insights:
        st.markdown("## 📈 Amikeco Insights (Google Analytics 4)")
        try:
            from google.analytics.data_v1beta import BetaAnalyticsDataClient
            from google.analytics.data_v1beta.types import DateRange, Metric, Dimension, RunReportRequest
            from google.oauth2 import service_account
            KEY_PATH = "ga_service_account.json"
            PROPERTY_ID = "435523420"  # Your GA4 property ID
            credentials = service_account.Credentials.from_service_account_file(KEY_PATH)
            client = BetaAnalyticsDataClient(credentials=credentials)

            # --- 1. Summary metrics (no dimensions) ---
            summary_request = RunReportRequest(
                property=f"properties/{PROPERTY_ID}",
                dimensions=[],
                metrics=[
                    Metric(name="activeUsers"),
                    Metric(name="newUsers"),
                    Metric(name="userEngagementDuration")
                ],
                date_ranges=[DateRange(start_date="2024-04-07", end_date="today")],
            )
            summary_response = client.run_report(summary_request)
            summary_metrics = summary_response.rows[0].metric_values
            active_users = int(summary_metrics[0].value)
            new_users = int(summary_metrics[1].value)
            total_engagement = float(summary_metrics[2].value)
            avg_time_sec = total_engagement / active_users if active_users > 0 else 0
            avg_min = int(avg_time_sec // 60)
            avg_sec = int(avg_time_sec % 60)

            st.markdown("### Key Metrics (since 2024-04-07)")
            col1, col2, col3 = st.columns(3)
            with col1:
                st.metric("Active Users", active_users)
            with col2:
                st.metric("New Users", new_users)
            with col3:
                st.metric("Avg. Engagement Time", f"{avg_min}m {avg_sec}s")

            # --- 2. Breakdown metrics (with dimensions) ---
            breakdown_request = RunReportRequest(
                property=f"properties/{PROPERTY_ID}",
                dimensions=[
                    Dimension(name="date"),
                    Dimension(name="country"),
                    Dimension(name="city")
                ],
                metrics=[
                    Metric(name="activeUsers"),
                    Metric(name="newUsers"),
                    Metric(name="userEngagementDuration")
                ],
                date_ranges=[DateRange(start_date="2024-04-07", end_date="today")],
            )
            breakdown_response = client.run_report(breakdown_request)
            data = {
                "Date": [],
                "Country": [],
                "City": [],
                "Active Users": [],
                "New Users": [],
                "User Engagement Duration (s)": []
            }
            for row in breakdown_response.rows:
                data["Date"].append(row.dimension_values[0].value)
                data["Country"].append(row.dimension_values[1].value)
                data["City"].append(row.dimension_values[2].value)
                data["Active Users"].append(int(row.metric_values[0].value))
                data["New Users"].append(int(row.metric_values[1].value))
                data["User Engagement Duration (s)"].append(float(row.metric_values[2].value))
            df = pd.DataFrame(data)
            col_country, col_city = st.columns(2)
            with col_country:
                st.markdown("### Users by Country")
                country_df = df.groupby("Country")["Active Users"].sum().reset_index().sort_values("Active Users", ascending=False)
                st.dataframe(country_df, use_container_width=True)
            with col_city:
                st.markdown("### Users by City")
                city_df = df.groupby(["Country", "City"])["Active Users"].sum().reset_index().sort_values("Active Users", ascending=False)
                st.dataframe(city_df, use_container_width=True)
            st.markdown("### Daily Active Users Trend")
            daily_df = df.groupby("Date")["Active Users"].sum().reset_index()

            st.line_chart(daily_df.set_index("Date"))

            # --- Event Summary Table ---
            st.markdown("### Event Summary Table")
            try:
                event_request = RunReportRequest(
                    property=f"properties/{PROPERTY_ID}",
                    dimensions=[Dimension(name="eventName")],
                    metrics=[
                        Metric(name="eventCount"),
                        Metric(name="totalUsers")
                    ],
                    date_ranges=[DateRange(start_date="2024-04-07", end_date="today")],
                )
                event_response = client.run_report(event_request)
                # Get total active users for the period (for per-user calculation)
                summary_request = RunReportRequest(
                    property=f"properties/{PROPERTY_ID}",
                    dimensions=[],
                    metrics=[Metric(name="activeUsers")],
                    date_ranges=[DateRange(start_date="2024-04-07", end_date="today")],
                )
                summary_response = client.run_report(summary_request)
                total_active_users = int(summary_response.rows[0].metric_values[0].value)
                event_data = {
                    "Event Name": [],
                    "Event Count": [],
                    "Total Users": [],
                    "Event Count per Active User": []
                }
                for row in event_response.rows:
                    event_name = row.dimension_values[0].value
                    event_count = int(row.metric_values[0].value)
                    total_users = int(row.metric_values[1].value)
                    per_user = round(event_count / total_active_users, 2) if total_active_users > 0 else 0
                    event_data["Event Name"].append(event_name)
                    event_data["Event Count"].append(event_count)
                    event_data["Total Users"].append(total_users)
                    event_data["Event Count per Active User"].append(per_user)
                event_df = pd.DataFrame(event_data)
                event_df = event_df.sort_values("Event Count", ascending=False)
                # Add totals row
                totals = {
                    "Event Name": "Total",
                    "Event Count": event_df["Event Count"].sum(),
                    "Total Users": event_df["Total Users"].sum(),
                    "Event Count per Active User": round(event_df["Event Count per Active User"].mean(), 2)
                }
                event_df = pd.concat([event_df, pd.DataFrame([totals])], ignore_index=True)
                st.dataframe(event_df, use_container_width=True)
            except Exception as e:
                st.info(f"Could not fetch event summary: {e}")
        except Exception as e:
            st.error(f"Google Analytics error: {e}")

    # ==================== TAB: Financial Data ====================
    with tab_fin:
        col_header, col_refresh = st.columns([6, 1])
        with col_header:
            st.markdown("<div class='tab-header'>💵 <b>Financial Data</b></div>", unsafe_allow_html=True)
        with col_refresh:
            refresh_fin = st.button("🔄 Refresh Data", key="refresh_fin")
        # Load and analyze financial_data.xlsx directly (no upload UI)
        fin_path_xlsx = os.path.join("uploaded_data", "financial_data.xlsx")
        if os.path.exists(fin_path_xlsx):
            try:
                df_fin = pd.read_excel(fin_path_xlsx, engine='openpyxl')
                # Standardize column names
                df_fin.columns = [c.strip().lower() for c in df_fin.columns]
                # Try to auto-detect columns
                date_col = next((c for c in df_fin.columns if "date" in c), df_fin.columns[0])
                client_col = next((c for c in df_fin.columns if "client" in c), df_fin.columns[1])
                value_col = next((c for c in df_fin.columns if "value" in c), df_fin.columns[2])
                # Parse date and value
                df_fin[date_col] = pd.to_datetime(df_fin[date_col], errors='coerce', dayfirst=True)
                df_fin[value_col] = pd.to_numeric(df_fin[value_col], errors='coerce')
                df_fin = df_fin.dropna(subset=[date_col, value_col])
                # KPIs
                st.markdown("### 📊 Key Financial KPIs")
                col1, col2, col3, col4 = st.columns(4)
                with col1:
                    st.metric("💰 Total Income", f"{df_fin[value_col].sum():,.2f}")
                with col2:
                    st.metric("📊 Average Income", f"{df_fin[value_col].mean():,.2f}")
                with col3:
                    st.metric("📈 Records", f"{len(df_fin):,}")
                # Use Card ID for unique clients if available
                card_id_col = next((c for c in df_fin.columns if "card id" in c or "card_id" in c or "cardid" in c), None)
                if card_id_col:
                    unique_clients = df_fin[card_id_col].nunique()
                    unique_label = "🏢 Unique Clients (Card ID)"
                else:
                    unique_clients = df_fin[client_col].nunique()
                    unique_label = "🏢 Unique Clients (Name)"
                with col4:
                    st.metric(unique_label, f"{unique_clients:,}")
                # Monthly trend
                st.markdown("### 📅 Monthly Income Trend")
                df_fin['Month_Year'] = df_fin[date_col].dt.to_period('M').astype(str)
                monthly = df_fin.groupby('Month_Year')[value_col].sum().reset_index()
                fig_trend = px.line(monthly, x='Month_Year', y=value_col, title="Monthly Income", markers=True)
                st.plotly_chart(fig_trend, use_container_width=True)
                # Top clients
                if card_id_col:
                    # Aggregate total income by Card ID
                    top_income = df_fin.groupby(card_id_col)[value_col].sum().sort_values(ascending=False).head(10)
                    # Get first client name for each Card ID
                    card_to_name = df_fin.drop_duplicates(subset=[card_id_col]).set_index(card_id_col)[client_col].to_dict()
                    # Build table with Card ID, Name, Total Income
                    top_clients = top_income.reset_index()
                    top_clients["Name"] = top_clients[card_id_col].map(card_to_name)
                    top_clients = top_clients[[card_id_col, "Name", value_col]]
                    top_clients.columns = ["Card ID", "Name", "Total Income"]
                    # Creative: Show monthly evolution for the top 10 Card IDs
                    if not top_clients.empty:
                        st.markdown("#### 📈 Monthly Evolution for Top 10 Card IDs")
                        # Prepare month columns (03 to 09)
                        all_months = [f"{m:02d}" for m in range(3, 10)]
                        # Extract month from date
                        df_fin['Month'] = df_fin[date_col].dt.strftime('%m')
                        # Build evolution table
                        evolution_rows = []
                        for _, row in top_clients.iterrows():
                            card_id = row["Card ID"]
                            name = row["Name"]
                            df_card = df_fin[df_fin[card_id_col] == card_id]
                            month_income = df_card.groupby('Month')[value_col].sum().reindex(all_months, fill_value=0)
                            total = month_income.sum()
                            evolution_rows.append([card_id, name] + [float(month_income[m]) for m in all_months] + [float(total)])
                        columns = ["Card ID", "Name"] + all_months + ["Total"]
                        evolution_df = pd.DataFrame(evolution_rows, columns=columns)
                        st.dataframe(evolution_df, use_container_width=True)
                else:
                    st.info("No Card ID column found. Showing by client name instead.")
                    top_clients = df_fin.groupby(client_col)[value_col].sum().sort_values(ascending=False).head(10).reset_index()
                    top_clients.columns = ["Client", "Total Income"]
                    st.dataframe(top_clients, use_container_width=True)
                # Daily income spike analysis (graph only, all time)
                st.markdown("### 🚀 Daily Income Spike Analysis")
                if not df_fin.empty:
                    daily = df_fin.groupby(df_fin[date_col].dt.date)[value_col].sum().reset_index()
                    daily.columns = ['Date', 'Total_Income']
                    mean_income = daily['Total_Income'].mean()
                    std_income = daily['Total_Income'].std()
                    spike_threshold = mean_income + 2 * std_income
                    spike_days = daily[daily['Total_Income'] > spike_threshold]
                    fig_daily = px.line(daily, x='Date', y='Total_Income', title="Daily Income (All Time)", markers=True)
                    if not spike_days.empty:
                        fig_daily.add_scatter(x=spike_days['Date'], y=spike_days['Total_Income'], mode='markers', marker=dict(size=12, color='red', symbol='star'), name='Spike Day')
                    st.plotly_chart(fig_daily, use_container_width=True)
                # Data table
                st.markdown("### 📋 Monthly Financial Sums Table")
                # Aggregate sums by month
                monthly_sums = df_fin.groupby(df_fin[date_col].dt.to_period('M'))[value_col].sum().reset_index()
                monthly_sums.columns = ['Month', 'Total_Amount']
                st.dataframe(monthly_sums, use_container_width=True)
            except Exception as e:
                st.error(f"Error: {e}")
        else:
            st.info("No financial_data.xlsx found in uploaded_data. Please add your data file.")

    # ...existing code...

    # ==================== TAB: Fuel Analytics ====================
    with tab_fuel:
        col_header, col_refresh = st.columns([6, 1])
        with col_header:
            st.markdown("<div class='tab-header'>⛽ <b>Fuel Analytics</b></div>", unsafe_allow_html=True)
        with col_refresh:
            refresh = st.button("🔄 Refresh Data", key="refresh_fuel")
        os.makedirs("uploaded_data", exist_ok=True)
        # Support both CSV and XLSX for fuel data
        fuel_path_csv = os.path.join("uploaded_data", "fuel_data.csv")
        fuel_path_xlsx = os.path.join("uploaded_data", "fuel_data.xlsx")
        file_found = False
        if os.path.exists(fuel_path_xlsx):
            try:
                df = pd.read_excel(fuel_path_xlsx, engine='openpyxl')
                file_found = True
            except Exception as e:
                st.error(f"Error reading XLSX: {e}")
        elif os.path.exists(fuel_path_csv):
            try:
                try:
                    df = pd.read_csv(fuel_path_csv, encoding='utf-8')
                except UnicodeDecodeError:
                    df = pd.read_csv(fuel_path_csv, encoding='latin1')
                file_found = True
            except Exception as e:
                st.error(f"Error reading CSV: {e}")
        if file_found:
            try:
                df_clean = load_and_format_fuel_data(df)
                create_fuel_analytics_dashboard(df_clean)
            except Exception as e:
                st.error(f"Error: {e}")
        else:
            st.info("Upload a CSV or XLSX file to analyze fuel sales data.")

    # ==================== TAB: Business Intelligence ====================
    with tab_bi:
        st.markdown("""
        ### 💡 Business Process Flow
        The diagram below illustrates how funds and information flow through the Amikeco fuel card system:
        """)
        st.graphviz_chart('''
        digraph G {
            rankdir=LR;
            node [shape=box, style="rounded,filled", color="#1976d2", fontcolor=white, fontsize=14, width=2.5, height=1.2, fixedsize=true];
            edge [color="#1976d2", penwidth=2];
            User [label="User\n(adds money)", fillcolor="#42a5f5"];
            AmikecoAPI [label="Amikeco API\n(card management)", fillcolor="#66bb6a"];
            UTA [label="UTA\n(funds transfer)", fillcolor="#ffa726"];
            GasStation [label="Gas Station\n(payment)", fillcolor="#ab47bc"];
            Amikeco [label="Amikeco\n(money management)", fillcolor="#ef5350"];
            User -> AmikecoAPI [label="add funds"];
            AmikecoAPI -> UTA [label="transfer funds"];
            UTA -> GasStation [label="user pays"];
            GasStation -> Amikeco [label="transaction info"];
            Amikeco -> AmikecoAPI [label="manage card balance"];
        }
        ''')
        st.markdown("""
        **Legend:**
        - **User**: Initiates by adding money to the card
        - **Amikeco API**: Handles card management and communicates with UTA
        - **UTA**: Receives funds and enables payment at gas stations
        - **Gas Station**: Where the user pays using the card
        - **Amikeco**: Receives transaction info and manages card money
        """)

    # ...Tabs Compare by ID and Compare by Name removed as requested...

# At the end of the script, call the dashboard runner
if __name__ == "__main__":
    main()
