import os import streamlit as st import pandas as pd import plotly.express as px import plotly.graph_objects as go from plotly.subplots import make_subplots import numpy as np import io import hashlib from datetime import datetime, timedelta import chardet import os def detect_encoding_and_read_csv(file_buffer): """ Detect file encoding and read CSV with proper encoding handling """ # Reset buffer position file_buffer.seek(0) raw_data = file_buffer.read() # Detect encoding detected = chardet.detect(raw_data) encoding = detected.get('encoding', 'utf-8') confidence = detected.get('confidence', 0) # List of encodings to try (most common for Romanian/European files) encodings_to_try = [ encoding, # Detected encoding first 'utf-8', 'windows-1252', # Common for Excel exports 'iso-8859-1', # Latin-1 'cp1250', # Central European 'utf-8-sig' # UTF-8 with BOM ] # Remove duplicates while preserving order encodings_to_try = list(dict.fromkeys(encodings_to_try)) for enc in encodings_to_try: try: # Create StringIO from decoded bytes decoded_content = raw_data.decode(enc) csv_buffer = io.StringIO(decoded_content) df = pd.read_csv(csv_buffer) # Return success info return df, enc, confidence, None except (UnicodeDecodeError, UnicodeError, pd.errors.EmptyDataError) as e: continue except Exception as e: continue # If all encodings fail return None, None, 0, f"Could not read file with any common encoding" def check_password(): """Bypass authentication for local use.""" return True # Set page config st.set_page_config( page_title="📊 Unified Analytics Dashboard", page_icon="🚀", layout="wide", initial_sidebar_state="expanded", menu_items={ 'Get Help': 'https://github.com/your-username/streamlit-analytics-dashboard', 'Report a bug': 'https://github.com/your-username/streamlit-analytics-dashboard/issues', 'About': """ # 🚀 Unified Analytics Dashboard **Version:** 1.0.0 **Author:** Your Name **Description:** Advanced CSV data analysis and fuel sales analytics platform ### Features: - 🔢 CSV Comparison by ID - 👤 CSV Comparison by Name - ⛽ Fuel Sales Analytics - 🧹 Data Cleaning Tools ### Built with: - Python 3.9+ - Streamlit - Pandas - Plotly **© 2025 - All rights reserved** """ } ) # Custom CSS st.markdown(""" """, unsafe_allow_html=True) # ==================== FILE HANDLING FUNCTIONS ==================== def read_csv_with_encoding_detection(file): """ Read CSV file with automatic encoding detection Handles common encodings including Romanian characters """ try: # Reset file pointer to beginning file.seek(0) # Read a sample to detect encoding sample = file.read(10000) file.seek(0) # Detect encoding detected = chardet.detect(sample) encoding = detected['encoding'] if detected['encoding'] else 'utf-8' # Common encodings to try in order encodings_to_try = [ encoding, 'utf-8', 'utf-8-sig', # UTF-8 with BOM 'iso-8859-1', # Latin-1 'cp1252', # Windows-1252 'iso-8859-2', # Latin-2 (Eastern European) 'cp1250', # Windows-1250 (Eastern European) 'utf-16', 'ascii' ] # Remove duplicates while preserving order seen = set() unique_encodings = [] for enc in encodings_to_try: if enc and enc not in seen: seen.add(enc) unique_encodings.append(enc) # Try each encoding for enc in unique_encodings: try: file.seek(0) content = file.read().decode(enc) df = pd.read_csv(io.StringIO(content)) return df, enc except (UnicodeDecodeError, UnicodeError): continue except Exception as e: continue # If all else fails, try with error handling file.seek(0) content = file.read().decode('utf-8', errors='replace') df = pd.read_csv(io.StringIO(content)) return df, 'utf-8 (with errors replaced)' except Exception as e: raise Exception(f"Could not read file with any encoding. Original error: {str(e)}") def clean_romanian_characters(text): """ Clean and standardize Romanian characters """ if pd.isna(text) or not isinstance(text, str): return text # Romanian character replacements romanian_chars = { 'ă': 'a', 'â': 'a', 'î': 'i', 'ș': 's', 'ț': 't', 'Ă': 'A', 'Â': 'A', 'Î': 'I', 'Ș': 'S', 'Ț': 'T', # Also handle common encoding issues 'ş': 's', 'ţ': 't', 'Ş': 'S', 'Ţ': 'T' } result = text for old_char, new_char in romanian_chars.items(): result = result.replace(old_char, new_char) return result # ==================== CSV COMPARISON FUNCTIONS ==================== def clean_and_aggregate_data(df, group_col, value_col, operation='sum'): """Clean and aggregate data by specified column and operation with improved name matching""" # Make a copy to avoid modifying original df_clean = df.copy() # Handle name-based grouping with normalization if df_clean[group_col].dtype == 'object': # Convert to string and handle NaN values df_clean[group_col] = df_clean[group_col].astype(str) # Create normalized version for matching df_clean['_normalized_key'] = df_clean[group_col].str.lower() df_clean['_normalized_key'] = df_clean['_normalized_key'].str.strip() df_clean['_normalized_key'] = df_clean['_normalized_key'].str.replace(r'\s+', ' ', regex=True) df_clean['_normalized_key'] = df_clean['_normalized_key'].str.replace(r'[^\w\s]', '', regex=True) df_clean['_normalized_key'] = df_clean['_normalized_key'].str.replace(r'\s+', ' ', regex=True).str.strip() # Clean display version df_clean[group_col] = df_clean[group_col].str.strip() df_clean[group_col] = df_clean[group_col].str.replace(r'\s+', ' ', regex=True) df_clean[group_col] = df_clean[group_col].str.title() group_key = '_normalized_key' else: # For numeric columns, use as-is group_key = group_col # Ensure value column is numeric df_clean[value_col] = pd.to_numeric(df_clean[value_col], errors='coerce') # Remove rows with NaN values df_clean = df_clean.dropna(subset=[group_col, value_col]) # Aggregate by operation using normalized key but keep original name if operation == 'sum': if group_key == '_normalized_key': aggregated = df_clean.groupby(group_key).agg({ group_col: 'first', # Keep first occurrence of original name value_col: 'sum' }).reset_index() aggregated = aggregated[[group_col, value_col]] # Drop the normalized key else: aggregated = df_clean.groupby(group_key)[value_col].sum().reset_index() elif operation == 'mean': if group_key == '_normalized_key': aggregated = df_clean.groupby(group_key).agg({ group_col: 'first', value_col: 'mean' }).reset_index() aggregated = aggregated[[group_col, value_col]] else: aggregated = df_clean.groupby(group_key)[value_col].mean().reset_index() elif operation == 'max': if group_key == '_normalized_key': aggregated = df_clean.groupby(group_key).agg({ group_col: 'first', value_col: 'max' }).reset_index() aggregated = aggregated[[group_col, value_col]] else: aggregated = df_clean.groupby(group_key)[value_col].max().reset_index() elif operation == 'min': if group_key == '_normalized_key': aggregated = df_clean.groupby(group_key).agg({ group_col: 'first', value_col: 'min' }).reset_index() aggregated = aggregated[[group_col, value_col]] else: aggregated = df_clean.groupby(group_key)[value_col].min().reset_index() elif operation == 'count': if group_key == '_normalized_key': aggregated = df_clean.groupby(group_key).agg({ group_col: 'first', value_col: 'count' }).reset_index() aggregated = aggregated[[group_col, value_col]] else: aggregated = df_clean.groupby(group_key)[value_col].count().reset_index() else: if group_key == '_normalized_key': aggregated = df_clean.groupby(group_key).agg({ group_col: 'first', value_col: 'sum' }).reset_index() aggregated = aggregated[[group_col, value_col]] else: aggregated = df_clean.groupby(group_key)[value_col].sum().reset_index() # Rename columns for clarity aggregated.columns = ['ID', f'Aggregated_{operation.title()}'] return aggregated def perform_comparison(df1, df2, group_col1, value_col1, group_col2, value_col2, operation, file1_name, file2_name): """Perform comparison between two datasets""" # Aggregate both datasets agg1 = clean_and_aggregate_data(df1, group_col1, value_col1, operation) agg2 = clean_and_aggregate_data(df2, group_col2, value_col2, operation) # Rename columns to include file names agg1.columns = ['ID', f'{file1_name}_{operation.title()}'] agg2.columns = ['ID', f'{file2_name}_{operation.title()}'] # Merge datasets comparison = pd.merge(agg1, agg2, on='ID', how='outer') comparison = comparison.fillna(0) # Calculate difference col1_name = f'{file1_name}_{operation.title()}' col2_name = f'{file2_name}_{operation.title()}' comparison['Difference'] = comparison[col1_name] - comparison[col2_name] comparison['Abs_Difference'] = comparison['Difference'].abs() # Round values to avoid floating point precision issues comparison[col1_name] = comparison[col1_name].round(10) comparison[col2_name] = comparison[col2_name].round(10) comparison['Difference'] = comparison['Difference'].round(10) comparison['Abs_Difference'] = comparison['Abs_Difference'].round(10) # Add status with tolerance for floating point comparison tolerance = 1e-10 # Very small tolerance for floating point comparison comparison['Status'] = comparison.apply(lambda row: f'Only in {file1_name}' if row[col2_name] == 0 else f'Only in {file2_name}' if row[col1_name] == 0 else 'Perfect Match' if abs(row['Difference']) <= tolerance else 'Different Values', axis=1) # Sort by absolute difference comparison = comparison.sort_values('Abs_Difference', ascending=False) return comparison, col1_name, col2_name def create_comparison_chart(comparison_df, col1_name, col2_name, file1_name, file2_name): """Create interactive scatter plot for comparison""" fig = px.scatter( comparison_df, x=col1_name, y=col2_name, color='Status', hover_data=['ID', 'Difference'], title=f"Comparison: {file1_name} vs {file2_name}", labels={ col1_name: f'{file1_name} Values', col2_name: f'{file2_name} Values' } ) # Add diagonal line for perfect matches max_val = max(comparison_df[col1_name].max(), comparison_df[col2_name].max()) if max_val > 0: fig.add_shape( type="line", x0=0, y0=0, x1=max_val, y1=max_val, line=dict(color="red", width=2, dash="dash"), name="Perfect Match Line" ) return fig def file_upload_section(key_prefix): """Create file upload section""" st.markdown('

', unsafe_allow_html=True) st.markdown("### 📁 Upload CSV Files") col1, col2 = st.columns(2) with col1: file1 = st.file_uploader( f"Upload First CSV File", type=['csv'], key=f"{key_prefix}_file1" ) with col2: file2 = st.file_uploader( f"Upload Second CSV File", type=['csv'], key=f"{key_prefix}_file2" ) st.markdown('

', unsafe_allow_html=True) return file1, file2 # ==================== FUEL ANALYTICS FUNCTIONS ==================== def load_and_format_fuel_data(df): """Load and format fuel sales data""" df_clean = df.copy() # Clean column names (remove extra spaces, normalize) df_clean.columns = df_clean.columns.str.strip() # Convert date column (try different formats) date_columns = [col for col in df_clean.columns if 'date' in col.lower() or 'data' in col.lower()] if date_columns: date_col = date_columns[0] try: df_clean[date_col] = pd.to_datetime(df_clean[date_col], format='%d.%m.%Y', errors='coerce') except: try: df_clean[date_col] = pd.to_datetime(df_clean[date_col], errors='coerce', dayfirst=True) except: st.warning(f"Could not parse date column: {date_col}") # Clean numeric columns and format Qty/Value as float with 2 decimals numeric_cols = ['Qty', 'Value', 'Cantitate', 'Valoare'] for col in df_clean.columns: if any(nc.lower() in col.lower() for nc in numeric_cols): df_clean[col] = pd.to_numeric(df_clean[col], errors='coerce').round(2) # Clean text columns text_cols = ['Partner', 'Product', 'Country', 'Currency'] for col in df_clean.columns: if any(tc.lower() in col.lower() for tc in text_cols): if df_clean[col].dtype == 'object': df_clean[col] = df_clean[col].astype(str).str.strip() return df_clean def create_fuel_analytics_dashboard(df): """Create comprehensive fuel analytics dashboard""" # KPI Summary st.markdown("### 📊 Key Performance Indicators") # Try to identify key columns dynamically qty_col = None value_col = None partner_col = None product_col = None country_col = None date_col = None location_col = None # Find columns by common names (includes Romanian terms) for col in df.columns: col_lower = col.lower() if any(x in col_lower for x in ['qty', 'cantitate', 'quantity', 'vanzari']): if qty_col is None: # Take first match qty_col = col elif any(x in col_lower for x in ['value', 'valoare', 'amount', 'suma', 'vanzari']): if value_col is None: value_col = col elif any(x in col_lower for x in ['partner', 'partener', 'client', 'customer']): if partner_col is None: partner_col = col elif any(x in col_lower for x in ['product', 'produs', 'combustibil', 'fuel']): if product_col is None: product_col = col elif any(x in col_lower for x in ['country', 'tara', 'nation']): if country_col is None: country_col = col elif any(x in col_lower for x in ['date', 'data']): if date_col is None: date_col = col elif any(x in col_lower for x in ['location', 'localitate', 'city', 'oras']): if location_col is None: location_col = col # Main KPIs if value_col: col1, col2, col3, col4, col5 = st.columns(5) with col1: total_value = df[value_col].sum() if pd.api.types.is_numeric_dtype(df[value_col]) else 0 st.metric("💰 Total Sales", f"{total_value:,.0f}") with col2: total_qty = df[qty_col].sum() if qty_col and qty_col in df.columns and pd.api.types.is_numeric_dtype(df[qty_col]) else 0 st.metric("🛢️ Total Quantity", f"{total_qty:,.2f}") with col3: avg_value = df[value_col].mean() if pd.api.types.is_numeric_dtype(df[value_col]) else 0 st.metric("📊 Average Sale", f"{avg_value:,.0f}") with col4: total_records = len(df) st.metric("📈 Total Records", f"{total_records:,}") with col5: unique_partners = df[partner_col].nunique() if partner_col else 0 st.metric("🏢 Unique Partners", f"{unique_partners:,}") # **MONTHLY SALES & QUANTITY ANALYSIS** - The key feature you requested! if date_col and value_col: # st.markdown("### 📅 **Monthly Sales & Quantity Analysis**") # Convert date column to datetime df_time = df.copy() try: df_time[date_col] = pd.to_datetime(df_time[date_col], errors='coerce', dayfirst=True) df_time = df_time.dropna(subset=[date_col]) if not df_time.empty: # Create month-year column df_time['Month_Year'] = df_time[date_col].dt.to_period('M').astype(str) # Monthly aggregation (add Qty if available) monthly_agg_dict = { value_col: ['sum', 'count', 'mean'], } if 'ID' in df_time.columns: monthly_agg_dict['ID'] = pd.Series.nunique if qty_col: monthly_agg_dict[qty_col] = 'sum' monthly_stats = df_time.groupby('Month_Year').agg(monthly_agg_dict).round(2) # Flatten column names and rename ID_nunique to Used_cards flat_cols = [] for col in monthly_stats.columns: if isinstance(col, tuple): col_name = '_'.join([str(c) for c in col if c]) if col_name == 'ID_nunique': col_name = 'Used_cards' flat_cols.append(col_name) else: flat_cols.append(col) monthly_stats.columns = flat_cols monthly_stats = monthly_stats.reset_index() # Merged full-width graph if qty_col and f'{qty_col}_sum' in monthly_stats.columns: import plotly.graph_objects as go # Round values for hover display monthly_stats_rounded = monthly_stats.copy() monthly_stats_rounded[f'{value_col}_sum'] = monthly_stats_rounded[f'{value_col}_sum'].round(2) monthly_stats_rounded[f'{qty_col}_sum'] = monthly_stats_rounded[f'{qty_col}_sum'].round(2) fig = go.Figure() fig.add_trace(go.Bar( x=monthly_stats_rounded['Month_Year'], y=monthly_stats_rounded[f'{qty_col}_sum'], name='Quantity', marker_color='#27ae60', yaxis='y1', opacity=0.7, hovertemplate='Quantity: %{y:.2f}
Month: %{x}' )) fig.add_trace(go.Line( x=monthly_stats_rounded['Month_Year'], y=monthly_stats_rounded[f'{value_col}_sum'], name='Sales', marker_color='#2E86AB', yaxis='y2', line=dict(width=3), hovertemplate='Sales: %{y:.2f}
Month: %{x}' )) fig.update_layout( title="Monthly Quantity (bar) & Sales (line)", xaxis=dict(title='Month'), yaxis=dict(title='Quantity', side='left', showgrid=False), yaxis2=dict(title='Sales', side='right', overlaying='y', showgrid=False), legend=dict(x=0.01, y=0.99, bgcolor='rgba(0,0,0,0)'), hovermode='x unified', bargap=0.2, width=None ) st.plotly_chart(fig, use_container_width=True, key="fuel_monthly_merged") else: # Only sales available fig_monthly_sales = px.line( monthly_stats, x='Month_Year', y=f'{value_col}_sum', title="Total Sales by Month", markers=True, line_shape='spline' ) fig_monthly_sales.update_traces(line_color='#2E86AB', line_width=3) fig_monthly_sales.update_layout( xaxis_title="Month", yaxis_title="Total Sales", hovermode='x unified' ) st.plotly_chart(fig_monthly_sales, use_container_width=True, key="fuel_monthly_sales") # Additional monthly insights st.markdown("#### 📋 **Monthly Performance Table**") if 'Qty_sum' in monthly_stats.columns: growth = monthly_stats['Qty_sum'].diff() / monthly_stats['Qty_sum'].shift(1) * 100 growth = growth.round(2).fillna(0) def format_growth(val): if val > 0: return f"+{val:.2f}% ↑" elif val < 0: return f"{val:.2f}% ↓" else: return f"{val:.2f}%" monthly_stats['Growth (%)'] = growth.apply(format_growth) cols = list(monthly_stats.columns) if 'Growth (%)' in cols and 'Qty_sum' in cols: qty_idx = cols.index('Qty_sum') cols.remove('Growth (%)') cols.insert(qty_idx + 1, 'Growth (%)') monthly_stats = monthly_stats[cols] # Rename columns for display display_stats = monthly_stats.rename(columns={ 'Month_Year': 'Month', f'{value_col}_sum': 'Sales Amount', f'{value_col}_count': 'Transactions', f'{value_col}_mean': 'Average Spent', 'Used_cards': 'Used Cards', 'Qty_sum': 'Quantity' }) html_table = display_stats.to_html(escape=False, index=False) html_table = f"

{html_table}

" st.write(html_table, unsafe_allow_html=True) else: st.warning("⚠️ Could not parse date column for monthly analysis") except Exception as e: st.error(f"❌ Error in monthly analysis: {e}") # **6-MONTH SALES SPIKE ANALYSIS** if date_col and value_col: st.markdown("### 🚀 **6-Month Sales Spike Analysis**") st.markdown("*Identify sales spikes and best performing days*") spike_analysis = create_sales_spike_analysis(df, date_col, value_col, period_months=6) if spike_analysis: # Summary metrics col1, col2, col3, col4 = st.columns(4) with col1: st.metric( "🏆 Best Sales Day", f"{spike_analysis['best_day_sales']['Date']}", f"{spike_analysis['best_day_sales']['Total_Sales']:,.0f}" ) with col2: st.metric( "📈 Best Volume Day", f"{spike_analysis['best_day_count']['Date']}", f"{spike_analysis['best_day_count']['Transaction_Count']:.0f} transactions" ) with col3: st.metric( "💰 Best Average Day", f"{spike_analysis['best_day_avg']['Date']}", f"{spike_analysis['best_day_avg']['Avg_Sale']:,.0f} avg" ) with col4: st.metric( f"📊 Period Total", f"{spike_analysis['period_months']} months", f"{spike_analysis['total_period_sales']:,.0f}" ) # Merge daily sales and transaction count into a dual-axis chart daily_data = spike_analysis['daily_sales'] import plotly.graph_objects as go fig = go.Figure() fig.add_trace(go.Bar( x=daily_data['Date'], y=daily_data['Transaction_Count'], name='Transactions', marker_color='#27ae60', yaxis='y1', opacity=0.7 )) fig.add_trace(go.Line( x=daily_data['Date'], y=daily_data['Total_Sales'], name='Sales Volume', marker_color='#2E86AB', yaxis='y2', line=dict(width=3) )) fig.update_layout( title="Daily Transactions (bar) & Sales Volume (line) - 6 Months", xaxis=dict(title='Date'), yaxis=dict(title='Transactions', side='left', showgrid=False), yaxis2=dict(title='Sales Volume', side='right', overlaying='y', showgrid=False), legend=dict(x=0.01, y=0.99, bgcolor='rgba(0,0,0,0)'), hovermode='x unified', bargap=0.2, width=None ) st.plotly_chart(fig, use_container_width=True, key="fuel_spike_merged") # (Removed Sales Spike Insights and Best Days of Week graphs as requested) else: st.warning("⚠️ Could not perform spike analysis - check date and sales data") # Partner Performance Analysis if partner_col and value_col: col1, col2 = st.columns(2) with col1: st.markdown("#### 📊 **Location Performance Table**") if location_col: location_stats = df.groupby(location_col).agg({ value_col: ['sum', 'count', 'mean'], partner_col: 'nunique' if partner_col else lambda x: 0 }).round(2) location_stats.columns = ['Total_Sales', 'Transactions', 'Avg_Sale', 'Partners'] location_stats = location_stats.sort_values('Total_Sales', ascending=False) st.dataframe(location_stats, use_container_width=True) else: st.info("No location column found for performance table.") with col2: st.markdown("#### 🎯 **Partner Market Share**") partner_sales = df.groupby(partner_col)[value_col].sum().sort_values(ascending=False).head(10) fig_partner_pie = px.pie( values=partner_sales.values, names=partner_sales.index, title="Market Share by Partner" ) st.plotly_chart(fig_partner_pie, use_container_width=True) # (Removed Geographic Analysis and all subsequent sections as requested) # ==================== ADVANCED ANALYTICS FUNCTIONS ==================== def extract_partner_and_location(df, partner_col): """ Your brilliant location extraction logic! Extract uppercase partner name and location from partner string """ df_processed = df.copy() # Extract uppercase partner name into new column 'Partner_Name' df_processed['Partner_Name'] = df_processed[partner_col].str.extract(r'^([A-Z]+)') # Extract location by removing the Partner_Name from the original string df_processed['Location'] = df_processed.apply( lambda row: row[partner_col].replace(row['Partner_Name'], '').strip() if pd.notnull(row['Partner_Name']) else row[partner_col], axis=1 ) return df_processed def create_sales_spike_analysis(df, date_col, value_col, period_months=3): """ Create comprehensive 3-month sales spike analysis Identifies best performing days and sales patterns """ try: # Prepare data df_analysis = df.copy() df_analysis[date_col] = pd.to_datetime(df_analysis[date_col], errors='coerce', dayfirst=True) df_analysis = df_analysis.dropna(subset=[date_col, value_col]) if df_analysis.empty: return None # Get the most recent 3 months of data max_date = df_analysis[date_col].max() start_date = max_date - pd.DateOffset(months=period_months) # Filter to 3-month period df_period = df_analysis[df_analysis[date_col] >= start_date].copy() if df_period.empty: return None # Daily aggregation daily_sales = df_period.groupby(df_period[date_col].dt.date).agg({ value_col: ['sum', 'count', 'mean'] }).round(2) # Flatten column names daily_sales.columns = ['Total_Sales', 'Transaction_Count', 'Avg_Sale'] daily_sales = daily_sales.reset_index() daily_sales.columns = ['Date', 'Total_Sales', 'Transaction_Count', 'Avg_Sale'] # Find best performing days best_day_sales = daily_sales.loc[daily_sales['Total_Sales'].idxmax()] best_day_count = daily_sales.loc[daily_sales['Transaction_Count'].idxmax()] best_day_avg = daily_sales.loc[daily_sales['Avg_Sale'].idxmax()] # Calculate period totals total_period_sales = daily_sales['Total_Sales'].sum() return { 'daily_sales': daily_sales, 'best_day_sales': best_day_sales, 'best_day_count': best_day_count, 'best_day_avg': best_day_avg, 'total_period_sales': total_period_sales, 'period_months': period_months, 'start_date': start_date, 'end_date': max_date } except Exception as e: st.error(f"Error in spike analysis: {e}") return None def create_business_intelligence_dashboard(df): """ Create comprehensive Business Intelligence dashboard """ st.markdown("#### 🚀 **Comprehensive Business Analysis**") # Auto-detect key columns value_col = None partner_col = None date_col = None location_col = None for col in df.columns: col_lower = col.lower() if any(x in col_lower for x in ['value', 'valoare', 'amount', 'suma', 'vanzari', 'sales']): if value_col is None: value_col = col elif any(x in col_lower for x in ['partner', 'partener', 'client', 'customer']): if partner_col is None: partner_col = col elif any(x in col_lower for x in ['date', 'data']): if date_col is None: date_col = col elif any(x in col_lower for x in ['location', 'localitate', 'city', 'oras']): if location_col is None: location_col = col # Business Intelligence Tabs bi_tab1, bi_tab2, bi_tab3, bi_tab4 = st.tabs([ "📈 Trend Analysis", "🎯 Performance Metrics", "🗺️ Geographic Insights", "💡 Strategic Insights" ]) with bi_tab1: st.markdown("#### 📊 **Trend Analysis & Forecasting**") if date_col and value_col: # Time-based analysis df_trends = df.copy() df_trends[date_col] = pd.to_datetime(df_trends[date_col], errors='coerce', dayfirst=True) df_trends = df_trends.dropna(subset=[date_col, value_col]) if not df_trends.empty: # Monthly trends df_trends['Month_Year'] = df_trends[date_col].dt.to_period('M').astype(str) monthly_trends = df_trends.groupby('Month_Year')[value_col].agg(['sum', 'count', 'mean']).round(2) monthly_trends.columns = ['Total_Sales', 'Transaction_Count', 'Average_Sale'] monthly_trends = monthly_trends.reset_index() col1, col2 = st.columns(2) with col1: # Sales trend fig_trend = px.line( monthly_trends, x='Month_Year', y='Total_Sales', title="📈 Sales Trend Over Time", markers=True ) fig_trend.update_traces(line_color='#1f77b4', line_width=3) st.plotly_chart(fig_trend, use_container_width=True, key="bi_trend_line") with col2: # Transaction volume trend fig_volume = px.bar( monthly_trends, x='Month_Year', y='Transaction_Count', title="📊 Transaction Volume Trend", color='Transaction_Count', color_continuous_scale='viridis' ) st.plotly_chart(fig_volume, use_container_width=True, key="bi_trend_bar") # **3-MONTH SPIKE ANALYSIS** - Your requested feature! st.markdown("#### 🚀 **3-Month Sales Spike Analysis**") spike_analysis = create_sales_spike_analysis(df_trends, date_col, value_col, period_months=3) if spike_analysis: # Key metrics col1, col2, col3, col4 = st.columns(4) with col1: st.metric( "🏆 Best Sales Day", f"{spike_analysis['best_day_sales']['Date']}", f"{spike_analysis['best_day_sales']['Total_Sales']:,.0f}" ) with col2: st.metric( "📈 Best Volume Day", f"{spike_analysis['best_day_count']['Date']}", f"{spike_analysis['best_day_count']['Transaction_Count']:.0f} transactions" ) with col3: st.metric( "💰 Best Average Day", f"{spike_analysis['best_day_avg']['Date']}", f"{spike_analysis['best_day_avg']['Avg_Sale']:,.0f} avg" ) with col4: st.metric( "📊 3-Month Total", f"{spike_analysis['period_months']} months", f"{spike_analysis['total_period_sales']:,.0f}" ) # Daily sales chart with spikes highlighted col1, col2 = st.columns(2) with col1: st.markdown("#### 📅 **Daily Sales Pattern**") daily_data = spike_analysis['daily_sales'] # Create line chart with markers for spikes fig_daily = px.line( daily_data, x='Date', y='Total_Sales', title="Daily Sales (3-Month Period)", markers=True, line_shape='spline' ) # Highlight best day best_day = spike_analysis['best_day_sales'] fig_daily.add_scatter( x=[best_day['Date']], y=[best_day['Total_Sales']], mode='markers', marker=dict(size=15, color='red', symbol='star'), name='🏆 Best Sales Day' ) st.plotly_chart(fig_daily, use_container_width=True, key="bi_daily_sales") with col2: st.markdown("#### 📊 **Daily Transaction Volume**") fig_daily_count = px.bar( daily_data, x='Date', y='Transaction_Count', title="Daily Transaction Count", color='Transaction_Count', color_continuous_scale='viridis' ) fig_daily_count.update_layout( xaxis_title="Date", yaxis_title="Transaction Count", showlegend=False ) st.plotly_chart(fig_daily_count, use_container_width=True, key="bi_daily_count") # Performance insights st.markdown("#### 💡 **Sales Spike Insights**") insights_col1, insights_col2 = st.columns(2) with insights_col1: # Calculate spike threshold (mean + 2*std) mean_sales = daily_data['Total_Sales'].mean() std_sales = daily_data['Total_Sales'].std() spike_threshold = mean_sales + (2 * std_sales) spike_days = daily_data[daily_data['Total_Sales'] > spike_threshold] st.markdown("**🔥 Spike Days Analysis:**") if len(spike_days) > 0: st.success(f"Found {len(spike_days)} spike days above threshold ({spike_threshold:,.0f})") st.dataframe(spike_days[['Date', 'Total_Sales']].head(), use_container_width=True) else: st.info("No significant spikes detected in this period") with insights_col2: # Day of week analysis daily_data_dow = daily_data.copy() daily_data_dow['Day_of_Week'] = pd.to_datetime(daily_data_dow['Date']).dt.day_name() dow_sales = daily_data_dow.groupby('Day_of_Week')['Total_Sales'].mean().sort_values(ascending=False) st.markdown("**📅 Best Days of Week:**") fig_dow = px.bar( x=dow_sales.index, y=dow_sales.values, title="Average Sales by Day of Week", color=dow_sales.values, color_continuous_scale='blues' ) fig_dow.update_layout( xaxis_title="Day of Week", yaxis_title="Average Sales", showlegend=False ) st.plotly_chart(fig_dow, use_container_width=True, key="bi_dow") else: st.warning("⚠️ Could not perform spike analysis - check date and sales data") else: st.warning("⚠️ Could not process date data for trend analysis") else: st.info("💡 Upload data with date and sales columns for trend analysis") with bi_tab2: st.markdown("#### 🎯 **Performance Metrics & KPIs**") if value_col: # Key performance indicators col1, col2, col3, col4 = st.columns(4) with col1: total_sales = df[value_col].sum() if pd.api.types.is_numeric_dtype(df[value_col]) else 0 st.metric("💰 Total Sales", f"{total_sales:,.0f}") with col2: avg_sale = df[value_col].mean() if pd.api.types.is_numeric_dtype(df[value_col]) else 0 st.metric("📊 Average Sale", f"{avg_sale:,.0f}") with col3: median_sale = df[value_col].median() if pd.api.types.is_numeric_dtype(df[value_col]) else 0 st.metric("📈 Median Sale", f"{median_sale:,.0f}") with col4: std_sale = df[value_col].std() if pd.api.types.is_numeric_dtype(df[value_col]) else 0 st.metric("📉 Sales Volatility", f"{std_sale:,.0f}") # Performance distribution st.markdown("#### 📊 **Sales Distribution Analysis**") col1, col2 = st.columns(2) with col1: fig_hist = px.histogram( df, x=value_col, nbins=30, title="Sales Distribution", color_discrete_sequence=['#636EFA'] ) st.plotly_chart(fig_hist, use_container_width=True, key="bi_hist") with col2: fig_box = px.box( df, y=value_col, title="Sales Outlier Analysis" ) st.plotly_chart(fig_box, use_container_width=True, key="bi_box") # Partner performance if partner_col and value_col: st.markdown("#### 🏢 **Partner Performance Analysis**") partner_stats = df.groupby(partner_col)[value_col].agg(['sum', 'count', 'mean']).round(2) partner_stats.columns = ['Total_Sales', 'Transaction_Count', 'Average_Sale'] partner_stats = partner_stats.sort_values('Total_Sales', ascending=False).reset_index() col1, col2 = st.columns(2) with col1: # Top partners chart top_10_partners = partner_stats.head(10) fig_top_partners = px.bar( top_10_partners, x='Total_Sales', y=partner_col, orientation='h', title="Top 10 Partners by Sales", color='Total_Sales', color_continuous_scale='viridis' ) st.plotly_chart(fig_top_partners, use_container_width=True, key="bi_top_partners") with col2: # Market share fig_market_share = px.pie( partner_stats.head(8), values='Total_Sales', names=partner_col, title="Market Share (Top 8 Partners)" ) st.plotly_chart(fig_market_share, use_container_width=True, key="bi_market_share") # Partner performance table st.markdown("#### 📋 **Detailed Partner Performance**") st.dataframe(partner_stats, use_container_width=True) with bi_tab3: st.markdown("#### 🗺️ **Geographic Insights**") if location_col and value_col: # Geographic performance location_stats = df.groupby(location_col)[value_col].agg(['sum', 'count', 'mean']).round(2) location_stats.columns = ['Total_Sales', 'Transaction_Count', 'Average_Sale'] location_stats = location_stats.sort_values('Total_Sales', ascending=False).reset_index() col1, col2 = st.columns(2) with col1: # Top locations top_locations = location_stats.head(10) fig_locations = px.bar( top_locations, x=location_col, y='Total_Sales', title="Sales by Location", color='Total_Sales', color_continuous_scale='reds' ) fig_locations.update_xaxes(tickangle=45) st.plotly_chart(fig_locations, use_container_width=True, key="bi_locations") with col2: # Location efficiency (average sale) fig_efficiency = px.scatter( location_stats, x='Transaction_Count', y='Average_Sale', size='Total_Sales', hover_name=location_col, title="Location Efficiency Analysis", labels={'Transaction_Count': 'Volume', 'Average_Sale': 'Efficiency'} ) st.plotly_chart(fig_efficiency, use_container_width=True, key="bi_efficiency") # Geographic insights table st.markdown("#### 📊 **Geographic Performance Table**") st.dataframe(location_stats, use_container_width=True) else: st.info("💡 Upload data with location information for geographic analysis") with bi_tab4: st.markdown("#### 💡 **Strategic Business Insights**") # Generate automated insights insights = [] if value_col and partner_col: # Top performer top_partner = df.groupby(partner_col)[value_col].sum().idxmax() top_partner_sales = df.groupby(partner_col)[value_col].sum().max() insights.append(f"🏆 **Market Leader**: {top_partner} with {top_partner_sales:,.0f} total sales") # Market concentration total_sales = df[value_col].sum() top_3_sales = df.groupby(partner_col)[value_col].sum().nlargest(3).sum() concentration = (top_3_sales / total_sales) * 100 insights.append(f"📊 **Market Concentration**: Top 3 partners control {concentration:.1f}% of market") if location_col and value_col: # Geographic insights top_location = df.groupby(location_col)[value_col].sum().idxmax() insights.append(f"🗺️ **Top Location**: {top_location} leads in sales performance") if date_col and value_col: # Trend insight df_recent = df.copy() df_recent[date_col] = pd.to_datetime(df_recent[date_col], errors='coerce', dayfirst=True) if not df_recent.empty: recent_trend = df_recent.groupby(df_recent[date_col].dt.to_period('M'))[value_col].sum() if len(recent_trend) >= 2: trend_direction = "growing" if recent_trend.iloc[-1] > recent_trend.iloc[-2] else "declining" insights.append(f"📈 **Recent Trend**: Sales are {trend_direction} month-over-month") # Display insights st.markdown("#### 🎯 **Key Business Insights**") for insight in insights: st.markdown(f"• {insight}") # Recommendations st.markdown("#### 🚀 **Strategic Recommendations**") recommendations = [ "📊 **Data Quality**: Ensure consistent data collection across all sources", "🎯 **Focus Areas**: Concentrate resources on top-performing locations", "📈 **Growth Strategy**: Analyze successful patterns for replication", "🔍 **Deep Dive**: Investigate underperforming segments for improvement opportunities", "📅 **Regular Reviews**: Schedule monthly performance reviews based on these insights" ] for rec in recommendations: st.markdown(f"• {rec}") # Action items st.markdown("#### ✅ **Next Steps**") action_items = [ "🔄 **Regular Monitoring**: Set up automated dashboard updates", "📧 **Stakeholder Reports**: Share insights with key decision makers", "📊 **Benchmark Tracking**: Monitor performance against industry standards", "🎯 **Goal Setting**: Establish KPIs based on current performance", "💡 **Continuous Improvement**: Implement data-driven optimization strategies" ] for action in action_items: st.markdown(f"• {action}") # ==================== MAIN APPLICATION ==================== def main(): # No authentication required for local use run_dashboard() import os # ==================== FUEL ANALYTICS TAB ==================== # The tab variables (tab1, tab2, tab3, tab4, tab5) are defined inside the dashboard function. # Move all tab logic (tab3, tab5, etc.) inside the dashboard function and call it at the end. def run_dashboard(): # Main Navigation Tabs (reordered: Fuel Analytics, Business Intelligence, Compare by ID, Compare by Name) tab_fuel, tab_bi, tab_fin, tab_insights, tab_providers = st.tabs([ "⛽ Fuel Analytics", "💡 Business Process", "💵 Financial Data", "📈 Amikeco Insights", "🏦 Providers" ]) # ==================== TAB: Providers ==================== with tab_providers: st.markdown("

🏦 Providers Analytics

", unsafe_allow_html=True) providers_path = os.path.join("uploaded_data", "providers.xlsx") if os.path.exists(providers_path): try: df_prov = pd.read_excel(providers_path, engine='openpyxl') # Standardize column names df_prov.columns = [c.strip().lower() for c in df_prov.columns] # Rename for convenience date_col = next((c for c in df_prov.columns if "data" in c), df_prov.columns[0]) client_col = next((c for c in df_prov.columns if "nume" in c), df_prov.columns[1]) cardid_col = next((c for c in df_prov.columns if "cardid" in c), df_prov.columns[2]) value_col = next((c for c in df_prov.columns if "suma" in c), df_prov.columns[3]) provider_col = next((c for c in df_prov.columns if "provider" in c), df_prov.columns[4]) # Parse date and value df_prov[date_col] = pd.to_datetime(df_prov[date_col], errors='coerce', dayfirst=True) df_prov[value_col] = pd.to_numeric(df_prov[value_col], errors='coerce') df_prov = df_prov.dropna(subset=[date_col, value_col, provider_col]) # Filter only completed months (Feb, Mar 2026) df_prov['Month_Year'] = df_prov[date_col].dt.to_period('M').astype(str) completed_months = sorted(df_prov['Month_Year'].unique()) # KPIs st.markdown("### 📊 Provider Usage KPIs") total_amount = df_prov[value_col].sum() revolut_amount = df_prov[df_prov[provider_col].str.lower() == 'revolut'][value_col].sum() finqware_amount = df_prov[df_prov[provider_col].str.lower() == 'finqware'][value_col].sum() revolut_pct = 100 * revolut_amount / total_amount if total_amount > 0 else 0 finqware_pct = 100 * finqware_amount / total_amount if total_amount > 0 else 0 col1, col2, col3 = st.columns(3) with col1: st.metric("Revolut %", f"{revolut_pct:.1f}%") st.metric("Revolut Total", f"{revolut_amount:,.2f}") with col2: st.metric("Finqware %", f"{finqware_pct:.1f}%") st.metric("Finqware Total", f"{finqware_amount:,.2f}") with col3: st.metric("Total Transfers", f"{total_amount:,.2f}") st.metric("Unique Clients", f"{df_prov[cardid_col].nunique():,}") # Monthly trend for each provider st.markdown("### 📅 Monthly Provider Trend") monthly = df_prov.groupby(['Month_Year', provider_col])[value_col].sum().reset_index() # Set custom colors for providers provider_colors = {"Revolut": "#000000", "Finqware": "#FF9900"} fig = px.bar( monthly, x='Month_Year', y=value_col, color=provider_col, barmode='group', title="Monthly Transfers by Provider", text_auto='.2s', color_discrete_map=provider_colors ) st.plotly_chart(fig, use_container_width=True) # Growth calculation if len(completed_months) >= 2: last, prev = completed_months[-1], completed_months[-2] # Calculate growth for both providers finq_last = monthly[(monthly['Month_Year'] == last) & (monthly[provider_col].str.lower() == 'finqware')][value_col].sum() finq_prev = monthly[(monthly['Month_Year'] == prev) & (monthly[provider_col].str.lower() == 'finqware')][value_col].sum() finq_growth = ((finq_last - finq_prev) / finq_prev * 100) if finq_prev > 0 else 0 rev_last = monthly[(monthly['Month_Year'] == last) & (monthly[provider_col].str.lower() == 'revolut')][value_col].sum() rev_prev = monthly[(monthly['Month_Year'] == prev) & (monthly[provider_col].str.lower() == 'revolut')][value_col].sum() rev_growth = ((rev_last - rev_prev) / rev_prev * 100) if rev_prev > 0 else 0 st.markdown(f"#### 📈 Growth from {prev} to {last}") col_finq, col_rev = st.columns(2) with col_finq: st.markdown(f"**Finqware Growth**
+{finq_growth:.1f}%", unsafe_allow_html=True) st.markdown(f"⬆ {finq_last-finq_prev:,.2f}", unsafe_allow_html=True) with col_rev: st.markdown(f"**Revolut Growth**
+{rev_growth:.1f}%", unsafe_allow_html=True) st.markdown(f"⬆ {rev_last-rev_prev:,.2f}", unsafe_allow_html=True) # Useful insights st.markdown("### 💡 Insights for Providers") avg_transfer = df_prov.groupby(provider_col)[value_col].mean().reset_index() st.markdown("**Average Transfer per Provider:**") st.dataframe(avg_transfer.rename(columns={value_col: 'Avg Transfer'}), use_container_width=True) top_clients = df_prov.groupby([provider_col, cardid_col, client_col])[value_col].sum().reset_index() top_clients = top_clients.sort_values(value_col, ascending=False).groupby(provider_col).head(5) st.markdown("**Top 5 Clients per Provider:**") st.dataframe(top_clients.rename(columns={value_col: 'Total Transferred'}), use_container_width=True) # Table: All monthly sums st.markdown("### 📋 Monthly Sums Table") monthly_sums = monthly.pivot(index='Month_Year', columns=provider_col, values=value_col).fillna(0) st.dataframe(monthly_sums, use_container_width=True) except Exception as e: st.error(f"Error: {e}") else: st.info("No providers.xlsx found in uploaded_data. Please add your data file.") # ==================== TAB: Amikeco Insights (Google Analytics) ==================== with tab_insights: st.markdown("## 📈 Amikeco Insights (Google Analytics 4)") try: from google.analytics.data_v1beta import BetaAnalyticsDataClient from google.analytics.data_v1beta.types import DateRange, Metric, Dimension, RunReportRequest from google.oauth2 import service_account KEY_PATH = "ga_service_account.json" PROPERTY_ID = "435523420" # Your GA4 property ID credentials = service_account.Credentials.from_service_account_file(KEY_PATH) client = BetaAnalyticsDataClient(credentials=credentials) # --- 1. Summary metrics (no dimensions) --- summary_request = RunReportRequest( property=f"properties/{PROPERTY_ID}", dimensions=[], metrics=[ Metric(name="activeUsers"), Metric(name="newUsers"), Metric(name="userEngagementDuration") ], date_ranges=[DateRange(start_date="2024-04-07", end_date="today")], ) summary_response = client.run_report(summary_request) summary_metrics = summary_response.rows[0].metric_values active_users = int(summary_metrics[0].value) new_users = int(summary_metrics[1].value) total_engagement = float(summary_metrics[2].value) avg_time_sec = total_engagement / active_users if active_users > 0 else 0 avg_min = int(avg_time_sec // 60) avg_sec = int(avg_time_sec % 60) st.markdown("### Key Metrics (since 2024-04-07)") col1, col2, col3 = st.columns(3) with col1: st.metric("Active Users", active_users) with col2: st.metric("New Users", new_users) with col3: st.metric("Avg. Engagement Time", f"{avg_min}m {avg_sec}s") # --- 2. Breakdown metrics (with dimensions) --- breakdown_request = RunReportRequest( property=f"properties/{PROPERTY_ID}", dimensions=[ Dimension(name="date"), Dimension(name="country"), Dimension(name="city") ], metrics=[ Metric(name="activeUsers"), Metric(name="newUsers"), Metric(name="userEngagementDuration") ], date_ranges=[DateRange(start_date="2024-04-07", end_date="today")], ) breakdown_response = client.run_report(breakdown_request) data = { "Date": [], "Country": [], "City": [], "Active Users": [], "New Users": [], "User Engagement Duration (s)": [] } for row in breakdown_response.rows: data["Date"].append(row.dimension_values[0].value) data["Country"].append(row.dimension_values[1].value) data["City"].append(row.dimension_values[2].value) data["Active Users"].append(int(row.metric_values[0].value)) data["New Users"].append(int(row.metric_values[1].value)) data["User Engagement Duration (s)"].append(float(row.metric_values[2].value)) df = pd.DataFrame(data) col_country, col_city = st.columns(2) with col_country: st.markdown("### Users by Country") country_df = df.groupby("Country")["Active Users"].sum().reset_index().sort_values("Active Users", ascending=False) st.dataframe(country_df, use_container_width=True) with col_city: st.markdown("### Users by City") city_df = df.groupby(["Country", "City"])["Active Users"].sum().reset_index().sort_values("Active Users", ascending=False) st.dataframe(city_df, use_container_width=True) st.markdown("### Daily Active Users Trend") daily_df = df.groupby("Date")["Active Users"].sum().reset_index() st.line_chart(daily_df.set_index("Date")) # --- Event Summary Table --- st.markdown("### Event Summary Table") try: event_request = RunReportRequest( property=f"properties/{PROPERTY_ID}", dimensions=[Dimension(name="eventName")], metrics=[ Metric(name="eventCount"), Metric(name="totalUsers") ], date_ranges=[DateRange(start_date="2024-04-07", end_date="today")], ) event_response = client.run_report(event_request) # Get total active users for the period (for per-user calculation) summary_request = RunReportRequest( property=f"properties/{PROPERTY_ID}", dimensions=[], metrics=[Metric(name="activeUsers")], date_ranges=[DateRange(start_date="2024-04-07", end_date="today")], ) summary_response = client.run_report(summary_request) total_active_users = int(summary_response.rows[0].metric_values[0].value) event_data = { "Event Name": [], "Event Count": [], "Total Users": [], "Event Count per Active User": [] } for row in event_response.rows: event_name = row.dimension_values[0].value event_count = int(row.metric_values[0].value) total_users = int(row.metric_values[1].value) per_user = round(event_count / total_active_users, 2) if total_active_users > 0 else 0 event_data["Event Name"].append(event_name) event_data["Event Count"].append(event_count) event_data["Total Users"].append(total_users) event_data["Event Count per Active User"].append(per_user) event_df = pd.DataFrame(event_data) event_df = event_df.sort_values("Event Count", ascending=False) # Add totals row totals = { "Event Name": "Total", "Event Count": event_df["Event Count"].sum(), "Total Users": event_df["Total Users"].sum(), "Event Count per Active User": round(event_df["Event Count per Active User"].mean(), 2) } event_df = pd.concat([event_df, pd.DataFrame([totals])], ignore_index=True) st.dataframe(event_df, use_container_width=True) except Exception as e: st.info(f"Could not fetch event summary: {e}") except Exception as e: st.error(f"Google Analytics error: {e}") # ==================== TAB: Financial Data ==================== with tab_fin: col_header, col_refresh = st.columns([6, 1]) with col_header: st.markdown("

💵 Financial Data

", unsafe_allow_html=True) with col_refresh: refresh_fin = st.button("🔄 Refresh Data", key="refresh_fin") # Load and analyze financial_data.xlsx directly (no upload UI) fin_path_xlsx = os.path.join("uploaded_data", "financial_data.xlsx") if os.path.exists(fin_path_xlsx): try: df_fin = pd.read_excel(fin_path_xlsx, engine='openpyxl') # Standardize column names df_fin.columns = [c.strip().lower() for c in df_fin.columns] # Try to auto-detect columns date_col = next((c for c in df_fin.columns if "date" in c), df_fin.columns[0]) client_col = next((c for c in df_fin.columns if "client" in c), df_fin.columns[1]) value_col = next((c for c in df_fin.columns if "value" in c), df_fin.columns[2]) # Parse date and value df_fin[date_col] = pd.to_datetime(df_fin[date_col], errors='coerce', dayfirst=True) df_fin[value_col] = pd.to_numeric(df_fin[value_col], errors='coerce') df_fin = df_fin.dropna(subset=[date_col, value_col]) # KPIs st.markdown("### 📊 Key Financial KPIs") col1, col2, col3, col4 = st.columns(4) with col1: st.metric("💰 Total Income", f"{df_fin[value_col].sum():,.2f}") with col2: st.metric("📊 Average Income", f"{df_fin[value_col].mean():,.2f}") with col3: st.metric("📈 Records", f"{len(df_fin):,}") # Use Card ID for unique clients if available card_id_col = next((c for c in df_fin.columns if "card id" in c or "card_id" in c or "cardid" in c), None) if card_id_col: unique_clients = df_fin[card_id_col].nunique() unique_label = "🏢 Unique Clients (Card ID)" else: unique_clients = df_fin[client_col].nunique() unique_label = "🏢 Unique Clients (Name)" with col4: st.metric(unique_label, f"{unique_clients:,}") # Monthly trend st.markdown("### 📅 Monthly Income Trend") df_fin['Month_Year'] = df_fin[date_col].dt.to_period('M').astype(str) monthly = df_fin.groupby('Month_Year')[value_col].sum().reset_index() fig_trend = px.line(monthly, x='Month_Year', y=value_col, title="Monthly Income", markers=True) st.plotly_chart(fig_trend, use_container_width=True) # Top clients if card_id_col: # Aggregate total income by Card ID top_income = df_fin.groupby(card_id_col)[value_col].sum().sort_values(ascending=False).head(10) # Get first client name for each Card ID card_to_name = df_fin.drop_duplicates(subset=[card_id_col]).set_index(card_id_col)[client_col].to_dict() # Build table with Card ID, Name, Total Income top_clients = top_income.reset_index() top_clients["Name"] = top_clients[card_id_col].map(card_to_name) top_clients = top_clients[[card_id_col, "Name", value_col]] top_clients.columns = ["Card ID", "Name", "Total Income"] # Creative: Show monthly evolution for the top 10 Card IDs if not top_clients.empty: st.markdown("#### 📈 Monthly Evolution for Top 10 Card IDs") # Prepare month columns (03 to 09) all_months = [f"{m:02d}" for m in range(3, 10)] # Extract month from date df_fin['Month'] = df_fin[date_col].dt.strftime('%m') # Build evolution table evolution_rows = [] for _, row in top_clients.iterrows(): card_id = row["Card ID"] name = row["Name"] df_card = df_fin[df_fin[card_id_col] == card_id] month_income = df_card.groupby('Month')[value_col].sum().reindex(all_months, fill_value=0) total = month_income.sum() evolution_rows.append([card_id, name] + [float(month_income[m]) for m in all_months] + [float(total)]) columns = ["Card ID", "Name"] + all_months + ["Total"] evolution_df = pd.DataFrame(evolution_rows, columns=columns) st.dataframe(evolution_df, use_container_width=True) else: st.info("No Card ID column found. Showing by client name instead.") top_clients = df_fin.groupby(client_col)[value_col].sum().sort_values(ascending=False).head(10).reset_index() top_clients.columns = ["Client", "Total Income"] st.dataframe(top_clients, use_container_width=True) # Daily income spike analysis (graph only, all time) st.markdown("### 🚀 Daily Income Spike Analysis") if not df_fin.empty: daily = df_fin.groupby(df_fin[date_col].dt.date)[value_col].sum().reset_index() daily.columns = ['Date', 'Total_Income'] mean_income = daily['Total_Income'].mean() std_income = daily['Total_Income'].std() spike_threshold = mean_income + 2 * std_income spike_days = daily[daily['Total_Income'] > spike_threshold] fig_daily = px.line(daily, x='Date', y='Total_Income', title="Daily Income (All Time)", markers=True) if not spike_days.empty: fig_daily.add_scatter(x=spike_days['Date'], y=spike_days['Total_Income'], mode='markers', marker=dict(size=12, color='red', symbol='star'), name='Spike Day') st.plotly_chart(fig_daily, use_container_width=True) # Data table st.markdown("### 📋 Monthly Financial Sums Table") # Aggregate sums by month monthly_sums = df_fin.groupby(df_fin[date_col].dt.to_period('M'))[value_col].sum().reset_index() monthly_sums.columns = ['Month', 'Total_Amount'] st.dataframe(monthly_sums, use_container_width=True) except Exception as e: st.error(f"Error: {e}") else: st.info("No financial_data.xlsx found in uploaded_data. Please add your data file.") # ...existing code... # ==================== TAB: Fuel Analytics ==================== with tab_fuel: col_header, col_refresh = st.columns([6, 1]) with col_header: st.markdown("

⛽ Fuel Analytics

", unsafe_allow_html=True) with col_refresh: refresh = st.button("🔄 Refresh Data", key="refresh_fuel") os.makedirs("uploaded_data", exist_ok=True) # Support both CSV and XLSX for fuel data fuel_path_csv = os.path.join("uploaded_data", "fuel_data.csv") fuel_path_xlsx = os.path.join("uploaded_data", "fuel_data.xlsx") file_found = False if os.path.exists(fuel_path_xlsx): try: df = pd.read_excel(fuel_path_xlsx, engine='openpyxl') file_found = True except Exception as e: st.error(f"Error reading XLSX: {e}") elif os.path.exists(fuel_path_csv): try: try: df = pd.read_csv(fuel_path_csv, encoding='utf-8') except UnicodeDecodeError: df = pd.read_csv(fuel_path_csv, encoding='latin1') file_found = True except Exception as e: st.error(f"Error reading CSV: {e}") if file_found: try: df_clean = load_and_format_fuel_data(df) create_fuel_analytics_dashboard(df_clean) except Exception as e: st.error(f"Error: {e}") else: st.info("Upload a CSV or XLSX file to analyze fuel sales data.") # ==================== TAB: Business Intelligence ==================== with tab_bi: st.markdown(""" ### 💡 Business Process Flow The diagram below illustrates how funds and information flow through the Amikeco fuel card system: """) st.graphviz_chart(''' digraph G { rankdir=LR; node [shape=box, style="rounded,filled", color="#1976d2", fontcolor=white, fontsize=14, width=2.5, height=1.2, fixedsize=true]; edge [color="#1976d2", penwidth=2]; User [label="User\n(adds money)", fillcolor="#42a5f5"]; AmikecoAPI [label="Amikeco API\n(card management)", fillcolor="#66bb6a"]; UTA [label="UTA\n(funds transfer)", fillcolor="#ffa726"]; GasStation [label="Gas Station\n(payment)", fillcolor="#ab47bc"]; Amikeco [label="Amikeco\n(money management)", fillcolor="#ef5350"]; User -> AmikecoAPI [label="add funds"]; AmikecoAPI -> UTA [label="transfer funds"]; UTA -> GasStation [label="user pays"]; GasStation -> Amikeco [label="transaction info"]; Amikeco -> AmikecoAPI [label="manage card balance"]; } ''') st.markdown(""" **Legend:** - **User**: Initiates by adding money to the card - **Amikeco API**: Handles card management and communicates with UTA - **UTA**: Receives funds and enables payment at gas stations - **Gas Station**: Where the user pays using the card - **Amikeco**: Receives transaction info and manages card money """) # ...Tabs Compare by ID and Compare by Name removed as requested... # At the end of the script, call the dashboard runner if __name__ == "__main__": main()