Spaces:

ehagey
/

AIExperimentsBudgeting

Sleeping

File size: 7,557 Bytes

import streamlit as st
import pandas as pd

st.set_page_config(page_title="LLM API Budget Dashboard", layout="wide")

# Title and description
st.title("LLM API Budget Dashboard")


# Define LLM models and their costs
llm_data = {
    "GPT-4o": {"input_cost_per_m": 2.50, "output_cost_per_m": 10.00},
    "Claude 3.7 Sonnet": {"input_cost_per_m": 3.00, "output_cost_per_m": 15.00},
    "Gemini Flash 1.5-8b": {"input_cost_per_m": 0.038, "output_cost_per_m": 0.15},
    "o3-mini": {"input_cost_per_m": 1.10, "output_cost_per_m": 4.40}
}

# Convert the LLM data to a DataFrame for displaying in a table
llm_df = pd.DataFrame([
    {
        "Model": model, 
        "Input Cost ($/M tokens)": data["input_cost_per_m"], 
        "Output Cost ($/M tokens)": data["output_cost_per_m"]
    } 
    for model, data in llm_data.items()
])

# Display LLM cost info
st.subheader("LLM Cost Information")
st.dataframe(llm_df, use_container_width=True)

# Create columns for main layout
left_col, right_col = st.columns([1, 3])

with left_col:
    st.header("Configuration")
    
    # Token input section
    st.subheader("Token Settings")
    input_tokens = st.number_input("Input Tokens", min_value=1, value=400, step=100)
    output_tokens = st.number_input("Output Tokens", min_value=1, value=200, step=100)
    
    # LLM selection
    st.subheader("Select LLMs")
    selected_llms = st.multiselect("Choose LLMs", options=list(llm_data.keys()), default=list(llm_data.keys()))
    
    # Run count settings
    st.subheader("Run Count Settings")
    uniform_runs = st.checkbox("Run all LLMs the same number of times", value=True)
    
    if uniform_runs:
        uniform_run_count = st.number_input("Number of runs for all LLMs", min_value=1, value=400000, step=1)
        run_counts = {llm: uniform_run_count for llm in selected_llms}
    else:
        st.write("Set individual run counts for each LLM:")
        run_counts = {}
        for llm in selected_llms:
            run_counts[llm] = st.number_input(f"Runs for {llm}", min_value=1, value=1, step=1)
    
    # Stability test settings
    st.subheader("Stability Test Settings")
    stability_test = st.checkbox("Enable stability testing", value=False)
    
    # Global settings for stability testing
    stability_iterations = {}
    stability_data_percentages = {}
    
    if stability_test:
        st.write("Global Stability Settings:")
        use_subset = st.checkbox("Test stability on a subset of data", value=False)
        
        if use_subset:
            default_percent = st.slider(
                "Default data percentage for stability tests", 
                min_value=10, 
                max_value=100, 
                value=50, 
                step=5,
                help="Percentage of the input data to use for stability testing"
            )
        
        st.write("Set stability iterations for selected LLMs:")
        for llm in selected_llms:
            st.markdown(f"**{llm} Stability Settings**")
            col1, col2 = st.columns(2)
            
            with col1:
                stability_enabled = st.checkbox(f"Test stability", value=False, key=f"stability_{llm}")
            
            if stability_enabled:
                with col1:
                    iterations = st.number_input(
                        f"Iterations", 
                        min_value=2, 
                        value=10, 
                        step=1, 
                        key=f"iterations_{llm}"
                    )
                    stability_iterations[llm] = iterations
                
                with col2:
                    if use_subset:
                        custom_percent = st.number_input(
                            f"Data %", 
                            min_value=5, 
                            max_value=100, 
                            value=default_percent, 
                            step=5,
                            key=f"percent_{llm}",
                            help="Percentage of the input data to use"
                        )
                        stability_data_percentages[llm] = custom_percent / 100.0
                    else:
                        stability_data_percentages[llm] = 1.0
            
            if llm != selected_llms[-1]:
                st.markdown("---")

with right_col:
    # Calculate costs
    st.header("Cost Results")
    
    if not selected_llms:
        st.warning("Please select at least one LLM model.")
    else:
        results = []
        
        for llm in selected_llms:
            base_runs = run_counts[llm]
            stability_runs = stability_iterations.get(llm, 0)
            data_percentage = stability_data_percentages.get(llm, 1.0)
            
            # Calculate total runs
            if stability_runs == 0:
                total_runs = base_runs
                effective_data_percentage = 1.0  # No stability testing, use full data
            else:
                total_runs = base_runs * stability_runs
                effective_data_percentage = data_percentage  # Use configured percentage for stability testing
            
            # Calculate tokens based on data percentage
            effective_input_tokens = input_tokens * effective_data_percentage
            effective_output_tokens = output_tokens * effective_data_percentage
            
            total_input_tokens = effective_input_tokens * total_runs
            total_output_tokens = effective_output_tokens * total_runs
            
            input_cost = (total_input_tokens / 1_000_000) * llm_data[llm]["input_cost_per_m"]
            output_cost = (total_output_tokens / 1_000_000) * llm_data[llm]["output_cost_per_m"]
            total_cost = input_cost + output_cost
            
            results.append({
                "Model": llm,
                "Base Runs": base_runs,
                "Stability Test Iterations": stability_iterations.get(llm, 0),
                "Data Percentage": f"{data_percentage * 100:.0f}%" if stability_runs > 0 else "100%",
                "Effective Input Tokens": int(effective_input_tokens),
                "Effective Output Tokens": int(effective_output_tokens),
                "Total Runs": total_runs,
                "Total Input Tokens": int(total_input_tokens),
                "Total Output Tokens": int(total_output_tokens),
                "Input Cost ($)": input_cost,
                "Output Cost ($)": output_cost,
                "Total Cost ($)": total_cost
            })
        
        # Create DataFrame from results
        results_df = pd.DataFrame(results)
        
        # Display results
        st.subheader("Cost Breakdown")
        st.dataframe(results_df, use_container_width=True)
        
        # Calculate overall totals
        total_input_cost = results_df["Input Cost ($)"].sum()
        total_output_cost = results_df["Output Cost ($)"].sum()
        total_cost = results_df["Total Cost ($)"].sum()
        
        # Display totals
        col1, col2, col3 = st.columns(3)
        col1.metric("Total Input Cost", f"${total_input_cost:.2f}")
        col2.metric("Total Output Cost", f"${total_output_cost:.2f}")
        col3.metric("Total API Cost", f"${total_cost:.2f}")
        
        # Export options
        st.subheader("Export Options")
        csv = results_df.to_csv(index=False).encode('utf-8')
        st.download_button(
            label="Download Results as CSV",
            data=csv,
            file_name='llm_budget_results.csv',
            mime='text/csv',
        )