Spaces:

ehagey
/

AIExperimentsBudgeting

Sleeping

App Files Files Community

ehagey commited on May 1

Commit

5af2655

verified ·

1 Parent(s): 879ca1c

Update src/streamlit_app.py

Browse files

Files changed (1) hide show

src/streamlit_app.py +69 -8

src/streamlit_app.py CHANGED Viewed

@@ -61,14 +61,60 @@ with left_col:
     st.subheader("Stability Test Settings")
     stability_test = st.checkbox("Enable stability testing", value=False)
     stability_iterations = {}
     if stability_test:
         st.write("Set stability iterations for selected LLMs:")
         for llm in selected_llms:
-            stability_enabled = st.checkbox(f"Test stability for {llm}", value=False, key=f"stability_{llm}")
             if stability_enabled:
-                iterations = st.number_input(f"Iterations for {llm}", min_value=2, value=10, step=1, key=f"iterations_{llm}")
-                stability_iterations[llm] = iterations
 with right_col:
     # Calculate costs
@@ -82,10 +128,22 @@ with right_col:
         for llm in selected_llms:
             base_runs = run_counts[llm]
             stability_runs = stability_iterations.get(llm, 0)
-            total_runs = base_runs * (1 if stability_runs == 0 else stability_runs)
-            total_input_tokens = input_tokens * total_runs
-            total_output_tokens = output_tokens * total_runs
             input_cost = (total_input_tokens / 1_000_000) * llm_data[llm]["input_cost_per_m"]
             output_cost = (total_output_tokens / 1_000_000) * llm_data[llm]["output_cost_per_m"]
@@ -95,9 +153,12 @@ with right_col:
                 "Model": llm,
                 "Base Runs": base_runs,
                 "Stability Test Iterations": stability_iterations.get(llm, 0),
                 "Total Runs": total_runs,
-                "Total Input Tokens": total_input_tokens,
-                "Total Output Tokens": total_output_tokens,
                 "Input Cost ($)": input_cost,
                 "Output Cost ($)": output_cost,
                 "Total Cost ($)": total_cost

     st.subheader("Stability Test Settings")
     stability_test = st.checkbox("Enable stability testing", value=False)
+    # Global settings for stability testing
     stability_iterations = {}
+    stability_data_percentages = {}
     if stability_test:
+        st.write("Global Stability Settings:")
+        use_subset = st.checkbox("Test stability on a subset of data", value=False)
+        if use_subset:
+            default_percent = st.slider(
+                "Default data percentage for stability tests",
+                min_value=10,
+                max_value=100,
+                value=50,
+                step=5,
+                help="Percentage of the input data to use for stability testing"
+            )
         st.write("Set stability iterations for selected LLMs:")
         for llm in selected_llms:
+            st.markdown(f"**{llm} Stability Settings**")
+            col1, col2 = st.columns(2)
+            with col1:
+                stability_enabled = st.checkbox(f"Test stability", value=False, key=f"stability_{llm}")
             if stability_enabled:
+                with col1:
+                    iterations = st.number_input(
+                        f"Iterations",
+                        min_value=2,
+                        value=10,
+                        step=1,
+                        key=f"iterations_{llm}"
+                    )
+                    stability_iterations[llm] = iterations
+                with col2:
+                    if use_subset:
+                        custom_percent = st.number_input(
+                            f"Data %",
+                            min_value=5,
+                            max_value=100,
+                            value=default_percent,
+                            step=5,
+                            key=f"percent_{llm}",
+                            help="Percentage of the input data to use"
+                        )
+                        stability_data_percentages[llm] = custom_percent / 100.0
+                    else:
+                        stability_data_percentages[llm] = 1.0
+            if llm != selected_llms[-1]:
+                st.markdown("---")
 with right_col:
     # Calculate costs
         for llm in selected_llms:
             base_runs = run_counts[llm]
             stability_runs = stability_iterations.get(llm, 0)
+            data_percentage = stability_data_percentages.get(llm, 1.0)
+            # Calculate total runs
+            if stability_runs == 0:
+                total_runs = base_runs
+                effective_data_percentage = 1.0  # No stability testing, use full data
+            else:
+                total_runs = base_runs * stability_runs
+                effective_data_percentage = data_percentage  # Use configured percentage for stability testing
+            # Calculate tokens based on data percentage
+            effective_input_tokens = input_tokens * effective_data_percentage
+            effective_output_tokens = output_tokens * effective_data_percentage
+            total_input_tokens = effective_input_tokens * total_runs
+            total_output_tokens = effective_output_tokens * total_runs
             input_cost = (total_input_tokens / 1_000_000) * llm_data[llm]["input_cost_per_m"]
             output_cost = (total_output_tokens / 1_000_000) * llm_data[llm]["output_cost_per_m"]
                 "Model": llm,
                 "Base Runs": base_runs,
                 "Stability Test Iterations": stability_iterations.get(llm, 0),
+                "Data Percentage": f"{data_percentage * 100:.0f}%" if stability_runs > 0 else "100%",
+                "Effective Input Tokens": int(effective_input_tokens),
+                "Effective Output Tokens": int(effective_output_tokens),
                 "Total Runs": total_runs,
+                "Total Input Tokens": int(total_input_tokens),
+                "Total Output Tokens": int(total_output_tokens),
                 "Input Cost ($)": input_cost,
                 "Output Cost ($)": output_cost,
                 "Total Cost ($)": total_cost