|
import gradio as gr |
|
import pandas as pd |
|
import plotly.express as px |
|
|
|
data = pd.read_csv('data/env_disclosure_data.csv') |
|
data = data.drop('Unnamed: 0', axis=1) |
|
data['Environmental Transparency'] = data['Environmental Transparency'].fillna('None') |
|
data.Organization = data.Organization.replace('University of Montreal / Université de Montréal', 'University of Montreal') |
|
data.Organization = data.Organization.replace('University of Washington,Allen Institute for AI', 'Allen Institute for AI') |
|
data.Organization = data.Organization.replace('Allen Institute for AI,University of Washington', 'Allen Institute for AI') |
|
data.Organization = data.Organization.replace(['Google', 'DeepMind', 'Google DeepMind','Google Brain','Google Research'], 'Alphabet') |
|
data.Organization = data.Organization.replace(['Meta AI','Facebook AI Research','Facebook AI', 'Facebook'], 'Meta') |
|
data.Organization = data.Organization.replace(['Microsoft','Microsoft Research'], 'Microsoft') |
|
|
|
organizations=['Alphabet', 'OpenAI', 'Alibaba', 'Stanford University', 'University of Toronto','University of Toronto', 'Microsoft', 'NVIDIA', |
|
'Carnegie Mellon University (CMU)', 'University of Oxford','University of California (UC) Berkeley','Baidu','Anthropic', |
|
'Salesforce Research', 'Amazon', 'University of Montreal', 'Apple', 'Mistral AI', 'DeepSeek', 'Allen Institute for AI'] |
|
|
|
def generate_figure(org_name): |
|
org_data = data[data['Organization'] == org_name] |
|
fig = px.histogram(org_data, x="Year", color="Environmental Transparency") |
|
fig.update_layout(xaxis_type='category') |
|
return fig |
|
|
|
with gr.Blocks() as demo: |
|
gr.Markdown("# Environmental Transparency Explorer Tool") |
|
gr.Markdown("## Explore the data from 'Misinformation by Omission: The Need for More Environmental Transparency in AI'") |
|
with gr.Accordion('Methodology', open=False): |
|
gr.Markdown('We analyzed Epoch AI\'s "Notable AI Models" dataset, which tracks information on “models that were state of the art, highly cited, \ |
|
or otherwise historically notable” released over time. We selected the time period starting in 2010 as this is the beginning of the modern “deep learning era” \ |
|
(as defined by Epoch AI), which is representative of the types of AI models currently trained and deployed, including all 754 models from 2010 \ |
|
to the first quarter of 2025 in our analysis. We examined the level of environmental impact transparency for each model based on key information \ |
|
from the Epoch AI dataset (e.g., model accessibility, training compute estimation method) as well as from individual model release content \ |
|
(e.g., paper, model card, announcement).') |
|
with gr.Row(): |
|
with gr.Column(scale=1): |
|
org_choice= gr.Dropdown(organizations, value="Alphabet", label="Organizations", info="Pick an organization to explore their environmental disclosures", interactive=True) |
|
gr.Markdown('The 3 transparency categories are:') |
|
gr.Markdown('**Direct Disclosure**: Developers explicitly reported energy or GHG emissions, e.g., using hardware TDP, country average carbon intensity or measurements.') |
|
gr.Markdown('**Indirect Disclosure**: Developers provided training compute data or released their model weights, allowing external estimates of training or inference impacts.') |
|
gr.Markdown('**No Disclosure**: Environmental impact data was not publicly released and estimation approaches (as noted in Indirect Disclosure) were not possible.') |
|
with gr.Column(scale=4): |
|
gr.Markdown("### Data by Organization") |
|
fig = generate_figure(org_choice) |
|
plt = gr.Plot(fig) |
|
with gr.Row(): |
|
with gr.Column(): |
|
gr.Markdown("### All Data") |
|
fig2 = px.histogram(data, x="Year", color="Environmental Transparency") |
|
fig2.update_layout(xaxis_type='category') |
|
plt2 = gr.Plot(fig2) |
|
org_choice.select(generate_figure, inputs=[org_choice], outputs=[plt]) |
|
|
|
demo.launch() |
|
|