|
import React, { useState, useEffect } from "react"; |
|
import { |
|
Box, |
|
Paper, |
|
Typography, |
|
Table, |
|
TableBody, |
|
TableCell, |
|
TableContainer, |
|
TableHead, |
|
TableRow, |
|
Alert, |
|
LinearProgress, |
|
Card, |
|
CardContent, |
|
Link, |
|
} from "@mui/material"; |
|
import OpenInNewIcon from "@mui/icons-material/OpenInNew"; |
|
|
|
const EvaluationDisplay = ({ sessionId }) => { |
|
const [results, setResults] = useState(null); |
|
const [loading, setLoading] = useState(true); |
|
const [error, setError] = useState(null); |
|
|
|
useEffect(() => { |
|
const fetchEvaluationResults = async () => { |
|
if (!sessionId) { |
|
setError("No session ID provided"); |
|
setLoading(false); |
|
return; |
|
} |
|
|
|
try { |
|
|
|
const response = await fetch( |
|
`http://localhost:3001/evaluation-results/${sessionId}` |
|
); |
|
|
|
if (!response.ok) { |
|
throw new Error(`Failed to fetch results: ${response.status}`); |
|
} |
|
|
|
const data = await response.json(); |
|
|
|
if (!data.success) { |
|
throw new Error(data.message || "Failed to fetch evaluation results"); |
|
} |
|
|
|
setResults(data.results); |
|
} catch (err) { |
|
console.error("Error fetching evaluation results:", err); |
|
setError(err.message); |
|
} finally { |
|
setLoading(false); |
|
} |
|
}; |
|
|
|
fetchEvaluationResults(); |
|
}, [sessionId]); |
|
|
|
|
|
const formatAccuracy = (value) => { |
|
return `${(value * 100).toFixed(2)}%`; |
|
}; |
|
|
|
|
|
const formatTime = (seconds) => { |
|
return `${seconds.toFixed(2)}s`; |
|
}; |
|
|
|
if (loading) { |
|
return ( |
|
<Box sx={{ width: "100%", mt: 4, mb: 4 }}> |
|
<Typography variant="h5" gutterBottom> |
|
Loading Evaluation Results... |
|
</Typography> |
|
<LinearProgress /> |
|
</Box> |
|
); |
|
} |
|
|
|
if (error) { |
|
return ( |
|
<Alert severity="error" sx={{ mt: 4, mb: 4 }}> |
|
{error} |
|
</Alert> |
|
); |
|
} |
|
|
|
if ( |
|
!results || |
|
!results.models_comparison || |
|
results.models_comparison.length === 0 |
|
) { |
|
return ( |
|
<Alert severity="info" sx={{ mt: 4, mb: 4 }}> |
|
No evaluation results found for this benchmark. |
|
</Alert> |
|
); |
|
} |
|
|
|
return ( |
|
<Box sx={{ mt: 4, mb: 6 }}> |
|
<Typography variant="h4" gutterBottom> |
|
Evaluation Results |
|
</Typography> |
|
|
|
<TableContainer |
|
component={Paper} |
|
sx={{ |
|
border: "1px solid rgba(224, 224, 224, 1)", |
|
boxShadow: "0 2px 4px rgba(0,0,0,0.05)", |
|
}} |
|
> |
|
<Table sx={{ minWidth: 650 }}> |
|
<TableHead> |
|
<TableRow> |
|
<TableCell>Rank</TableCell> |
|
<TableCell>Model</TableCell> |
|
<TableCell>Provider</TableCell> |
|
<TableCell align="center">Accuracy</TableCell> |
|
<TableCell align="center">Std Error</TableCell> |
|
<TableCell align="center">Eval Time</TableCell> |
|
<TableCell align="center">Status</TableCell> |
|
</TableRow> |
|
</TableHead> |
|
<TableBody> |
|
{results.models_comparison.map((model, index) => ( |
|
<TableRow |
|
key={`${model.model_name}-${model.provider}`} |
|
sx={{ |
|
"&:last-child td, &:last-child th": { border: 0 }, |
|
backgroundColor: model.success |
|
? "inherit" |
|
: "rgba(0, 0, 0, 0.04)", |
|
}} |
|
> |
|
<TableCell>{index + 1}</TableCell> |
|
<TableCell component="th" scope="row"> |
|
<Link |
|
href={`https://huggingface.co/${model.model_name}`} |
|
target="_blank" |
|
rel="noopener noreferrer" |
|
sx={{ |
|
textDecoration: "none", |
|
"&:hover": { |
|
textDecoration: "underline", |
|
}, |
|
display: "flex", |
|
alignItems: "center", |
|
}} |
|
> |
|
{model.model_name} |
|
<OpenInNewIcon sx={{ ml: 0.5, fontSize: 16 }} /> |
|
</Link> |
|
</TableCell> |
|
<TableCell>{model.provider}</TableCell> |
|
<TableCell align="center"> |
|
{model.success ? formatAccuracy(model.accuracy) : "-"} |
|
</TableCell> |
|
<TableCell align="center"> |
|
{model.success ? formatAccuracy(model.accuracy_stderr) : "-"} |
|
</TableCell> |
|
<TableCell align="center"> |
|
{model.success ? formatTime(model.evaluation_time) : "-"} |
|
</TableCell> |
|
<TableCell align="center"> |
|
{model.success ? ( |
|
<span style={{ color: "green" }}>✓ Success</span> |
|
) : ( |
|
<span style={{ color: "red" }}>✗ Failed</span> |
|
)} |
|
</TableCell> |
|
</TableRow> |
|
))} |
|
</TableBody> |
|
</Table> |
|
</TableContainer> |
|
|
|
<Box sx={{ mt: 4, textAlign: "center" }}> |
|
<Typography variant="body2" color="textSecondary"> |
|
Need larger evaluation?{" "} |
|
<Link |
|
href="https://huggingface.co/spaces/yourbench/yourbench" |
|
target="_blank" |
|
rel="noopener noreferrer" |
|
> |
|
Go to this page |
|
</Link> |
|
</Typography> |
|
</Box> |
|
</Box> |
|
); |
|
}; |
|
|
|
export default EvaluationDisplay; |
|
|