# 11.1_evaluate_binary_v3.py from transformers import AutoModelForSequenceClassification, AutoTokenizer, Trainer from datasets import load_from_disk from sklearn.metrics import classification_report, confusion_matrix, ConfusionMatrixDisplay import numpy as np import pandas as pd import matplotlib.pyplot as plt from pathlib import Path import json # === Ścieżki MODEL_DIR = Path("models/binary/final") DATASET_DIR = Path("data/processed/dataset_binary") OUT_DIR = MODEL_DIR REPORT_CSV = OUT_DIR / "classification_report.csv" REPORT_JSON = OUT_DIR / "metrics.json" CONF_MATRIX_PNG = OUT_DIR / "confusion_matrix.png" # === Wczytaj model print("📂 Wczytywanie modelu...") model = AutoModelForSequenceClassification.from_pretrained(MODEL_DIR) # === Wczytaj tokenizer z modelu lub zapasowy tokenizer_files = list(MODEL_DIR.glob("tokenizer*")) if tokenizer_files: tokenizer = AutoTokenizer.from_pretrained(MODEL_DIR) else: print("⚠️ Brak tokenizera w modelu — pobieram z microsoft/codebert-base") tokenizer = AutoTokenizer.from_pretrained("microsoft/codebert-base") tokenizer.save_pretrained(MODEL_DIR) # === Wczytaj dane ds = load_from_disk(str(DATASET_DIR)) trainer = Trainer(model=model) # === Predykcja print("🔍 Predykcja na zbiorze testowym...") predictions = trainer.predict(ds["test"]) y_pred = np.argmax(predictions.predictions, axis=1) y_true = predictions.label_ids # === Raport klasyfikacji print("\n📊 Raport klasyfikacji:") report_dict = classification_report( y_true, y_pred, target_names=["good", "bad"], zero_division=0, output_dict=True ) report_text = classification_report( y_true, y_pred, target_names=["good", "bad"], zero_division=0 ) print(report_text) # Zapis CSV + JSON df_report = pd.DataFrame(report_dict).transpose() df_report.to_csv(REPORT_CSV) with open(REPORT_JSON, "w") as f: json.dump(report_dict, f, indent=2) print(f"💾 Zapisano raport CSV: {REPORT_CSV}") print(f"💾 Zapisano metryki JSON: {REPORT_JSON}") # === Macierz pomyłek + wykres conf_matrix = confusion_matrix(y_true, y_pred) labels = ["good", "bad"] disp = ConfusionMatrixDisplay(confusion_matrix=conf_matrix, display_labels=labels) plt.figure(figsize=(5, 4)) disp.plot(cmap="Purples", values_format="d") plt.title("🧱 Confusion Matrix – Binary Classifier") plt.grid(False) plt.tight_layout() plt.savefig(CONF_MATRIX_PNG) plt.close() print(f"🖼️ Zapisano confusion matrix jako PNG: {CONF_MATRIX_PNG}")