import gradio as gr import torch from transformers import AutoFeatureExtractor, AutoModel import numpy as np from sklearn.linear_model import LogisticRegression # Load HeAR model and feature extractor MODEL_ID = "google/hear" feature_extractor = AutoFeatureExtractor.from_pretrained(MODEL_ID) model = AutoModel.from_pretrained(MODEL_ID) # Dummy classifier (replace with your trained classifier) # For demonstration, we simulate a trained classifier with random weights # In real use, train a classifier on HeAR embeddings using your labeled dataset clf = LogisticRegression() clf.classes_ = np.array(["Normal", "Abnormal"]) clf.coef_ = np.random.randn(1, 768) # HeAR outputs 768-dim embeddings clf.intercept_ = np.random.randn(1) def extract_embedding(audio): # audio: tuple (sr, np.array) if audio is None: return None sr, y = audio # HeAR expects 2-second clips at 16kHz; pad/truncate as needed target_sr = 16000 if sr != target_sr: import librosa y = librosa.resample(y, orig_sr=sr, target_sr=target_sr) y = y[:target_sr*2] if len(y) > target_sr*2 else np.pad(y, (0, max(0, target_sr*2-len(y)))) inputs = feature_extractor(y, sampling_rate=target_sr, return_tensors="pt") with torch.no_grad(): emb = model(**inputs).last_hidden_state.mean(dim=1).cpu().numpy() return emb def predict(audio): emb = extract_embedding(audio) if emb is None: return "Please upload a heart or lung sound file." # Predict with the dummy classifier pred = clf.predict(emb)[0] prob = clf.predict_proba(emb)[0] return f"Prediction: **{pred}**\n\nConfidence: {max(prob):.2%}" description = """ # Heart & Lung Sound Classifier (Demo) Upload a heart or lung sound (WAV, MP3, etc.). This demo uses the [HeAR model](https://huggingface.co/google/hear) for health acoustic embeddings and a simple classifier for normal/abnormal prediction. **Note:** For best results, use 2-second clips. For real diagnosis, a classifier trained on labeled heart/lung sound data should be used. """ iface = gr.Interface( fn=predict, inputs=gr.Audio(sources=["upload", "microphone"], type="numpy", label="Upload Heart/Lung Sound"), outputs=gr.Markdown(), title="Heart & Lung Sound Classifier", description=description, allow_flagging="never" ) if __name__ == "__main__": iface.launch()