const { HfApi } = require("@huggingface/hub"); const fetch = require('node-fetch'); class HFDatasetManager { constructor() { this.hfToken = process.env.HF_TOKEN; this.datasetId = process.env.HF_DATASET_ID || "Detomo/houzou-devices"; this.fileName = "devices.json"; this.hfApi = new HfApi({ accessToken: this.hfToken }); this.isEnabled = !!this.hfToken; console.log(`🤗 HF Dataset Manager initialized`); console.log(` Dataset: ${this.datasetId}`); console.log(` Enabled: ${this.isEnabled}`); } async loadDevices() { if (!this.isEnabled) { console.log('⚠️ HF Dataset disabled - no token provided'); return new Map(); } try { console.log('📥 Loading devices from HF dataset...'); const fileUrl = `https://huggingface.co/datasets/${this.datasetId}/resolve/main/${this.fileName}`; const response = await fetch(fileUrl, { headers: { 'Authorization': `Bearer ${this.hfToken}` } }); if (response.status === 404) { console.log('📁 No devices file found in dataset, creating new one'); await this.saveDevices(new Map()); return new Map(); } if (!response.ok) { throw new Error(`HTTP ${response.status}: ${response.statusText}`); } const data = await response.json(); const deviceMap = new Map(data); console.log(`📥 Loaded ${deviceMap.size} devices from HF dataset`); return deviceMap; } catch (error) { console.error('❌ Error loading devices from HF dataset:', error); console.log('⚠️ Falling back to empty device list'); return new Map(); } } async saveDevices(deviceMap) { if (!this.isEnabled) { console.log('⚠️ HF Dataset disabled - cannot save devices'); return false; } try { console.log(`💾 Saving ${deviceMap.size} devices to HF dataset...`); const devicesArray = Array.from(deviceMap.entries()); const jsonData = JSON.stringify(devicesArray, null, 2); // Create a blob from the JSON data const blob = new Blob([jsonData], { type: 'application/json' }); // Upload to HF dataset await this.hfApi.uploadFile({ repo: this.datasetId, file: { path: this.fileName, content: blob }, commitMessage: `Update devices data - ${new Date().toISOString()}`, repoType: "dataset" }); console.log(`✅ Successfully saved ${deviceMap.size} devices to HF dataset`); return true; } catch (error) { console.error('❌ Error saving devices to HF dataset:', error); return false; } } async createDatasetIfNotExists() { if (!this.isEnabled) { console.log('⚠️ HF Dataset disabled - cannot create dataset'); return false; } try { console.log('🔍 Checking if dataset exists...'); // Try to get dataset info const datasetInfo = await this.hfApi.datasetInfo({ repo: this.datasetId }); console.log('✅ Dataset already exists'); return true; } catch (error) { if (error.statusCode === 404) { console.log('📁 Dataset not found, creating new one...'); try { // Create the dataset await this.hfApi.createRepo({ repo: this.datasetId, type: "dataset", private: false }); console.log('✅ Dataset created successfully'); // Create initial README const readmeContent = `# Houzou Medical Devices Dataset This dataset stores FCM tokens and device information for the Houzou Medical app notification system. ## Files - \`devices.json\`: Contains device tokens and metadata ## Usage This dataset is automatically managed by the Houzou Medical Notification Server. Last updated: ${new Date().toISOString()} `; await this.hfApi.uploadFile({ repo: this.datasetId, file: { path: "README.md", content: new Blob([readmeContent], { type: 'text/markdown' }) }, commitMessage: "Initial dataset setup", repoType: "dataset" }); // Create initial empty devices file await this.saveDevices(new Map()); return true; } catch (createError) { console.error('❌ Error creating dataset:', createError); return false; } } else { console.error('❌ Error checking dataset:', error); return false; } } } isReady() { return this.isEnabled; } getStatus() { return { enabled: this.isEnabled, datasetId: this.datasetId, hasToken: !!this.hfToken }; } } module.exports = HFDatasetManager;