#Count_authors serait à l'origine d'un runtime error de Fraudscore, apparu spontanément le 15 juin 2025 #HuggingFace envoie le message: # "We have detected the following secret in spaces/TheFrenchDemos/FraudScore at the revision fd81b6050a663bae8e3ce6975c22635eb665c8dd. # "Flickr (status: active) in count_authors.py # "We strongly advise you rotate this secret, in particular if your repository is public. # "Read more about our Secret scanning feature: https://huggingface.co/docs/hub/en/security-secrets #or ce module n'est pas utile pour FraudScore #il est désactivé par la mise en commentaire des lignes 89, et suppression des codes 19 à 21, à retrouver au besoin dans reward simulator import tqdm from multiprocessing import Pool, cpu_count import signal import sys import time from flickrapi import FlickrAPI # Add Flickr configuration #FLICKR_API #FLICKR_API #flickr = def get_photo_id(url): """Extract photo ID from Flickr URL""" try: return url.split('/')[-1].split('_')[0] except: return None def get_other_info(url): """Get author information from Flickr""" try: photo_id = get_photo_id(url) if photo_id: # wait for 0.1 second time.sleep(0.1) photo_info = flickr.photos.getInfo(photo_id=photo_id) license = photo_info['photo']['license'] owner = photo_info['photo']['owner'] flickr_url = f"https://www.flickr.com/photos/{owner.get('nsid', '')}/{photo_id}" return { 'username': owner.get('username', ''), 'realname': owner.get('realname', ''), 'nsid': owner.get('nsid', ''), 'flickr_url': flickr_url, 'license': license } except: pass return { 'username': 'Unknown', 'realname': 'Unknown', 'nsid': '', 'flickr_url': '', 'license': 'Unknown' } def init_worker(): """Initialize worker process to handle signals""" signal.signal(signal.SIGINT, signal.SIG_IGN) def process_url(url): try: return get_other_info(url) except Exception as e: return { 'username': 'Error', 'realname': str(e), 'nsid': '', 'flickr_url': url, 'license': 'Unknown' } def process_urls_in_chunks(urls, chunk_size=100000): authors = [] with Pool(cpu_count(), initializer=init_worker) as pool: try: # Process URLs in chunks for i in range(0, len(urls), chunk_size): chunk = urls[i:i + chunk_size] chunk_results = list(tqdm.tqdm( pool.imap(process_url, chunk), total=len(chunk), desc=f"Processing chunk {i//chunk_size + 1}" )) authors.extend(chunk_results) except KeyboardInterrupt: pool.terminate() pool.join() print("\nProcessing interrupted by user") sys.exit(1) return authors #if __name__ == "__main__": urls_file = "data/openimages_urls.txt" with open(urls_file) as f: urls = [url.strip() for url in f.readlines()][:100000] authors = process_urls_in_chunks(urls) # Count unique authors unique_authors = len(set([author['username'] for author in authors])) print(f"unique_authors: {unique_authors}") print(f"Number of unique authors: {unique_authors}")