Spaces:
Runtime error
Runtime error
suppression
Browse files- count_authors.py +0 -104
count_authors.py
DELETED
@@ -1,104 +0,0 @@
|
|
1 |
-
#Count_authors serait à l'origine d'un runtime error de Fraudscore, apparu spontanément le 15 juin 2025
|
2 |
-
#HuggingFace envoie le message:
|
3 |
-
# "We have detected the following secret in spaces/TheFrenchDemos/FraudScore at the revision fd81b6050a663bae8e3ce6975c22635eb665c8dd.
|
4 |
-
# "Flickr (status: active) in count_authors.py
|
5 |
-
# "We strongly advise you rotate this secret, in particular if your repository is public.
|
6 |
-
# "Read more about our Secret scanning feature: https://huggingface.co/docs/hub/en/security-secrets
|
7 |
-
#or ce module n'est pas utile pour FraudScore
|
8 |
-
#il est désactivé par la mise en commentaire des lignes 89, et suppression des codes 19 à 21, à retrouver au besoin dans reward simulator
|
9 |
-
|
10 |
-
import tqdm
|
11 |
-
from multiprocessing import Pool, cpu_count
|
12 |
-
import signal
|
13 |
-
import sys
|
14 |
-
import time
|
15 |
-
|
16 |
-
from flickrapi import FlickrAPI
|
17 |
-
|
18 |
-
# Add Flickr configuration
|
19 |
-
#FLICKR_API
|
20 |
-
#FLICKR_API
|
21 |
-
#flickr =
|
22 |
-
|
23 |
-
def get_photo_id(url):
|
24 |
-
"""Extract photo ID from Flickr URL"""
|
25 |
-
try:
|
26 |
-
return url.split('/')[-1].split('_')[0]
|
27 |
-
except:
|
28 |
-
return None
|
29 |
-
|
30 |
-
def get_other_info(url):
|
31 |
-
"""Get author information from Flickr"""
|
32 |
-
try:
|
33 |
-
photo_id = get_photo_id(url)
|
34 |
-
if photo_id:
|
35 |
-
# wait for 0.1 second
|
36 |
-
time.sleep(0.1)
|
37 |
-
photo_info = flickr.photos.getInfo(photo_id=photo_id)
|
38 |
-
license = photo_info['photo']['license']
|
39 |
-
owner = photo_info['photo']['owner']
|
40 |
-
flickr_url = f"https://www.flickr.com/photos/{owner.get('nsid', '')}/{photo_id}"
|
41 |
-
return {
|
42 |
-
'username': owner.get('username', ''),
|
43 |
-
'realname': owner.get('realname', ''),
|
44 |
-
'nsid': owner.get('nsid', ''),
|
45 |
-
'flickr_url': flickr_url,
|
46 |
-
'license': license
|
47 |
-
}
|
48 |
-
except:
|
49 |
-
pass
|
50 |
-
return {
|
51 |
-
'username': 'Unknown',
|
52 |
-
'realname': 'Unknown',
|
53 |
-
'nsid': '',
|
54 |
-
'flickr_url': '',
|
55 |
-
'license': 'Unknown'
|
56 |
-
}
|
57 |
-
|
58 |
-
def init_worker():
|
59 |
-
"""Initialize worker process to handle signals"""
|
60 |
-
signal.signal(signal.SIGINT, signal.SIG_IGN)
|
61 |
-
|
62 |
-
def process_url(url):
|
63 |
-
try:
|
64 |
-
return get_other_info(url)
|
65 |
-
except Exception as e:
|
66 |
-
return {
|
67 |
-
'username': 'Error',
|
68 |
-
'realname': str(e),
|
69 |
-
'nsid': '',
|
70 |
-
'flickr_url': url,
|
71 |
-
'license': 'Unknown'
|
72 |
-
}
|
73 |
-
|
74 |
-
def process_urls_in_chunks(urls, chunk_size=100000):
|
75 |
-
authors = []
|
76 |
-
with Pool(cpu_count(), initializer=init_worker) as pool:
|
77 |
-
try:
|
78 |
-
# Process URLs in chunks
|
79 |
-
for i in range(0, len(urls), chunk_size):
|
80 |
-
chunk = urls[i:i + chunk_size]
|
81 |
-
chunk_results = list(tqdm.tqdm(
|
82 |
-
pool.imap(process_url, chunk),
|
83 |
-
total=len(chunk),
|
84 |
-
desc=f"Processing chunk {i//chunk_size + 1}"
|
85 |
-
))
|
86 |
-
authors.extend(chunk_results)
|
87 |
-
except KeyboardInterrupt:
|
88 |
-
pool.terminate()
|
89 |
-
pool.join()
|
90 |
-
print("\nProcessing interrupted by user")
|
91 |
-
sys.exit(1)
|
92 |
-
return authors
|
93 |
-
|
94 |
-
#if __name__ == "__main__":
|
95 |
-
urls_file = "data/openimages_urls.txt"
|
96 |
-
with open(urls_file) as f:
|
97 |
-
urls = [url.strip() for url in f.readlines()][:100000]
|
98 |
-
|
99 |
-
authors = process_urls_in_chunks(urls)
|
100 |
-
|
101 |
-
# Count unique authors
|
102 |
-
unique_authors = len(set([author['username'] for author in authors]))
|
103 |
-
print(f"unique_authors: {unique_authors}")
|
104 |
-
print(f"Number of unique authors: {unique_authors}")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|