TheFrenchDemos commited on
Commit
25dd438
·
verified ·
1 Parent(s): 60f6737

suppression

Browse files
Files changed (1) hide show
  1. count_authors.py +0 -104
count_authors.py DELETED
@@ -1,104 +0,0 @@
1
- #Count_authors serait à l'origine d'un runtime error de Fraudscore, apparu spontanément le 15 juin 2025
2
- #HuggingFace envoie le message:
3
- # "We have detected the following secret in spaces/TheFrenchDemos/FraudScore at the revision fd81b6050a663bae8e3ce6975c22635eb665c8dd.
4
- # "Flickr (status: active) in count_authors.py
5
- # "We strongly advise you rotate this secret, in particular if your repository is public.
6
- # "Read more about our Secret scanning feature: https://huggingface.co/docs/hub/en/security-secrets
7
- #or ce module n'est pas utile pour FraudScore
8
- #il est désactivé par la mise en commentaire des lignes 89, et suppression des codes 19 à 21, à retrouver au besoin dans reward simulator
9
-
10
- import tqdm
11
- from multiprocessing import Pool, cpu_count
12
- import signal
13
- import sys
14
- import time
15
-
16
- from flickrapi import FlickrAPI
17
-
18
- # Add Flickr configuration
19
- #FLICKR_API
20
- #FLICKR_API
21
- #flickr =
22
-
23
- def get_photo_id(url):
24
- """Extract photo ID from Flickr URL"""
25
- try:
26
- return url.split('/')[-1].split('_')[0]
27
- except:
28
- return None
29
-
30
- def get_other_info(url):
31
- """Get author information from Flickr"""
32
- try:
33
- photo_id = get_photo_id(url)
34
- if photo_id:
35
- # wait for 0.1 second
36
- time.sleep(0.1)
37
- photo_info = flickr.photos.getInfo(photo_id=photo_id)
38
- license = photo_info['photo']['license']
39
- owner = photo_info['photo']['owner']
40
- flickr_url = f"https://www.flickr.com/photos/{owner.get('nsid', '')}/{photo_id}"
41
- return {
42
- 'username': owner.get('username', ''),
43
- 'realname': owner.get('realname', ''),
44
- 'nsid': owner.get('nsid', ''),
45
- 'flickr_url': flickr_url,
46
- 'license': license
47
- }
48
- except:
49
- pass
50
- return {
51
- 'username': 'Unknown',
52
- 'realname': 'Unknown',
53
- 'nsid': '',
54
- 'flickr_url': '',
55
- 'license': 'Unknown'
56
- }
57
-
58
- def init_worker():
59
- """Initialize worker process to handle signals"""
60
- signal.signal(signal.SIGINT, signal.SIG_IGN)
61
-
62
- def process_url(url):
63
- try:
64
- return get_other_info(url)
65
- except Exception as e:
66
- return {
67
- 'username': 'Error',
68
- 'realname': str(e),
69
- 'nsid': '',
70
- 'flickr_url': url,
71
- 'license': 'Unknown'
72
- }
73
-
74
- def process_urls_in_chunks(urls, chunk_size=100000):
75
- authors = []
76
- with Pool(cpu_count(), initializer=init_worker) as pool:
77
- try:
78
- # Process URLs in chunks
79
- for i in range(0, len(urls), chunk_size):
80
- chunk = urls[i:i + chunk_size]
81
- chunk_results = list(tqdm.tqdm(
82
- pool.imap(process_url, chunk),
83
- total=len(chunk),
84
- desc=f"Processing chunk {i//chunk_size + 1}"
85
- ))
86
- authors.extend(chunk_results)
87
- except KeyboardInterrupt:
88
- pool.terminate()
89
- pool.join()
90
- print("\nProcessing interrupted by user")
91
- sys.exit(1)
92
- return authors
93
-
94
- #if __name__ == "__main__":
95
- urls_file = "data/openimages_urls.txt"
96
- with open(urls_file) as f:
97
- urls = [url.strip() for url in f.readlines()][:100000]
98
-
99
- authors = process_urls_in_chunks(urls)
100
-
101
- # Count unique authors
102
- unique_authors = len(set([author['username'] for author in authors]))
103
- print(f"unique_authors: {unique_authors}")
104
- print(f"Number of unique authors: {unique_authors}")