Spaces:
Running
Running
Update main.py
Browse files
main.py
CHANGED
@@ -139,5 +139,34 @@ async def root(site: str = 'abcnews.go'):
|
|
139 |
return {"RESULTS": ii_list}
|
140 |
except requests.RequestException as e:
|
141 |
return {"error": str(e), "status_code": 500}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
142 |
|
143 |
|
|
|
139 |
return {"RESULTS": ii_list}
|
140 |
except requests.RequestException as e:
|
141 |
return {"error": str(e), "status_code": 500}
|
142 |
+
|
143 |
+
|
144 |
+
@app.get("/extract-images-livemint")
|
145 |
+
async def root(site: str = 'livemint'):
|
146 |
+
|
147 |
+
try:
|
148 |
+
ii_list = []
|
149 |
+
response = supabase.table('news').select("*").eq('source', f'www.livemint.com').is_('image_url', 'null').order('published_date', desc=True).limit(15).execute()
|
150 |
+
|
151 |
+
|
152 |
+
for i in range(len(response.data)):
|
153 |
+
|
154 |
+
url = response.data[i]['article_url']
|
155 |
+
try:
|
156 |
+
res = requests.get(url)
|
157 |
+
soup = BeautifulSoup(res.text, 'html.parser')
|
158 |
+
results = soup.find_all(['img'])
|
159 |
+
url = results[18]['src']
|
160 |
+
ii_list.append(url)
|
161 |
+
|
162 |
+
insert_image(url, response.data[i]['id'] )
|
163 |
+
|
164 |
+
except Exception as e:
|
165 |
+
print(e)
|
166 |
+
results = None
|
167 |
+
|
168 |
+
return {"RESULTS": ii_list}
|
169 |
+
except requests.RequestException as e:
|
170 |
+
return {"error": str(e), "status_code": 500}
|
171 |
|
172 |
|