Spaces:
Sleeping
Sleeping
import requests | |
import json | |
from datetime import datetime, timedelta | |
from tradingview_ta import TA_Handler, Interval | |
from langchain.document_loaders import WebBaseLoader | |
from langchain.docstore.document import Document | |
from bs4 import BeautifulSoup | |
from GoogleNews import GoogleNews | |
from langchain.prompts import PromptTemplate | |
from langchain.chains import StuffDocumentsChain, LLMChain | |
def clean_google_news_url(url: str): | |
for ext in [".html", ".cms"]: | |
if ext in url: | |
return url.split(ext)[0] + ext | |
return url.split("&")[0] | |
def get_google_news_documents(query: str, max_articles: int = 10, timeout: int = 10): | |
googlenews = GoogleNews(lang="en") | |
end_date = datetime.today() | |
start_date = end_date - timedelta(days=2) | |
googlenews.set_time_range(start_date.strftime("%m/%d/%Y"), end_date.strftime("%m/%d/%Y")) | |
googlenews.search(query) | |
articles = googlenews.result() | |
documents = [] | |
for article in articles[:max_articles]: | |
url = clean_google_news_url(article.get("link")) | |
try: | |
response = requests.get(url, timeout=timeout, headers={"User-Agent": "Mozilla/5.0"}) | |
response.raise_for_status() | |
soup = BeautifulSoup(response.text, "html.parser") | |
paragraphs = soup.find_all("p") | |
content = "\n".join([p.get_text(strip=True) for p in paragraphs if p.get_text(strip=True)]) | |
if content and len(content) > 200: | |
doc = Document( | |
page_content=content, | |
metadata={ | |
"source": "Google News", | |
"title": article.get("title", ""), | |
"published": article.get("date", ""), | |
"link": url, | |
} | |
) | |
documents.append(doc) | |
except Exception: | |
continue | |
return documents | |
def get_market_stock_sentiment(llm): | |
urls = [ | |
f"https://economictimes.indiatimes.com/markets/stocks/news", | |
f"https://www.livemint.com/market/stock-market-news/", | |
f"https://in.tradingview.com/ideas/editors-picks/?type=trade", | |
f"https://pulse.zerodha.com/", | |
"https://upstox.com/news/market-news/stocks/", | |
] | |
loader = WebBaseLoader(urls) | |
web_docs = loader.load() | |
google_docs = get_google_news_documents("Indian Stock market news NSE, Stocks in Action, Stocks in News, Stocks to Buy in next few weeks", max_articles=10) | |
web_docs.extend(google_docs) | |
prompt_template = """You are an expert Stock Market Trader specializing in stock market insights derived from fundamental analysis, analytical trends, profit-based evaluations, news indicators from different sites and detailed company financials. | |
You will receive stock market news articles or stocks in news from various news websites which have India stock news feed. For the below context/input_documents, perform the following tasks: | |
Context: | |
{input_documents} | |
1. **Top picks**: After analyzing all provided data, rank the top 5-10 stocks to look at this week, including tickers, current sentiment, and why they made the list. | |
2. **Identify the stock(s)** mentioned (by ticker and company name). | |
3. **Sentiment analysis**: classify as Bullish, Bearish, or Neutral. | |
4. **Extract critical news**: What is the main event or update? (e.g., earnings beat, regulatory approval, management change, major contract or macro impact). | |
5. **Summarize impact**: Briefly explain how this news might affect stock price and investor behavior (e.g., “could boost investor confidence”, “sign indicates profit pressure”, etc.). | |
6. **Actionable signal**: Based on the sentiment and news, suggest whether this is a “Buy”, “Sell”, “Hold”, or “Watch” recommendation, and the rationale. | |
PROVIDE THE DETAILS based on just the FACTS present in the document. Do NOT DUPLICATE the Output & hallucinate. | |
***Format your output as JSON*** with the following structure: | |
```json | |
{{ | |
"top_picks": [ | |
{{ | |
"ticker": "TICKER", | |
"company": "Company Name", | |
"sentiment": "Bullish|Bearish|Neutral", | |
"critical_news": "Brief summary of the key event", | |
"impact_summary": "How this may affect the stock", | |
"action": "Buy|Sell|Hold|Watch", | |
"reason": "Why this stock ranks among top picks" | |
}}, | |
... | |
] | |
}} | |
""" | |
prompt = PromptTemplate.from_template(prompt_template) | |
chain = StuffDocumentsChain(llm_chain=LLMChain(llm=llm, prompt=prompt), document_variable_name="input_documents") | |
response = chain.invoke({"input_documents": web_docs}) | |
raw = response["output_text"].strip() | |
# Clean code block markdown if present | |
if raw.startswith("```json"): | |
raw = raw[len("```json"):] | |
if raw.endswith("```"): | |
raw = raw[:-3] | |
try: | |
return json.loads(raw.strip()) | |
except json.JSONDecodeError: | |
return {"error": "Failed to parse model output", "raw": raw} |