stock-api / core /market_stock_sentiment.py
rajat5ranjan's picture
Update core/market_stock_sentiment.py
1ec1b3d verified
import requests
import json
from datetime import datetime, timedelta
from tradingview_ta import TA_Handler, Interval
from langchain.document_loaders import WebBaseLoader
from langchain.docstore.document import Document
from bs4 import BeautifulSoup
from GoogleNews import GoogleNews
from langchain.prompts import PromptTemplate
from langchain.chains import StuffDocumentsChain, LLMChain
def clean_google_news_url(url: str):
for ext in [".html", ".cms"]:
if ext in url:
return url.split(ext)[0] + ext
return url.split("&")[0]
def get_google_news_documents(query: str, max_articles: int = 10, timeout: int = 10):
googlenews = GoogleNews(lang="en")
end_date = datetime.today()
start_date = end_date - timedelta(days=2)
googlenews.set_time_range(start_date.strftime("%m/%d/%Y"), end_date.strftime("%m/%d/%Y"))
googlenews.search(query)
articles = googlenews.result()
documents = []
for article in articles[:max_articles]:
url = clean_google_news_url(article.get("link"))
try:
response = requests.get(url, timeout=timeout, headers={"User-Agent": "Mozilla/5.0"})
response.raise_for_status()
soup = BeautifulSoup(response.text, "html.parser")
paragraphs = soup.find_all("p")
content = "\n".join([p.get_text(strip=True) for p in paragraphs if p.get_text(strip=True)])
if content and len(content) > 200:
doc = Document(
page_content=content,
metadata={
"source": "Google News",
"title": article.get("title", ""),
"published": article.get("date", ""),
"link": url,
}
)
documents.append(doc)
except Exception:
continue
return documents
def get_market_stock_sentiment(llm):
urls = [
f"https://economictimes.indiatimes.com/markets/stocks/news",
f"https://www.livemint.com/market/stock-market-news/",
f"https://in.tradingview.com/ideas/editors-picks/?type=trade",
f"https://pulse.zerodha.com/",
"https://upstox.com/news/market-news/stocks/",
]
loader = WebBaseLoader(urls)
web_docs = loader.load()
google_docs = get_google_news_documents("Indian Stock market news NSE, Stocks in Action, Stocks in News, Stocks to Buy in next few weeks", max_articles=10)
web_docs.extend(google_docs)
prompt_template = """You are an expert Stock Market Trader specializing in stock market insights derived from fundamental analysis, analytical trends, profit-based evaluations, news indicators from different sites and detailed company financials.
You will receive stock market news articles or stocks in news from various news websites which have India stock news feed. For the below context/input_documents, perform the following tasks:
Context:
{input_documents}
1. **Top picks**: After analyzing all provided data, rank the top 5-10 stocks to look at this week, including tickers, current sentiment, and why they made the list.
2. **Identify the stock(s)** mentioned (by ticker and company name).
3. **Sentiment analysis**: classify as Bullish, Bearish, or Neutral.
4. **Extract critical news**: What is the main event or update? (e.g., earnings beat, regulatory approval, management change, major contract or macro impact).
5. **Summarize impact**: Briefly explain how this news might affect stock price and investor behavior (e.g., “could boost investor confidence”, “sign indicates profit pressure”, etc.).
6. **Actionable signal**: Based on the sentiment and news, suggest whether this is a “Buy”, “Sell”, “Hold”, or “Watch” recommendation, and the rationale.
PROVIDE THE DETAILS based on just the FACTS present in the document. Do NOT DUPLICATE the Output & hallucinate.
***Format your output as JSON*** with the following structure:
```json
{{
"top_picks": [
{{
"ticker": "TICKER",
"company": "Company Name",
"sentiment": "Bullish|Bearish|Neutral",
"critical_news": "Brief summary of the key event",
"impact_summary": "How this may affect the stock",
"action": "Buy|Sell|Hold|Watch",
"reason": "Why this stock ranks among top picks"
}},
...
]
}}
"""
prompt = PromptTemplate.from_template(prompt_template)
chain = StuffDocumentsChain(llm_chain=LLMChain(llm=llm, prompt=prompt), document_variable_name="input_documents")
response = chain.invoke({"input_documents": web_docs})
raw = response["output_text"].strip()
# Clean code block markdown if present
if raw.startswith("```json"):
raw = raw[len("```json"):]
if raw.endswith("```"):
raw = raw[:-3]
try:
return json.loads(raw.strip())
except json.JSONDecodeError:
return {"error": "Failed to parse model output", "raw": raw}