Spaces:

rajat5ranjan
/

stock-api

Sleeping

App Files Files Community

stock-api / core /market_stock_sentiment.py

rajat5ranjan

Update core/market_stock_sentiment.py

1ec1b3d verified about 1 month ago

raw

history blame contribute delete

5.07 kB

	import requests
	import json
	from datetime import datetime, timedelta
	from tradingview_ta import TA_Handler, Interval
	from langchain.document_loaders import WebBaseLoader
	from langchain.docstore.document import Document
	from bs4 import BeautifulSoup
	from GoogleNews import GoogleNews
	from langchain.prompts import PromptTemplate
	from langchain.chains import StuffDocumentsChain, LLMChain

	def clean_google_news_url(url: str):
	for ext in [".html", ".cms"]:
	if ext in url:
	return url.split(ext)[0] + ext
	return url.split("&")[0]

	def get_google_news_documents(query: str, max_articles: int = 10, timeout: int = 10):
	googlenews = GoogleNews(lang="en")
	end_date = datetime.today()
	start_date = end_date - timedelta(days=2)
	googlenews.set_time_range(start_date.strftime("%m/%d/%Y"), end_date.strftime("%m/%d/%Y"))
	googlenews.search(query)
	articles = googlenews.result()

	documents = []
	for article in articles[:max_articles]:
	url = clean_google_news_url(article.get("link"))
	try:
	response = requests.get(url, timeout=timeout, headers={"User-Agent": "Mozilla/5.0"})
	response.raise_for_status()
	soup = BeautifulSoup(response.text, "html.parser")
	paragraphs = soup.find_all("p")
	content = "\n".join([p.get_text(strip=True) for p in paragraphs if p.get_text(strip=True)])
	if content and len(content) > 200:
	doc = Document(
	page_content=content,
	metadata={
	"source": "Google News",
	"title": article.get("title", ""),
	"published": article.get("date", ""),
	"link": url,
	}
	)
	documents.append(doc)
	except Exception:
	continue
	return documents

	def get_market_stock_sentiment(llm):
	urls = [
	f"https://economictimes.indiatimes.com/markets/stocks/news",
	f"https://www.livemint.com/market/stock-market-news/",
	f"https://in.tradingview.com/ideas/editors-picks/?type=trade",
	f"https://pulse.zerodha.com/",
	"https://upstox.com/news/market-news/stocks/",
	]

	loader = WebBaseLoader(urls)
	web_docs = loader.load()

	google_docs = get_google_news_documents("Indian Stock market news NSE, Stocks in Action, Stocks in News, Stocks to Buy in next few weeks", max_articles=10)
	web_docs.extend(google_docs)

	prompt_template = """You are an expert Stock Market Trader specializing in stock market insights derived from fundamental analysis, analytical trends, profit-based evaluations, news indicators from different sites and detailed company financials.
	You will receive stock market news articles or stocks in news from various news websites which have India stock news feed. For the below context/input_documents, perform the following tasks:
	Context:
	{input_documents}
	1. Top picks: After analyzing all provided data, rank the top 5-10 stocks to look at this week, including tickers, current sentiment, and why they made the list.
	2. Identify the stock(s) mentioned (by ticker and company name).
	3. Sentiment analysis: classify as Bullish, Bearish, or Neutral.
	4. Extract critical news: What is the main event or update? (e.g., earnings beat, regulatory approval, management change, major contract or macro impact).
	5. Summarize impact: Briefly explain how this news might affect stock price and investor behavior (e.g., “could boost investor confidence”, “sign indicates profit pressure”, etc.).
	6. Actionable signal: Based on the sentiment and news, suggest whether this is a “Buy”, “Sell”, “Hold”, or “Watch” recommendation, and the rationale.
	PROVIDE THE DETAILS based on just the FACTS present in the document. Do NOT DUPLICATE the Output & hallucinate.
	*Format your output as JSON* with the following structure:

	```json
	{{
	"top_picks": [
	{{
	"ticker": "TICKER",
	"company": "Company Name",
	"sentiment": "Bullish\|Bearish\|Neutral",
	"critical_news": "Brief summary of the key event",
	"impact_summary": "How this may affect the stock",
	"action": "Buy\|Sell\|Hold\|Watch",
	"reason": "Why this stock ranks among top picks"
	}},
	...
	]
	}}
	"""

	prompt = PromptTemplate.from_template(prompt_template)
	chain = StuffDocumentsChain(llm_chain=LLMChain(llm=llm, prompt=prompt), document_variable_name="input_documents")
	response = chain.invoke({"input_documents": web_docs})
	raw = response["output_text"].strip()

	# Clean code block markdown if present
	if raw.startswith("```json"):
	raw = raw[len("```json"):]
	if raw.endswith("```"):
	raw = raw[:-3]

	try:
	return json.loads(raw.strip())
	except json.JSONDecodeError:
	return {"error": "Failed to parse model output", "raw": raw}