spaces-research-think

Running

App Files Files Community

spaces-research-think / app.py

ginipick

Update app.py

4e8ea60 verified 7 months ago

raw

history blame

20.7 kB

	import os
	import gradio as gr
	from gradio import ChatMessage
	from typing import Iterator, List, Dict, Tuple, Any
	import google.generativeai as genai
	from huggingface_hub import HfApi
	import requests
	import re
	import traceback

	# HuggingFace 관련 API 키 (스페이스 분석 용)
	HF_TOKEN = os.getenv("HF_TOKEN")
	hf_api = HfApi(token=HF_TOKEN)

	# Gemini 2.0 Flash Thinking 모델 관련 API 키 및 클라이언트 (LLM 용)
	GEMINI_API_KEY = os.getenv("GEMINI_API_KEY")
	genai.configure(api_key=GEMINI_API_KEY)
	model = genai.GenerativeModel("gemini-2.0-flash-thinking-exp-01-21")


	def get_headers():
	if not HF_TOKEN:
	raise ValueError("Hugging Face token not found in environment variables")
	return {"Authorization": f"Bearer {HF_TOKEN}"}


	def get_file_content(space_id: str, file_path: str) -> str:
	file_url = f"https://huggingface.co/spaces/{space_id}/raw/main/{file_path}"
	try:
	response = requests.get(file_url, headers=get_headers())
	if response.status_code == 200:
	return response.text
	else:
	return f"File not found or inaccessible: {file_path}"
	except requests.RequestException:
	return f"Error fetching content for file: {file_path}"


	def get_space_structure(space_id: str) -> Dict:
	try:
	files = hf_api.list_repo_files(repo_id=space_id, repo_type="space")
	tree = {"type": "directory", "path": "", "name": space_id, "children": []}
	for file in files:
	path_parts = file.split('/')
	current = tree
	for i, part in enumerate(path_parts):
	if i == len(path_parts) - 1: # 파일
	current["children"].append({"type": "file", "path": file, "name": part})
	else:
	found = False
	for child in current["children"]:
	if child["type"] == "directory" and child["name"] == part:
	current = child
	found = True
	break
	if not found:
	new_dir = {"type": "directory", "path": '/'.join(path_parts[:i+1]), "name": part, "children": []}
	current["children"].append(new_dir)
	current = new_dir
	return tree
	except Exception as e:
	print(f"Error in get_space_structure: {str(e)}")
	return {"error": f"API request error: {str(e)}"}


	def format_tree_structure(tree_data: Dict, indent: str = "") -> str:
	if "error" in tree_data:
	return tree_data["error"]
	formatted = f"{indent}{'📁' if tree_data.get('type') == 'directory' else '📄'} {tree_data.get('name', 'Unknown')}\n"
	if tree_data.get("type") == "directory":
	# 디렉토리를 먼저, 파일을 나중에 표시하기 위해 정렬 조건 사용
	for child in sorted(tree_data.get("children", []), key=lambda x: (x.get("type", "") != "directory", x.get("name", ""))):
	formatted += format_tree_structure(child, indent + " ")
	return formatted


	def analyze_space(url: str, progress=gr.Progress()):
	"""
	HuggingFace Space의 app.py와 파일구조 등을 불러와서:
	1) 코드 요약
	2) 코드 분석
	3) 사용법
	등을 반환합니다.
	"""
	try:
	space_id = url.split('spaces/')[-1]
	if not re.match(r'^[\w.-]+/[\w.-]+$', space_id):
	raise ValueError(f"Invalid Space ID format: {space_id}")

	progress(0.1, desc="파일 구조 분석 중...")
	tree_structure = get_space_structure(space_id)
	if "error" in tree_structure:
	raise ValueError(tree_structure["error"])
	tree_view = format_tree_structure(tree_structure)

	progress(0.3, desc="app.py 내용 가져오는 중...")
	app_content = get_file_content(space_id, "app.py")

	progress(0.5, desc="코드 요약 중...")
	summary = summarize_code(app_content)

	progress(0.7, desc="코드 분석 중...")
	analysis = analyze_code(app_content)

	progress(0.9, desc="사용법 설명 생성 중...")
	usage = explain_usage(app_content)

	# lines 수 조정
	lines_for_app_py = adjust_lines_for_code(app_content)

	progress(1.0, desc="완료")
	return app_content, tree_view, tree_structure, space_id, summary, analysis, usage, lines_for_app_py

	except Exception as e:
	print(f"Error in analyze_space: {str(e)}")
	print(traceback.format_exc())
	return f"오류가 발생했습니다: {str(e)}", "", None, "", "", "", "", 10


	def adjust_lines_for_code(code_content: str, min_lines: int = 10, max_lines: int = 100) -> int:
	"""
	코드의 줄 수에 맞춰 표시할 lines 수를 동적으로 조정합니다.
	"""
	num_lines = len(code_content.split('\n'))
	return min(max(num_lines, min_lines), max_lines)


	# --------------------------------------------------
	# Gemini 2.0 Flash Thinking 모델 관련 (LLM) 함수들
	# --------------------------------------------------
	from gradio import ChatMessage

	def format_chat_history(messages: List[ChatMessage]) -> List[Dict]:
	"""
	ChatMessage 목록을 Gemini 모델이 이해할 수 있는 형식으로 변환
	(Thinking 메타데이터 포함 메시지는 무시)
	"""
	formatted = []
	for m in messages:
	# 'Thinking' metadata가 있으면 무시
	if hasattr(m, "metadata") and m.metadata:
	continue
	role = "assistant" if m.role == "assistant" else "user"
	formatted.append({"role": role, "parts": [m.content or ""]})
	return formatted


	def gemini_chat_completion(system_message: str, user_message: str, max_tokens: int = 200, temperature: float = 0.7) -> str:
	"""
	시스템 메시지와 유저 메시지를 받아 Gemini에 스트리밍 요청,
	최종 응답 텍스트를 반환합니다.
	"""
	init_msgs = [
	ChatMessage(role="system", content=system_message),
	ChatMessage(role="user", content=user_message)
	]
	chat_history = format_chat_history(init_msgs)
	chat = model.start_chat(history=chat_history)
	final = ""
	try:
	for chunk in chat.send_message(user_message, stream=True):
	parts = chunk.candidates[0].content.parts
	if len(parts) == 2:
	# Thinking + 최종응답
	final += parts[1].text
	else:
	final += parts[0].text
	return final.strip()
	except Exception as e:
	return f"LLM 호출 중 오류 발생: {str(e)}"


	def summarize_code(app_content: str):
	system_msg = "당신은 Python 코드를 분석하고 요약하는 AI 조수입니다. 주어진 코드를 3줄 이내로 간결하게 요약해주세요."
	user_msg = f"다음 Python 코드를 3줄 이내로 요약해주세요:\n\n{app_content}"
	try:
	return gemini_chat_completion(system_msg, user_msg, max_tokens=200, temperature=0.7)
	except Exception as e:
	return f"요약 생성 중 오류 발생: {str(e)}"


	def analyze_code(app_content: str):
	# 시스템 프롬프트에 '딥씽킹' 안내문 추가
	system_msg = (
	"You are a deep thinking AI. You may use extremely long chains of thought to deeply consider the problem "
	"and deliberate with yourself via systematic reasoning processes to help come to a correct solution prior to answering. "
	"You should enclose your thoughts and internal monologue inside tags, and then provide your solution or response to the problem. "
	"당신은 Python 코드를 분석하는 AI 조수입니다. 주어진 코드를 분석하여 서비스의 효용성과 활용 측면에서 다음 항목에 대해 설명해주세요:\n"
	"A. 배경 및 필요성\n"
	"B. 기능적 효용성 및 가치\n"
	"C. 특장점\n"
	"D. 적용 대상 및 타겟\n"
	"E. 기대효과\n"
	"기존 및 유사 프로젝트와 비교하여 분석해주세요. Markdown 형식으로 출력하세요."
	)
	user_msg = f"다음 Python 코드를 분석해주세요:\n\n{app_content}"
	try:
	return gemini_chat_completion(system_msg, user_msg, max_tokens=1000, temperature=0.7)
	except Exception as e:
	return f"분석 생성 중 오류 발생: {str(e)}"


	def explain_usage(app_content: str):
	# 시스템 프롬프트에 '딥씽킹' 안내문 추가
	system_msg = (
	"You are a deep thinking AI. You may use extremely long chains of thought to deeply consider the problem "
	"and deliberate with yourself via systematic reasoning processes to help come to a correct solution prior to answering. "
	"You should enclose your thoughts and internal monologue inside tags, and then provide your solution or response to the problem. "
	"당신은 Python 코드를 분석하여 사용법을 설명하는 AI 조수입니다. 주어진 코드를 바탕으로 마치 화면을 보는 것처럼 사용법을 상세히 설명해주세요. Markdown 형식으로 출력하세요."
	)
	user_msg = f"다음 Python 코드의 사용법을 설명해주세요:\n\n{app_content}"
	try:
	return gemini_chat_completion(system_msg, user_msg, max_tokens=800, temperature=0.7)
	except Exception as e:
	return f"사용법 설명 생성 중 오류 발생: {str(e)}"


	def stream_gemini_response(user_message: str, conversation_state: List[ChatMessage]) -> Iterator[List[ChatMessage]]:
	"""
	conversation_state: ChatMessage 객체로만 이루어진 '대화 이력' (Gradio State).
	(수정) 빈 문자열이어도 처리하도록 변경. 에러를 띄우지 않음.
	"""
	# 기존에는 if not user_message.strip(): ... return 했으나,
	# "Please provide a non-empty text message..." 오류가 불편하다는 요청으로 제거/완화함.
	# 필요하다면 user_message가 정말 아무것도 없을 때 처리 로직을 추가하세요.

	print(f"\n=== New Request ===\nUser message: {user_message if user_message.strip() else '(Empty)'}")

	# 기존 대화를 Gemini 형식으로 변환
	chat_history = format_chat_history(conversation_state)
	chat = model.start_chat(history=chat_history)

	response = chat.send_message(user_message, stream=True)
	thought_buffer = ""
	response_buffer = ""
	thinking_complete = False

	# 'Thinking' 표시용
	conversation_state.append(
	ChatMessage(
	role="assistant",
	content="",
	metadata={"title": "⚙️ Thinking: *The thoughts produced by the model are experimental"}
	)
	)

	try:
	for chunk in response:
	parts = chunk.candidates[0].content.parts
	current_chunk = parts[0].text

	if len(parts) == 2 and not thinking_complete:
	thought_buffer += current_chunk
	print(f"\n=== Complete Thought ===\n{thought_buffer}")
	conversation_state[-1] = ChatMessage(
	role="assistant",
	content=thought_buffer,
	metadata={"title": "⚙️ Thinking: *The thoughts produced by the model are experimental"}
	)
	yield conversation_state

	response_buffer = parts[1].text
	print(f"\n=== Starting Response ===\n{response_buffer}")
	conversation_state.append(
	ChatMessage(role="assistant", content=response_buffer)
	)
	thinking_complete = True

	elif thinking_complete:
	response_buffer += current_chunk
	print(f"\n=== Response Chunk ===\n{current_chunk}")
	conversation_state[-1] = ChatMessage(
	role="assistant",
	content=response_buffer
	)
	else:
	thought_buffer += current_chunk
	print(f"\n=== Thinking Chunk ===\n{current_chunk}")
	conversation_state[-1] = ChatMessage(
	role="assistant",
	content=thought_buffer,
	metadata={"title": "⚙️ Thinking: *The thoughts produced by the model are experimental"}
	)
	yield conversation_state

	print(f"\n=== Final Response ===\n{response_buffer}")

	except Exception as e:
	print(f"\n=== Error ===\n{str(e)}")
	conversation_state.append(
	ChatMessage(
	role="assistant",
	content=f"I apologize, but I encountered an error: {str(e)}"
	)
	)
	yield conversation_state


	def convert_to_display_tuples(messages: List[ChatMessage]) -> List[Tuple[str, str]]:
	"""
	화면에 표시하기 위해 (user, assistant) 튜플 목록으로 변환
	"""
	result = []
	i = 0
	while i < len(messages):
	if messages[i].role == "user":
	user_text = messages[i].content
	assistant_text = ""
	if i + 1 < len(messages) and messages[i+1].role == "assistant":
	assistant_text = messages[i+1].content
	i += 2
	else:
	i += 1
	result.append((user_text, assistant_text))
	else:
	# assistant 단독
	result.append(("", messages[i].content))
	i += 1
	return result


	def user_submit_message(msg: str, conversation_state: List[ChatMessage]):
	"""
	사용자가 메시지를 입력할 때 호출.
	ChatMessage 리스트(conversation_state)에 user 메시지를 추가한 뒤 반환.
	"""
	conversation_state.append(ChatMessage(role="user", content=msg))
	# 입력창은 비워줌
	return "", conversation_state


	def respond_wrapper(message: str, conversation_state: List[ChatMessage], max_tokens, temperature, top_p):
	"""
	유저 메시지를 받아 Gemini에게 요청(스트리밍)하고, 대화 이력을 업데이트 후
	화면에는 (user, assistant) 튜플을 반환한다.
	"""
	for updated_messages in stream_gemini_response(message, conversation_state):
	# 화면 표시용 (user, assistant) 튜플로 변환
	yield "", convert_to_display_tuples(updated_messages)


	def create_ui():
	"""
	Gradio UI를 구성하는 함수
	"""
	try:
	css = """
	footer {visibility: hidden;}
	"""

	with gr.Blocks(css=css) as demo:
	gr.Markdown("# MOUSE: Space Research Thinking")

	with gr.Tabs():
	with gr.TabItem("분석"):
	with gr.Row():
	with gr.Column():
	url_input = gr.Textbox(label="HuggingFace Space URL")
	analyze_button = gr.Button("분석")

	summary_output = gr.Markdown(label="요약")
	analysis_output = gr.Markdown(label="분석")
	usage_output = gr.Markdown(label="사용법")
	tree_view_output = gr.Textbox(label="파일 구조", lines=20)

	with gr.Column():
	code_tabs = gr.Tabs()
	with code_tabs:
	with gr.TabItem("app.py"):
	app_py_content = gr.Code(
	language="python",
	label="app.py",
	lines=50
	)
	with gr.TabItem("requirements.txt"):
	requirements_content = gr.Textbox(
	label="requirements.txt",
	lines=50
	)

	with gr.TabItem("AI 코드챗"):
	gr.Markdown("## 예제를 입력 또는 소스 코드를 붙여넣고 질문하세요")

	# Chatbot은 단지 출력만 담당(튜플을 받아 표시)
	chatbot = gr.Chatbot(
	label="대화",
	height=400
	)

	msg = gr.Textbox(
	label="메시지",
	placeholder="메시지를 입력하세요..."
	)

	# 숨겨진 파라미터
	max_tokens = gr.Slider(
	minimum=1, maximum=8000,
	value=4000, label="Max Tokens",
	visible=False
	)
	temperature = gr.Slider(
	minimum=0, maximum=1,
	value=0.7, label="Temperature",
	visible=False
	)
	top_p = gr.Slider(
	minimum=0, maximum=1,
	value=0.9, label="Top P",
	visible=False
	)

	examples = [
	["상세한 사용 방법을 4000 토큰 이상 상세히 설명"],
	["FAQ 20건을 4000 토큰 이상 작성"],
	["기술 차별점, 강점을 중심으로 4000 토큰 이상 설명"],
	["특허 출원에 활용 가능한 혁신 아이디어를 4000 토큰 이상 작성"],
	["논문 형식으로 4000 토큰 이상 작성"],
	["계속 이어서 답변하라"]
	]
	gr.Examples(examples, inputs=msg)

	# 대화 상태(채팅 기록)는 ChatMessage 객체로만 유지
	conversation_state = gr.State([])

	# 이벤트 체인
	# 1) 유저 메시지 -> user_submit_message -> (입력창 비움 + state추가)
	# 2) respond_wrapper -> Gemini 스트리밍 -> 대화 state 갱신 -> (user,assistant) 튜플 변환
	msg.submit(
	user_submit_message,
	inputs=[msg, conversation_state],
	outputs=[msg, conversation_state],
	queue=False
	).then(
	respond_wrapper,
	inputs=[msg, conversation_state, max_tokens, temperature, top_p],
	outputs=[msg, chatbot],
	)

	with gr.TabItem("Recommended Best"):
	gr.Markdown(
	"Discover recommended HuggingFace Spaces [here](https://huggingface.co/spaces/openfree/Korean-Leaderboard)."
	)

	# 분석 버튼 로직
	space_id_state = gr.State()
	tree_structure_state = gr.State()
	app_py_content_lines = gr.State()

	analyze_button.click(
	analyze_space,
	inputs=[url_input],
	outputs=[
	app_py_content,
	tree_view_output,
	tree_structure_state,
	space_id_state,
	summary_output,
	analysis_output,
	usage_output,
	app_py_content_lines
	]
	).then(
	lambda space_id: get_file_content(space_id, "requirements.txt"),
	inputs=[space_id_state],
	outputs=[requirements_content]
	).then(
	lambda lines: gr.update(lines=lines),
	inputs=[app_py_content_lines],
	outputs=[app_py_content]
	)

	return demo

	except Exception as e:
	print(f"Error in create_ui: {str(e)}")
	print(traceback.format_exc())
	raise


	if __name__ == "__main__":
	try:
	print("Starting HuggingFace Space Analyzer...")
	demo = create_ui()
	print("UI created successfully.")
	print("Configuring Gradio queue...")
	demo.queue()
	print("Gradio queue configured.")
	print("Launching Gradio app...")
	demo.launch(
	server_name="0.0.0.0",
	server_port=7860,
	share=False,
	debug=True,
	show_api=False
	)
	print("Gradio app launched successfully.")
	except Exception as e:
	print(f"Error in main: {str(e)}")
	print("Detailed error information:")
	print(traceback.format_exc())
	raise