spaces-research-think

Running

File size: 20,095 Bytes

6035bfe
a8eb718
 
5dfb15d
9470e9c
a8eb718
6035bfe
a8eb718
6035bfe
 
db3cf6b
6035bfe
 
 
a8eb718
 
 
 
db3cf6b
4e8ea60
6035bfe
 
 
 
 
4e8ea60
6035bfe
 
 
 
 
 
 
 
 
 
 
4e8ea60
6035bfe
 
 
 
 
 
 
 
 
 
9470e9c
6035bfe
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4e8ea60
6035bfe
4e8ad25
 
 
 
eac18b7
4e8ad25
6035bfe
 
 
4e8ea60
f00b06e
4e8ea60
 
 
 
 
 
 
f00b06e
 
e0713c0
 
4e8ea60
f00b06e
 
4e8ad25
 
f00b06e
4e8ea60
f00b06e
 
4e8ea60
10a5dab
 
4e8ea60
10a5dab
 
4e8ea60
10a5dab
 
4e8ea60
9696775
4e8ea60
f00b06e
9696775
4e8ea60
f00b06e
 
 
4dd03cf
10a5dab
4e8ea60
9696775
4e8ea60
 
 
9696775
 
 
4e8ea60
eac18b7
4e8ea60
9696775
 
9470e9c
9696775
 
eac18b7
9696775
 
 
eac18b7
9696775
 
 
 
 
 
eac18b7
 
4e8ea60
 
eac18b7
4e8ea60
9696775
a8eb718
 
 
9696775
a8eb718
9696775
a8eb718
 
 
 
9696775
a8eb718
9696775
 
a8eb718
 
0ef3920
4e8ea60
9696775
 
 
a8eb718
9696775
a8eb718
 
 
4e8ea60
9696775
 
 
 
 
 
a8eb718
 
 
 
 
 
 
9696775
a8eb718
9696775
a8eb718
 
 
4e8ea60
9696775
 
 
 
 
 
 
 
a8eb718
9696775
a8eb718
 
 
4e8ea60
9696775
2a0f996
eac18b7
 
2a0f996
eac18b7
 
 
 
 
 
 
 
 
 
9696775
eac18b7
9696775
 
 
eac18b7
9696775
 
 
 
eac18b7
9696775
 
 
 
 
a8eb718
9696775
a8eb718
4e8ea60
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
9696775
a8eb718
4e8ea60
a8eb718
4e8ea60
 
 
 
a8eb718
eac18b7
a8eb718
4e8ea60
9696775
 
a8eb718
9696775
a8eb718
eac18b7
a8eb718
9696775
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
9470e9c
4e8ea60
9696775
 
eac18b7
9696775
 
4e8ea60
9696775
a8eb718
4e8ea60
9696775
 
eac18b7
9696775
 
 
e2baeda
4e8ea60
6035bfe
4e8ea60
 
 
6035bfe
65ee007
9696775
65ee007
d972c46
9696775
 
 
 
6035bfe
 
9696775
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
892de37
e2baeda
9696775
 
eac18b7
a8eb718
 
eac18b7
 
a8eb718
14c240d
9696775
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6035bfe
 
9696775
 
 
 
 
 
6035bfe
 
 
eac18b7
9696775
 
eac18b7
 
9696775
 
 
4e8ea60
9696775
 
 
 
eac18b7
9696775
 
9bbcc80
 
9696775
9bbcc80
c292539
eac18b7
81ba805
 
4dd03cf
81ba805
 
 
 
9696775
 
 
 
 
 
 
 
 
 
81ba805
 
 
 
9696775
4e8ea60
9696775
 
81ba805
58bb0eb
b557bec
 
 
 
 
0370aef
c9c6fdf
4e8ea60
c9c6fdf
 
 
 
4e8ea60
 
c9c6fdf
4e8ea60
 
 
 
 
 
 
 
 
 
c9c6fdf
 
4e8ea60
c9c6fdf

import os
import gradio as gr
from gradio import ChatMessage
from typing import Iterator, List, Dict, Tuple, Any
import google.generativeai as genai
from huggingface_hub import HfApi
import requests
import re
import traceback

# HuggingFace 관련 API 키 (스페이스 분석 용)
HF_TOKEN = os.getenv("HF_TOKEN")
hf_api = HfApi(token=HF_TOKEN)

# Gemini 2.0 Flash Thinking 모델 관련 API 키 및 클라이언트 (LLM 용)
GEMINI_API_KEY = os.getenv("GEMINI_API_KEY")
genai.configure(api_key=GEMINI_API_KEY)
model = genai.GenerativeModel("gemini-2.0-flash-thinking-exp-01-21")


def get_headers():
    if not HF_TOKEN:
        raise ValueError("Hugging Face token not found in environment variables")
    return {"Authorization": f"Bearer {HF_TOKEN}"}


def get_file_content(space_id: str, file_path: str) -> str:
    file_url = f"https://huggingface.co/spaces/{space_id}/raw/main/{file_path}"
    try:
        response = requests.get(file_url, headers=get_headers())
        if response.status_code == 200:
            return response.text
        else:
            return f"File not found or inaccessible: {file_path}"
    except requests.RequestException:
        return f"Error fetching content for file: {file_path}"


def get_space_structure(space_id: str) -> Dict:
    try:
        files = hf_api.list_repo_files(repo_id=space_id, repo_type="space")
        tree = {"type": "directory", "path": "", "name": space_id, "children": []}
        for file in files:
            path_parts = file.split('/')
            current = tree
            for i, part in enumerate(path_parts):
                if i == len(path_parts) - 1:  # 파일
                    current["children"].append({"type": "file", "path": file, "name": part})
                else:
                    found = False
                    for child in current["children"]:
                        if child["type"] == "directory" and child["name"] == part:
                            current = child
                            found = True
                            break
                    if not found:
                        new_dir = {"type": "directory", "path": '/'.join(path_parts[:i+1]), "name": part, "children": []}
                        current["children"].append(new_dir)
                        current = new_dir
        return tree
    except Exception as e:
        print(f"Error in get_space_structure: {str(e)}")
        return {"error": f"API request error: {str(e)}"}


def format_tree_structure(tree_data: Dict, indent: str = "") -> str:
    if "error" in tree_data:
        return tree_data["error"]
    formatted = f"{indent}{'📁' if tree_data.get('type') == 'directory' else '📄'} {tree_data.get('name', 'Unknown')}\n"
    if tree_data.get("type") == "directory":
        # 디렉토리를 먼저, 파일을 나중에 표시
        for child in sorted(tree_data.get("children", []), key=lambda x: (x.get("type", "") != "directory", x.get("name", ""))):
            formatted += format_tree_structure(child, indent + "  ")
    return formatted


def analyze_space(url: str, progress=gr.Progress()):
    """
    HuggingFace Space의 app.py와 파일구조 등을 불러와서:
    1) 코드 요약
    2) 코드 분석
    3) 사용법
    등을 반환합니다.
    """
    try:
        space_id = url.split('spaces/')[-1]
        if not re.match(r'^[\w.-]+/[\w.-]+$', space_id):
            raise ValueError(f"Invalid Space ID format: {space_id}")

        progress(0.1, desc="파일 구조 분석 중...")
        tree_structure = get_space_structure(space_id)
        if "error" in tree_structure:
            raise ValueError(tree_structure["error"])
        tree_view = format_tree_structure(tree_structure)

        progress(0.3, desc="app.py 내용 가져오는 중...")
        app_content = get_file_content(space_id, "app.py")

        progress(0.5, desc="코드 요약 중...")
        summary = summarize_code(app_content)

        progress(0.7, desc="코드 분석 중...")
        analysis = analyze_code(app_content)

        progress(0.9, desc="사용법 설명 생성 중...")
        usage = explain_usage(app_content)

        lines_for_app_py = adjust_lines_for_code(app_content)

        progress(1.0, desc="완료")
        return app_content, tree_view, tree_structure, space_id, summary, analysis, usage, lines_for_app_py

    except Exception as e:
        print(f"Error in analyze_space: {str(e)}")
        print(traceback.format_exc())
        return f"오류가 발생했습니다: {str(e)}", "", None, "", "", "", "", 10


def adjust_lines_for_code(code_content: str, min_lines: int = 10, max_lines: int = 100) -> int:
    """
    코드의 줄 수에 맞춰 표시할 lines 수를 동적으로 조정합니다.
    """
    num_lines = len(code_content.split('\n'))
    return min(max(num_lines, min_lines), max_lines)

# --------------------------------------------------
# Gemini 2.0 Flash Thinking 모델 (LLM) 함수들
# --------------------------------------------------
from gradio import ChatMessage

def format_chat_history(messages: List[ChatMessage]) -> List[Dict]:
    """
    ChatMessage 목록을 Gemini 모델이 이해할 수 있는 형식으로 변환
    (Thinking 메타데이터가 있는 메시지는 무시)
    """
    formatted = []
    for m in messages:
        if hasattr(m, "metadata") and m.metadata:  # 'Thinking' 메시지는 제외
            continue
        role = "assistant" if m.role == "assistant" else "user"
        formatted.append({"role": role, "parts": [m.content or ""]})
    return formatted


import google.generativeai as genai

def gemini_chat_completion(system_message: str, user_message: str, max_tokens: int = 200, temperature: float = 0.7) -> str:
    """
    시스템 & 유저 메시지로 Gemini 모델에게 스트리밍 요청. 최종 텍스트 반환
    """
    init_msgs = [
        ChatMessage(role="system", content=system_message),
        ChatMessage(role="user", content=user_message)
    ]
    chat_history = format_chat_history(init_msgs)
    chat = model.start_chat(history=chat_history)
    final = ""
    try:
        for chunk in chat.send_message(user_message, stream=True):
            parts = chunk.candidates[0].content.parts
            if len(parts) == 2:
                final += parts[1].text
            else:
                final += parts[0].text
        return final.strip()
    except Exception as e:
        return f"LLM 호출 중 오류 발생: {str(e)}"


def summarize_code(app_content: str):
    system_msg = "당신은 Python 코드를 분석하고 요약하는 AI 조수입니다. 주어진 코드를 3줄 이내로 간결하게 요약해주세요."
    user_msg = f"다음 Python 코드를 3줄 이내로 요약해주세요:\n\n{app_content}"
    try:
        return gemini_chat_completion(system_msg, user_msg, max_tokens=200, temperature=0.7)
    except Exception as e:
        return f"요약 생성 중 오류 발생: {str(e)}"


def analyze_code(app_content: str):
    system_msg = (
        "You are a deep thinking AI. You may use extremely long chains of thought to deeply consider the problem "
        "and deliberate with yourself via systematic reasoning processes to help come to a correct solution prior to answering. "
        "You should enclose your thoughts and internal monologue inside tags, and then provide your solution or response to the problem. "
        "당신은 Python 코드를 분석하는 AI 조수입니다. 주어진 코드를 분석하여 서비스의 효용성과 활용 측면에서 다음 항목에 대해 설명해주세요:\n"
        "A. 배경 및 필요성\n"
        "B. 기능적 효용성 및 가치\n"
        "C. 특장점\n"
        "D. 적용 대상 및 타겟\n"
        "E. 기대효과\n"
        "기존 및 유사 프로젝트와 비교하여 분석해주세요. Markdown 형식으로 출력하세요."
    )
    user_msg = f"다음 Python 코드를 분석해주세요:\n\n{app_content}"
    try:
        return gemini_chat_completion(system_msg, user_msg, max_tokens=1000, temperature=0.7)
    except Exception as e:
        return f"분석 생성 중 오류 발생: {str(e)}"


def explain_usage(app_content: str):
    system_msg = (
        "You are a deep thinking AI. You may use extremely long chains of thought to deeply consider the problem "
        "and deliberate with yourself via systematic reasoning processes to help come to a correct solution prior to answering. "
        "You should enclose your thoughts and internal monologue inside tags, and then provide your solution or response to the problem. "
        "당신은 Python 코드를 분석하여 사용법을 설명하는 AI 조수입니다. 주어진 코드를 바탕으로 마치 화면을 보는 것처럼 사용법을 상세히 설명해주세요. Markdown 형식으로 출력하세요."
    )
    user_msg = f"다음 Python 코드의 사용법을 설명해주세요:\n\n{app_content}"
    try:
        return gemini_chat_completion(system_msg, user_msg, max_tokens=800, temperature=0.7)
    except Exception as e:
        return f"사용법 설명 생성 중 오류 발생: {str(e)}"


def stream_gemini_response(user_message: str, conversation_state: List[ChatMessage]) -> Iterator[List[ChatMessage]]:
    """
    Gemini에 스트리밍 요청. 
    빈 메시지도 여기서 처리(에러 없이)하도록 함.
    """
    # 만약 user_message가 완전 빈 문자열이라면, 모델 호출 대신 간단 안내
    if not user_message.strip():
        conversation_state.append(
            ChatMessage(
                role="assistant",
                content="(Note: You sent an empty message. No LLM call was made.)"
            )
        )
        yield conversation_state
        return

    print(f"\n=== New Request ===\nUser message: {user_message}")
    chat_history = format_chat_history(conversation_state)
    chat = model.start_chat(history=chat_history)
    response = chat.send_message(user_message, stream=True)

    thought_buffer = ""
    response_buffer = ""
    thinking_complete = False

    # 'Thinking' 표시용 메시지 추가
    conversation_state.append(
        ChatMessage(
            role="assistant",
            content="",
            metadata={"title": "⚙️ Thinking: *The thoughts produced by the model are experimental"}
        )
    )

    try:
        for chunk in response:
            parts = chunk.candidates[0].content.parts
            current_chunk = parts[0].text

            if len(parts) == 2 and not thinking_complete:
                thought_buffer += current_chunk
                print(f"\n=== Complete Thought ===\n{thought_buffer}")
                conversation_state[-1] = ChatMessage(
                    role="assistant",
                    content=thought_buffer,
                    metadata={"title": "⚙️ Thinking: *The thoughts produced by the model are experimental"}
                )
                yield conversation_state

                response_buffer = parts[1].text
                print(f"\n=== Starting Response ===\n{response_buffer}")
                conversation_state.append(
                    ChatMessage(role="assistant", content=response_buffer)
                )
                thinking_complete = True

            elif thinking_complete:
                response_buffer += current_chunk
                print(f"\n=== Response Chunk ===\n{current_chunk}")
                conversation_state[-1] = ChatMessage(
                    role="assistant",
                    content=response_buffer
                )
            else:
                thought_buffer += current_chunk
                print(f"\n=== Thinking Chunk ===\n{current_chunk}")
                conversation_state[-1] = ChatMessage(
                    role="assistant",
                    content=thought_buffer,
                    metadata={"title": "⚙️ Thinking: *The thoughts produced by the model are experimental"}
                )
            yield conversation_state

        print(f"\n=== Final Response ===\n{response_buffer}")

    except Exception as e:
        print(f"\n=== Error ===\n{str(e)}")
        conversation_state.append(
            ChatMessage(
                role="assistant",
                content=f"I apologize, but encountered an error: {str(e)}"
            )
        )
        yield conversation_state


def convert_to_display_tuples(messages: List[ChatMessage]) -> List[Tuple[str, str]]:
    """
    ChatMessage 리스트 -> (user, assistant) 튜플 리스트
    """
    result = []
    i = 0
    while i < len(messages):
        if messages[i].role == "user":
            user_text = messages[i].content
            assistant_text = ""
            if i + 1 < len(messages) and messages[i+1].role == "assistant":
                assistant_text = messages[i+1].content
                i += 2
            else:
                i += 1
            result.append((user_text, assistant_text))
        else:
            # assistant 단독
            result.append(("", messages[i].content))
            i += 1
    return result


def user_submit_message(msg: str, conversation_state: List[ChatMessage]):
    """
    사용자가 메시지를 입력할 때 호출
    """
    conversation_state.append(ChatMessage(role="user", content=msg))
    # 입력창은 비워줌
    return "", conversation_state


def respond_wrapper(message: str, conversation_state: List[ChatMessage], max_tokens, temperature, top_p):
    """
    Gemini에 스트리밍 요청 -> 대화 이력을 갱신 -> (user, assistant) 튜플로 변환하여 화면에 표시
    """
    for updated_messages in stream_gemini_response(message, conversation_state):
        yield "", convert_to_display_tuples(updated_messages)


def create_ui():
    """
    Gradio UI를 구성하는 함수
    """
    try:
        css = """
        footer {visibility: hidden;}
        """

        with gr.Blocks(css=css) as demo:
            gr.Markdown("# MOUSE: Space Research Thinking")

            with gr.Tabs():
                with gr.TabItem("분석"):
                    with gr.Row():
                        with gr.Column():
                            url_input = gr.Textbox(label="HuggingFace Space URL")
                            analyze_button = gr.Button("분석")

                            summary_output = gr.Markdown(label="요약")
                            analysis_output = gr.Markdown(label="분석")
                            usage_output = gr.Markdown(label="사용법")
                            tree_view_output = gr.Textbox(label="파일 구조", lines=20)

                        with gr.Column():
                            code_tabs = gr.Tabs()
                            with code_tabs:
                                with gr.TabItem("app.py"):
                                    app_py_content = gr.Code(
                                        language="python",
                                        label="app.py",
                                        lines=50
                                    )
                                with gr.TabItem("requirements.txt"):
                                    requirements_content = gr.Textbox(
                                        label="requirements.txt",
                                        lines=50
                                    )

                with gr.TabItem("AI 코드챗"):
                    gr.Markdown("## 예제를 입력 또는 소스 코드를 붙여넣고 질문하세요")

                    # Chatbot에 type="messages"로 설정 (권장)
                    chatbot = gr.Chatbot(
                        label="대화",
                        height=400,
                        type="messages"
                    )

                    msg = gr.Textbox(
                        label="메시지", 
                        placeholder="메시지를 입력하세요..."
                    )

                    max_tokens = gr.Slider(
                        minimum=1, maximum=8000, 
                        value=4000, label="Max Tokens", 
                        visible=False
                    )
                    temperature = gr.Slider(
                        minimum=0, maximum=1, 
                        value=0.7, label="Temperature", 
                        visible=False
                    )
                    top_p = gr.Slider(
                        minimum=0, maximum=1, 
                        value=0.9, label="Top P", 
                        visible=False
                    )
                    
                    examples = [
                        ["상세한 사용 방법을 4000 토큰 이상 상세히 설명"],
                        ["FAQ 20건을 4000 토큰 이상 작성"],
                        ["기술 차별점, 강점을 중심으로 4000 토큰 이상 설명"],
                        ["특허 출원에 활용 가능한 혁신 아이디어를 4000 토큰 이상 작성"],
                        ["논문 형식으로 4000 토큰 이상 작성"],
                        ["계속 이어서 답변하라"]
                    ]
                    gr.Examples(examples, inputs=msg)

                    # 대화 상태(채팅 기록)는 ChatMessage 객체로만 관리
                    conversation_state = gr.State([])

                    # 1) 유저 메시지 입력 -> user_submit_message
                    # 2) respond_wrapper -> Gemini 스트리밍 -> 대화 업데이트 -> (user,assistant) 변환하여 chatbot 표시
                    msg.submit(
                        user_submit_message, 
                        inputs=[msg, conversation_state], 
                        outputs=[msg, conversation_state],
                        queue=False
                    ).then(
                        respond_wrapper, 
                        inputs=[msg, conversation_state, max_tokens, temperature, top_p], 
                        outputs=[msg, chatbot],
                    )

                with gr.TabItem("Recommended Best"):
                    gr.Markdown(
                        "Discover recommended HuggingFace Spaces [here](https://huggingface.co/spaces/openfree/Korean-Leaderboard)."
                    )

            # 분석 탭 로직
            space_id_state = gr.State()
            tree_structure_state = gr.State()
            app_py_content_lines = gr.State()

            analyze_button.click(
                analyze_space,
                inputs=[url_input],
                outputs=[
                    app_py_content, 
                    tree_view_output, 
                    tree_structure_state, 
                    space_id_state, 
                    summary_output, 
                    analysis_output, 
                    usage_output, 
                    app_py_content_lines
                ]
            ).then(
                lambda space_id: get_file_content(space_id, "requirements.txt"),
                inputs=[space_id_state],
                outputs=[requirements_content]
            ).then(
                lambda lines: gr.update(lines=lines),
                inputs=[app_py_content_lines],
                outputs=[app_py_content]
            )

        return demo

    except Exception as e:
        print(f"Error in create_ui: {str(e)}")
        print(traceback.format_exc())
        raise


if __name__ == "__main__":
    try:
        print("Starting HuggingFace Space Analyzer...")
        demo = create_ui()
        print("UI created successfully.")
        print("Configuring Gradio queue...")
        demo.queue()
        print("Gradio queue configured.")
        print("Launching Gradio app...")
        demo.launch(
            server_name="0.0.0.0",
            server_port=7860,
            share=False,
            debug=True,
            show_api=False
        )
        print("Gradio app launched successfully.")
    except Exception as e:
        print(f"Error in main: {str(e)}")
        print("Detailed error information:")
        print(traceback.format_exc())
        raise