Spaces:

JeffMII
/

CEC-Learning

Running

App Files Files Community

Jeff Myers II commited on May 5

Commit

64fde65

1 Parent(s): 13a2860

Completed Prototype

Browse files

Files changed (3) hide show

Gemma_Model.py → Gemma.py +20 -57
News.py +29 -24
app.py +259 -52

Gemma_Model.py → Gemma.py RENAMED Viewed

@@ -1,9 +1,4 @@
-from transformers import (
-    # AutoTokenizer,
-    # BitsAndBytesConfig,
-    Gemma3ForCausalLM,
-    GemmaTokenizerFast
-    )
 import torch
 import json
 import os
@@ -14,21 +9,16 @@ class GemmaLLM:
     def __init__(self):
         model_id = "google/gemma-3-1b-it"
-        # quantization_config = BitsAndBytesConfig(load_in_8bit=True)
         self.model = Gemma3ForCausalLM.from_pretrained(
             model_id,
-            device_map="cpu",
-            # quantization_config=quantization_config,
-            # low_cpu_mem_usage=True,
             torch_dtype=torch.float16,
             token=os.environ.get("GEMMA_TOKEN"),
         ).eval()
-        self.tokenizer = GemmaTokenizerFast.from_pretrained(model_id, token=os.environ.get("GEMMA_TOKEN"))
     def generate(self, message) -> str:
-        print("Generating...")
         inputs = self.tokenizer.apply_chat_template(
             message,
             add_generation_prompt=True,
@@ -40,72 +30,45 @@ class GemmaLLM:
         input_length = inputs["input_ids"].shape[1]
         with torch.inference_mode():
-            outputs = self.model.generate(
-                **inputs, max_new_tokens=1024,
-            )[0][input_length:]
-        outputs = self.tokenizer.decode(outputs, skip_special_tokens=True)
-        print("Completed generating!")
         return outputs
-    def get_summary_message(self, article, num_paragraphs) -> dict:
         summarize = "You are a helpful assistant. Your main task is to summarize articles. You will be given an article that you will generate a summary for. The summary should include all the key points of the article. ONLY RESPOND WITH THE SUMMARY!!!"
         summary = f"Summarize the data in the following JSON into {num_paragraphs} paragraph(s) so that it is easy to read and understand:\n"
-        message = [
-            {
-                "role": "system",
-                "content": [
-                    {"type": "text", "text": summarize},
-                ],
-            },
-            {
-                "role": "user",
-                "content": [
-                    {"type": "text", "text": summary + json.dumps(article, indent=4)},
-                ],
-            },
-        ]
         return message
-    def get_summary(self, message) -> str:
         summary = self.generate(message)
         return summary
-    def get_questions_message(self, summary, num_questions, difficulty) -> dict:
-        schema = json.dumps([
             dict(question=str.__name__, correct_answer=str.__name__, false_answers=[str.__name__, str.__name__, str.__name__]),
             dict(question=str.__name__, correct_answer=str.__name__, false_answers=[str.__name__, str.__name__, str.__name__]),
-            dict(question=str.__name__, correct_answer=str.__name__, false_answers=[str.__name__, str.__name__, str.__name__])], indent=4)
-        question = "You are a helpful assistant. Your main task is to generate " + str(num_questions) + " multiple choice questions from an article. Respond in the following JSON structure and schema:\n\njson\n```\n" + schema + "\n```\n\nThere should only be " + str(num_questions) + " questions generated. Each question should only have 3 false answers and 1 correct answer. The correct answer should be the most relevant answer based on the context derived from the article. False answers should not contain the correct answer. False answers should contain false information but also be reasonably plausible for answering the question. ONLY RESPOND WITH RAW JSON!!!"
-        questions = f"Generate {difficulty} questions and answers from the following article:\n"
-        message = [
-            {
-                "role": "system",
-                "content": [
-                    {"type": "text", "text": question},
-                ],
-            },
-            {
-                "role": "user",
-                "content": [
-                    {"type": "text", "text": questions + summary},
-                ],
-            },
-        ]
         return message
-    def get_questions(self, message) -> dict:
         questions = self.generate(message)
         return json.loads(questions.strip("```").replace("json\n", ""))

+from transformers import AutoTokenizer, Gemma3ForCausalLM
 import torch
 import json
 import os
     def __init__(self):
         model_id = "google/gemma-3-1b-it"
+        self.tokenizer = AutoTokenizer.from_pretrained(model_id)
         self.model = Gemma3ForCausalLM.from_pretrained(
             model_id,
+            device_map="cuda" if torch.cuda.is_available() else "cpu",
             torch_dtype=torch.float16,
             token=os.environ.get("GEMMA_TOKEN"),
         ).eval()
     def generate(self, message) -> str:
         inputs = self.tokenizer.apply_chat_template(
             message,
             add_generation_prompt=True,
         input_length = inputs["input_ids"].shape[1]
         with torch.inference_mode():
+            outputs = self.model.generate(**inputs, max_new_tokens=1024)[0][input_length:]
+            outputs = self.tokenizer.decode(outputs, skip_special_tokens=True)
         return outputs
+    def _get_summary_message(self, article, num_paragraphs) -> dict:
         summarize = "You are a helpful assistant. Your main task is to summarize articles. You will be given an article that you will generate a summary for. The summary should include all the key points of the article. ONLY RESPOND WITH THE SUMMARY!!!"
         summary = f"Summarize the data in the following JSON into {num_paragraphs} paragraph(s) so that it is easy to read and understand:\n"
+        message = [{"role": "system", "content": [{"type": "text", "text": summarize}]},
+                   {"role": "user",   "content": [{"type": "text", "text": summary + json.dumps(article, indent=4)}]}]
         return message
+    def get_summary(self, article, num_paragraphs) -> str:
+        message = self._get_summary_message(article, num_paragraphs)
         summary = self.generate(message)
         return summary
+    def _get_questions_message(self, summary, num_questions, difficulty) -> dict:
+        question = f"""
+            You are a helpful assistant. Your main task is to generate {num_questions} multiple choice questions from an article. Respond in the following JSON structure and schema:\n\njson\n```{json.dumps(list((
             dict(question=str.__name__, correct_answer=str.__name__, false_answers=[str.__name__, str.__name__, str.__name__]),
             dict(question=str.__name__, correct_answer=str.__name__, false_answers=[str.__name__, str.__name__, str.__name__]),
+            dict(question=str.__name__, correct_answer=str.__name__, false_answers=[str.__name__, str.__name__, str.__name__]))), indent=4)}```\n\nThere should only be {num_questions} questions generated. Each question should only have 3 false answers and 1 correct answer. The correct answer should be the most relevant answer based on the context derived from the article. False answers should not contain the correct answer. False answers should contain false information but also be reasonably plausible for answering the question. ONLY RESPOND WITH RAW JSON!!!
+        """
+        questions = f"Generate {difficulty.lower()} questions and answers from the following article:\n"
+        message = [{"role": "system", "content": [{"type": "text", "text": question}]},
+                   {"role": "user", "content": [{"type": "text", "text": questions + summary}]}]
         return message
+    def get_questions(self, summary, num_questions, difficulty) -> dict:
+        message = self._get_questions_message(summary, num_questions, difficulty)
         questions = self.generate(message)
         return json.loads(questions.strip("```").replace("json\n", ""))

News.py CHANGED Viewed

@@ -5,58 +5,63 @@ import os
 __export__ = ["News"]
 class News:
-    __EX_SOURCES__ = {"ABC News", "Bloomberg", "The Hill", "Fox Sports", "Google News"}
-    __CATEGORIES__ = {
-        "business",
-        "entertainment",
-        "general",
-        "health",
-        "science",
-        "sports",
-        "technology"
-    }
     def __init__(self):
         newsapi_key = os.environ.get("NEWS_API_KEY")
         self.newsapi = NewsApiClient(api_key=newsapi_key)
     def get_sources(self, category=None):
-        sources = self.newsapi.get_sources(language="en", country="us", category=category)["sources"]
         sources = {source["name"] for source in sources if source["name"] not in self.__EX_SOURCES__}
-        print(sources)
         return sources
-    def get_top_headlines(self, num_headlines=None, category=None):
-        sources = self.get_sources(category=category)
         headlines = self.newsapi.get_top_headlines(
             sources=", ".join(sources),
-            page_size=num_headlines
         )["articles"]
         return headlines
-    def get_headlines(self, num_headlines=None, query=None):
         sources = self.get_sources()
         headlines = self.newsapi.get_everything(
             q=query,
             sources=", ".join(sources),
-            page_size=num_headlines
         )["articles"]
         return headlines
-    def get_articles_from_headlines(self, headlines):
         for headline in headlines:
             article = Article(headline["url"])
             article.download()
             article.parse()
             headline["content"] = article.text
-            # headline["authors"] = article.authors
-            headline["source"] = headline["source"]["name"]
-            del headline["author"]
-            # headline.pop("author", None)
         return headlines

 __export__ = ["News"]
 class News:
+    __EX_SOURCES__ = ["ABC News", "Bloomberg", "The Hill", "Fox Sports", "Google News", "Newsweek"]
+    __CATEGORIES__ = [
+        "General",
+        "Business",
+        "Entertainment",
+        "Health",
+        "Science",
+        "Technology"
+    ]
     def __init__(self):
         newsapi_key = os.environ.get("NEWS_API_KEY")
         self.newsapi = NewsApiClient(api_key=newsapi_key)
     def get_sources(self, category=None):
+        sources = self.newsapi.get_sources(language="en", country="us", category=category.lower() if category else category)["sources"]
         sources = {source["name"] for source in sources if source["name"] not in self.__EX_SOURCES__}
         return sources
+    def get_top_headlines(self, num_headlines=5, category=None):
+        sources = self.get_sources(category.lower() if category else category)
         headlines = self.newsapi.get_top_headlines(
             sources=", ".join(sources),
+            page_size=num_headlines,
+            language="en",
         )["articles"]
+        headlines = self._get_articles_from_headlines(headlines)
         return headlines
+    def get_headlines(self, num_headlines=5, query=None):
         sources = self.get_sources()
         headlines = self.newsapi.get_everything(
             q=query,
             sources=", ".join(sources),
+            page_size=num_headlines,
+            lanuguage="en",
         )["articles"]
+        headlines = self._get_articles_from_headlines(headlines)
         return headlines
+    def _get_articles_from_headlines(self, headlines):
         for headline in headlines:
+            del headline["author"]
+            headline["source"] = headline["source"]["name"]
             article = Article(headline["url"])
             article.download()
             article.parse()
             headline["content"] = article.text
         return headlines

app.py CHANGED Viewed

@@ -1,64 +1,271 @@
-from flask import Flask, request, jsonify
-from flask_cors import CORS
 from News import News
-from Gemma_Model import GemmaLLM
-print("Starting server...")
-app = Flask(__name__)
-CORS(app)
-news = News()
-gemma = GemmaLLM()
-# business entertainment general health science sports technology
-@app.get("/get_top_articles/")
-@app.get("/get_top_articles/<string:category>/")
-@app.get("/get_top_articles/<int:num_articles>/")
-@app.get("/get_top_articles/<int:num_articles>/<string:category>/")
-def get_top_articles(num_articles=5, category=None):
-    if category is not None: category = category.lower()
-    articles = news.get_top_headlines(num_headlines=num_articles, category=category)
-    articles = news.get_articles_from_headlines(articles)
-    return jsonify(articles)
-@app.get("/get_articles/")
-@app.get("/get_articles/<string:query>/")
-@app.get("/get_articles/<int:num_articles>/")
-@app.get("/get_articles/<int:num_articles>/<string:query>/")
-def get_articles(num_articles=5, query=None):
-    if query is not None: query = query.lower()
-    articles = news.get_headlines(num_headlines=num_articles, query=query)
-    articles = news.get_articles_from_headlines(articles)
-    return jsonify(articles)
-@app.post("/get_summary/")
-@app.post("/get_summary/<int:num_paragraphs>/")
-def get_summary(num_paragraphs=1):
-    article = request.json
-    message = gemma.get_summary_message(article, num_paragraphs)
-    summary = gemma.get_summary(message)
-    article["summary"] = summary
-    return jsonify(article)
-@app.post("/get_questions/")
-@app.post("/get_questions/<string:difficulty>/")
-@app.post("/get_questions/<int:num_questions>/")
-@app.post("/get_questions/<int:num_questions>/<string:difficulty>/")
-def get_questions(num_questions=3, difficulty="average"):
-    if "summary" in request.json:
-        summary = request.json["summary"]
-        questions = gemma.get_questions(gemma.get_questions_message(summary, num_questions, difficulty))
-    elif "summaries" in request.json:
-        summaries = request.json["summaries"]
-        messages = [gemma.get_questions_message(summary) for summary in summaries]
-        questions = gemma.get_questions(messages)
-    else: return jsonify({})
-    return jsonify(questions)
-if __name__ == "__main__": app.run()

+# %%
+import random
+import gradio as gr
 from News import News
+from Gemma import GemmaLLM
+# %%
+class Cooldown:
+    ...
+cooldown = Cooldown() ################################ News fetching cooldown in seconds
+news = News() ######################################## Initialize the News object
+model = GemmaLLM() ################################### Initialize the Gemma model
+# %%
+with gr.Blocks() as demo:
+    gr.Markdown("# News Quiz Application")
+    ######
+    ###### State Variables and Components Initialization
+    ######
+    init = ( ######################################### Initialize the Gradio interface with components and state variables
+        gr.Markdown("## News Articles"),
+        ## State variables for news articles and quiz,
+        gr.State({}), gr.State([]), gr.State({}),
+        gr.Slider(label="Number of Articles", minimum=1, maximum=10, value=3, step=1),
+        gr.Radio(label="Category (optional)", choices=news.__CATEGORIES__),
+        gr.Button("Get Articles"),
+        gr.Radio(visible=False),
+        gr.Textbox(visible=False),
+        gr.Button(visible=False),
+        gr.Checkbox(visible=False),
+        gr.Textbox(visible=False),
+        gr.Button(visible=False),
+        ## State variables for quiz
+        gr.State([]), gr.State([]), gr.State(),
+        [gr.Radio(visible=False) for _ in range(10)],
+        gr.Button(visible=False),
+        gr.Textbox(visible=False),
+        gr.Button(visible=False),
+    )
+    ( ################################################ State variables and components for news articles and quiz
+        heading,
+        ## Components for news articles
+        article, articles, descriptions,
+        num_headlines, category, get,
+        headline, description, show,
+        summarize, content, ready,
+        ## Components for quiz
+        answers, response, results,
+        quiz, submit,
+        evaluation, read,
+    ) = init
+    def hide_news(): ################################# Hide news-related components
+        num_headlines = gr.Slider(visible=False)
+        category = gr.Radio(visible=False)
+        get = gr.Button(visible=False)
+        headline = gr.Radio(visible=False)
+        description = gr.Textbox(visible=False)
+        show = gr.Button(visible=False)
+        summarize = gr.Checkbox(visible=False)
+        content = gr.Textbox(visible=False)
+        ready = gr.Button(visible=False)
+        return num_headlines, category, get, headline, description, show, summarize, content, ready
+    def show_news(): ################################# Show news-related components
+        num_headlines = gr.Slider(label="Number of Articles", minimum=1, maximum=10, value=3, step=1, visible=True)
+        category = gr.Radio(label="Category (optional)", choices=news.__CATEGORIES__, visible=True)
+        get = gr.Button("Get Articles", visible=True)
+        return num_headlines, category, get
+    def show_headline(headlines, descriptions): ###### Show news headlines and descriptions
+        headline = gr.Radio(label="News Headlines", choices=headlines, value=headlines[0], interactive=True, visible=True)
+        description = gr.Textbox(label="Headline Description", value=descriptions[0], visible=True)
+        show = gr.Button("Show Content", visible=True)
+        return headline, description, show
+    def show_content(summary): ####################### Show article content and summary
+        summarize = gr.Checkbox(label="Show Summary?", value=True, interactive=True, visible=True)
+        content = gr.Textbox(label="Summary", value=summary, visible=True)
+        ready = gr.Button("Begin Quiz", visible=True)
+        return summarize, content, ready
+    def format_mcq(mcq): ############################# Format multiple choice question for quiz
+        if not mcq or not isinstance(mcq, dict):
+            print(f"Multiple choice question object is a {type(mcq)} but should be {type(dict())}.")
+            return "Invalid multiple choice question.", None
+        question = mcq.get('question', 'No question provided.')
+        answer = mcq.get('correct_answer', 'No correct answer provided.')
+        false_answers = mcq.get('false_answers', [])
+        if not isinstance(false_answers, list):
+            print(f"False answers is a {type(false_answers)} but should be {type(list())}.")
+            return "Invalid false answers format.", None
+        options = random.shuffle([answer] + false_answers)
+        print("Question:", question)
+        print(f"Formatted options: {options}")
+        return question, options, answer
+    def hide_quiz(): ################################# Hide quiz-related components
+        quiz = [gr.Radio(visible=False) for _ in range(10)]
+        submit = gr.Button(visible=False)
+        evaluation = gr.Textbox(visible=False)
+        read = gr.Button(visible=False)
+        return read, evaluation, submit, *quiz
+    def show_quiz(mcqs): ############################# Show quiz-related components
+        quiz = [(mcq["question"], mcq["false_answers"], mcq["correct_answer"]) for mcq in mcqs]
+        quiz = [(question, random.sample(distractors + [answer], 4), answer) for question, distractors, answer in quiz]
+        questions, options, answers = zip(*quiz) if quiz else ([], [], [])
+        print("options", len(options))
+        quiz = [gr.Radio(label=f"{i + 1}: {questions[i]}", choices=options[i], visible=True) for i in range(len(mcqs))]\
+             + [gr.Radio(visible=False) for _ in range(10 - len(mcqs))]
+        print("quiz", len(quiz))
+        submit = gr.Button("Submit Answers", interactive=bool(answers), visible=True)
+        return submit, list(answers), *quiz
+    def show_eval(eva): ############################## Show evaluation of user's response to the quiz
+        evaluation = gr.Textbox(label="Evaluation", value=eva, visible=True)
+        read = gr.Button("Read Articles", visible=True)
+        return evaluation, read
+    ######
+    ###### Get and Display News Articles
+    ######
+    def get_headline(category, num_headlines): ####### Get news headlines based on selected category and number
+        articles = news.get_top_headlines(category=category, num_headlines=num_headlines)
+        headlines, descriptions = zip(*[(article['title'], article.get('description', 'No description available.')) for article in articles])
+        show = show_headline(headlines, descriptions)
+        descriptions = {h: d for h, d in zip(headlines, descriptions)}
+        return articles, descriptions, *show
+    get.click(get_headline, inputs=[category, num_headlines], outputs=[articles, descriptions, headline, description, show])
+    def get_description(descriptions, headline): ##### Get description for the selected headline
+        description = "No description available."
+        if   not descriptions:                   print("Descriptions are empty.")
+        elif not headline:                       print("Headline is empty.")
+        elif not isinstance(descriptions, dict): print(f"Descriptions is a {type(descriptions)} but should be {type(dict())}.")
+        else:                                    description = descriptions.get(headline, description)
+        return description
+    headline.change(get_description, inputs=[descriptions, headline], outputs=[description])
+    def get_article(articles, headline): ############# Get article for the selected headline
+        headlines = [a['title'] for a in articles]
+        if headline not in headlines: return {}
+        return articles[headlines.index(headline)]
+    show.click(get_article, inputs=[articles, headline], outputs=[article])
+    def get_content(articles, article): ############## Get content for the selected article
+        if "summary" not in article:
+            idx = articles.index(article)
+            articles[idx]["summary"] = model.get_summary(article, 1)
+            article = articles[idx]
+        return articles, article, *show_content(article.get("summary", "No summary available."))
+    article.change(get_content, inputs=[articles, article], outputs=[articles, article, summarize, content, ready])
+    def toggle_summary(article, summarize): ########## Toggle between showing summary and full content
+        content = "No article available."
+        if not article:                     print("Selected article is empty.")
+        elif not isinstance(article, dict): print(f"Selected article is a {type(article)} but should be {type(dict())}.")
+        elif summarize:                     content = article.get("summary", "Summary not available.")
+        else:                               content = article.get("content", "Content not available.")
+        return content
+    summarize.change(toggle_summary, inputs=[article, summarize], outputs=[content])
+    ######
+    ###### Quiz Generation and Evaluation
+    ######
+    def get_quiz(content): ########################### Generate quiz questions from the article content
+        multichoicequests = []
+        if not content:                     mcqs = multichoicequests
+        else:                               mcqs = model.get_questions(content, 3, "Moderate")
+        if not isinstance(mcqs, list):      print(f"Multiple choice questions object is a {type(mcqs)} but should be {type(list())}.")
+        elif len(mcqs) == 0:                print("Content is empty or no multiple choice questions generated.")
+        for mcq in mcqs:
+            missing = set()
+            if not isinstance(mcq, dict):   print(f"Multiple choice question object is {type(mcq)} but should be {type(dict())}.")
+            else:                           missing = set(['question', 'correct_answer', 'false_answers']) - set(mcq.keys())
+            if missing:                     print(f"Multiple choice question object is missing keys: {missing}.")
+            else:                           multichoicequests.append(mcq)
+        return gr.Markdown("## News Quiz"), *hide_news(), *show_quiz(multichoicequests)
+    ready.click(get_quiz, inputs=[content], outputs=[
+        heading, num_headlines, category, get, headline, description, show, summarize, content, ready, submit, answers, *quiz])
+    def get_evaluation(answers, *quiz): ############## Evaluate the user's responses to the quiz
+        results = -1
+        if not answers:                     print("Answers are empty.")
+        elif not quiz:                      print("Quiz is empty.")
+        elif not isinstance(answers, list): print(f"Answers is a {type(answers)} but should be {type(list())}.")
+        else:
+                                            results = sum(1 for ans, resp in zip(answers, list(quiz)) if ans == resp) / len(answers)
+                                            results = round(results, 4)
+        if 0.9 <= results <= 1.0:           evaluation = f"Excellent! You scored {results * 100}%."
+        elif 0.8 <= results < 0.9:          evaluation = f"Great job! You scored {results * 100}%."
+        elif 0.7 <= results < 0.8:          evaluation = f"Good effort! You scored {results * 100}%."
+        elif 0.6 <= results < 0.7:          evaluation = f"You scored {results * 100}%. Keep practicing!"
+        elif 0.5 <= results < 0.6:          evaluation = f"You scored {results * 100}%. You can do better!"
+        elif results < 0:                   evaluation = f"Unable to evaluate. Please try again."
+        else:                               evaluation = f"You scored {results * 100}%. Keep trying!"
+        return show_eval(evaluation)
+    submit.click(get_evaluation, inputs=[answers, *quiz], outputs=[evaluation, read])
+    def read_articles(): ############################# Reset the interface to read articles again
+        return gr.Markdown("## News Articles"), *show_news(), *hide_quiz()
+    read.click(read_articles, outputs=[heading, num_headlines, category, get, read, evaluation, submit, *quiz])
+demo.launch()