cognisafe-backend / app /ml_models /gemini_moderator.py
zyriean's picture
add app
d68e65a verified
from google import genai
from pprint import pprint
from app.core.config import settings
from app.services.gemini_error_handling import handle_model_error
class GeminiModerator:
def __init__(self):
self.client = None
def intitialize_for_cleansing(self) -> None:
client = genai.Client(api_key=settings.gemini_apikey)
self.client = client
def initialize_for_misinfo_detection(self) -> None:
client = genai.Client(api_key=settings.gemini_apikey)
self.client = client
def check_misinfo(self, content: str) -> str:
return self.prompt_model(content, 1) # passing 1 for misinformation detection
def cleanse(self, content: str) -> str:
return self.prompt_model(content, 0) # passing 0 for cleansing hatespeech
def prompt_model(self, content: str, purpose: int) -> str:
prompt_message = [
f'You are a content moderation assistant. The following text has been flagged for harmful content. Your task is to clean it by removing or replacing inappropriate words while keeping the meaning intact. Please return the cleaned version of the text. Content: "{content}". I repeat return only the clean version, without any explanation.',
f"You are a content moderation assistant. Check if the following content contains any factual inaccuracies. Your task is to identify any statements that contradict established facts or lack evidence. If there is misinformation present, the response must start with 'False' followed by the corrected information or explanation of the inaccuracy. If the content is factually accurate, the response must start with 'True'. Do not flag content as 'False' simply because it is harmful, hateful, or threatening if the claims made are factually correct. Focus solely on the truthfulness of the statements. Content:\"{content}\".",
]
try:
response = self.client.models.generate_content(
model="gemini-2.0-flash", contents=prompt_message[purpose]
)
print(response)
block_reason = getattr(
getattr(response, "prompt_feedback", None), "block_reason", None
)
if block_reason == "PROHIBITED_CONTENT":
return "I have anger issues. I will take a deep breath"
return response.text
except Exception as e:
print(f"Something went wrong while prompting: {e}")
handle_model_error(e.code, content)
gemini_for_cleansing = GeminiModerator()
gemini_for_cleansing.intitialize_for_cleansing()
gemini_for_misinfo_detection = GeminiModerator()
gemini_for_misinfo_detection.initialize_for_misinfo_detection()