Spaces:

zyriean
/

cognisafe-backend

Sleeping

App Files Files Community

cognisafe-backend / app /ml_models /gemini_moderator.py

zyriean

add app

d68e65a verified about 2 months ago

raw

history blame contribute delete

2.75 kB

	from google import genai
	from pprint import pprint
	from app.core.config import settings
	from app.services.gemini_error_handling import handle_model_error

	class GeminiModerator:
	def __init__(self):
	self.client = None

	def intitialize_for_cleansing(self) -> None:
	client = genai.Client(api_key=settings.gemini_apikey)
	self.client = client

	def initialize_for_misinfo_detection(self) -> None:
	client = genai.Client(api_key=settings.gemini_apikey)
	self.client = client

	def check_misinfo(self, content: str) -> str:
	return self.prompt_model(content, 1) # passing 1 for misinformation detection

	def cleanse(self, content: str) -> str:
	return self.prompt_model(content, 0) # passing 0 for cleansing hatespeech

	def prompt_model(self, content: str, purpose: int) -> str:
	prompt_message = [
	f'You are a content moderation assistant. The following text has been flagged for harmful content. Your task is to clean it by removing or replacing inappropriate words while keeping the meaning intact. Please return the cleaned version of the text. Content: "{content}". I repeat return only the clean version, without any explanation.',
	f"You are a content moderation assistant. Check if the following content contains any factual inaccuracies. Your task is to identify any statements that contradict established facts or lack evidence. If there is misinformation present, the response must start with 'False' followed by the corrected information or explanation of the inaccuracy. If the content is factually accurate, the response must start with 'True'. Do not flag content as 'False' simply because it is harmful, hateful, or threatening if the claims made are factually correct. Focus solely on the truthfulness of the statements. Content:\"{content}\".",
	]

	try:
	response = self.client.models.generate_content(
	model="gemini-2.0-flash", contents=prompt_message[purpose]
	)

	print(response)
	block_reason = getattr(
	getattr(response, "prompt_feedback", None), "block_reason", None
	)
	if block_reason == "PROHIBITED_CONTENT":
	return "I have anger issues. I will take a deep breath"

	return response.text
	except Exception as e:
	print(f"Something went wrong while prompting: {e}")
	handle_model_error(e.code, content)


	gemini_for_cleansing = GeminiModerator()
	gemini_for_cleansing.intitialize_for_cleansing()
	gemini_for_misinfo_detection = GeminiModerator()
	gemini_for_misinfo_detection.initialize_for_misinfo_detection()