Spaces:
Running
Running
add app
Browse files- app/__pycache__/main.cpython-312.pyc +0 -0
- app/api/__init__.py +0 -0
- app/api/__pycache__/__init__.cpython-312.pyc +0 -0
- app/api/v1/__pycache__/api.cpython-312.pyc +0 -0
- app/api/v1/api.py +6 -0
- app/api/v1/endpoints/__pycache__/moderation.cpython-312.pyc +0 -0
- app/api/v1/endpoints/moderation.py +13 -0
- app/core/__pycache__/config.cpython-312.pyc +0 -0
- app/core/config.py +16 -0
- app/main.py +33 -0
- app/ml_models/__init__.py +0 -0
- app/ml_models/__pycache__/__init__.cpython-312.pyc +0 -0
- app/ml_models/__pycache__/classifier.cpython-312.pyc +0 -0
- app/ml_models/__pycache__/classifier_loader.cpython-312.pyc +0 -0
- app/ml_models/__pycache__/classifier_path_loader.cpython-312.pyc +0 -0
- app/ml_models/__pycache__/gemini_moderator.cpython-312.pyc +0 -0
- app/ml_models/classifier.py +41 -0
- app/ml_models/classifier_loader.py +42 -0
- app/ml_models/classifier_path_loader.py +20 -0
- app/ml_models/gemini_moderator.py +51 -0
- app/ml_models/toxic-bert/config.json +42 -0
- app/ml_models/toxic-bert/model.safetensors +3 -0
- app/ml_models/toxic-bert/special_tokens_map.json +7 -0
- app/ml_models/toxic-bert/tokenizer.json +0 -0
- app/ml_models/toxic-bert/tokenizer_config.json +58 -0
- app/ml_models/toxic-bert/vocab.txt +0 -0
- app/models/__pycache__/check_type.cpython-312.pyc +0 -0
- app/models/__pycache__/moderation_data.cpython-312.pyc +0 -0
- app/models/__pycache__/moderation_request.cpython-312.pyc +0 -0
- app/models/__pycache__/moderation_response.cpython-312.pyc +0 -0
- app/models/__pycache__/schemas.cpython-312.pyc +0 -0
- app/models/__pycache__/standard_response.cpython-312.pyc +0 -0
- app/models/check_type.py +11 -0
- app/models/moderation_data.py +12 -0
- app/models/moderation_request.py +32 -0
- app/models/moderation_response.py +6 -0
- app/models/schemas.py +5 -0
- app/models/standard_response.py +8 -0
- app/services/__pycache__/gemini_error_handling.cpython-312.pyc +0 -0
- app/services/__pycache__/moderation.cpython-312.pyc +0 -0
- app/services/__pycache__/object_to_json.cpython-312.pyc +0 -0
- app/services/__pycache__/text_cleaner.cpython-312.pyc +0 -0
- app/services/frequency_dictionary_en_82_765.txt +0 -0
- app/services/gemini_error_handling.py +11 -0
- app/services/moderation.py +56 -0
- app/services/text_cleaner.py +22 -0
app/__pycache__/main.cpython-312.pyc
ADDED
Binary file (1.51 kB). View file
|
|
app/api/__init__.py
ADDED
File without changes
|
app/api/__pycache__/__init__.cpython-312.pyc
ADDED
Binary file (182 Bytes). View file
|
|
app/api/v1/__pycache__/api.cpython-312.pyc
ADDED
Binary file (469 Bytes). View file
|
|
app/api/v1/api.py
ADDED
@@ -0,0 +1,6 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from fastapi import APIRouter
|
2 |
+
from app.api.v1.endpoints import moderation
|
3 |
+
|
4 |
+
api_router = APIRouter()
|
5 |
+
|
6 |
+
api_router.include_router(moderation.router, prefix="/moderation", tags=["Moderation"])
|
app/api/v1/endpoints/__pycache__/moderation.cpython-312.pyc
ADDED
Binary file (700 Bytes). View file
|
|
app/api/v1/endpoints/moderation.py
ADDED
@@ -0,0 +1,13 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from fastapi import APIRouter
|
2 |
+
|
3 |
+
from app.models.schemas import ModerationRequest, ModerationResponse
|
4 |
+
from app.services.moderation import moderate_content
|
5 |
+
|
6 |
+
router = APIRouter()
|
7 |
+
|
8 |
+
|
9 |
+
@router.post("/", response_model=ModerationResponse)
|
10 |
+
def testing(request: ModerationRequest):
|
11 |
+
response = moderate_content(request)
|
12 |
+
|
13 |
+
return response
|
app/core/__pycache__/config.cpython-312.pyc
ADDED
Binary file (1.09 kB). View file
|
|
app/core/config.py
ADDED
@@ -0,0 +1,16 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os
|
2 |
+
from dotenv import load_dotenv
|
3 |
+
|
4 |
+
load_dotenv()
|
5 |
+
class Settings:
|
6 |
+
def __init__(self)->None:
|
7 |
+
self.gemini_apikey = None
|
8 |
+
self.perspective_apikey = None
|
9 |
+
|
10 |
+
def config(self)->None:
|
11 |
+
self.gemini_apikey = os.environ.get("GEMINI_API_KEY_1")
|
12 |
+
self.perspective_apikey = os.environ.get("PERSPECTIVE_API_KEY_1")
|
13 |
+
|
14 |
+
|
15 |
+
settings = Settings()
|
16 |
+
settings.config()
|
app/main.py
ADDED
@@ -0,0 +1,33 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from fastapi import FastAPI, Response
|
2 |
+
from app.api.v1.api import api_router
|
3 |
+
from fastapi.middleware.cors import CORSMiddleware
|
4 |
+
|
5 |
+
from app.models.schemas import StandardResponse
|
6 |
+
|
7 |
+
app = FastAPI(title="Cognisafe API")
|
8 |
+
|
9 |
+
origins = ["*"]
|
10 |
+
|
11 |
+
app.add_middleware(
|
12 |
+
CORSMiddleware,
|
13 |
+
allow_origins=origins, # or ["*"] for all origins (not recommended in prod)
|
14 |
+
allow_credentials=True,
|
15 |
+
allow_methods=["*"],
|
16 |
+
allow_headers=["*"],
|
17 |
+
)
|
18 |
+
|
19 |
+
@app.get("/health")
|
20 |
+
async def health_check():
|
21 |
+
return {"status": "healthy"}
|
22 |
+
|
23 |
+
@app.get("/helloworld", response_model=StandardResponse)
|
24 |
+
def helloworld(response: Response) -> StandardResponse:
|
25 |
+
"""
|
26 |
+
Returns helloworld as the standard response
|
27 |
+
"""
|
28 |
+
response.status_code = 200
|
29 |
+
response = StandardResponse(error=False, title="Hello World", status=200)
|
30 |
+
return response
|
31 |
+
|
32 |
+
|
33 |
+
app.include_router(api_router, prefix="/api/v1")
|
app/ml_models/__init__.py
ADDED
File without changes
|
app/ml_models/__pycache__/__init__.cpython-312.pyc
ADDED
Binary file (188 Bytes). View file
|
|
app/ml_models/__pycache__/classifier.cpython-312.pyc
ADDED
Binary file (1.98 kB). View file
|
|
app/ml_models/__pycache__/classifier_loader.cpython-312.pyc
ADDED
Binary file (2.44 kB). View file
|
|
app/ml_models/__pycache__/classifier_path_loader.cpython-312.pyc
ADDED
Binary file (1.48 kB). View file
|
|
app/ml_models/__pycache__/gemini_moderator.cpython-312.pyc
ADDED
Binary file (3.95 kB). View file
|
|
app/ml_models/classifier.py
ADDED
@@ -0,0 +1,41 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from transformers import pipeline
|
2 |
+
import logging
|
3 |
+
|
4 |
+
|
5 |
+
from app.ml_models.classifier_loader import ClassifierLoader
|
6 |
+
|
7 |
+
|
8 |
+
logger = logging.getLogger(__name__)
|
9 |
+
|
10 |
+
|
11 |
+
class Classifier:
|
12 |
+
def __init__(self, model_name: str = "toxic-bert") -> None:
|
13 |
+
self.model = None
|
14 |
+
self.tokenizer = None
|
15 |
+
self.model_name = model_name
|
16 |
+
self.classifier = None
|
17 |
+
|
18 |
+
def initialize_classifier(self) -> None:
|
19 |
+
loader = ClassifierLoader(self.model_name)
|
20 |
+
self.model = loader.load_model()
|
21 |
+
self.tokenizer = loader.load_tokenizer()
|
22 |
+
self.classifier = pipeline(
|
23 |
+
"text-classification",
|
24 |
+
model=self.model,
|
25 |
+
tokenizer=self.tokenizer,
|
26 |
+
device=-1,
|
27 |
+
top_k=None,
|
28 |
+
)
|
29 |
+
|
30 |
+
def predict_nsfw(self, content: str) -> dict:
|
31 |
+
if self.classifier is None:
|
32 |
+
raise RuntimeError(
|
33 |
+
"Model not initialized. Please call `initialize_classifier()` first."
|
34 |
+
)
|
35 |
+
results = self.classifier(content)
|
36 |
+
|
37 |
+
prediction = {}
|
38 |
+
for result in results[0]:
|
39 |
+
prediction[result["label"]] = result["score"]
|
40 |
+
|
41 |
+
return prediction
|
app/ml_models/classifier_loader.py
ADDED
@@ -0,0 +1,42 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from transformers import AutoTokenizer, AutoModelForSequenceClassification
|
2 |
+
from app.ml_models.classifier_path_loader import ClassifierPathLoader
|
3 |
+
import logging
|
4 |
+
from transformers import AutoTokenizer
|
5 |
+
from transformers import AutoModelForSequenceClassification, pipeline
|
6 |
+
|
7 |
+
|
8 |
+
logger = logging.getLogger(__name__)
|
9 |
+
|
10 |
+
|
11 |
+
class ClassifierLoader:
|
12 |
+
def __init__(self, model_name: str):
|
13 |
+
self.model_name = model_name
|
14 |
+
self.model = None
|
15 |
+
self.tokenizer = None
|
16 |
+
|
17 |
+
path_loader = ClassifierPathLoader()
|
18 |
+
path_loader.set_model(self.model_name)
|
19 |
+
self.model_path = path_loader.get_model_path()
|
20 |
+
|
21 |
+
# If model doesn't exist, download it
|
22 |
+
if not self.model_path.exists():
|
23 |
+
model_name = "unitary/toxic-bert"
|
24 |
+
tokenizer = AutoTokenizer.from_pretrained(model_name)
|
25 |
+
model = AutoModelForSequenceClassification.from_pretrained(model_name)
|
26 |
+
|
27 |
+
tokenizer.save_pretrained(self.model_path)
|
28 |
+
model.save_pretrained(self.model_path)
|
29 |
+
|
30 |
+
def load_model(self):
|
31 |
+
if self.model is None:
|
32 |
+
self.model = AutoModelForSequenceClassification.from_pretrained(
|
33 |
+
self.model_path
|
34 |
+
)
|
35 |
+
logger.info("[✅] Model loaded successfully.")
|
36 |
+
return self.model
|
37 |
+
|
38 |
+
def load_tokenizer(self):
|
39 |
+
if self.tokenizer is None:
|
40 |
+
self.tokenizer = AutoTokenizer.from_pretrained(self.model_path)
|
41 |
+
logger.info("[✅] Tokenizer loaded successfully.")
|
42 |
+
return self.tokenizer
|
app/ml_models/classifier_path_loader.py
ADDED
@@ -0,0 +1,20 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from pathlib import Path
|
2 |
+
|
3 |
+
path_to_model = {"toxic-bert": Path.cwd() / "app" / "ml_models" / "toxic-bert"}
|
4 |
+
|
5 |
+
|
6 |
+
class ClassifierPathLoader:
|
7 |
+
def __init__(self) -> None:
|
8 |
+
self.model_name = None
|
9 |
+
self.model_path = None
|
10 |
+
|
11 |
+
def set_model(self, model_name: str) -> None:
|
12 |
+
if model_name not in path_to_model:
|
13 |
+
raise KeyError(f"Model '{model_name}' not found in path registry.")
|
14 |
+
self.model_name = model_name
|
15 |
+
self.model_path = Path(path_to_model[model_name])
|
16 |
+
|
17 |
+
def get_model_path(self) -> Path:
|
18 |
+
if self.model_path is None:
|
19 |
+
raise RuntimeError("Model not set. Call `set_model()` first.")
|
20 |
+
return self.model_path
|
app/ml_models/gemini_moderator.py
ADDED
@@ -0,0 +1,51 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from google import genai
|
2 |
+
from pprint import pprint
|
3 |
+
from app.core.config import settings
|
4 |
+
from app.services.gemini_error_handling import handle_model_error
|
5 |
+
|
6 |
+
class GeminiModerator:
|
7 |
+
def __init__(self):
|
8 |
+
self.client = None
|
9 |
+
|
10 |
+
def intitialize_for_cleansing(self) -> None:
|
11 |
+
client = genai.Client(api_key=settings.gemini_apikey)
|
12 |
+
self.client = client
|
13 |
+
|
14 |
+
def initialize_for_misinfo_detection(self) -> None:
|
15 |
+
client = genai.Client(api_key=settings.gemini_apikey)
|
16 |
+
self.client = client
|
17 |
+
|
18 |
+
def check_misinfo(self, content: str) -> str:
|
19 |
+
return self.prompt_model(content, 1) # passing 1 for misinformation detection
|
20 |
+
|
21 |
+
def cleanse(self, content: str) -> str:
|
22 |
+
return self.prompt_model(content, 0) # passing 0 for cleansing hatespeech
|
23 |
+
|
24 |
+
def prompt_model(self, content: str, purpose: int) -> str:
|
25 |
+
prompt_message = [
|
26 |
+
f'You are a content moderation assistant. The following text has been flagged for harmful content. Your task is to clean it by removing or replacing inappropriate words while keeping the meaning intact. Please return the cleaned version of the text. Content: "{content}". I repeat return only the clean version, without any explanation.',
|
27 |
+
f"You are a content moderation assistant. Check if the following content contains any factual inaccuracies. Your task is to identify any statements that contradict established facts or lack evidence. If there is misinformation present, the response must start with 'False' followed by the corrected information or explanation of the inaccuracy. If the content is factually accurate, the response must start with 'True'. Do not flag content as 'False' simply because it is harmful, hateful, or threatening if the claims made are factually correct. Focus solely on the truthfulness of the statements. Content:\"{content}\".",
|
28 |
+
]
|
29 |
+
|
30 |
+
try:
|
31 |
+
response = self.client.models.generate_content(
|
32 |
+
model="gemini-2.0-flash", contents=prompt_message[purpose]
|
33 |
+
)
|
34 |
+
|
35 |
+
print(response)
|
36 |
+
block_reason = getattr(
|
37 |
+
getattr(response, "prompt_feedback", None), "block_reason", None
|
38 |
+
)
|
39 |
+
if block_reason == "PROHIBITED_CONTENT":
|
40 |
+
return "I have anger issues. I will take a deep breath"
|
41 |
+
|
42 |
+
return response.text
|
43 |
+
except Exception as e:
|
44 |
+
print(f"Something went wrong while prompting: {e}")
|
45 |
+
handle_model_error(e.code, content)
|
46 |
+
|
47 |
+
|
48 |
+
gemini_for_cleansing = GeminiModerator()
|
49 |
+
gemini_for_cleansing.intitialize_for_cleansing()
|
50 |
+
gemini_for_misinfo_detection = GeminiModerator()
|
51 |
+
gemini_for_misinfo_detection.initialize_for_misinfo_detection()
|
app/ml_models/toxic-bert/config.json
ADDED
@@ -0,0 +1,42 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"architectures": [
|
3 |
+
"BertForSequenceClassification"
|
4 |
+
],
|
5 |
+
"attention_probs_dropout_prob": 0.1,
|
6 |
+
"classifier_dropout": null,
|
7 |
+
"gradient_checkpointing": false,
|
8 |
+
"hidden_act": "gelu",
|
9 |
+
"hidden_dropout_prob": 0.1,
|
10 |
+
"hidden_size": 768,
|
11 |
+
"id2label": {
|
12 |
+
"0": "toxic",
|
13 |
+
"1": "severe_toxic",
|
14 |
+
"2": "obscene",
|
15 |
+
"3": "threat",
|
16 |
+
"4": "insult",
|
17 |
+
"5": "identity_hate"
|
18 |
+
},
|
19 |
+
"initializer_range": 0.02,
|
20 |
+
"intermediate_size": 3072,
|
21 |
+
"label2id": {
|
22 |
+
"identity_hate": 5,
|
23 |
+
"insult": 4,
|
24 |
+
"obscene": 2,
|
25 |
+
"severe_toxic": 1,
|
26 |
+
"threat": 3,
|
27 |
+
"toxic": 0
|
28 |
+
},
|
29 |
+
"layer_norm_eps": 1e-12,
|
30 |
+
"max_position_embeddings": 512,
|
31 |
+
"model_type": "bert",
|
32 |
+
"num_attention_heads": 12,
|
33 |
+
"num_hidden_layers": 12,
|
34 |
+
"pad_token_id": 0,
|
35 |
+
"position_embedding_type": "absolute",
|
36 |
+
"problem_type": "multi_label_classification",
|
37 |
+
"torch_dtype": "float32",
|
38 |
+
"transformers_version": "4.51.3",
|
39 |
+
"type_vocab_size": 2,
|
40 |
+
"use_cache": true,
|
41 |
+
"vocab_size": 30522
|
42 |
+
}
|
app/ml_models/toxic-bert/model.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:c155addfd79483b0a75955c49b6d42508e26ecd72b9600fb092205d9990df577
|
3 |
+
size 437970952
|
app/ml_models/toxic-bert/special_tokens_map.json
ADDED
@@ -0,0 +1,7 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"cls_token": "[CLS]",
|
3 |
+
"mask_token": "[MASK]",
|
4 |
+
"pad_token": "[PAD]",
|
5 |
+
"sep_token": "[SEP]",
|
6 |
+
"unk_token": "[UNK]"
|
7 |
+
}
|
app/ml_models/toxic-bert/tokenizer.json
ADDED
The diff for this file is too large to render.
See raw diff
|
|
app/ml_models/toxic-bert/tokenizer_config.json
ADDED
@@ -0,0 +1,58 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"added_tokens_decoder": {
|
3 |
+
"0": {
|
4 |
+
"content": "[PAD]",
|
5 |
+
"lstrip": false,
|
6 |
+
"normalized": false,
|
7 |
+
"rstrip": false,
|
8 |
+
"single_word": false,
|
9 |
+
"special": true
|
10 |
+
},
|
11 |
+
"100": {
|
12 |
+
"content": "[UNK]",
|
13 |
+
"lstrip": false,
|
14 |
+
"normalized": false,
|
15 |
+
"rstrip": false,
|
16 |
+
"single_word": false,
|
17 |
+
"special": true
|
18 |
+
},
|
19 |
+
"101": {
|
20 |
+
"content": "[CLS]",
|
21 |
+
"lstrip": false,
|
22 |
+
"normalized": false,
|
23 |
+
"rstrip": false,
|
24 |
+
"single_word": false,
|
25 |
+
"special": true
|
26 |
+
},
|
27 |
+
"102": {
|
28 |
+
"content": "[SEP]",
|
29 |
+
"lstrip": false,
|
30 |
+
"normalized": false,
|
31 |
+
"rstrip": false,
|
32 |
+
"single_word": false,
|
33 |
+
"special": true
|
34 |
+
},
|
35 |
+
"103": {
|
36 |
+
"content": "[MASK]",
|
37 |
+
"lstrip": false,
|
38 |
+
"normalized": false,
|
39 |
+
"rstrip": false,
|
40 |
+
"single_word": false,
|
41 |
+
"special": true
|
42 |
+
}
|
43 |
+
},
|
44 |
+
"clean_up_tokenization_spaces": true,
|
45 |
+
"cls_token": "[CLS]",
|
46 |
+
"do_basic_tokenize": true,
|
47 |
+
"do_lower_case": true,
|
48 |
+
"extra_special_tokens": {},
|
49 |
+
"mask_token": "[MASK]",
|
50 |
+
"model_max_length": 512,
|
51 |
+
"never_split": null,
|
52 |
+
"pad_token": "[PAD]",
|
53 |
+
"sep_token": "[SEP]",
|
54 |
+
"strip_accents": null,
|
55 |
+
"tokenize_chinese_chars": true,
|
56 |
+
"tokenizer_class": "BertTokenizer",
|
57 |
+
"unk_token": "[UNK]"
|
58 |
+
}
|
app/ml_models/toxic-bert/vocab.txt
ADDED
The diff for this file is too large to render.
See raw diff
|
|
app/models/__pycache__/check_type.cpython-312.pyc
ADDED
Binary file (575 Bytes). View file
|
|
app/models/__pycache__/moderation_data.cpython-312.pyc
ADDED
Binary file (805 Bytes). View file
|
|
app/models/__pycache__/moderation_request.cpython-312.pyc
ADDED
Binary file (2.03 kB). View file
|
|
app/models/__pycache__/moderation_response.cpython-312.pyc
ADDED
Binary file (568 Bytes). View file
|
|
app/models/__pycache__/schemas.cpython-312.pyc
ADDED
Binary file (524 Bytes). View file
|
|
app/models/__pycache__/standard_response.cpython-312.pyc
ADDED
Binary file (547 Bytes). View file
|
|
app/models/check_type.py
ADDED
@@ -0,0 +1,11 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from enum import Enum
|
2 |
+
|
3 |
+
|
4 |
+
class CheckType(str, Enum):
|
5 |
+
toxic = "toxic"
|
6 |
+
severe_toxic = "severe_toxic"
|
7 |
+
insult = "insult"
|
8 |
+
obscene = "obscene"
|
9 |
+
threat = "threat"
|
10 |
+
identity_hate = "identity_hate"
|
11 |
+
misinfo = "misinfo"
|
app/models/moderation_data.py
ADDED
@@ -0,0 +1,12 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from pydantic import BaseModel
|
2 |
+
from typing import List
|
3 |
+
from typing import Any, Dict
|
4 |
+
from app.models.check_type import CheckType
|
5 |
+
|
6 |
+
|
7 |
+
class ModerationData(BaseModel):
|
8 |
+
flagged_for: Dict[CheckType, float]
|
9 |
+
scores: Dict[CheckType, float]
|
10 |
+
original_content: str
|
11 |
+
cleaned_content: str = None
|
12 |
+
corrected_content: Any
|
app/models/moderation_request.py
ADDED
@@ -0,0 +1,32 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from pydantic import BaseModel
|
2 |
+
from typing import List
|
3 |
+
|
4 |
+
from app.models.check_type import CheckType
|
5 |
+
from app.services.text_cleaner import clean_text
|
6 |
+
|
7 |
+
from app.ml_models.classifier import Classifier
|
8 |
+
from app.ml_models.gemini_moderator import gemini_for_cleansing
|
9 |
+
from app.ml_models.gemini_moderator import gemini_for_misinfo_detection
|
10 |
+
|
11 |
+
|
12 |
+
classifier = Classifier()
|
13 |
+
classifier.initialize_classifier()
|
14 |
+
|
15 |
+
|
16 |
+
class ModerationRequest(BaseModel):
|
17 |
+
content: str
|
18 |
+
checkFor: List[CheckType]
|
19 |
+
threshold: float = 0.6
|
20 |
+
|
21 |
+
def correct_typos(self) -> None:
|
22 |
+
self.content = clean_text(self.content)
|
23 |
+
|
24 |
+
def classify_moderation(self) -> None:
|
25 |
+
result = classifier.predict_nsfw(self.content)
|
26 |
+
return result
|
27 |
+
|
28 |
+
def cleanse_content(self) -> str:
|
29 |
+
return gemini_for_cleansing.cleanse(self.content)
|
30 |
+
|
31 |
+
def identify_misinfo(self) -> str:
|
32 |
+
return gemini_for_misinfo_detection.check_misinfo(self.content)
|
app/models/moderation_response.py
ADDED
@@ -0,0 +1,6 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from app.models.standard_response import StandardResponse
|
2 |
+
from app.models.moderation_data import ModerationData
|
3 |
+
|
4 |
+
|
5 |
+
class ModerationResponse(StandardResponse):
|
6 |
+
payload: ModerationData # override with specific structure
|
app/models/schemas.py
ADDED
@@ -0,0 +1,5 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from app.models.check_type import CheckType
|
2 |
+
from app.models.moderation_data import ModerationData
|
3 |
+
from app.models.moderation_request import ModerationRequest
|
4 |
+
from app.models.moderation_response import ModerationResponse
|
5 |
+
from app.models.standard_response import StandardResponse
|
app/models/standard_response.py
ADDED
@@ -0,0 +1,8 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from pydantic import BaseModel
|
2 |
+
|
3 |
+
|
4 |
+
class StandardResponse(BaseModel):
|
5 |
+
error: bool
|
6 |
+
title: str
|
7 |
+
status: int
|
8 |
+
payload: None # generic
|
app/services/__pycache__/gemini_error_handling.cpython-312.pyc
ADDED
Binary file (706 Bytes). View file
|
|
app/services/__pycache__/moderation.cpython-312.pyc
ADDED
Binary file (2.29 kB). View file
|
|
app/services/__pycache__/object_to_json.cpython-312.pyc
ADDED
Binary file (836 Bytes). View file
|
|
app/services/__pycache__/text_cleaner.cpython-312.pyc
ADDED
Binary file (1.38 kB). View file
|
|
app/services/frequency_dictionary_en_82_765.txt
ADDED
The diff for this file is too large to render.
See raw diff
|
|
app/services/gemini_error_handling.py
ADDED
@@ -0,0 +1,11 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
def handle_model_error(code: int, content: str) -> str:
|
2 |
+
response = switch_case(code)
|
3 |
+
return response
|
4 |
+
|
5 |
+
|
6 |
+
def switch_case(code: int) -> str:
|
7 |
+
match code:
|
8 |
+
case 503:
|
9 |
+
return "Model is overloaded. Please try again later"
|
10 |
+
case _:
|
11 |
+
return f"Something went wrong while prompting: {code}"
|
app/services/moderation.py
ADDED
@@ -0,0 +1,56 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from fastapi.responses import JSONResponse
|
2 |
+
from google import genai
|
3 |
+
|
4 |
+
# from app.models.moderation_data import ModerationData
|
5 |
+
from app.models.moderation_response import ModerationResponse
|
6 |
+
from app.models.schemas import ModerationRequest
|
7 |
+
from app.models.standard_response import StandardResponse
|
8 |
+
from app.models.schemas import ModerationData
|
9 |
+
|
10 |
+
|
11 |
+
def to_json_response(data: StandardResponse) -> JSONResponse:
|
12 |
+
return JSONResponse(content=data.model_dump(), status_code=data.status)
|
13 |
+
|
14 |
+
|
15 |
+
def moderate_content(request: ModerationRequest) -> ModerationResponse:
|
16 |
+
flagged_for = {}
|
17 |
+
scores = {}
|
18 |
+
corrected_content = None
|
19 |
+
original_content = request.content
|
20 |
+
|
21 |
+
if "misinfo" in request.checkFor:
|
22 |
+
correct_info = request.identify_misinfo()
|
23 |
+
if correct_info.startswith("False"):
|
24 |
+
flagged_for["misinfo"] = 1
|
25 |
+
corrected_content = str(correct_info[6:])
|
26 |
+
|
27 |
+
request.correct_typos()
|
28 |
+
result = request.classify_moderation()
|
29 |
+
result["misinfo"] = flagged_for.get("misinfo", 0)
|
30 |
+
|
31 |
+
for category in request.checkFor:
|
32 |
+
scores[category] = result[category]
|
33 |
+
if result[category] > request.threshold:
|
34 |
+
flagged_for[category] = result[category]
|
35 |
+
|
36 |
+
cleaned_content = request.content
|
37 |
+
if len(flagged_for) > 0:
|
38 |
+
if list(flagged_for.keys()) == ["misinfo"]:
|
39 |
+
# Do nothing
|
40 |
+
pass
|
41 |
+
else:
|
42 |
+
gemini_response = request.cleanse_content()
|
43 |
+
cleaned_content = gemini_response
|
44 |
+
|
45 |
+
payload = ModerationData(
|
46 |
+
flagged_for=flagged_for,
|
47 |
+
cleaned_content=cleaned_content,
|
48 |
+
corrected_content=corrected_content,
|
49 |
+
original_content=original_content,
|
50 |
+
scores=scores,
|
51 |
+
)
|
52 |
+
response = ModerationResponse(
|
53 |
+
error=False, title="Cleaned", status=200, payload=payload
|
54 |
+
)
|
55 |
+
|
56 |
+
return response
|
app/services/text_cleaner.py
ADDED
@@ -0,0 +1,22 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# app/services/text_cleaner.py
|
2 |
+
from symspellpy.symspellpy import SymSpell
|
3 |
+
from pathlib import Path
|
4 |
+
|
5 |
+
# Initialize only once when this module is imported
|
6 |
+
sym_spell = SymSpell(max_dictionary_edit_distance=2, prefix_length=7)
|
7 |
+
|
8 |
+
# Load dictionary
|
9 |
+
dict_path = Path.cwd()/"app"/"services"/"frequency_dictionary_en_82_765.txt"
|
10 |
+
sym_spell.load_dictionary(dict_path, term_index=0, count_index=1)
|
11 |
+
|
12 |
+
# Leetspeak normalizer
|
13 |
+
def leetspeak_normalizer(text: str) -> str:
|
14 |
+
leet_map = str.maketrans("014!3$@5#+", "oialesasht")
|
15 |
+
return text.translate(leet_map)
|
16 |
+
|
17 |
+
# Combined cleaning function
|
18 |
+
def clean_text(text: str) -> str:
|
19 |
+
normalized = leetspeak_normalizer(text)
|
20 |
+
suggestions = sym_spell.lookup_compound(normalized, max_edit_distance=2)
|
21 |
+
corrected = suggestions[0].term if suggestions else normalized
|
22 |
+
return corrected
|