zyriean commited on
Commit
d68e65a
·
verified ·
1 Parent(s): 1cfccc2
Files changed (46) hide show
  1. app/__pycache__/main.cpython-312.pyc +0 -0
  2. app/api/__init__.py +0 -0
  3. app/api/__pycache__/__init__.cpython-312.pyc +0 -0
  4. app/api/v1/__pycache__/api.cpython-312.pyc +0 -0
  5. app/api/v1/api.py +6 -0
  6. app/api/v1/endpoints/__pycache__/moderation.cpython-312.pyc +0 -0
  7. app/api/v1/endpoints/moderation.py +13 -0
  8. app/core/__pycache__/config.cpython-312.pyc +0 -0
  9. app/core/config.py +16 -0
  10. app/main.py +33 -0
  11. app/ml_models/__init__.py +0 -0
  12. app/ml_models/__pycache__/__init__.cpython-312.pyc +0 -0
  13. app/ml_models/__pycache__/classifier.cpython-312.pyc +0 -0
  14. app/ml_models/__pycache__/classifier_loader.cpython-312.pyc +0 -0
  15. app/ml_models/__pycache__/classifier_path_loader.cpython-312.pyc +0 -0
  16. app/ml_models/__pycache__/gemini_moderator.cpython-312.pyc +0 -0
  17. app/ml_models/classifier.py +41 -0
  18. app/ml_models/classifier_loader.py +42 -0
  19. app/ml_models/classifier_path_loader.py +20 -0
  20. app/ml_models/gemini_moderator.py +51 -0
  21. app/ml_models/toxic-bert/config.json +42 -0
  22. app/ml_models/toxic-bert/model.safetensors +3 -0
  23. app/ml_models/toxic-bert/special_tokens_map.json +7 -0
  24. app/ml_models/toxic-bert/tokenizer.json +0 -0
  25. app/ml_models/toxic-bert/tokenizer_config.json +58 -0
  26. app/ml_models/toxic-bert/vocab.txt +0 -0
  27. app/models/__pycache__/check_type.cpython-312.pyc +0 -0
  28. app/models/__pycache__/moderation_data.cpython-312.pyc +0 -0
  29. app/models/__pycache__/moderation_request.cpython-312.pyc +0 -0
  30. app/models/__pycache__/moderation_response.cpython-312.pyc +0 -0
  31. app/models/__pycache__/schemas.cpython-312.pyc +0 -0
  32. app/models/__pycache__/standard_response.cpython-312.pyc +0 -0
  33. app/models/check_type.py +11 -0
  34. app/models/moderation_data.py +12 -0
  35. app/models/moderation_request.py +32 -0
  36. app/models/moderation_response.py +6 -0
  37. app/models/schemas.py +5 -0
  38. app/models/standard_response.py +8 -0
  39. app/services/__pycache__/gemini_error_handling.cpython-312.pyc +0 -0
  40. app/services/__pycache__/moderation.cpython-312.pyc +0 -0
  41. app/services/__pycache__/object_to_json.cpython-312.pyc +0 -0
  42. app/services/__pycache__/text_cleaner.cpython-312.pyc +0 -0
  43. app/services/frequency_dictionary_en_82_765.txt +0 -0
  44. app/services/gemini_error_handling.py +11 -0
  45. app/services/moderation.py +56 -0
  46. app/services/text_cleaner.py +22 -0
app/__pycache__/main.cpython-312.pyc ADDED
Binary file (1.51 kB). View file
 
app/api/__init__.py ADDED
File without changes
app/api/__pycache__/__init__.cpython-312.pyc ADDED
Binary file (182 Bytes). View file
 
app/api/v1/__pycache__/api.cpython-312.pyc ADDED
Binary file (469 Bytes). View file
 
app/api/v1/api.py ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ from fastapi import APIRouter
2
+ from app.api.v1.endpoints import moderation
3
+
4
+ api_router = APIRouter()
5
+
6
+ api_router.include_router(moderation.router, prefix="/moderation", tags=["Moderation"])
app/api/v1/endpoints/__pycache__/moderation.cpython-312.pyc ADDED
Binary file (700 Bytes). View file
 
app/api/v1/endpoints/moderation.py ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from fastapi import APIRouter
2
+
3
+ from app.models.schemas import ModerationRequest, ModerationResponse
4
+ from app.services.moderation import moderate_content
5
+
6
+ router = APIRouter()
7
+
8
+
9
+ @router.post("/", response_model=ModerationResponse)
10
+ def testing(request: ModerationRequest):
11
+ response = moderate_content(request)
12
+
13
+ return response
app/core/__pycache__/config.cpython-312.pyc ADDED
Binary file (1.09 kB). View file
 
app/core/config.py ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ from dotenv import load_dotenv
3
+
4
+ load_dotenv()
5
+ class Settings:
6
+ def __init__(self)->None:
7
+ self.gemini_apikey = None
8
+ self.perspective_apikey = None
9
+
10
+ def config(self)->None:
11
+ self.gemini_apikey = os.environ.get("GEMINI_API_KEY_1")
12
+ self.perspective_apikey = os.environ.get("PERSPECTIVE_API_KEY_1")
13
+
14
+
15
+ settings = Settings()
16
+ settings.config()
app/main.py ADDED
@@ -0,0 +1,33 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from fastapi import FastAPI, Response
2
+ from app.api.v1.api import api_router
3
+ from fastapi.middleware.cors import CORSMiddleware
4
+
5
+ from app.models.schemas import StandardResponse
6
+
7
+ app = FastAPI(title="Cognisafe API")
8
+
9
+ origins = ["*"]
10
+
11
+ app.add_middleware(
12
+ CORSMiddleware,
13
+ allow_origins=origins, # or ["*"] for all origins (not recommended in prod)
14
+ allow_credentials=True,
15
+ allow_methods=["*"],
16
+ allow_headers=["*"],
17
+ )
18
+
19
+ @app.get("/health")
20
+ async def health_check():
21
+ return {"status": "healthy"}
22
+
23
+ @app.get("/helloworld", response_model=StandardResponse)
24
+ def helloworld(response: Response) -> StandardResponse:
25
+ """
26
+ Returns helloworld as the standard response
27
+ """
28
+ response.status_code = 200
29
+ response = StandardResponse(error=False, title="Hello World", status=200)
30
+ return response
31
+
32
+
33
+ app.include_router(api_router, prefix="/api/v1")
app/ml_models/__init__.py ADDED
File without changes
app/ml_models/__pycache__/__init__.cpython-312.pyc ADDED
Binary file (188 Bytes). View file
 
app/ml_models/__pycache__/classifier.cpython-312.pyc ADDED
Binary file (1.98 kB). View file
 
app/ml_models/__pycache__/classifier_loader.cpython-312.pyc ADDED
Binary file (2.44 kB). View file
 
app/ml_models/__pycache__/classifier_path_loader.cpython-312.pyc ADDED
Binary file (1.48 kB). View file
 
app/ml_models/__pycache__/gemini_moderator.cpython-312.pyc ADDED
Binary file (3.95 kB). View file
 
app/ml_models/classifier.py ADDED
@@ -0,0 +1,41 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from transformers import pipeline
2
+ import logging
3
+
4
+
5
+ from app.ml_models.classifier_loader import ClassifierLoader
6
+
7
+
8
+ logger = logging.getLogger(__name__)
9
+
10
+
11
+ class Classifier:
12
+ def __init__(self, model_name: str = "toxic-bert") -> None:
13
+ self.model = None
14
+ self.tokenizer = None
15
+ self.model_name = model_name
16
+ self.classifier = None
17
+
18
+ def initialize_classifier(self) -> None:
19
+ loader = ClassifierLoader(self.model_name)
20
+ self.model = loader.load_model()
21
+ self.tokenizer = loader.load_tokenizer()
22
+ self.classifier = pipeline(
23
+ "text-classification",
24
+ model=self.model,
25
+ tokenizer=self.tokenizer,
26
+ device=-1,
27
+ top_k=None,
28
+ )
29
+
30
+ def predict_nsfw(self, content: str) -> dict:
31
+ if self.classifier is None:
32
+ raise RuntimeError(
33
+ "Model not initialized. Please call `initialize_classifier()` first."
34
+ )
35
+ results = self.classifier(content)
36
+
37
+ prediction = {}
38
+ for result in results[0]:
39
+ prediction[result["label"]] = result["score"]
40
+
41
+ return prediction
app/ml_models/classifier_loader.py ADDED
@@ -0,0 +1,42 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from transformers import AutoTokenizer, AutoModelForSequenceClassification
2
+ from app.ml_models.classifier_path_loader import ClassifierPathLoader
3
+ import logging
4
+ from transformers import AutoTokenizer
5
+ from transformers import AutoModelForSequenceClassification, pipeline
6
+
7
+
8
+ logger = logging.getLogger(__name__)
9
+
10
+
11
+ class ClassifierLoader:
12
+ def __init__(self, model_name: str):
13
+ self.model_name = model_name
14
+ self.model = None
15
+ self.tokenizer = None
16
+
17
+ path_loader = ClassifierPathLoader()
18
+ path_loader.set_model(self.model_name)
19
+ self.model_path = path_loader.get_model_path()
20
+
21
+ # If model doesn't exist, download it
22
+ if not self.model_path.exists():
23
+ model_name = "unitary/toxic-bert"
24
+ tokenizer = AutoTokenizer.from_pretrained(model_name)
25
+ model = AutoModelForSequenceClassification.from_pretrained(model_name)
26
+
27
+ tokenizer.save_pretrained(self.model_path)
28
+ model.save_pretrained(self.model_path)
29
+
30
+ def load_model(self):
31
+ if self.model is None:
32
+ self.model = AutoModelForSequenceClassification.from_pretrained(
33
+ self.model_path
34
+ )
35
+ logger.info("[✅] Model loaded successfully.")
36
+ return self.model
37
+
38
+ def load_tokenizer(self):
39
+ if self.tokenizer is None:
40
+ self.tokenizer = AutoTokenizer.from_pretrained(self.model_path)
41
+ logger.info("[✅] Tokenizer loaded successfully.")
42
+ return self.tokenizer
app/ml_models/classifier_path_loader.py ADDED
@@ -0,0 +1,20 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from pathlib import Path
2
+
3
+ path_to_model = {"toxic-bert": Path.cwd() / "app" / "ml_models" / "toxic-bert"}
4
+
5
+
6
+ class ClassifierPathLoader:
7
+ def __init__(self) -> None:
8
+ self.model_name = None
9
+ self.model_path = None
10
+
11
+ def set_model(self, model_name: str) -> None:
12
+ if model_name not in path_to_model:
13
+ raise KeyError(f"Model '{model_name}' not found in path registry.")
14
+ self.model_name = model_name
15
+ self.model_path = Path(path_to_model[model_name])
16
+
17
+ def get_model_path(self) -> Path:
18
+ if self.model_path is None:
19
+ raise RuntimeError("Model not set. Call `set_model()` first.")
20
+ return self.model_path
app/ml_models/gemini_moderator.py ADDED
@@ -0,0 +1,51 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from google import genai
2
+ from pprint import pprint
3
+ from app.core.config import settings
4
+ from app.services.gemini_error_handling import handle_model_error
5
+
6
+ class GeminiModerator:
7
+ def __init__(self):
8
+ self.client = None
9
+
10
+ def intitialize_for_cleansing(self) -> None:
11
+ client = genai.Client(api_key=settings.gemini_apikey)
12
+ self.client = client
13
+
14
+ def initialize_for_misinfo_detection(self) -> None:
15
+ client = genai.Client(api_key=settings.gemini_apikey)
16
+ self.client = client
17
+
18
+ def check_misinfo(self, content: str) -> str:
19
+ return self.prompt_model(content, 1) # passing 1 for misinformation detection
20
+
21
+ def cleanse(self, content: str) -> str:
22
+ return self.prompt_model(content, 0) # passing 0 for cleansing hatespeech
23
+
24
+ def prompt_model(self, content: str, purpose: int) -> str:
25
+ prompt_message = [
26
+ f'You are a content moderation assistant. The following text has been flagged for harmful content. Your task is to clean it by removing or replacing inappropriate words while keeping the meaning intact. Please return the cleaned version of the text. Content: "{content}". I repeat return only the clean version, without any explanation.',
27
+ f"You are a content moderation assistant. Check if the following content contains any factual inaccuracies. Your task is to identify any statements that contradict established facts or lack evidence. If there is misinformation present, the response must start with 'False' followed by the corrected information or explanation of the inaccuracy. If the content is factually accurate, the response must start with 'True'. Do not flag content as 'False' simply because it is harmful, hateful, or threatening if the claims made are factually correct. Focus solely on the truthfulness of the statements. Content:\"{content}\".",
28
+ ]
29
+
30
+ try:
31
+ response = self.client.models.generate_content(
32
+ model="gemini-2.0-flash", contents=prompt_message[purpose]
33
+ )
34
+
35
+ print(response)
36
+ block_reason = getattr(
37
+ getattr(response, "prompt_feedback", None), "block_reason", None
38
+ )
39
+ if block_reason == "PROHIBITED_CONTENT":
40
+ return "I have anger issues. I will take a deep breath"
41
+
42
+ return response.text
43
+ except Exception as e:
44
+ print(f"Something went wrong while prompting: {e}")
45
+ handle_model_error(e.code, content)
46
+
47
+
48
+ gemini_for_cleansing = GeminiModerator()
49
+ gemini_for_cleansing.intitialize_for_cleansing()
50
+ gemini_for_misinfo_detection = GeminiModerator()
51
+ gemini_for_misinfo_detection.initialize_for_misinfo_detection()
app/ml_models/toxic-bert/config.json ADDED
@@ -0,0 +1,42 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "architectures": [
3
+ "BertForSequenceClassification"
4
+ ],
5
+ "attention_probs_dropout_prob": 0.1,
6
+ "classifier_dropout": null,
7
+ "gradient_checkpointing": false,
8
+ "hidden_act": "gelu",
9
+ "hidden_dropout_prob": 0.1,
10
+ "hidden_size": 768,
11
+ "id2label": {
12
+ "0": "toxic",
13
+ "1": "severe_toxic",
14
+ "2": "obscene",
15
+ "3": "threat",
16
+ "4": "insult",
17
+ "5": "identity_hate"
18
+ },
19
+ "initializer_range": 0.02,
20
+ "intermediate_size": 3072,
21
+ "label2id": {
22
+ "identity_hate": 5,
23
+ "insult": 4,
24
+ "obscene": 2,
25
+ "severe_toxic": 1,
26
+ "threat": 3,
27
+ "toxic": 0
28
+ },
29
+ "layer_norm_eps": 1e-12,
30
+ "max_position_embeddings": 512,
31
+ "model_type": "bert",
32
+ "num_attention_heads": 12,
33
+ "num_hidden_layers": 12,
34
+ "pad_token_id": 0,
35
+ "position_embedding_type": "absolute",
36
+ "problem_type": "multi_label_classification",
37
+ "torch_dtype": "float32",
38
+ "transformers_version": "4.51.3",
39
+ "type_vocab_size": 2,
40
+ "use_cache": true,
41
+ "vocab_size": 30522
42
+ }
app/ml_models/toxic-bert/model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c155addfd79483b0a75955c49b6d42508e26ecd72b9600fb092205d9990df577
3
+ size 437970952
app/ml_models/toxic-bert/special_tokens_map.json ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ {
2
+ "cls_token": "[CLS]",
3
+ "mask_token": "[MASK]",
4
+ "pad_token": "[PAD]",
5
+ "sep_token": "[SEP]",
6
+ "unk_token": "[UNK]"
7
+ }
app/ml_models/toxic-bert/tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
app/ml_models/toxic-bert/tokenizer_config.json ADDED
@@ -0,0 +1,58 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "added_tokens_decoder": {
3
+ "0": {
4
+ "content": "[PAD]",
5
+ "lstrip": false,
6
+ "normalized": false,
7
+ "rstrip": false,
8
+ "single_word": false,
9
+ "special": true
10
+ },
11
+ "100": {
12
+ "content": "[UNK]",
13
+ "lstrip": false,
14
+ "normalized": false,
15
+ "rstrip": false,
16
+ "single_word": false,
17
+ "special": true
18
+ },
19
+ "101": {
20
+ "content": "[CLS]",
21
+ "lstrip": false,
22
+ "normalized": false,
23
+ "rstrip": false,
24
+ "single_word": false,
25
+ "special": true
26
+ },
27
+ "102": {
28
+ "content": "[SEP]",
29
+ "lstrip": false,
30
+ "normalized": false,
31
+ "rstrip": false,
32
+ "single_word": false,
33
+ "special": true
34
+ },
35
+ "103": {
36
+ "content": "[MASK]",
37
+ "lstrip": false,
38
+ "normalized": false,
39
+ "rstrip": false,
40
+ "single_word": false,
41
+ "special": true
42
+ }
43
+ },
44
+ "clean_up_tokenization_spaces": true,
45
+ "cls_token": "[CLS]",
46
+ "do_basic_tokenize": true,
47
+ "do_lower_case": true,
48
+ "extra_special_tokens": {},
49
+ "mask_token": "[MASK]",
50
+ "model_max_length": 512,
51
+ "never_split": null,
52
+ "pad_token": "[PAD]",
53
+ "sep_token": "[SEP]",
54
+ "strip_accents": null,
55
+ "tokenize_chinese_chars": true,
56
+ "tokenizer_class": "BertTokenizer",
57
+ "unk_token": "[UNK]"
58
+ }
app/ml_models/toxic-bert/vocab.txt ADDED
The diff for this file is too large to render. See raw diff
 
app/models/__pycache__/check_type.cpython-312.pyc ADDED
Binary file (575 Bytes). View file
 
app/models/__pycache__/moderation_data.cpython-312.pyc ADDED
Binary file (805 Bytes). View file
 
app/models/__pycache__/moderation_request.cpython-312.pyc ADDED
Binary file (2.03 kB). View file
 
app/models/__pycache__/moderation_response.cpython-312.pyc ADDED
Binary file (568 Bytes). View file
 
app/models/__pycache__/schemas.cpython-312.pyc ADDED
Binary file (524 Bytes). View file
 
app/models/__pycache__/standard_response.cpython-312.pyc ADDED
Binary file (547 Bytes). View file
 
app/models/check_type.py ADDED
@@ -0,0 +1,11 @@
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from enum import Enum
2
+
3
+
4
+ class CheckType(str, Enum):
5
+ toxic = "toxic"
6
+ severe_toxic = "severe_toxic"
7
+ insult = "insult"
8
+ obscene = "obscene"
9
+ threat = "threat"
10
+ identity_hate = "identity_hate"
11
+ misinfo = "misinfo"
app/models/moderation_data.py ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from pydantic import BaseModel
2
+ from typing import List
3
+ from typing import Any, Dict
4
+ from app.models.check_type import CheckType
5
+
6
+
7
+ class ModerationData(BaseModel):
8
+ flagged_for: Dict[CheckType, float]
9
+ scores: Dict[CheckType, float]
10
+ original_content: str
11
+ cleaned_content: str = None
12
+ corrected_content: Any
app/models/moderation_request.py ADDED
@@ -0,0 +1,32 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from pydantic import BaseModel
2
+ from typing import List
3
+
4
+ from app.models.check_type import CheckType
5
+ from app.services.text_cleaner import clean_text
6
+
7
+ from app.ml_models.classifier import Classifier
8
+ from app.ml_models.gemini_moderator import gemini_for_cleansing
9
+ from app.ml_models.gemini_moderator import gemini_for_misinfo_detection
10
+
11
+
12
+ classifier = Classifier()
13
+ classifier.initialize_classifier()
14
+
15
+
16
+ class ModerationRequest(BaseModel):
17
+ content: str
18
+ checkFor: List[CheckType]
19
+ threshold: float = 0.6
20
+
21
+ def correct_typos(self) -> None:
22
+ self.content = clean_text(self.content)
23
+
24
+ def classify_moderation(self) -> None:
25
+ result = classifier.predict_nsfw(self.content)
26
+ return result
27
+
28
+ def cleanse_content(self) -> str:
29
+ return gemini_for_cleansing.cleanse(self.content)
30
+
31
+ def identify_misinfo(self) -> str:
32
+ return gemini_for_misinfo_detection.check_misinfo(self.content)
app/models/moderation_response.py ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ from app.models.standard_response import StandardResponse
2
+ from app.models.moderation_data import ModerationData
3
+
4
+
5
+ class ModerationResponse(StandardResponse):
6
+ payload: ModerationData # override with specific structure
app/models/schemas.py ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ from app.models.check_type import CheckType
2
+ from app.models.moderation_data import ModerationData
3
+ from app.models.moderation_request import ModerationRequest
4
+ from app.models.moderation_response import ModerationResponse
5
+ from app.models.standard_response import StandardResponse
app/models/standard_response.py ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ from pydantic import BaseModel
2
+
3
+
4
+ class StandardResponse(BaseModel):
5
+ error: bool
6
+ title: str
7
+ status: int
8
+ payload: None # generic
app/services/__pycache__/gemini_error_handling.cpython-312.pyc ADDED
Binary file (706 Bytes). View file
 
app/services/__pycache__/moderation.cpython-312.pyc ADDED
Binary file (2.29 kB). View file
 
app/services/__pycache__/object_to_json.cpython-312.pyc ADDED
Binary file (836 Bytes). View file
 
app/services/__pycache__/text_cleaner.cpython-312.pyc ADDED
Binary file (1.38 kB). View file
 
app/services/frequency_dictionary_en_82_765.txt ADDED
The diff for this file is too large to render. See raw diff
 
app/services/gemini_error_handling.py ADDED
@@ -0,0 +1,11 @@
 
 
 
 
 
 
 
 
 
 
 
 
1
+ def handle_model_error(code: int, content: str) -> str:
2
+ response = switch_case(code)
3
+ return response
4
+
5
+
6
+ def switch_case(code: int) -> str:
7
+ match code:
8
+ case 503:
9
+ return "Model is overloaded. Please try again later"
10
+ case _:
11
+ return f"Something went wrong while prompting: {code}"
app/services/moderation.py ADDED
@@ -0,0 +1,56 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from fastapi.responses import JSONResponse
2
+ from google import genai
3
+
4
+ # from app.models.moderation_data import ModerationData
5
+ from app.models.moderation_response import ModerationResponse
6
+ from app.models.schemas import ModerationRequest
7
+ from app.models.standard_response import StandardResponse
8
+ from app.models.schemas import ModerationData
9
+
10
+
11
+ def to_json_response(data: StandardResponse) -> JSONResponse:
12
+ return JSONResponse(content=data.model_dump(), status_code=data.status)
13
+
14
+
15
+ def moderate_content(request: ModerationRequest) -> ModerationResponse:
16
+ flagged_for = {}
17
+ scores = {}
18
+ corrected_content = None
19
+ original_content = request.content
20
+
21
+ if "misinfo" in request.checkFor:
22
+ correct_info = request.identify_misinfo()
23
+ if correct_info.startswith("False"):
24
+ flagged_for["misinfo"] = 1
25
+ corrected_content = str(correct_info[6:])
26
+
27
+ request.correct_typos()
28
+ result = request.classify_moderation()
29
+ result["misinfo"] = flagged_for.get("misinfo", 0)
30
+
31
+ for category in request.checkFor:
32
+ scores[category] = result[category]
33
+ if result[category] > request.threshold:
34
+ flagged_for[category] = result[category]
35
+
36
+ cleaned_content = request.content
37
+ if len(flagged_for) > 0:
38
+ if list(flagged_for.keys()) == ["misinfo"]:
39
+ # Do nothing
40
+ pass
41
+ else:
42
+ gemini_response = request.cleanse_content()
43
+ cleaned_content = gemini_response
44
+
45
+ payload = ModerationData(
46
+ flagged_for=flagged_for,
47
+ cleaned_content=cleaned_content,
48
+ corrected_content=corrected_content,
49
+ original_content=original_content,
50
+ scores=scores,
51
+ )
52
+ response = ModerationResponse(
53
+ error=False, title="Cleaned", status=200, payload=payload
54
+ )
55
+
56
+ return response
app/services/text_cleaner.py ADDED
@@ -0,0 +1,22 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # app/services/text_cleaner.py
2
+ from symspellpy.symspellpy import SymSpell
3
+ from pathlib import Path
4
+
5
+ # Initialize only once when this module is imported
6
+ sym_spell = SymSpell(max_dictionary_edit_distance=2, prefix_length=7)
7
+
8
+ # Load dictionary
9
+ dict_path = Path.cwd()/"app"/"services"/"frequency_dictionary_en_82_765.txt"
10
+ sym_spell.load_dictionary(dict_path, term_index=0, count_index=1)
11
+
12
+ # Leetspeak normalizer
13
+ def leetspeak_normalizer(text: str) -> str:
14
+ leet_map = str.maketrans("014!3$@5#+", "oialesasht")
15
+ return text.translate(leet_map)
16
+
17
+ # Combined cleaning function
18
+ def clean_text(text: str) -> str:
19
+ normalized = leetspeak_normalizer(text)
20
+ suggestions = sym_spell.lookup_compound(normalized, max_edit_distance=2)
21
+ corrected = suggestions[0].term if suggestions else normalized
22
+ return corrected