Spaces:
Paused
Paused
File size: 6,564 Bytes
2b12d61 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 |
import spacy
from fastcoref import FCoref
from typing import List
import re, os
from googletrans import Translator
from dotenv import load_dotenv
import requests
from fastapi import HTTPException
load_dotenv()
HF_API_KEY = os.getenv("HF_API_KEY")
nlp = spacy.load("en_core_web_lg")
coref_model = FCoref()
CAPITALIZED_PRONOUNS = {
"He",
"She",
"His",
"Her",
"They",
"Their",
"It",
"Its",
"You",
"Your",
"I",
"We",
"Our",
}
def clean_caption(line: str) -> str:
# Remove leading numbers, dashes, bullets, bold text
line = re.sub(r"^\s*\d+[\.\-)]?\s*", "", line) # "1. " or "2-" or "3)"
line = re.sub(r"^\*\*(.*?)\*\*\s*[-ββ]?\s*", "", line) # "**Title** β"
return line.strip()
def get_script_captions(script_text: str):
API_URL = "https://router.huggingface.co/novita/v3/openai/chat/completions"
headers = {
"Authorization": f"Bearer {HF_API_KEY}",
"Content-Type": "application/json",
}
messages = [
{
"role": "system",
"content": (
"You convert story or movie scripts into detailed, visually rich image generation captions. "
"Each caption should describe a visually distinct scene as if it were to be illustrated or rendered, "
"but do not number them or include titles β just full, descriptive sentences."
"add the appropriate camera shot or angle each sentence."
"these are the 8 shots you can use, close up shot, extreme close up shot, long shot, low angle shot, high angle shot, dutch angle, over the shoulder shot, medium shot."
),
},
{
"role": "user",
"content": f"SCRIPT:\n{script_text}\n\nReturn only image generation captions, one per line, no numbering, no headings.",
},
]
payload = {
"model": "deepseek/deepseek-v3-0324",
"messages": messages,
"temperature": 0.7,
}
response = requests.post(API_URL, headers=headers, json=payload)
if response.status_code == 200:
json_data = response.json()
raw_output = json_data["choices"][0]["message"]["content"]
lines = raw_output.strip().split("\n")
return [clean_caption(line) for line in lines if line.strip()]
else:
raise HTTPException(
status_code=500,
detail=f"DeepSeek API error: {response.status_code} - {response.text}",
)
def is_capitalized_pronoun(span: spacy.tokens.Span, text: str) -> bool:
"""Check if the span is a single capitalized pronoun in the original text."""
if len(span) != 1 or span[0].pos_ != "PRON":
return False
# Use original casing from text
start = span.start_char
end = span.end_char
original_token_text = text[start:end]
return original_token_text[0].isupper()
def get_fastcoref_clusters(doc, text):
preds = coref_model.predict(texts=[text])
fast_clusters = preds[0].get_clusters(as_strings=False)
converted_clusters = []
for cluster in fast_clusters:
new_cluster = []
for start_char, end_char in cluster:
span = doc.char_span(start_char, end_char)
if span is not None:
new_cluster.append((span.start, span.end))
if new_cluster:
converted_clusters.append(new_cluster)
return converted_clusters
def get_span_noun_indices(doc: spacy.tokens.Doc, cluster: List[List[int]]) -> List[int]:
spans = [doc[span[0] : span[1]] for span in cluster]
spans_pos = [[token.pos_ for token in span] for span in spans]
return [
i
for i, span_pos in enumerate(spans_pos)
if any(pos in ["NOUN", "PROPN"] for pos in span_pos)
]
def get_cluster_head(
doc: spacy.tokens.Doc, cluster: List[List[int]], noun_indices: List[int]
):
head_idx = noun_indices[0] if noun_indices else 0
head_start, head_end = cluster[head_idx]
head_span = doc[head_start:head_end]
return head_span, (head_start, head_end)
def is_containing_other_spans(span: List[int], all_spans: List[List[int]]):
return any(s != span and s[0] >= span[0] and s[1] <= span[1] for s in all_spans)
def replace_coref_span(doc, coref_span, resolved_text, mention_span):
start, end = coref_span
prefix = " " if start > 0 and not doc[start - 1].whitespace_ else ""
suffix = doc[end - 1].whitespace_ if end < len(doc) else ""
resolved_text[start] = prefix + mention_span.text + suffix
for i in range(start + 1, end):
resolved_text[i] = ""
def improved_replace_corefs(
doc: spacy.tokens.Doc, clusters: List[List[List[int]]], text: str
):
resolved = [token.text_with_ws for token in doc]
all_spans = [span for cluster in clusters for span in cluster]
for cluster in clusters:
noun_indices = get_span_noun_indices(doc, cluster)
if not noun_indices:
continue
mention_span, mention = get_cluster_head(doc, cluster, noun_indices)
for coref in cluster:
coref_span = doc[coref[0] : coref[1]]
if (
coref != mention
and not is_containing_other_spans(coref, all_spans)
and is_capitalized_pronoun(coref_span, text)
):
replace_coref_span(doc, coref, resolved, mention_span)
return "".join(resolved)
def detect_and_translate_to_english(text: str) -> str:
try:
translator = Translator()
detected = translator.detect(text)
if detected.lang == "tl":
print("[Info] Detected language: Filipino (tl). Translating to English...")
translated = translator.translate(text, src="tl", dest="en")
return translated.text
return text
except Exception as e:
print(f"[Warning] Language detection or translation failed: {e}")
return text
def resolve_coreferences(text: str) -> str:
doc = nlp(text)
clusters = get_fastcoref_clusters(doc, text)
return improved_replace_corefs(doc, clusters, text)
def remove_dialogues(text: str) -> str:
text = re.sub(r'(["β\']).*?\1', "", text)
text = re.sub(r"\s{2,}", " ", text)
return text.strip()
def get_resolved_sentences(text: str) -> List[str]:
text = detect_and_translate_to_english(text)
resolved_text = resolve_coreferences(text)
no_dialogue_text = remove_dialogues(resolved_text)
resolved_doc = nlp(no_dialogue_text)
return [sent.text.strip() for sent in resolved_doc.sents]
|