Spaces:

DawnC
/

VisionScout

Running on Zero

App Files Files Community

DawnC commited on Jun 7

Commit

bb01345

verified ·

1 Parent(s): 77026b9

Upload 6 files

Browse files

fixed format issues and improved number counting

Files changed (6) hide show

enhanced_scene_describer.py +1 -105
llm_enhancer.py +12 -1
object_description_generator.py +501 -44
response_processor.py +38 -0
template_manager.py +35 -2
text_formatter.py +114 -0

enhanced_scene_describer.py CHANGED Viewed

@@ -241,7 +241,7 @@ class EnhancedSceneDescriber:
                 secondary_desc = self.scene_types[current_scene_type]["secondary_description"]
                 if secondary_desc:
                     description = self.text_formatter.smart_append(description, secondary_desc)
             # 處理人物相關的描述
             people_objs = [obj for obj in current_detected_objects if obj.get("class_id") == 0]
             if people_objs:
@@ -333,110 +333,6 @@ class EnhancedSceneDescriber:
             except:
                 return "A scene with various elements is visible."
-    def deduplicate_sentences_in_description(self, description: str, similarity_threshold: float = 0.80) -> str:
-        """
-        從一段描述文本中移除重複或高度相似的句子。
-        此方法會嘗試保留更長、資訊更豐富的句子版本。
-        Args:
-            description (str): 原始描述文本。
-            similarity_threshold (float): 判斷句子是否相似的 Jaccard 相似度閾值 (0 到 1)。
-                                         預設為 0.8，表示詞彙重疊度達到80%即視為相似。
-        Returns:
-            str: 移除了重複或高度相似句子後的文本。
-        """
-        try:
-            if not description or not description.strip():
-                self.logger.debug("deduplicate_sentences_in_description: Received empty or blank description.")
-                return ""
-            # 使用正則表達式分割句子，保留句尾標點符號
-            sentences = re.split(r'(?<=[.!?])\s+', description.strip())
-            if not sentences:
-                self.logger.debug("deduplicate_sentences_in_description: No sentences found after splitting.")
-                return ""
-            unique_sentences_data = []  # 存儲 (原始句子文本, 該句子的詞彙集合)
-            for current_sentence_text in sentences:
-                current_sentence_text = current_sentence_text.strip()
-                if not current_sentence_text:
-                    continue
-                # 預處理當前句子以進行比較：轉小寫、移除標點、分割成詞彙集合
-                simplified_current_text = re.sub(r'[^\w\s\d]', '', current_sentence_text.lower()) # 保留數字
-                current_sentence_words = set(simplified_current_text.split())
-                if not current_sentence_words: # 如果處理後是空集合 (例如句子只包含標點)
-                    # 如果原始句子有內容（例如只有一個標點），就保留它
-                    if current_sentence_text and not unique_sentences_data: # 避免在開頭加入孤立標點
-                         unique_sentences_data.append((current_sentence_text, current_sentence_words))
-                    continue
-                is_subsumed_or_highly_similar = False
-                index_to_replace = -1
-                for i, (kept_sentence_text, kept_sentence_words) in enumerate(unique_sentences_data):
-                    if not kept_sentence_words: # 跳過已保留的空詞彙集合
-                        continue
-                    # 計算 Jaccard 相似度
-                    intersection_len = len(current_sentence_words.intersection(kept_sentence_words))
-                    union_len = len(current_sentence_words.union(kept_sentence_words))
-                    jaccard_similarity = 0.0
-                    if union_len > 0:
-                        jaccard_similarity = intersection_len / union_len
-                    elif not current_sentence_words and not kept_sentence_words: # 兩個都是空的
-                        jaccard_similarity = 1.0
-                    if jaccard_similarity >= similarity_threshold:
-                        # 如果當前句子比已保留的句子長，則標記替換舊的
-                        if len(current_sentence_words) > len(kept_sentence_words):
-                            self.logger.debug(f"Deduplication: Replacing shorter \"{kept_sentence_text[:50]}...\" "
-                                              f"with longer similar \"{current_sentence_text[:50]}...\" (Jaccard: {jaccard_similarity:.2f})")
-                            index_to_replace = i
-                            break # 找到一個可以被替換的，就跳出內層循環
-                        # 如果當前句子比已保留的句子短，或者長度相近但內容高度相似，則標記當前句子為重複
-                        else: # current_sentence_words is shorter or of similar length
-                            is_subsumed_or_highly_similar = True
-                            self.logger.debug(f"Deduplication: Current sentence \"{current_sentence_text[:50]}...\" "
-                                              f"is subsumed by or highly similar to \"{kept_sentence_text[:50]}...\" (Jaccard: {jaccard_similarity:.2f}). Skipping.")
-                            break
-                if index_to_replace != -1:
-                    unique_sentences_data[index_to_replace] = (current_sentence_text, current_sentence_words)
-                elif not is_subsumed_or_highly_similar:
-                    unique_sentences_data.append((current_sentence_text, current_sentence_words))
-            # 從 unique_sentences_data 中提取最終的句子文本
-            final_sentences = [s_data[0] for s_data in unique_sentences_data]
-            # 重組句子，確保每個句子以標點符號結尾，並且句子間有空格
-            reconstructed_response = ""
-            for i, s_text in enumerate(final_sentences):
-                s_text = s_text.strip()
-                if not s_text:
-                    continue
-                # 確保句子以標點結尾
-                if not re.search(r'[.!?]$', s_text):
-                    s_text += "."
-                reconstructed_response += s_text
-                if i < len(final_sentences) - 1: # 如果不是最後一句，添加空格
-                    reconstructed_response += " "
-            self.logger.debug(f"Deduplicated description (len {len(reconstructed_response.strip())}): '{reconstructed_response.strip()[:150]}...'")
-            return reconstructed_response.strip()
-        except Exception as e:
-            self.logger.error(f"Error in deduplicate_sentences_in_description: {str(e)}")
-            self.logger.error(traceback.format_exc())
-            return description # 發生錯誤時返回原始描述
     def _extract_placeholders(self, template: str) -> List[str]:
         """提取模板中的佔位符"""
         import re

                 secondary_desc = self.scene_types[current_scene_type]["secondary_description"]
                 if secondary_desc:
                     description = self.text_formatter.smart_append(description, secondary_desc)
             # 處理人物相關的描述
             people_objs = [obj for obj in current_detected_objects if obj.get("class_id") == 0]
             if people_objs:
             except:
                 return "A scene with various elements is visible."
     def _extract_placeholders(self, template: str) -> List[str]:
         """提取模板中的佔位符"""
         import re

llm_enhancer.py CHANGED Viewed

@@ -146,12 +146,23 @@ class LLMEnhancer:
             if perspective and perspective.lower() not in cleaned_response.lower():
                 cleaned_response = f"{perspective}, {cleaned_response[0].lower()}{cleaned_response[1:]}"
             # 14. 最終驗證：如果結果過短，嘗試fallback
             final_result = cleaned_response.strip()
             if not final_result or len(final_result) < 20:
                 self.logger.warning("Enhanced description too short; attempting fallback")
-                # Fallback prompt
                 fallback_scene_data = enhanced_scene_data.copy()
                 fallback_scene_data["is_fallback"] = True
                 fallback_prompt = self.prompt_manager.format_enhancement_prompt_with_landmark(

             if perspective and perspective.lower() not in cleaned_response.lower():
                 cleaned_response = f"{perspective}, {cleaned_response[0].lower()}{cleaned_response[1:]}"
+            # 13.5. 最終的 identical 詞彙清理（確保LLM輸出不包含重複性描述）
+            identical_final_cleanup = [
+                (r'\b(\d+)\s+identical\s+([a-zA-Z\s]+)', r'\1 \2'),
+                (r'\b(two|three|four|five|six|seven|eight|nine|ten|eleven|twelve)\s+identical\s+([a-zA-Z\s]+)', r'\1 \2'),
+                (r'\bidentical\s+([a-zA-Z\s]+)', r'\1'),
+                (r'\bcomprehensive arrangement of\b', 'arrangement of'),
+            ]
+            for pattern, replacement in identical_final_cleanup:
+                cleaned_response = re.sub(pattern, replacement, cleaned_response, flags=re.IGNORECASE)
             # 14. 最終驗證：如果結果過短，嘗試fallback
             final_result = cleaned_response.strip()
             if not final_result or len(final_result) < 20:
                 self.logger.warning("Enhanced description too short; attempting fallback")
+                # Fallback prompt
                 fallback_scene_data = enhanced_scene_data.copy()
                 fallback_scene_data["is_fallback"] = True
                 fallback_prompt = self.prompt_manager.format_enhancement_prompt_with_landmark(

object_description_generator.py CHANGED Viewed

@@ -513,11 +513,10 @@ class ObjectDescriptionGenerator:
                 # 使用置信度過濾
                 confident_objects = [obj for obj in detected_objects
                                    if obj.get("confidence", 0) >= self.confidence_threshold_for_description]
-                # print(f"DEBUG: After confidence filtering (threshold={self.confidence_threshold_for_description}):")
-                # for class_name in ["car", "traffic light", "person", "handbag"]:
-                #     class_objects = [obj for obj in confident_objects if obj.get("class_name") == class_name]
-                #     print(f"DEBUG: {class_name}: {len(class_objects)} confident objects")
                 if not confident_objects:
                     no_confident_obj_msg = "While some elements might be present, no objects were identified with sufficient confidence for a detailed description."
@@ -557,10 +556,11 @@ class ObjectDescriptionGenerator:
                             if name not in objects_by_class:
                                 objects_by_class[name] = []
                             objects_by_class[name].append(obj)
-                            # print(f"DEBUG: Before spatial deduplication:")
-                            # for class_name in ["car", "traffic light", "person", "handbag"]:
-                            #     if class_name in objects_by_class:
-                            #         print(f"DEBUG: {class_name}: {len(objects_by_class[class_name])} objects before dedup")
                     if not objects_by_class:
                         description_segments.append("No common objects were confidently identified for detailed description.")
@@ -616,22 +616,19 @@ class ObjectDescriptionGenerator:
                                 deduplicated_objects_by_class[class_name] = unique_objects
                         objects_by_class = deduplicated_objects_by_class
-                        # print(f"DEBUG: After spatial deduplication:")
-                        # for class_name in ["car", "traffic light", "person", "handbag"]:
-                        #     if class_name in objects_by_class:
-                        #         print(f"DEBUG: {class_name}: {len(objects_by_class[class_name])} objects after dedup")
                         sorted_object_groups = sorted(objects_by_class.items(), key=sort_key_object_groups)
                         object_clauses = []
                         for class_name, group_of_objects in sorted_object_groups:
                             count = len(group_of_objects)
-                            # if class_name in ["car", "traffic light", "person", "handbag"]:
-                            #     print(f"DEBUG: Final count for {class_name}: {count}")
                             if count == 0:
                                 continue
@@ -642,11 +639,15 @@ class ObjectDescriptionGenerator:
                             if object_statistics and class_name in object_statistics:
                                 actual_count = object_statistics[class_name]["count"]
                                 formatted_name_with_exact_count = self._format_object_count_description(
-                                    normalized_class_name, actual_count
                                 )
                             else:
                                 formatted_name_with_exact_count = self._format_object_count_description(
-                                    normalized_class_name, count
                                 )
                             if formatted_name_with_exact_count == "no specific objects clearly identified" or not formatted_name_with_exact_count:
@@ -726,6 +727,9 @@ class ObjectDescriptionGenerator:
             if raw_description and not raw_description.endswith(('.', '!', '?')):
                 raw_description += "."
             if not raw_description or len(raw_description.strip()) < 20:
                 if 'confident_objects' in locals() and confident_objects:
                     return "The scene contains several detected objects, but a detailed textual description could not be fully constructed."
@@ -739,45 +743,498 @@ class ObjectDescriptionGenerator:
             self.logger.error(f"{error_msg}\n{traceback.format_exc()}")
             raise ObjectDescriptionError(error_msg) from e
-    def _format_object_count_description(self, class_name: str, count: int) -> str:
         """
-        格式化物件數量描述，提供多樣化的表達方式
         Args:
             class_name: 標準化後的類別名稱
             count: 物件數量
         Returns:
-            str: 格式化的數量描述
         """
         try:
             if count <= 0:
                 return ""
-            # 單數情況
-            if count == 1:
-                article = "an" if class_name[0].lower() in 'aeiou' else "a"
-                return f"{article} {class_name}"
-            # 複數情況
             plural_form = self._get_plural_form(class_name)
-            # 根據數量選擇不同的表達方式
-            if count == 2:
-                return f"two {plural_form}"
-            elif count == 3:
-                return f"three {plural_form}"
-            elif count <= 5:
-                return f"{count} {plural_form}"
-            elif count <= 10:
-                return f"several {plural_form}"
-            else:
-                return f"numerous {plural_form}"
         except Exception as e:
             self.logger.warning(f"Error formatting object count for '{class_name}': {str(e)}")
             return f"{count} {class_name}s" if count > 1 else class_name
     def _get_plural_form(self, word: str) -> str:
         """
         獲取詞彙的複數形式
@@ -1282,4 +1739,4 @@ class ObjectDescriptionGenerator:
         except Exception as e:
             self.logger.error(f"Error updating configuration: {str(e)}")
-            raise ObjectDescriptionError(f"Failed to update configuration: {str(e)}") from e

                 # 使用置信度過濾
                 confident_objects = [obj for obj in detected_objects
                                    if obj.get("confidence", 0) >= self.confidence_threshold_for_description]
+                print(f"DEBUG: After confidence filtering (threshold={self.confidence_threshold_for_description}):")
+                for class_name in ["car", "traffic light", "person", "handbag"]:
+                    class_objects = [obj for obj in confident_objects if obj.get("class_name") == class_name]
+                    print(f"DEBUG: {class_name}: {len(class_objects)} confident objects")
                 if not confident_objects:
                     no_confident_obj_msg = "While some elements might be present, no objects were identified with sufficient confidence for a detailed description."
                             if name not in objects_by_class:
                                 objects_by_class[name] = []
                             objects_by_class[name].append(obj)
+                            print(f"DEBUG: Before spatial deduplication:")
+                            for class_name in ["car", "traffic light", "person", "handbag"]:
+                                if class_name in objects_by_class:
+                                    print(f"DEBUG: {class_name}: {len(objects_by_class[class_name])} objects before dedup")
                     if not objects_by_class:
                         description_segments.append("No common objects were confidently identified for detailed description.")
                                 deduplicated_objects_by_class[class_name] = unique_objects
                         objects_by_class = deduplicated_objects_by_class
+                        print(f"DEBUG: After spatial deduplication:")
+                        for class_name in ["car", "traffic light", "person", "handbag"]:
+                            if class_name in objects_by_class:
+                                print(f"DEBUG: {class_name}: {len(objects_by_class[class_name])} objects after dedup")
                         sorted_object_groups = sorted(objects_by_class.items(), key=sort_key_object_groups)
                         object_clauses = []
                         for class_name, group_of_objects in sorted_object_groups:
                             count = len(group_of_objects)
+                            if class_name in ["car", "traffic light", "person", "handbag"]:
+                                print(f"DEBUG: Final count for {class_name}: {count}")
                             if count == 0:
                                 continue
                             if object_statistics and class_name in object_statistics:
                                 actual_count = object_statistics[class_name]["count"]
                                 formatted_name_with_exact_count = self._format_object_count_description(
+                                    normalized_class_name,
+                                    actual_count,
+                                    scene_type=scene_type
                                 )
                             else:
                                 formatted_name_with_exact_count = self._format_object_count_description(
+                                    normalized_class_name,
+                                    count,
+                                    scene_type=scene_type
                                 )
                             if formatted_name_with_exact_count == "no specific objects clearly identified" or not formatted_name_with_exact_count:
             if raw_description and not raw_description.endswith(('.', '!', '?')):
                 raw_description += "."
+            # 移除重複性和不適當的描述詞彙
+            raw_description = self._remove_repetitive_descriptors(raw_description)
             if not raw_description or len(raw_description.strip()) < 20:
                 if 'confident_objects' in locals() and confident_objects:
                     return "The scene contains several detected objects, but a detailed textual description could not be fully constructed."
             self.logger.error(f"{error_msg}\n{traceback.format_exc()}")
             raise ObjectDescriptionError(error_msg) from e
+    def _remove_repetitive_descriptors(self, description: str) -> str:
+        """
+        移除描述中的重複性和不適當的描述詞彙，特別是 "identical" 等詞彙
+        Args:
+            description: 原始描述文本
+        Returns:
+            str: 清理後的描述文本
         """
+        try:
+            import re
+            # 定義需要移除或替換的模式
+            cleanup_patterns = [
+                # 移除 "identical" 描述模式
+                (r'\b(\d+)\s+identical\s+([a-zA-Z\s]+)', r'\1 \2'),
+                (r'\b(two|three|four|five|six|seven|eight|nine|ten|eleven|twelve)\s+identical\s+([a-zA-Z\s]+)', r'\1 \2'),
+                (r'\bidentical\s+([a-zA-Z\s]+)', r'\1'),
+                # 改善 "comprehensive arrangement" 等過於技術性的表達
+                (r'\bcomprehensive arrangement of\b', 'arrangement of'),
+                (r'\bcomprehensive view featuring\b', 'scene featuring'),
+                (r'\bcomprehensive display of\b', 'display of'),
+                # 簡化過度描述性的短語
+                (r'\bpositioning around\s+(\d+)\s+identical\b', r'positioning around \1'),
+                (r'\barranged around\s+(\d+)\s+identical\b', r'arranged around \1'),
+            ]
+            processed_description = description
+            for pattern, replacement in cleanup_patterns:
+                processed_description = re.sub(pattern, replacement, processed_description, flags=re.IGNORECASE)
+            # 進一步清理可能的多餘空格
+            processed_description = re.sub(r'\s+', ' ', processed_description).strip()
+            self.logger.debug(f"Cleaned description: removed repetitive descriptors")
+            return processed_description
+        except Exception as e:
+            self.logger.warning(f"Error removing repetitive descriptors: {str(e)}")
+            return description
+    def _format_object_count_description(self, class_name: str, count: int,
+                                    scene_type: Optional[str] = None,
+                                    detected_objects: Optional[List[Dict]] = None,
+                                    avg_confidence: float = 0.0) -> str:
+        """
+        格式化物件數量描述的核心方法，整合空間排列、材質推斷和場景語境
+        這個方法是整個物件描述系統的核心，它將多個子功能整合在一起：
+        1. 數字到文字的轉換（避免阿拉伯數字）
+        2. 基於場景的材質推斷
+        3. 空間排列模式的描述
+        4. 語境化的物件描述
         Args:
             class_name: 標準化後的類別名稱
             count: 物件數量
+            scene_type: 場景類型，用於語境化描述
+            detected_objects: 該類型的所有檢測物件，用於空間分析
+            avg_confidence: 平均檢測置信度，影響材質推斷的可信度
         Returns:
+            str: 完整的格式化數量描述
         """
         try:
             if count <= 0:
                 return ""
+            # 獲取基礎的複數形式
             plural_form = self._get_plural_form(class_name)
+            # 單數情況的處理
+            if count == 1:
+                return self._format_single_object_description(class_name, scene_type,
+                                                            detected_objects, avg_confidence)
+            # 複數情況的處理
+            return self._format_multiple_objects_description(class_name, count, plural_form,
+                                                        scene_type, detected_objects, avg_confidence)
         except Exception as e:
             self.logger.warning(f"Error formatting object count for '{class_name}': {str(e)}")
             return f"{count} {class_name}s" if count > 1 else class_name
+    def _format_single_object_description(self, class_name: str, scene_type: Optional[str],
+                                        detected_objects: Optional[List[Dict]],
+                                        avg_confidence: float) -> str:
+        """
+        處理單個物件的描述生成
+        對於單個物件，我們重點在於通過材質推斷和位置描述來豐富描述內容，
+        避免簡單的 "a chair" 這樣的描述，而是生成 "a wooden dining chair" 這樣的表達
+        Args:
+            class_name: 物件類別名稱
+            scene_type: 場景類型
+            detected_objects: 檢測物件列表
+            avg_confidence: 平均置信度
+        Returns:
+            str: 單個物件的完整描述
+        """
+        article = "an" if class_name[0].lower() in 'aeiou' else "a"
+        # 獲取材質描述符
+        material_descriptor = self._get_material_descriptor(class_name, scene_type, avg_confidence)
+        # 獲取位置或特徵描述符
+        feature_descriptor = self._get_single_object_feature(class_name, scene_type, detected_objects)
+        # 組合描述
+        descriptors = []
+        if material_descriptor:
+            descriptors.append(material_descriptor)
+        if feature_descriptor:
+            descriptors.append(feature_descriptor)
+        if descriptors:
+            return f"{article} {' '.join(descriptors)} {class_name}"
+        else:
+            return f"{article} {class_name}"
+    def _format_multiple_objects_description(self, class_name: str, count: int, plural_form: str,
+                                        scene_type: Optional[str], detected_objects: Optional[List[Dict]],
+                                        avg_confidence: float) -> str:
+        """
+        處理多個物件的描述生成
+        對於多個物件，我們的重點是：
+        1. 將數字轉換為文字表達
+        2. 分析空間排列模式
+        3. 添加適當的材質或功能描述
+        4. 生成自然流暢的描述
+        Args:
+            class_name: 物件類別名稱
+            count: 物件數量
+            plural_form: 複數形式
+            scene_type: 場景類型
+            detected_objects: 檢測物件列表
+            avg_confidence: 平均置信度
+        Returns:
+            str: 多個物件的完整描述
+        """
+        # 數字到文字的轉換映射
+        number_words = {
+            2: "two", 3: "three", 4: "four", 5: "five", 6: "six",
+            7: "seven", 8: "eight", 9: "nine", 10: "ten",
+            11: "eleven", 12: "twelve"
+        }
+        # 確定基礎數量表達
+        if count in number_words:
+            count_expression = number_words[count]
+        elif count <= 20:
+            count_expression = "several"
+        else:
+            count_expression = "numerous"
+        # 獲取材質或功能描述符
+        material_descriptor = self._get_material_descriptor(class_name, scene_type, avg_confidence)
+        # 獲取空間排列描述
+        spatial_descriptor = self._get_spatial_arrangement_descriptor(class_name, scene_type,
+                                                                    detected_objects, count)
+        # 組合最終描述
+        descriptors = []
+        if material_descriptor:
+            descriptors.append(material_descriptor)
+        # 構建基礎描述
+        base_description = f"{count_expression} {' '.join(descriptors)} {plural_form}".strip()
+        # 添加空間排列信息
+        if spatial_descriptor:
+            return f"{base_description} {spatial_descriptor}"
+        else:
+            return base_description
+    def _get_material_descriptor(self, class_name: str, scene_type: Optional[str],
+                            avg_confidence: float) -> Optional[str]:
+        """
+        基於場景語境和置信度進行材質推斷
+        這個方法實現了智能的材質推斷，它不依賴複雜的圖像分析，
+        而是基於常識和場景邏輯來推斷最可能的材質描述
+        Args:
+            class_name: 物件類別名稱
+            scene_type: 場景類型
+            avg_confidence: 檢測置信度，影響推斷的保守程度
+        Returns:
+            Optional[str]: 材質描述符，如果無法推斷則返回None
+        """
+        # 只有在置信度足夠高時才進行材質推斷
+        if avg_confidence < 0.5:
+            return None
+        # 餐廳和用餐相關場景
+        if scene_type and scene_type in ["dining_area", "restaurant", "upscale_dining", "cafe"]:
+            material_mapping = {
+                "chair": "wooden" if avg_confidence > 0.7 else None,
+                "dining table": "wooden",
+                "couch": "upholstered",
+                "vase": "decorative"
+            }
+            return material_mapping.get(class_name)
+        # 辦公場景
+        elif scene_type and scene_type in ["office_workspace", "meeting_room", "conference_room"]:
+            material_mapping = {
+                "chair": "office",
+                "dining table": "conference",  # 在辦公環境中，餐桌通常是會議桌
+                "laptop": "modern",
+                "book": "reference"
+            }
+            return material_mapping.get(class_name)
+        # 客廳場景
+        elif scene_type and scene_type in ["living_room"]:
+            material_mapping = {
+                "couch": "comfortable",
+                "chair": "accent",
+                "tv": "large",
+                "vase": "decorative"
+            }
+            return material_mapping.get(class_name)
+        # 室外場景
+        elif scene_type and scene_type in ["city_street", "park_area", "parking_lot"]:
+            material_mapping = {
+                "car": "parked",
+                "person": "walking",
+                "bicycle": "stationed"
+            }
+            return material_mapping.get(class_name)
+        # 如果沒有特定的場景映射，返回通用描述符
+        generic_mapping = {
+            "chair": "comfortable",
+            "dining table": "sturdy",
+            "car": "parked",
+            "person": "present"
+        }
+        return generic_mapping.get(class_name)
+    def _get_spatial_arrangement_descriptor(self, class_name: str, scene_type: Optional[str],
+                                        detected_objects: Optional[List[Dict]],
+                                        count: int) -> Optional[str]:
+        """
+        分析物件的空間排列模式並生成相應描述
+        這個方法通過分析物件的位置分布來判斷排列模式，
+        然後根據物件類型和場景生成適當的空間描述
+        Args:
+            class_name: 物件類別名稱
+            scene_type: 場景類型
+            detected_objects: 該類型的所有檢測物件
+            count: 物件數量
+        Returns:
+            Optional[str]: 空間排列描述，如果無法分析則返回None
+        """
+        if not detected_objects or len(detected_objects) < 2:
+            return None
+        try:
+            # 提取物件的標準化位置
+            positions = []
+            for obj in detected_objects:
+                center = obj.get("normalized_center", [0.5, 0.5])
+                if isinstance(center, (list, tuple)) and len(center) >= 2:
+                    positions.append(center)
+            if len(positions) < 2:
+                return None
+            # 分析排列模式
+            arrangement_pattern = self._analyze_arrangement_pattern(positions)
+            # 根據物件類型和場景生成描述
+            return self._generate_arrangement_description(class_name, scene_type,
+                                                        arrangement_pattern, count)
+        except Exception as e:
+            self.logger.warning(f"Error analyzing spatial arrangement: {str(e)}")
+            return None
+    def _analyze_arrangement_pattern(self, positions: List[List[float]]) -> str:
+        """
+        分析位置點的排列模式
+        這個方法使用簡單的幾何分析來判斷物件的排列類型，
+        幫助我們理解物件在空間中的組織方式
+        Args:
+            positions: 標準化的位置座標列表
+        Returns:
+            str: 排列模式類型（linear, clustered, scattered, circular等）
+        """
+        import numpy as np
+        if len(positions) < 2:
+            return "single"
+        # 轉換為numpy陣列便於計算
+        pos_array = np.array(positions)
+        # 計算��置的分布特徵
+        x_coords = pos_array[:, 0]
+        y_coords = pos_array[:, 1]
+        # 分析x和y方向的變異程度
+        x_variance = np.var(x_coords)
+        y_variance = np.var(y_coords)
+        # 計算物件間的平均距離
+        distances = []
+        for i in range(len(positions)):
+            for j in range(i + 1, len(positions)):
+                dist = np.sqrt((positions[i][0] - positions[j][0])**2 +
+                            (positions[i][1] - positions[j][1])**2)
+                distances.append(dist)
+        avg_distance = np.mean(distances) if distances else 0
+        distance_variance = np.var(distances) if distances else 0
+        # 判斷排列模式
+        if len(positions) >= 4 and self._is_circular_pattern(positions):
+            return "circular"
+        elif x_variance < 0.05 or y_variance < 0.05:  # 一個方向變異很小
+            return "linear"
+        elif avg_distance < 0.3 and distance_variance < 0.02:  # 物件聚集且距離相近
+            return "clustered"
+        elif avg_distance > 0.6:  # 物件分散
+            return "scattered"
+        elif distance_variance < 0.03:  # 距離一致，可能是規則排列
+            return "regular"
+        else:
+            return "distributed"
+    def _is_circular_pattern(self, positions: List[List[float]]) -> bool:
+        """
+        檢查位置是否形成圓形或環形排列
+        Args:
+            positions: 位置座標列表
+        Returns:
+            bool: 是否為圓形排列
+        """
+        import numpy as np
+        if len(positions) < 4:
+            return False
+        try:
+            pos_array = np.array(positions)
+            # 計算中心點
+            center_x = np.mean(pos_array[:, 0])
+            center_y = np.mean(pos_array[:, 1])
+            # 計算每個點到中心的距離
+            distances_to_center = []
+            for pos in positions:
+                dist = np.sqrt((pos[0] - center_x)**2 + (pos[1] - center_y)**2)
+                distances_to_center.append(dist)
+            # 如果所有距離都相近，可能是圓形排列
+            distance_variance = np.var(distances_to_center)
+            return distance_variance < 0.05 and np.mean(distances_to_center) > 0.2
+        except:
+            return False
+    def _generate_arrangement_description(self, class_name: str, scene_type: Optional[str],
+                                        arrangement_pattern: str, count: int) -> Optional[str]:
+        """
+        根據物件類型、場景和排列模式生成空間描述
+        這個方法將抽象的排列模式轉換為自然語言描述，
+        並根據具體的物件類型和場景語境進行定制
+        Args:
+            class_name: 物件類別名稱
+            scene_type: 場景類型
+            arrangement_pattern: 排列模式
+            count: 物件數量
+        Returns:
+            Optional[str]: 生成的空間排列描述
+        """
+        # 基於物件類型的描述模板
+        arrangement_templates = {
+            "chair": {
+                "linear": "arranged in a row",
+                "clustered": "grouped together for conversation",
+                "circular": "arranged around the table",
+                "scattered": "positioned throughout the space",
+                "regular": "evenly spaced",
+                "distributed": "thoughtfully positioned"
+            },
+            "dining table": {
+                "linear": "aligned to create a unified dining space",
+                "clustered": "grouped to form intimate dining areas",
+                "scattered": "distributed to optimize space flow",
+                "regular": "systematically positioned",
+                "distributed": "strategically placed"
+            },
+            "car": {
+                "linear": "parked in sequence",
+                "clustered": "grouped in the parking area",
+                "scattered": "distributed throughout the lot",
+                "regular": "neatly parked",
+                "distributed": "positioned across the area"
+            },
+            "person": {
+                "linear": "moving in a line",
+                "clustered": "gathered together",
+                "circular": "forming a circle",
+                "scattered": "spread across the area",
+                "distributed": "positioned throughout the scene"
+            }
+        }
+        # 獲取對應的描述模板
+        if class_name in arrangement_templates:
+            template_dict = arrangement_templates[class_name]
+            base_description = template_dict.get(arrangement_pattern, "positioned in the scene")
+        else:
+            # 通用的排列描述
+            generic_templates = {
+                "linear": "arranged in a line",
+                "clustered": "grouped together",
+                "circular": "arranged in a circular pattern",
+                "scattered": "distributed across the space",
+                "regular": "evenly positioned",
+                "distributed": "thoughtfully placed"
+            }
+            base_description = generic_templates.get(arrangement_pattern, "positioned in the scene")
+        return base_description
+    def _get_single_object_feature(self, class_name: str, scene_type: Optional[str],
+                                detected_objects: Optional[List[Dict]]) -> Optional[str]:
+        """
+        為單個物件生成特徵描述符
+        當只有一個物件時，我們可以提供更具體的位置或功能描述
+        Args:
+            class_name: 物件類別名稱
+            scene_type: 場景類型
+            detected_objects: 檢測物件（單個）
+        Returns:
+            Optional[str]: 特徵描述符
+        """
+        if not detected_objects or len(detected_objects) != 1:
+            return None
+        obj = detected_objects[0]
+        region = obj.get("region", "").lower()
+        # 基於位置的描述
+        if "center" in region:
+            if class_name == "dining table":
+                return "central"
+            elif class_name == "chair":
+                return "centrally placed"
+        elif "corner" in region or "left" in region or "right" in region:
+            return "positioned"
+        # 基於場景的功能描述
+        if scene_type and scene_type in ["dining_area", "restaurant"]:
+            if class_name == "chair":
+                return "dining"
+            elif class_name == "vase":
+                return "decorative"
+        return None
     def _get_plural_form(self, word: str) -> str:
         """
         獲取詞彙的複數形式
         except Exception as e:
             self.logger.error(f"Error updating configuration: {str(e)}")
+            raise ObjectDescriptionError(f"Failed to update configuration: {str(e)}") from e

response_processor.py CHANGED Viewed

@@ -652,6 +652,44 @@ class ResponseProcessor:
                 pattern = re.compile(r'\b' + re.escape(word_to_replace) + r'\b', re.IGNORECASE)
                 processed_response = pattern.sub(replacer_instance, processed_response)
             return processed_response
         except Exception as e:

                 pattern = re.compile(r'\b' + re.escape(word_to_replace) + r'\b', re.IGNORECASE)
                 processed_response = pattern.sub(replacer_instance, processed_response)
+            # 移除 identical 等重複性描述詞彙
+            identical_cleanup_patterns = [
+                (r'\b(\d+)\s+identical\s+([a-zA-Z\s]+)', r'\1 \2'),
+                (r'\b(two|three|four|five|six|seven|eight|nine|ten|eleven|twelve)\s+identical\s+([a-zA-Z\s]+)', r'\1 \2'),
+                (r'\bidentical\s+([a-zA-Z\s]+)', r'\1'),
+                (r'\bcomprehensive arrangement of\b', 'arrangement of'),
+                (r'\bcomprehensive view featuring\b', 'scene featuring'),
+                (r'\bcomprehensive display of\b', 'display of'),
+            ]
+            for pattern, replacement in identical_cleanup_patterns:
+                processed_response = re.sub(pattern, replacement, processed_response, flags=re.IGNORECASE)
+            # 數字到文字
+            number_conversions = {
+                '2': 'two', '3': 'three', '4': 'four', '5': 'five', '6': 'six',
+                '7': 'seven', '8': 'eight', '9': 'nine', '10': 'ten',
+                '11': 'eleven', '12': 'twelve'
+            }
+            # 處理各種語法結構中的數字
+            for digit, word in number_conversions.items():
+                # 模式1: 數字 + 單一複數詞 (如 "7 chairs")
+                pattern1 = rf'\b{digit}\s+([a-zA-Z]+s)\b'
+                processed_response = re.sub(pattern1, rf'{word} \1', processed_response)
+                # 模式2: 數字 + 修飾詞 + 複數詞 (如 "7 more chairs")
+                pattern2 = rf'\b{digit}\s+(more|additional|other|identical)\s+([a-zA-Z]+s)\b'
+                processed_response = re.sub(pattern2, rf'{word} \1 \2', processed_response, flags=re.IGNORECASE)
+                # 模式3: 數字 + 形容詞 + 複數詞 (如 "2 dining tables")
+                pattern3 = rf'\b{digit}\s+([a-zA-Z]+)\s+([a-zA-Z]+s)\b'
+                processed_response = re.sub(pattern3, rf'{word} \1 \2', processed_response)
+                # 模式4: 介詞片語中的數字 (如 "around 2 tables")
+                pattern4 = rf'\b(around|approximately|about)\s+{digit}\s+([a-zA-Z]+s)\b'
+                processed_response = re.sub(pattern4, rf'\1 {word} \2', processed_response, flags=re.IGNORECASE)
             return processed_response
         except Exception as e:

template_manager.py CHANGED Viewed

@@ -35,7 +35,7 @@ class TemplateManager:
             custom_templates_db: 可選的自定義模板數據庫，如果提供則會與默認模板合併
         """
         self.logger = logging.getLogger(self.__class__.__name__)
-        self.template_registry = {}
         try:
             # 載入模板數據庫
@@ -1047,10 +1047,43 @@ class TemplateManager:
                 count = object_statistics["chair"]["count"]
                 if count == 1:
                     replacements["seating"] = "a chair"
                 elif count <= 4:
-                    replacements["seating"] = f"{count} chairs"
                 else:
                     replacements["seating"] = f"numerous chairs ({count} total)"
             # 處理人員
             if "person" in object_statistics:

             custom_templates_db: 可選的自定義模板數據庫，如果提供則會與默認模板合併
         """
         self.logger = logging.getLogger(self.__class__.__name__)
+        self.template_registry = {}
         try:
             # 載入模板數據庫
                 count = object_statistics["chair"]["count"]
                 if count == 1:
                     replacements["seating"] = "a chair"
+                    replacements["furniture"] = "a chair"  # 新增：同時處理furniture佔位符
                 elif count <= 4:
+                    number_word = ["", "one", "two", "three", "four"][count]  # 轉換為文字
+                    replacements["seating"] = f"{number_word} chairs"
+                    replacements["furniture"] = f"{number_word} chairs"  # 同時處理furniture佔位符
+                elif count <= 6:
+                    number_words = ["", "one", "two", "three", "four", "five", "six"]
+                    replacements["seating"] = f"{number_words[count]} chairs"
+                    replacements["furniture"] = f"{number_words[count]} chairs"  # 同時處理furniture佔位符
                 else:
                     replacements["seating"] = f"numerous chairs ({count} total)"
+                    replacements["furniture"] = f"numerous chairs"  # 通用情況下的家具描述
+            # 處理混合家具情況（當存在多種家具類型時）
+            furniture_items = []
+            furniture_counts = []
+            # 收集所有家具類型的統計
+            for furniture_type in ["chair", "dining table", "couch", "bed"]:
+                if furniture_type in object_statistics:
+                    count = object_statistics[furniture_type]["count"]
+                    if count > 0:
+                        furniture_items.append(furniture_type)
+                        furniture_counts.append(count)
+            # 如果只有椅子,那就用上面的方式
+            # 如果有多種家具類型，生成組合描述
+            if len(furniture_items) > 1 and "furniture" not in replacements:
+                main_furniture = furniture_items[0]  # 數量最多的家具類型
+                main_count = furniture_counts[0]
+                if main_furniture == "chair":
+                    number_words = ["", "one", "two", "three", "four", "five", "six"]
+                    if main_count <= 6:
+                        replacements["furniture"] = f"{number_words[main_count]} chairs and other furniture"
+                    else:
+                        replacements["furniture"] = "multiple chairs and other furniture"
             # 處理人員
             if "person" in object_statistics:

text_formatter.py CHANGED Viewed

@@ -239,6 +239,16 @@ class TextFormatter:
             # 11. 移除最終標點符號前的空格（如果規則7意外添加）
             text = re.sub(r'\s+([.!?])$', r'\1', text)
             return text.strip()  # 最終修剪
         except Exception as e:
@@ -543,3 +553,107 @@ class TextFormatter:
         except Exception as e:
             self.logger.warning(f"Error getting text statistics: {str(e)}")
             return {"characters": 0, "words": 0, "sentences": 0}

             # 11. 移除最終標點符號前的空格（如果規則7意外添加）
             text = re.sub(r'\s+([.!?])$', r'\1', text)
+            # 12. 移除重複性描述詞彙的最終檢查
+            identical_cleanup_patterns = [
+                (r'\b(\d+)\s+identical\s+([a-zA-Z\s]+)', r'\1 \2'),
+                (r'\b(two|three|four|five|six|seven|eight|nine|ten|eleven|twelve)\s+identical\s+([a-zA-Z\s]+)', r'\1 \2'),
+                (r'\bidentical\s+([a-zA-Z\s]+)', r'\1'),
+                (r'\bcomprehensive arrangement of\b', 'arrangement of'),
+            ]
+            for pattern, replacement in identical_cleanup_patterns:
+                text = re.sub(pattern, replacement, text, flags=re.IGNORECASE)
             return text.strip()  # 最終修剪
         except Exception as e:
         except Exception as e:
             self.logger.warning(f"Error getting text statistics: {str(e)}")
             return {"characters": 0, "words": 0, "sentences": 0}
+    def deduplicate_sentences_in_description(self, description: str, similarity_threshold: float = 0.80) -> str:
+        """
+        從一段描述文本中移除重複或高度相似的句子。
+        此方法會嘗試保留更長、資訊更豐富的句子版本。
+        Args:
+            description (str): 原始描述文本。
+            similarity_threshold (float): 判斷句子是否相似的 Jaccard 相似度閾值 (0 到 1)。
+                                         預設為 0.8，表示詞彙重疊度達到80%即視為相似。
+        Returns:
+            str: 移除了重複或高度相似句子後的文本。
+        """
+        try:
+            if not description or not description.strip():
+                self.logger.debug("deduplicate_sentences_in_description: Received empty or blank description.")
+                return ""
+            # 使用正則表達式分割句子，保留句尾標點符號
+            sentences = re.split(r'(?<=[.!?])\s+', description.strip())
+            if not sentences:
+                self.logger.debug("deduplicate_sentences_in_description: No sentences found after splitting.")
+                return ""
+            unique_sentences_data = []  # 存儲 (原始句子文本, 該句子的詞彙集合)
+            for current_sentence_text in sentences:
+                current_sentence_text = current_sentence_text.strip()
+                if not current_sentence_text:
+                    continue
+                # 預處理當前句子以進行比較：轉小寫、移除標點、分割成詞彙集合
+                simplified_current_text = re.sub(r'[^\w\s\d]', '', current_sentence_text.lower()) # 保留數字
+                current_sentence_words = set(simplified_current_text.split())
+                if not current_sentence_words: # 如果處理後是空集合 (例如句子只包含標點)
+                    # 如果原始句子有內容（例如只有一個標點），就保留它
+                    if current_sentence_text and not unique_sentences_data: # 避免在開頭加入孤立標點
+                         unique_sentences_data.append((current_sentence_text, current_sentence_words))
+                    continue
+                is_subsumed_or_highly_similar = False
+                index_to_replace = -1
+                for i, (kept_sentence_text, kept_sentence_words) in enumerate(unique_sentences_data):
+                    if not kept_sentence_words: # 跳過已保留的空詞彙集合
+                        continue
+                    # 計算 Jaccard 相似度
+                    intersection_len = len(current_sentence_words.intersection(kept_sentence_words))
+                    union_len = len(current_sentence_words.union(kept_sentence_words))
+                    jaccard_similarity = 0.0
+                    if union_len > 0:
+                        jaccard_similarity = intersection_len / union_len
+                    elif not current_sentence_words and not kept_sentence_words: # 兩個都是空的
+                        jaccard_similarity = 1.0
+                    if jaccard_similarity >= similarity_threshold:
+                        # 如果當前句子比已保留的句子長，則標記替換舊的
+                        if len(current_sentence_words) > len(kept_sentence_words):
+                            self.logger.debug(f"Deduplication: Replacing shorter \"{kept_sentence_text[:50]}...\" "
+                                              f"with longer similar \"{current_sentence_text[:50]}...\" (Jaccard: {jaccard_similarity:.2f})")
+                            index_to_replace = i
+                            break # 找到一個可以被替換的，就跳出內層循環
+                        # 如果當前句子比已保留的句子短，或者長度相近但內容高度相似，則標記當前句子為重複
+                        else: # current_sentence_words is shorter or of similar length
+                            is_subsumed_or_highly_similar = True
+                            self.logger.debug(f"Deduplication: Current sentence \"{current_sentence_text[:50]}...\" "
+                                              f"is subsumed by or highly similar to \"{kept_sentence_text[:50]}...\" (Jaccard: {jaccard_similarity:.2f}). Skipping.")
+                            break
+                if index_to_replace != -1:
+                    unique_sentences_data[index_to_replace] = (current_sentence_text, current_sentence_words)
+                elif not is_subsumed_or_highly_similar:
+                    unique_sentences_data.append((current_sentence_text, current_sentence_words))
+            # 從 unique_sentences_data 中提取最終的句子文本
+            final_sentences = [s_data[0] for s_data in unique_sentences_data]
+            # 重組句子，確保每個句子以標點符號結尾，並且句子間有空格
+            reconstructed_response = ""
+            for i, s_text in enumerate(final_sentences):
+                s_text = s_text.strip()
+                if not s_text:
+                    continue
+                # 確保句子以標點結尾
+                if not re.search(r'[.!?]$', s_text):
+                    s_text += "."
+                reconstructed_response += s_text
+                if i < len(final_sentences) - 1: # 如果不是最後一句，添加空格
+                    reconstructed_response += " "
+            self.logger.debug(f"Deduplicated description (len {len(reconstructed_response.strip())}): '{reconstructed_response.strip()[:150]}...'")
+            return reconstructed_response.strip()
+        except Exception as e:
+            self.logger.error(f"Error in deduplicate_sentences_in_description: {str(e)}")
+            self.logger.error(traceback.format_exc())
+            return description # 發生錯誤時返回原始描述