import re import json from typing import Dict, Any, List from backend.models.schemas import NLPAnalysisResult, QueryType, DatabaseResult from backend.config import settings from backend.services.openrouter_service import OpenRouterService import logging import asyncio logger = logging.getLogger(__name__) class NLPService: """自然語言處理服務""" def __init__(self): self.openrouter_service = OpenRouterService() self.business_intent_patterns = { "product_search": [ r"查詢.*商品|找.*商品|搜尋.*商品|商品.*資料", r"商品.*查詢|商品.*搜尋|產品.*查詢|產品.*搜尋", r"有什麼.*商品|商品.*價格|產品.*價格" ], "inventory_check": [ r"庫存.*查詢|查詢.*庫存|庫存.*多少|剩餘.*數量", r"存貨.*查詢|查詢.*存貨|還有.*多少|庫存.*狀況", r".*庫存|.*存貨|.*剩餘" ], "order_search": [ r"查詢.*訂單|找.*訂單|搜尋.*訂單|訂單.*資料", r"訂單.*查詢|訂單.*搜尋|訂單.*狀態|我的.*訂單", r"訂單編號|購買.*記錄" ], "customer_search": [ r"查詢.*客戶|找.*客戶|搜尋.*客戶|客戶.*資料", r"客戶.*查詢|客戶.*搜尋|客戶.*聯絡" ], "low_stock_alert": [ r"低庫存|缺貨|庫存.*不足|存貨.*不足", r"快.*沒有|即將.*缺貨|庫存.*警告" ], "business_summary": [ r"統計|分析|報表|數據|摘要", r"總計|總數|多少.*筆|幾.*筆|業務.*狀況" ] } self.intent_patterns = { "search_user": [ r"查詢.*用戶|找.*用戶|搜尋.*用戶|用戶.*資料", r"用戶.*查詢|用戶.*搜尋|用戶.*找", r"誰是|哪個用戶|用戶名.*是" ], "search_order": [ r"查詢.*訂單|找.*訂單|搜尋.*訂單|訂單.*資料", r"訂單.*查詢|訂單.*搜尋|訂單.*狀態", r"我的訂單|訂單編號" ], "search_product": [ r"查詢.*商品|找.*商品|搜尋.*商品|商品.*資料", r"商品.*查詢|商品.*搜尋|產品.*查詢", r"有什麼.*商品|商品.*價格" ], "create_order": [ r"建立.*訂單|新增.*訂單|下訂|購買", r"我要.*買|我想.*買|訂購" ], "update_profile": [ r"更新.*資料|修改.*資料|變更.*資料", r"更新.*個人|修改.*個人|個人.*資料" ], "analytics": [ r"統計|分析|報表|數據", r"總計|總數|多少.*筆|幾.*筆" ] } self.entity_patterns = { "user_id": r"用戶ID[::]?\s*([A-Za-z0-9]+)", "user_name": r"用戶名[::]?\s*([^\s]+)|名字[::]?\s*([^\s]+)", "order_id": r"訂單[編號ID][::]?\s*([A-Za-z0-9\-]+)", "product_name": r"商品[::]?\s*([^\s]+)|產品[::]?\s*([^\s]+)", "price_range": r"價格.*?(\d+).*?到.*?(\d+)|(\d+).*?元.*?到.*?(\d+).*?元", "date_range": r"(\d{4}[-/]\d{1,2}[-/]\d{1,2})", "number": r"(\d+)" } def analyze_message(self, message: str, use_advanced: bool = True) -> NLPAnalysisResult: """分析用戶訊息""" try: # 如果啟用進階分析且有 OpenRouter API Key if use_advanced and self.openrouter_service.api_key: try: # 使用 asyncio 執行異步分析 loop = asyncio.new_event_loop() asyncio.set_event_loop(loop) advanced_result = loop.run_until_complete( self.openrouter_service.analyze_intent_advanced(message) ) loop.close() if not advanced_result.get("fallback", False): return NLPAnalysisResult( query_type=QueryType(advanced_result.get("query_type", "unknown")), intent=advanced_result.get("intent", "unknown"), entities=advanced_result.get("entities", {}), confidence=advanced_result.get("confidence", 0.5), parameters=advanced_result.get("parameters", {}) ) except Exception as e: logger.warning(f"進階 NLP 分析失敗,使用基礎分析: {str(e)}") # 使用基礎規則引擎分析 return self._basic_analyze_message(message) except Exception as e: logger.error(f"NLP 分析錯誤: {str(e)}") return NLPAnalysisResult( query_type=QueryType.UNKNOWN, intent="unknown", entities={}, confidence=0.0, parameters={} ) def _basic_analyze_message(self, message: str) -> NLPAnalysisResult: """基礎訊息分析(規則引擎)""" # 清理訊息 cleaned_message = self._clean_message(message) # 識別意圖 intent, confidence = self._identify_intent(cleaned_message) # 提取實體 entities = self._extract_entities(cleaned_message) # 確定查詢類型 query_type = self._determine_query_type(intent) # 生成查詢參數 parameters = self._generate_parameters(intent, entities) return NLPAnalysisResult( query_type=query_type, intent=intent, entities=entities, confidence=confidence, parameters=parameters ) def _clean_message(self, message: str) -> str: """清理訊息""" # 移除多餘空白 message = re.sub(r'\s+', ' ', message.strip()) return message def _identify_intent(self, message: str) -> tuple[str, float]: """識別用戶意圖""" best_intent = "unknown" best_score = 0.0 for intent, patterns in self.intent_patterns.items(): for pattern in patterns: if re.search(pattern, message, re.IGNORECASE): score = len(re.findall(pattern, message, re.IGNORECASE)) / len(message.split()) if score > best_score: best_score = score best_intent = intent # 如果沒有匹配到任何模式,設定基本信心度 confidence = max(best_score, 0.3) if best_intent != "unknown" else 0.1 return best_intent, min(confidence, 1.0) def _extract_entities(self, message: str) -> Dict[str, Any]: """提取實體""" entities = {} for entity_type, pattern in self.entity_patterns.items(): matches = re.findall(pattern, message, re.IGNORECASE) if matches: if entity_type == "price_range": # 處理價格範圍 for match in matches: if isinstance(match, tuple): prices = [p for p in match if p] if len(prices) >= 2: entities["min_price"] = int(prices[0]) entities["max_price"] = int(prices[1]) elif entity_type == "user_name": # 處理用戶名(可能有多個捕獲組) for match in matches: if isinstance(match, tuple): name = next((n for n in match if n), None) if name: entities[entity_type] = name else: entities[entity_type] = match else: entities[entity_type] = matches[0] if isinstance(matches[0], str) else matches[0][0] return entities def _determine_query_type(self, intent: str) -> QueryType: """確定查詢類型""" if intent.startswith("search_"): return QueryType.SEARCH elif intent.startswith("create_"): return QueryType.CREATE elif intent.startswith("update_"): return QueryType.UPDATE elif intent.startswith("delete_"): return QueryType.DELETE elif intent == "analytics": return QueryType.ANALYTICS else: return QueryType.UNKNOWN def _generate_parameters(self, intent: str, entities: Dict[str, Any]) -> Dict[str, Any]: """生成查詢參數""" parameters = {} # 根據意圖設定表名 if "user" in intent: parameters["table"] = "users" elif "order" in intent: parameters["table"] = "orders" elif "product" in intent: parameters["table"] = "products" # 設定查詢條件 conditions = {} if "user_id" in entities: conditions["user_id"] = entities["user_id"] if "user_name" in entities: conditions["name"] = entities["user_name"] if "order_id" in entities: conditions["order_id"] = entities["order_id"] if "product_name" in entities: conditions["name"] = entities["product_name"] if "min_price" in entities and "max_price" in entities: conditions["price"] = { "gte": entities["min_price"], "lte": entities["max_price"] } if conditions: parameters["conditions"] = conditions # 設定限制 if "number" in entities: parameters["limit"] = min(int(entities["number"]), 50) # 最多50筆 else: parameters["limit"] = 10 # 預設10筆 return parameters def format_response(self, db_result: DatabaseResult, analysis_result: NLPAnalysisResult, user_message: str = "", use_advanced: bool = True) -> str: """格式化回應訊息""" try: # 如果啟用進階回應且有 OpenRouter API Key if use_advanced and user_message and self.openrouter_service.api_key: try: # 準備查詢結果資料 query_result = { "success": db_result.success, "data": db_result.data, "count": db_result.count, "error": db_result.error } # 使用 asyncio 執行異步回應生成 loop = asyncio.new_event_loop() asyncio.set_event_loop(loop) advanced_response = loop.run_until_complete( self.openrouter_service.generate_response(query_result, user_message) ) loop.close() if advanced_response and len(advanced_response.strip()) > 0: return advanced_response except Exception as e: logger.warning(f"進階回應生成失敗,使用基礎格式: {str(e)}") # 使用基礎格式化 return self._basic_format_response(db_result, analysis_result) except Exception as e: logger.error(f"格式化回應錯誤: {str(e)}") return "資料處理時發生錯誤,請稍後再試。" def _basic_format_response(self, db_result: DatabaseResult, analysis_result: NLPAnalysisResult) -> str: """基礎回應格式化""" if not db_result.success: return f"抱歉,查詢時發生錯誤:{db_result.error}" if not db_result.data: return "沒有找到相關資料。" intent = analysis_result.intent data = db_result.data if intent.startswith("search_user"): return self._format_user_response(data) elif intent.startswith("search_order"): return self._format_order_response(data) elif intent.startswith("search_product"): return self._format_product_response(data) elif intent == "analytics": return self._format_analytics_response(data) else: return f"找到 {len(data)} 筆資料。" def _format_user_response(self, data: List[Dict[str, Any]]) -> str: """格式化用戶查詢回應""" if len(data) == 1: user = data[0] return f"用戶資料:\n名稱:{user.get('name', 'N/A')}\nID:{user.get('user_id', 'N/A')}\n電子郵件:{user.get('email', 'N/A')}" else: response = f"找到 {len(data)} 位用戶:\n" for i, user in enumerate(data[:5], 1): # 最多顯示5筆 response += f"{i}. {user.get('name', 'N/A')} (ID: {user.get('user_id', 'N/A')})\n" if len(data) > 5: response += f"... 還有 {len(data) - 5} 筆資料" return response def _format_order_response(self, data: List[Dict[str, Any]]) -> str: """格式化訂單查詢回應""" if len(data) == 1: order = data[0] return f"訂單資料:\n訂單編號:{order.get('order_id', 'N/A')}\n狀態:{order.get('status', 'N/A')}\n金額:${order.get('total_amount', 'N/A')}" else: response = f"找到 {len(data)} 筆訂單:\n" for i, order in enumerate(data[:5], 1): response += f"{i}. {order.get('order_id', 'N/A')} - ${order.get('total_amount', 'N/A')}\n" if len(data) > 5: response += f"... 還有 {len(data) - 5} 筆資料" return response def _format_product_response(self, data: List[Dict[str, Any]]) -> str: """格式化商品查詢回應""" if len(data) == 1: product = data[0] return f"商品資料:\n名稱:{product.get('name', 'N/A')}\n價格:${product.get('price', 'N/A')}\n庫存:{product.get('stock', 'N/A')}" else: response = f"找到 {len(data)} 項商品:\n" for i, product in enumerate(data[:5], 1): response += f"{i}. {product.get('name', 'N/A')} - ${product.get('price', 'N/A')}\n" if len(data) > 5: response += f"... 還有 {len(data) - 5} 筆資料" return response def _format_analytics_response(self, data: List[Dict[str, Any]]) -> str: """格式化分析回應""" if data and len(data) > 0: if 'count' in data[0]: return f"統計結果:共 {data[0]['count']} 筆資料" else: return f"分析結果:找到 {len(data)} 筆相關資料" return "無統計資料" def analyze_business_query(self, message: str, user_id: str = None) -> NLPAnalysisResult: """分析業務相關的自然語言查詢""" try: # 檢測查詢意圖 intent = self._detect_business_intent(message) # 提取實體 entities = self._extract_business_entities(message, intent) # 根據意圖設定查詢類型和參數 query_type, parameters = self._build_query_parameters(intent, entities, user_id) # 計算信心度 confidence = self._calculate_confidence(message, intent, entities) return NLPAnalysisResult( query_type=query_type, intent=intent, entities=entities, confidence=confidence, parameters=parameters ) except Exception as e: logger.error(f"業務查詢分析錯誤: {str(e)}") return NLPAnalysisResult( query_type=QueryType.UNKNOWN, intent="unknown", entities={}, confidence=0.0, parameters={} ) def _detect_business_intent(self, message: str) -> str: """檢測業務查詢意圖""" message_lower = message.lower() # 檢查業務相關的意圖模式 for intent, patterns in self.business_intent_patterns.items(): for pattern in patterns: if re.search(pattern, message_lower): return intent # 如果沒有匹配到業務意圖,使用原有的意圖檢測 for intent, patterns in self.intent_patterns.items(): for pattern in patterns: if re.search(pattern, message_lower): return intent return "general_search" def _extract_business_entities(self, message: str, intent: str) -> Dict[str, Any]: """提取業務相關實體""" entities = {} # 商品名稱提取 product_patterns = [ r"商品[::]?\s*([^\s,。!?]+)", r"產品[::]?\s*([^\s,。!?]+)", r"貨品[::]?\s*([^\s,。!?]+)" ] for pattern in product_patterns: match = re.search(pattern, message) if match: entities["product_name"] = match.group(1) break # 如果沒有明確的商品名稱,嘗試提取關鍵字 if "product_name" not in entities: # 移除查詢關鍵字後的剩餘內容可能是商品名稱 keywords_to_remove = ['查詢', '搜尋', '找', '商品', '產品', '庫存', '有沒有', '請問', '的', '嗎'] cleaned_message = message for keyword in keywords_to_remove: cleaned_message = cleaned_message.replace(keyword, '') cleaned_message = cleaned_message.strip() if cleaned_message and len(cleaned_message) > 0: entities["search_text"] = cleaned_message # 客戶相關實體 customer_patterns = [ r"客戶[::]?\s*([^\s,。!?]+)", r"客戶編號[::]?\s*([A-Za-z0-9]+)", r"客戶名稱[::]?\s*([^\s,。!?]+)" ] for pattern in customer_patterns: match = re.search(pattern, message) if match: entities["customer_info"] = match.group(1) break # 訂單相關實體 order_patterns = [ r"訂單[編號ID][::]?\s*([A-Za-z0-9\-]+)", r"訂單[::]?\s*([A-Za-z0-9\-]+)" ] for pattern in order_patterns: match = re.search(pattern, message) if match: entities["order_id"] = match.group(1) break # 數量相關實體 quantity_patterns = [ r"(\d+)\s*個", r"(\d+)\s*件", r"(\d+)\s*箱", r"數量[::]?\s*(\d+)" ] for pattern in quantity_patterns: match = re.search(pattern, message) if match: entities["quantity"] = int(match.group(1)) break # 狀態相關實體 status_keywords = { "待處理": ["待處理", "pending"], "已確認": ["已確認", "confirmed"], "已出貨": ["已出貨", "shipped"], "已完成": ["已完成", "completed"], "已取消": ["已取消", "cancelled"] } message_lower = message.lower() for status, keywords in status_keywords.items(): if any(keyword in message_lower for keyword in keywords): entities["status"] = status break return entities def _build_query_parameters(self, intent: str, entities: Dict[str, Any], user_id: str = None) -> tuple: """根據意圖和實體建立查詢參數""" if intent == "product_search": return QueryType.SEARCH, { "method": "search_products", "query_text": entities.get("product_name") or entities.get("search_text"), "category": entities.get("category"), "limit": 10 } elif intent == "inventory_check": return QueryType.SEARCH, { "method": "check_inventory", "product_name": entities.get("product_name") or entities.get("search_text"), "category": entities.get("category") } elif intent == "order_search": return QueryType.SEARCH, { "method": "search_orders", "user_id": user_id, "order_id": entities.get("order_id"), "status": entities.get("status"), "limit": 10 } elif intent == "low_stock_alert": return QueryType.SEARCH, { "method": "get_low_stock_products", "threshold": 10 } elif intent == "business_summary": return QueryType.ANALYTICS, { "method": "get_business_summary" } else: # 預設為商品搜尋 return QueryType.SEARCH, { "method": "search_products", "query_text": entities.get("search_text") or entities.get("product_name"), "limit": 10 } def _calculate_confidence(self, message: str, intent: str, entities: Dict[str, Any]) -> float: """計算查詢信心度""" confidence = 0.5 # 基礎信心度 # 如果有明確的意圖匹配,增加信心度 if intent in self.business_intent_patterns: confidence += 0.3 # 如果提取到實體,增加信心度 if entities: confidence += 0.2 * len(entities) # 如果訊息長度適中,增加信心度 if 2 <= len(message) <= 50: confidence += 0.1 return min(confidence, 1.0) def format_response_message(self, result: DatabaseResult, intent: str) -> str: """格式化回應訊息""" if not result.success: return f"抱歉,查詢時發生錯誤:{result.error}" if not result.data or result.count == 0: return "沒有找到相關資料。" # 根據不同的查詢意圖格式化回應 if intent == "product_search": return self._format_product_response(result.data) elif intent == "inventory_check": return self._format_inventory_response(result.data) elif intent == "order_search": return self._format_order_response(result.data) elif intent == "low_stock_alert": return self._format_low_stock_response(result.data) elif intent == "business_summary": return self._format_summary_response(result.data) else: return self._format_general_response(result.data) def _format_product_response(self, data: List[Dict[str, Any]]) -> str: """格式化商品查詢回應""" if len(data) == 1: product = data[0] return f"找到商品:\n" \ f"名稱:{product.get('name', 'N/A')}\n" \ f"描述:{product.get('description', 'N/A')}\n" \ f"價格:${product.get('price', 0)}\n" \ f"類別:{product.get('category', 'N/A')}" else: response = f"找到 {len(data)} 個商品:\n" for i, product in enumerate(data[:5], 1): response += f"{i}. {product.get('name', 'N/A')} - ${product.get('price', 0)}\n" if len(data) > 5: response += f"... 還有 {len(data) - 5} 個商品" return response def _format_inventory_response(self, data: List[Dict[str, Any]]) -> str: """格式化庫存查詢回應""" if len(data) == 1: item = data[0] return f"庫存資訊:\n" \ f"商品:{item.get('product_name', 'N/A')}\n" \ f"目前庫存:{item.get('current_stock', 0)} 件\n" \ f"類別:{item.get('category', 'N/A')}\n" \ f"價格:${item.get('price', 0)}" else: response = f"找到 {len(data)} 個商品的庫存:\n" for i, item in enumerate(data[:5], 1): response += f"{i}. {item.get('product_name', 'N/A')} - 庫存:{item.get('current_stock', 0)}\n" return response def _format_order_response(self, data: List[Dict[str, Any]]) -> str: """格式化訂單查詢回應 - 適配銷售訂單資料結構""" if not data: return "沒有找到符合條件的訂單。" if len(data) == 1: order = data[0] status_display = order.get('status_display', order.get('status', 'N/A')) return f"📋 訂單詳細資訊:\n" \ f"訂單編號:{order.get('order_id', 'N/A')}\n" \ f"銷售日期:{order.get('sales_date', 'N/A')}\n" \ f"狀態:{status_display}\n" \ f"客戶:{order.get('customer_name', 'N/A')}\n" \ f"銷售人員:{order.get('salesperson_name', 'N/A')}\n" \ f"付款條件:{order.get('payment_term', 'N/A')}\n" \ f"總金額:${order.get('total_amount', 0)}\n" \ f"更新時間:{order.get('updated_at', 'N/A')[:10] if order.get('updated_at') else 'N/A'}" else: response = f"📋 找到 {len(data)} 筆訂單:\n\n" for i, order in enumerate(data[:5], 1): status_display = order.get('status_display', order.get('status', 'N/A')) sales_date = order.get('sales_date', 'N/A') if isinstance(sales_date, str) and len(sales_date) > 10: sales_date = sales_date[:10] # 只顯示日期部分 response += f"{i}. {order.get('order_id', 'N/A')}\n" response += f" 狀態:{status_display} | 日期:{sales_date}\n" response += f" 客戶:{order.get('customer_name', 'N/A')} | 金額:${order.get('total_amount', 0)}\n\n" if len(data) > 5: response += f"... 還有 {len(data) - 5} 筆訂單" return response.strip() def _format_low_stock_response(self, data: List[Dict[str, Any]]) -> str: """格式化低庫存警告回應""" if not data: return "目前沒有低庫存商品。" response = f"⚠️ 發現 {len(data)} 個低庫存商品:\n" for i, item in enumerate(data[:10], 1): response += f"{i}. {item.get('product_name', 'N/A')} - 剩餘:{item.get('current_stock', 0)} 件\n" return response def _format_summary_response(self, data: List[Dict[str, Any]]) -> str: """格式化業務摘要回應""" if data: summary = data[0] return f"📊 業務摘要:\n" \ f"商品總數:{summary.get('total_products', 0)} 個\n" \ f"訂單總數:{summary.get('total_orders', 0)} 筆\n" \ f"用戶總數:{summary.get('total_users', 0)} 人\n" \ f"低庫存商品:{summary.get('low_stock_items', 0)} 個\n" \ f"統計時間:{summary.get('report_date', 'N/A')}" return "無法取得業務摘要資料。" def _format_general_response(self, data: List[Dict[str, Any]]) -> str: """格式化一般查詢回應""" return f"找到 {len(data)} 筆資料。"