File size: 28,490 Bytes
cd9bca9 68c8519 cd9bca9 68c8519 cd9bca9 68c8519 cd9bca9 68c8519 cd9bca9 68c8519 cd9bca9 68c8519 cd9bca9 |
|
import re
import json
from typing import Dict, Any, List
from backend.models.schemas import NLPAnalysisResult, QueryType, DatabaseResult
from backend.config import settings
from backend.services.openrouter_service import OpenRouterService
import logging
import asyncio
logger = logging.getLogger(__name__)
class NLPService:
"""自然語言處理服務"""
def __init__(self):
self.openrouter_service = OpenRouterService()
self.business_intent_patterns = {
"product_search": [
r"查詢.*商品|找.*商品|搜尋.*商品|商品.*資料",
r"商品.*查詢|商品.*搜尋|產品.*查詢|產品.*搜尋",
r"有什麼.*商品|商品.*價格|產品.*價格"
],
"inventory_check": [
r"庫存.*查詢|查詢.*庫存|庫存.*多少|剩餘.*數量",
r"存貨.*查詢|查詢.*存貨|還有.*多少|庫存.*狀況",
r".*庫存|.*存貨|.*剩餘"
],
"order_search": [
r"查詢.*訂單|找.*訂單|搜尋.*訂單|訂單.*資料",
r"訂單.*查詢|訂單.*搜尋|訂單.*狀態|我的.*訂單",
r"訂單編號|購買.*記錄"
],
"customer_search": [
r"查詢.*客戶|找.*客戶|搜尋.*客戶|客戶.*資料",
r"客戶.*查詢|客戶.*搜尋|客戶.*聯絡"
],
"low_stock_alert": [
r"低庫存|缺貨|庫存.*不足|存貨.*不足",
r"快.*沒有|即將.*缺貨|庫存.*警告"
],
"business_summary": [
r"統計|分析|報表|數據|摘要",
r"總計|總數|多少.*筆|幾.*筆|業務.*狀況"
]
}
self.intent_patterns = {
"search_user": [
r"查詢.*用戶|找.*用戶|搜尋.*用戶|用戶.*資料",
r"用戶.*查詢|用戶.*搜尋|用戶.*找",
r"誰是|哪個用戶|用戶名.*是"
],
"search_order": [
r"查詢.*訂單|找.*訂單|搜尋.*訂單|訂單.*資料",
r"訂單.*查詢|訂單.*搜尋|訂單.*狀態",
r"我的訂單|訂單編號"
],
"search_product": [
r"查詢.*商品|找.*商品|搜尋.*商品|商品.*資料",
r"商品.*查詢|商品.*搜尋|產品.*查詢",
r"有什麼.*商品|商品.*價格"
],
"create_order": [
r"建立.*訂單|新增.*訂單|下訂|購買",
r"我要.*買|我想.*買|訂購"
],
"update_profile": [
r"更新.*資料|修改.*資料|變更.*資料",
r"更新.*個人|修改.*個人|個人.*資料"
],
"analytics": [
r"統計|分析|報表|數據",
r"總計|總數|多少.*筆|幾.*筆"
]
}
self.entity_patterns = {
"user_id": r"用戶ID[::]?\s*([A-Za-z0-9]+)",
"user_name": r"用戶名[::]?\s*([^\s]+)|名字[::]?\s*([^\s]+)",
"order_id": r"訂單[編號ID][::]?\s*([A-Za-z0-9\-]+)",
"product_name": r"商品[::]?\s*([^\s]+)|產品[::]?\s*([^\s]+)",
"price_range": r"價格.*?(\d+).*?到.*?(\d+)|(\d+).*?元.*?到.*?(\d+).*?元",
"date_range": r"(\d{4}[-/]\d{1,2}[-/]\d{1,2})",
"number": r"(\d+)"
}
def analyze_message(self, message: str, use_advanced: bool = True) -> NLPAnalysisResult:
"""分析用戶訊息"""
try:
# 如果啟用進階分析且有 OpenRouter API Key
if use_advanced and self.openrouter_service.api_key:
try:
# 使用 asyncio 執行異步分析
loop = asyncio.new_event_loop()
asyncio.set_event_loop(loop)
advanced_result = loop.run_until_complete(
self.openrouter_service.analyze_intent_advanced(message)
)
loop.close()
if not advanced_result.get("fallback", False):
return NLPAnalysisResult(
query_type=QueryType(advanced_result.get("query_type", "unknown")),
intent=advanced_result.get("intent", "unknown"),
entities=advanced_result.get("entities", {}),
confidence=advanced_result.get("confidence", 0.5),
parameters=advanced_result.get("parameters", {})
)
except Exception as e:
logger.warning(f"進階 NLP 分析失敗,使用基礎分析: {str(e)}")
# 使用基礎規則引擎分析
return self._basic_analyze_message(message)
except Exception as e:
logger.error(f"NLP 分析錯誤: {str(e)}")
return NLPAnalysisResult(
query_type=QueryType.UNKNOWN,
intent="unknown",
entities={},
confidence=0.0,
parameters={}
)
def _basic_analyze_message(self, message: str) -> NLPAnalysisResult:
"""基礎訊息分析(規則引擎)"""
# 清理訊息
cleaned_message = self._clean_message(message)
# 識別意圖
intent, confidence = self._identify_intent(cleaned_message)
# 提取實體
entities = self._extract_entities(cleaned_message)
# 確定查詢類型
query_type = self._determine_query_type(intent)
# 生成查詢參數
parameters = self._generate_parameters(intent, entities)
return NLPAnalysisResult(
query_type=query_type,
intent=intent,
entities=entities,
confidence=confidence,
parameters=parameters
)
def _clean_message(self, message: str) -> str:
"""清理訊息"""
# 移除多餘空白
message = re.sub(r'\s+', ' ', message.strip())
return message
def _identify_intent(self, message: str) -> tuple[str, float]:
"""識別用戶意圖"""
best_intent = "unknown"
best_score = 0.0
for intent, patterns in self.intent_patterns.items():
for pattern in patterns:
if re.search(pattern, message, re.IGNORECASE):
score = len(re.findall(pattern, message, re.IGNORECASE)) / len(message.split())
if score > best_score:
best_score = score
best_intent = intent
# 如果沒有匹配到任何模式,設定基本信心度
confidence = max(best_score, 0.3) if best_intent != "unknown" else 0.1
return best_intent, min(confidence, 1.0)
def _extract_entities(self, message: str) -> Dict[str, Any]:
"""提取實體"""
entities = {}
for entity_type, pattern in self.entity_patterns.items():
matches = re.findall(pattern, message, re.IGNORECASE)
if matches:
if entity_type == "price_range":
# 處理價格範圍
for match in matches:
if isinstance(match, tuple):
prices = [p for p in match if p]
if len(prices) >= 2:
entities["min_price"] = int(prices[0])
entities["max_price"] = int(prices[1])
elif entity_type == "user_name":
# 處理用戶名(可能有多個捕獲組)
for match in matches:
if isinstance(match, tuple):
name = next((n for n in match if n), None)
if name:
entities[entity_type] = name
else:
entities[entity_type] = match
else:
entities[entity_type] = matches[0] if isinstance(matches[0], str) else matches[0][0]
return entities
def _determine_query_type(self, intent: str) -> QueryType:
"""確定查詢類型"""
if intent.startswith("search_"):
return QueryType.SEARCH
elif intent.startswith("create_"):
return QueryType.CREATE
elif intent.startswith("update_"):
return QueryType.UPDATE
elif intent.startswith("delete_"):
return QueryType.DELETE
elif intent == "analytics":
return QueryType.ANALYTICS
else:
return QueryType.UNKNOWN
def _generate_parameters(self, intent: str, entities: Dict[str, Any]) -> Dict[str, Any]:
"""生成查詢參數"""
parameters = {}
# 根據意圖設定表名
if "user" in intent:
parameters["table"] = "users"
elif "order" in intent:
parameters["table"] = "orders"
elif "product" in intent:
parameters["table"] = "products"
# 設定查詢條件
conditions = {}
if "user_id" in entities:
conditions["user_id"] = entities["user_id"]
if "user_name" in entities:
conditions["name"] = entities["user_name"]
if "order_id" in entities:
conditions["order_id"] = entities["order_id"]
if "product_name" in entities:
conditions["name"] = entities["product_name"]
if "min_price" in entities and "max_price" in entities:
conditions["price"] = {
"gte": entities["min_price"],
"lte": entities["max_price"]
}
if conditions:
parameters["conditions"] = conditions
# 設定限制
if "number" in entities:
parameters["limit"] = min(int(entities["number"]), 50) # 最多50筆
else:
parameters["limit"] = 10 # 預設10筆
return parameters
def format_response(self, db_result: DatabaseResult, analysis_result: NLPAnalysisResult, user_message: str = "", use_advanced: bool = True) -> str:
"""格式化回應訊息"""
try:
# 如果啟用進階回應且有 OpenRouter API Key
if use_advanced and user_message and self.openrouter_service.api_key:
try:
# 準備查詢結果資料
query_result = {
"success": db_result.success,
"data": db_result.data,
"count": db_result.count,
"error": db_result.error
}
# 使用 asyncio 執行異步回應生成
loop = asyncio.new_event_loop()
asyncio.set_event_loop(loop)
advanced_response = loop.run_until_complete(
self.openrouter_service.generate_response(query_result, user_message)
)
loop.close()
if advanced_response and len(advanced_response.strip()) > 0:
return advanced_response
except Exception as e:
logger.warning(f"進階回應生成失敗,使用基礎格式: {str(e)}")
# 使用基礎格式化
return self._basic_format_response(db_result, analysis_result)
except Exception as e:
logger.error(f"格式化回應錯誤: {str(e)}")
return "資料處理時發生錯誤,請稍後再試。"
def _basic_format_response(self, db_result: DatabaseResult, analysis_result: NLPAnalysisResult) -> str:
"""基礎回應格式化"""
if not db_result.success:
return f"抱歉,查詢時發生錯誤:{db_result.error}"
if not db_result.data:
return "沒有找到相關資料。"
intent = analysis_result.intent
data = db_result.data
if intent.startswith("search_user"):
return self._format_user_response(data)
elif intent.startswith("search_order"):
return self._format_order_response(data)
elif intent.startswith("search_product"):
return self._format_product_response(data)
elif intent == "analytics":
return self._format_analytics_response(data)
else:
return f"找到 {len(data)} 筆資料。"
def _format_user_response(self, data: List[Dict[str, Any]]) -> str:
"""格式化用戶查詢回應"""
if len(data) == 1:
user = data[0]
return f"用戶資料:\n名稱:{user.get('name', 'N/A')}\nID:{user.get('user_id', 'N/A')}\n電子郵件:{user.get('email', 'N/A')}"
else:
response = f"找到 {len(data)} 位用戶:\n"
for i, user in enumerate(data[:5], 1): # 最多顯示5筆
response += f"{i}. {user.get('name', 'N/A')} (ID: {user.get('user_id', 'N/A')})\n"
if len(data) > 5:
response += f"... 還有 {len(data) - 5} 筆資料"
return response
def _format_order_response(self, data: List[Dict[str, Any]]) -> str:
"""格式化訂單查詢回應"""
if len(data) == 1:
order = data[0]
return f"訂單資料:\n訂單編號:{order.get('order_id', 'N/A')}\n狀態:{order.get('status', 'N/A')}\n金額:${order.get('total_amount', 'N/A')}"
else:
response = f"找到 {len(data)} 筆訂單:\n"
for i, order in enumerate(data[:5], 1):
response += f"{i}. {order.get('order_id', 'N/A')} - ${order.get('total_amount', 'N/A')}\n"
if len(data) > 5:
response += f"... 還有 {len(data) - 5} 筆資料"
return response
def _format_product_response(self, data: List[Dict[str, Any]]) -> str:
"""格式化商品查詢回應"""
if len(data) == 1:
product = data[0]
return f"商品資料:\n名稱:{product.get('name', 'N/A')}\n價格:${product.get('price', 'N/A')}\n庫存:{product.get('stock', 'N/A')}"
else:
response = f"找到 {len(data)} 項商品:\n"
for i, product in enumerate(data[:5], 1):
response += f"{i}. {product.get('name', 'N/A')} - ${product.get('price', 'N/A')}\n"
if len(data) > 5:
response += f"... 還有 {len(data) - 5} 筆資料"
return response
def _format_analytics_response(self, data: List[Dict[str, Any]]) -> str:
"""格式化分析回應"""
if data and len(data) > 0:
if 'count' in data[0]:
return f"統計結果:共 {data[0]['count']} 筆資料"
else:
return f"分析結果:找到 {len(data)} 筆相關資料"
return "無統計資料"
def analyze_business_query(self, message: str, user_id: str = None) -> NLPAnalysisResult:
"""分析業務相關的自然語言查詢"""
try:
# 檢測查詢意圖
intent = self._detect_business_intent(message)
# 提取實體
entities = self._extract_business_entities(message, intent)
# 根據意圖設定查詢類型和參數
query_type, parameters = self._build_query_parameters(intent, entities, user_id)
# 計算信心度
confidence = self._calculate_confidence(message, intent, entities)
return NLPAnalysisResult(
query_type=query_type,
intent=intent,
entities=entities,
confidence=confidence,
parameters=parameters
)
except Exception as e:
logger.error(f"業務查詢分析錯誤: {str(e)}")
return NLPAnalysisResult(
query_type=QueryType.UNKNOWN,
intent="unknown",
entities={},
confidence=0.0,
parameters={}
)
def _detect_business_intent(self, message: str) -> str:
"""檢測業務查詢意圖"""
message_lower = message.lower()
# 檢查業務相關的意圖模式
for intent, patterns in self.business_intent_patterns.items():
for pattern in patterns:
if re.search(pattern, message_lower):
return intent
# 如果沒有匹配到業務意圖,使用原有的意圖檢測
for intent, patterns in self.intent_patterns.items():
for pattern in patterns:
if re.search(pattern, message_lower):
return intent
return "general_search"
def _extract_business_entities(self, message: str, intent: str) -> Dict[str, Any]:
"""提取業務相關實體"""
entities = {}
# 商品名稱提取
product_patterns = [
r"商品[::]?\s*([^\s,。!?]+)",
r"產品[::]?\s*([^\s,。!?]+)",
r"貨品[::]?\s*([^\s,。!?]+)"
]
for pattern in product_patterns:
match = re.search(pattern, message)
if match:
entities["product_name"] = match.group(1)
break
# 如果沒有明確的商品名稱,嘗試提取關鍵字
if "product_name" not in entities:
# 移除查詢關鍵字後的剩餘內容可能是商品名稱
keywords_to_remove = ['查詢', '搜尋', '找', '商品', '產品', '庫存', '有沒有', '請問', '的', '嗎']
cleaned_message = message
for keyword in keywords_to_remove:
cleaned_message = cleaned_message.replace(keyword, '')
cleaned_message = cleaned_message.strip()
if cleaned_message and len(cleaned_message) > 0:
entities["search_text"] = cleaned_message
# 客戶相關實體
customer_patterns = [
r"客戶[::]?\s*([^\s,。!?]+)",
r"客戶編號[::]?\s*([A-Za-z0-9]+)",
r"客戶名稱[::]?\s*([^\s,。!?]+)"
]
for pattern in customer_patterns:
match = re.search(pattern, message)
if match:
entities["customer_info"] = match.group(1)
break
# 訂單相關實體
order_patterns = [
r"訂單[編號ID][::]?\s*([A-Za-z0-9\-]+)",
r"訂單[::]?\s*([A-Za-z0-9\-]+)"
]
for pattern in order_patterns:
match = re.search(pattern, message)
if match:
entities["order_id"] = match.group(1)
break
# 數量相關實體
quantity_patterns = [
r"(\d+)\s*個",
r"(\d+)\s*件",
r"(\d+)\s*箱",
r"數量[::]?\s*(\d+)"
]
for pattern in quantity_patterns:
match = re.search(pattern, message)
if match:
entities["quantity"] = int(match.group(1))
break
# 狀態相關實體
status_keywords = {
"待處理": ["待處理", "pending"],
"已確認": ["已確認", "confirmed"],
"已出貨": ["已出貨", "shipped"],
"已完成": ["已完成", "completed"],
"已取消": ["已取消", "cancelled"]
}
message_lower = message.lower()
for status, keywords in status_keywords.items():
if any(keyword in message_lower for keyword in keywords):
entities["status"] = status
break
return entities
def _build_query_parameters(self, intent: str, entities: Dict[str, Any], user_id: str = None) -> tuple:
"""根據意圖和實體建立查詢參數"""
if intent == "product_search":
return QueryType.SEARCH, {
"method": "search_products",
"query_text": entities.get("product_name") or entities.get("search_text"),
"category": entities.get("category"),
"limit": 10
}
elif intent == "inventory_check":
return QueryType.SEARCH, {
"method": "check_inventory",
"product_name": entities.get("product_name") or entities.get("search_text"),
"category": entities.get("category")
}
elif intent == "order_search":
return QueryType.SEARCH, {
"method": "search_orders",
"user_id": user_id,
"order_id": entities.get("order_id"),
"status": entities.get("status"),
"limit": 10
}
elif intent == "low_stock_alert":
return QueryType.SEARCH, {
"method": "get_low_stock_products",
"threshold": 10
}
elif intent == "business_summary":
return QueryType.ANALYTICS, {
"method": "get_business_summary"
}
else:
# 預設為商品搜尋
return QueryType.SEARCH, {
"method": "search_products",
"query_text": entities.get("search_text") or entities.get("product_name"),
"limit": 10
}
def _calculate_confidence(self, message: str, intent: str, entities: Dict[str, Any]) -> float:
"""計算查詢信心度"""
confidence = 0.5 # 基礎信心度
# 如果有明確的意圖匹配,增加信心度
if intent in self.business_intent_patterns:
confidence += 0.3
# 如果提取到實體,增加信心度
if entities:
confidence += 0.2 * len(entities)
# 如果訊息長度適中,增加信心度
if 2 <= len(message) <= 50:
confidence += 0.1
return min(confidence, 1.0)
def format_response_message(self, result: DatabaseResult, intent: str) -> str:
"""格式化回應訊息"""
if not result.success:
return f"抱歉,查詢時發生錯誤:{result.error}"
if not result.data or result.count == 0:
return "沒有找到相關資料。"
# 根據不同的查詢意圖格式化回應
if intent == "product_search":
return self._format_product_response(result.data)
elif intent == "inventory_check":
return self._format_inventory_response(result.data)
elif intent == "order_search":
return self._format_order_response(result.data)
elif intent == "low_stock_alert":
return self._format_low_stock_response(result.data)
elif intent == "business_summary":
return self._format_summary_response(result.data)
else:
return self._format_general_response(result.data)
def _format_product_response(self, data: List[Dict[str, Any]]) -> str:
"""格式化商品查詢回應"""
if len(data) == 1:
product = data[0]
return f"找到商品:\n" \
f"名稱:{product.get('name', 'N/A')}\n" \
f"描述:{product.get('description', 'N/A')}\n" \
f"價格:${product.get('price', 0)}\n" \
f"類別:{product.get('category', 'N/A')}"
else:
response = f"找到 {len(data)} 個商品:\n"
for i, product in enumerate(data[:5], 1):
response += f"{i}. {product.get('name', 'N/A')} - ${product.get('price', 0)}\n"
if len(data) > 5:
response += f"... 還有 {len(data) - 5} 個商品"
return response
def _format_inventory_response(self, data: List[Dict[str, Any]]) -> str:
"""格式化庫存查詢回應"""
if len(data) == 1:
item = data[0]
return f"庫存資訊:\n" \
f"商品:{item.get('product_name', 'N/A')}\n" \
f"目前庫存:{item.get('current_stock', 0)} 件\n" \
f"類別:{item.get('category', 'N/A')}\n" \
f"價格:${item.get('price', 0)}"
else:
response = f"找到 {len(data)} 個商品的庫存:\n"
for i, item in enumerate(data[:5], 1):
response += f"{i}. {item.get('product_name', 'N/A')} - 庫存:{item.get('current_stock', 0)}\n"
return response
def _format_order_response(self, data: List[Dict[str, Any]]) -> str:
"""格式化訂單查詢回應 - 適配銷售訂單資料結構"""
if not data:
return "沒有找到符合條件的訂單。"
if len(data) == 1:
order = data[0]
status_display = order.get('status_display', order.get('status', 'N/A'))
return f"📋 訂單詳細資訊:\n" \
f"訂單編號:{order.get('order_id', 'N/A')}\n" \
f"銷售日期:{order.get('sales_date', 'N/A')}\n" \
f"狀態:{status_display}\n" \
f"客戶:{order.get('customer_name', 'N/A')}\n" \
f"銷售人員:{order.get('salesperson_name', 'N/A')}\n" \
f"付款條件:{order.get('payment_term', 'N/A')}\n" \
f"總金額:${order.get('total_amount', 0)}\n" \
f"更新時間:{order.get('updated_at', 'N/A')[:10] if order.get('updated_at') else 'N/A'}"
else:
response = f"📋 找到 {len(data)} 筆訂單:\n\n"
for i, order in enumerate(data[:5], 1):
status_display = order.get('status_display', order.get('status', 'N/A'))
sales_date = order.get('sales_date', 'N/A')
if isinstance(sales_date, str) and len(sales_date) > 10:
sales_date = sales_date[:10] # 只顯示日期部分
response += f"{i}. {order.get('order_id', 'N/A')}\n"
response += f" 狀態:{status_display} | 日期:{sales_date}\n"
response += f" 客戶:{order.get('customer_name', 'N/A')} | 金額:${order.get('total_amount', 0)}\n\n"
if len(data) > 5:
response += f"... 還有 {len(data) - 5} 筆訂單"
return response.strip()
def _format_low_stock_response(self, data: List[Dict[str, Any]]) -> str:
"""格式化低庫存警告回應"""
if not data:
return "目前沒有低庫存商品。"
response = f"⚠️ 發現 {len(data)} 個低庫存商品:\n"
for i, item in enumerate(data[:10], 1):
response += f"{i}. {item.get('product_name', 'N/A')} - 剩餘:{item.get('current_stock', 0)} 件\n"
return response
def _format_summary_response(self, data: List[Dict[str, Any]]) -> str:
"""格式化業務摘要回應"""
if data:
summary = data[0]
return f"📊 業務摘要:\n" \
f"商品總數:{summary.get('total_products', 0)} 個\n" \
f"訂單總數:{summary.get('total_orders', 0)} 筆\n" \
f"用戶總數:{summary.get('total_users', 0)} 人\n" \
f"低庫存商品:{summary.get('low_stock_items', 0)} 個\n" \
f"統計時間:{summary.get('report_date', 'N/A')}"
return "無法取得業務摘要資料。"
def _format_general_response(self, data: List[Dict[str, Any]]) -> str:
"""格式化一般查詢回應"""
return f"找到 {len(data)} 筆資料。" |