File size: 25,810 Bytes
72f802a
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
"""

NLP Query Enhancer

Advanced query processing using spaCy and NLTK for better search results

"""

import spacy
import nltk
from nltk.corpus import wordnet
from nltk.tokenize import word_tokenize
from nltk.corpus import stopwords
from nltk.stem import WordNetLemmatizer
from sentence_transformers import SentenceTransformer
import numpy as np
from typing import List, Dict, Set, Tuple, Any
import re
import logging
from collections import defaultdict, Counter
import asyncio
import aiohttp

logger = logging.getLogger(__name__)

class NLPQueryEnhancer:
    """Enhanced query processing using advanced NLP techniques"""
    
    def __init__(self):
        """Initialize NLP models and resources"""
        self.nlp = None
        self.sentence_model = None
        self.lemmatizer = None
        self.stop_words = None
        self._initialized = False
        
        # Industry-specific term mappings
        self.industry_ontology = {
            'technology': {
                'ai': ['artificial intelligence', 'machine learning', 'deep learning', 'neural networks'],
                'app': ['application', 'software', 'mobile app', 'web app', 'platform'],
                'api': ['application programming interface', 'web service', 'endpoint'],
                'saas': ['software as a service', 'cloud software', 'subscription software'],
                'iot': ['internet of things', 'connected devices', 'smart devices'],
                'blockchain': ['distributed ledger', 'cryptocurrency', 'smart contracts']
            },
            'business': {
                'startup': ['new business', 'entrepreneur', 'venture', 'company'],
                'revenue': ['income', 'earnings', 'sales', 'profit'],
                'customer': ['client', 'user', 'consumer', 'buyer'],
                'market': ['industry', 'sector', 'segment', 'niche'],
                'competition': ['competitor', 'rival', 'alternative', 'substitute']
            },
            'health': {
                'fitness': ['exercise', 'workout', 'training', 'physical activity'],
                'nutrition': ['diet', 'food', 'eating', 'meal planning'],
                'wellness': ['health', 'wellbeing', 'lifestyle', 'self-care'],
                'medical': ['healthcare', 'clinical', 'treatment', 'diagnosis']
            },
            'finance': {
                'fintech': ['financial technology', 'digital banking', 'payment systems'],
                'investment': ['portfolio', 'trading', 'stocks', 'assets'],
                'banking': ['financial services', 'credit', 'loans', 'deposits'],
                'insurance': ['coverage', 'policy', 'claims', 'risk management']
            }
        }
    
    async def initialize(self):
        """Initialize NLP models asynchronously"""
        if self._initialized:
            return
        
        try:
            logger.info("Initializing NLP models...")
            
            # Download required NLTK data
            await self._download_nltk_data()
            
            # Initialize spaCy model
            try:
                self.nlp = spacy.load("en_core_web_sm")
            except OSError:
                logger.warning("spaCy model 'en_core_web_sm' not found. Using basic tokenization.")
                self.nlp = None
            
            # Initialize sentence transformer
            try:
                self.sentence_model = SentenceTransformer('all-MiniLM-L6-v2')
            except Exception as e:
                logger.warning(f"Could not load sentence transformer: {e}")
                self.sentence_model = None
            
            # Initialize NLTK components
            self.lemmatizer = WordNetLemmatizer()
            self.stop_words = set(stopwords.words('english'))
            
            self._initialized = True
            logger.info("NLP models initialized successfully")
            
        except Exception as e:
            logger.error(f"Error initializing NLP models: {e}")
            # Set basic fallbacks
            self.lemmatizer = WordNetLemmatizer() if 'WordNetLemmatizer' in locals() else None
            self.stop_words = {'the', 'a', 'an', 'and', 'or', 'but', 'in', 'on', 'at', 'to', 'for', 'of', 'with', 'by'}
    
    async def _download_nltk_data(self):
        """Download required NLTK data"""
        required_data = ['punkt', 'stopwords', 'wordnet', 'averaged_perceptron_tagger', 'omw-1.4']
        
        for data in required_data:
            try:
                nltk.data.find(f'tokenizers/{data}')
            except LookupError:
                try:
                    nltk.download(data, quiet=True)
                except Exception as e:
                    logger.warning(f"Could not download NLTK data '{data}': {e}")
    
    async def enhance_query(self, feature_description: str, target_market: str = "") -> Dict[str, Any]:
        """

        Enhance a feature description into multiple optimized search queries

        

        Args:

            feature_description: Original feature description

            target_market: Target market description

            

        Returns:

            Enhanced query data with multiple search strategies

        """
        await self.initialize()
        
        try:
            # Extract key concepts using NLP
            concepts = await self._extract_key_concepts(feature_description)
            
            # Generate semantic variations
            semantic_queries = await self._generate_semantic_queries(feature_description)
            
            # Expand with synonyms and related terms
            expanded_queries = await self._expand_with_synonyms(concepts)
            
            # Add industry-specific terms
            industry_queries = await self._add_industry_terms(concepts, target_market)
            
            # Generate market-specific queries
            market_queries = await self._generate_market_queries(feature_description, target_market)
            
            # Combine and rank queries
            all_queries = {
                'core_concepts': concepts['core_terms'],
                'semantic_variations': semantic_queries,
                'synonym_expansions': expanded_queries,
                'industry_specific': industry_queries,
                'market_focused': market_queries,
                'combined_queries': self._combine_queries(concepts, target_market)
            }
            
            return {
                'original_description': feature_description,
                'target_market': target_market,
                'extracted_concepts': concepts,
                'enhanced_queries': all_queries,
                'query_metadata': {
                    'total_queries': sum(len(v) if isinstance(v, list) else 1 for v in all_queries.values()),
                    'confidence_score': self._calculate_confidence(concepts),
                    'processing_method': 'advanced_nlp' if self.nlp else 'basic_processing'
                }
            }
            
        except Exception as e:
            logger.error(f"Error enhancing query: {e}")
            # Fallback to basic processing
            return await self._basic_query_enhancement(feature_description, target_market)
    
    async def _extract_key_concepts(self, text: str) -> Dict[str, Any]:
        """Extract key concepts using spaCy NLP"""
        concepts = {
            'core_terms': [],
            'entities': [],
            'technologies': [],
            'business_terms': [],
            'action_verbs': [],
            'descriptors': []
        }
        
        if self.nlp:
            # Use spaCy for advanced processing
            doc = self.nlp(text)
            
            # Extract named entities
            for ent in doc.ents:
                if ent.label_ in ['ORG', 'PRODUCT', 'TECHNOLOGY']:
                    concepts['entities'].append(ent.text.lower())
            
            # Extract key terms by POS tags
            for token in doc:
                if token.is_stop or token.is_punct or len(token.text) < 3:
                    continue
                
                lemma = token.lemma_.lower()
                
                if token.pos_ == 'NOUN' and token.dep_ in ['nsubj', 'dobj', 'pobj']:
                    concepts['core_terms'].append(lemma)
                elif token.pos_ == 'VERB' and token.tag_ not in ['MD', 'VBZ']:  # Not auxiliary verbs
                    concepts['action_verbs'].append(lemma)
                elif token.pos_ == 'ADJ':
                    concepts['descriptors'].append(lemma)
            
            # Extract noun phrases
            for chunk in doc.noun_chunks:
                if len(chunk.text.split()) <= 3:  # Keep phrases short
                    concepts['core_terms'].append(chunk.text.lower())
        
        else:
            # Fallback to basic processing
            words = word_tokenize(text.lower())
            if self.lemmatizer:
                words = [self.lemmatizer.lemmatize(word) for word in words if word.isalpha()]
            
            # Filter out stop words
            words = [word for word in words if word not in self.stop_words and len(word) > 2]
            concepts['core_terms'] = words[:10]  # Limit to top 10
        
        # Categorize terms
        concepts = self._categorize_terms(concepts)
        
        # Remove duplicates and sort by importance
        for key in concepts:
            if isinstance(concepts[key], list):
                concepts[key] = list(dict.fromkeys(concepts[key]))[:5]  # Top 5 per category
        
        return concepts
    
    def _categorize_terms(self, concepts: Dict[str, Any]) -> Dict[str, Any]:
        """Categorize terms into technology, business, etc."""
        tech_keywords = {'ai', 'app', 'software', 'platform', 'api', 'system', 'tool', 'technology', 'digital', 'online', 'mobile', 'web'}
        business_keywords = {'market', 'customer', 'business', 'service', 'product', 'solution', 'company', 'industry', 'revenue', 'profit'}
        
        all_terms = concepts['core_terms'] + concepts['entities']
        
        for term in all_terms:
            term_lower = term.lower()
            if any(tech in term_lower for tech in tech_keywords):
                concepts['technologies'].append(term)
            elif any(biz in term_lower for biz in business_keywords):
                concepts['business_terms'].append(term)
        
        return concepts
    
    async def _generate_semantic_queries(self, text: str) -> List[str]:
        """Generate semantically similar queries using sentence transformers"""
        if not self.sentence_model:
            return []
        
        try:
            # Generate variations by paraphrasing key concepts
            base_embedding = self.sentence_model.encode([text])
            
            # Create variations by replacing key terms with synonyms
            variations = []
            words = text.split()
            
            for i, word in enumerate(words):
                if len(word) > 4 and word.lower() not in self.stop_words:
                    # Try to find synonyms
                    synonyms = self._get_wordnet_synonyms(word)
                    for synonym in synonyms[:2]:  # Limit to 2 synonyms per word
                        new_text = words.copy()
                        new_text[i] = synonym
                        variations.append(' '.join(new_text))
            
            return variations[:5]  # Return top 5 variations
            
        except Exception as e:
            logger.warning(f"Error generating semantic queries: {e}")
            return []
    
    async def _expand_with_synonyms(self, concepts: Dict[str, Any]) -> List[str]:
        """Expand core terms with synonyms using WordNet"""
        expanded = []
        
        for term in concepts['core_terms'][:5]:  # Limit to top 5 terms
            synonyms = self._get_wordnet_synonyms(term)
            if synonyms:
                # Create queries with synonyms
                expanded.extend(synonyms[:3])  # Top 3 synonyms per term
        
        return list(set(expanded))  # Remove duplicates
    
    def _get_wordnet_synonyms(self, word: str) -> List[str]:
        """Get synonyms from WordNet"""
        synonyms = set()
        
        try:
            for syn in wordnet.synsets(word):
                for lemma in syn.lemmas():
                    synonym = lemma.name().replace('_', ' ')
                    if synonym.lower() != word.lower() and len(synonym) > 2:
                        synonyms.add(synonym)
        except Exception as e:
            logger.debug(f"Error getting synonyms for '{word}': {e}")
        
        return list(synonyms)[:5]  # Limit to 5 synonyms
    
    async def _add_industry_terms(self, concepts: Dict[str, Any], target_market: str) -> List[str]:
        """Add industry-specific terminology"""
        industry_queries = []
        
        # Detect industry from concepts and target market
        detected_industries = self._detect_industries(concepts, target_market)
        
        for industry in detected_industries:
            if industry in self.industry_ontology:
                ontology = self.industry_ontology[industry]
                
                # Match concepts to industry terms
                for concept in concepts['core_terms'][:3]:
                    for term, expansions in ontology.items():
                        if concept.lower() in term or term in concept.lower():
                            industry_queries.extend(expansions[:2])  # Add 2 expansions
        
        return list(set(industry_queries))[:8]  # Limit to 8 industry terms
    
    def _detect_industries(self, concepts: Dict[str, Any], target_market: str) -> List[str]:
        """Detect relevant industries from concepts and target market"""
        industries = []
        all_text = ' '.join(concepts['core_terms'] + [target_market]).lower()
        
        # Simple keyword matching for industry detection
        industry_keywords = {
            'technology': ['app', 'software', 'ai', 'tech', 'digital', 'platform', 'api'],
            'health': ['fitness', 'health', 'medical', 'wellness', 'nutrition', 'exercise'],
            'business': ['business', 'startup', 'company', 'market', 'customer'],
            'finance': ['finance', 'money', 'payment', 'banking', 'investment']
        }
        
        for industry, keywords in industry_keywords.items():
            if any(keyword in all_text for keyword in keywords):
                industries.append(industry)
        
        return industries[:2]  # Limit to 2 industries
    
    async def _generate_market_queries(self, feature_description: str, target_market: str) -> List[str]:
        """Generate market-focused search queries"""
        if not target_market:
            return []
        
        market_queries = []
        
        # Extract key terms from feature description
        feature_words = [word for word in feature_description.split() 
                        if len(word) > 3 and word.lower() not in self.stop_words][:3]
        
        # Extract market segments
        market_words = [word for word in target_market.split() 
                       if len(word) > 3 and word.lower() not in self.stop_words][:3]
        
        # Combine feature + market
        for feature_word in feature_words:
            for market_word in market_words:
                market_queries.append(f"{feature_word} {market_word}")
                market_queries.append(f"{market_word} {feature_word}")
        
        # Add market-specific patterns
        market_patterns = [
            f"{target_market} solutions",
            f"{target_market} tools",
            f"{target_market} software",
            f"{target_market} apps",
            f"{target_market} technology"
        ]
        
        market_queries.extend(market_patterns)
        
        return list(set(market_queries))[:6]  # Limit to 6 market queries
    
    def _combine_queries(self, concepts: Dict[str, Any], target_market: str) -> List[str]:
        """Combine different concepts into comprehensive, contextually relevant queries"""
        combined = []
        
        core_terms = concepts['core_terms'][:3]
        technologies = concepts['technologies'][:2]
        business_terms = concepts['business_terms'][:2]
        
        # Create contextual phrase combinations (preserve meaning)
        if len(core_terms) >= 2:
            # Keep related terms together
            main_concept = ' '.join(core_terms[:2])
            combined.append(main_concept)
            
            # Add third term only if it's contextually related
            if len(core_terms) >= 3:
                extended_concept = ' '.join(core_terms[:3])
                # Only add if it makes semantic sense
                if len(extended_concept.split()) <= 4:  # Avoid overly long phrases
                    combined.append(extended_concept)
        
        # Combine technology + business terms meaningfully
        for tech in technologies:
            for biz in business_terms:
                # Create meaningful combinations
                tech_biz_combo = f"{tech} {biz}"
                # Ensure the combination makes sense (not just random word pairs)
                if self._is_meaningful_combination(tech, biz):
                    combined.append(tech_biz_combo)
        
        # Add contextual market combinations
        if target_market:
            market_words = [word for word in target_market.split() if len(word) > 3][:2]
            for market_word in market_words:
                for term in core_terms[:2]:
                    # Create market-specific queries that maintain context
                    market_combo = f"{term} {market_word}"
                    if self._is_meaningful_combination(term, market_word):
                        combined.append(market_combo)
        
        # Remove duplicates and filter for relevance
        unique_combined = list(dict.fromkeys(combined))  # Preserve order while removing duplicates
        
        # Filter out combinations that are too generic or meaningless
        filtered_combined = [combo for combo in unique_combined if self._is_contextually_relevant(combo)]
        
        return filtered_combined[:5]  # Limit to 5 most relevant combined queries
    
    def _is_meaningful_combination(self, term1: str, term2: str) -> bool:
        """Check if two terms create a meaningful combination"""
        # Avoid combining very similar terms
        if term1.lower() == term2.lower():
            return False
        
        # Avoid combining terms that are substrings of each other
        if term1.lower() in term2.lower() or term2.lower() in term1.lower():
            return False
        
        # Check for semantic compatibility
        tech_terms = {'ai', 'ml', 'algorithm', 'neural', 'deep', 'machine', 'learning', 'data', 'model', 'system', 'network', 'automation', 'software', 'digital', 'technology', 'computer', 'intelligence'}
        business_terms = {'business', 'market', 'customer', 'service', 'product', 'solution', 'company', 'industry', 'enterprise', 'commercial', 'professional', 'management', 'strategy'}
        
        term1_is_tech = any(tech in term1.lower() for tech in tech_terms)
        term2_is_tech = any(tech in term2.lower() for tech in tech_terms)
        term1_is_business = any(biz in term1.lower() for biz in business_terms)
        term2_is_business = any(biz in term2.lower() for biz in business_terms)
        
        # Good combinations: tech + business, or related terms
        if (term1_is_tech and term2_is_business) or (term1_is_business and term2_is_tech):
            return True
        
        # Both are tech terms or both are business terms - can be good if not too similar
        if (term1_is_tech and term2_is_tech) or (term1_is_business and term2_is_business):
            return len(set(term1.lower().split()) & set(term2.lower().split())) == 0  # No overlapping words
        
        return True  # Default to allowing the combination
    
    def _is_contextually_relevant(self, query: str) -> bool:
        """Check if a query maintains contextual relevance"""
        words = query.lower().split()
        
        # Filter out queries that are too short or too long
        if len(words) < 1 or len(words) > 5:
            return False
        
        # Filter out queries with only stop words
        stop_words = {'the', 'a', 'an', 'and', 'or', 'but', 'in', 'on', 'at', 'to', 'for', 'of', 'with', 'by'}
        meaningful_words = [word for word in words if word not in stop_words and len(word) > 2]
        
        if len(meaningful_words) == 0:
            return False
        
        # Ensure at least one word is substantial (length > 3)
        if not any(len(word) > 3 for word in meaningful_words):
            return False
        
        return True
    
    def _calculate_confidence(self, concepts: Dict[str, Any]) -> float:
        """Calculate confidence score for the extraction"""
        total_concepts = sum(len(v) if isinstance(v, list) else 0 for v in concepts.values())
        
        # Base confidence on number of extracted concepts
        if total_concepts >= 15:
            return 0.9
        elif total_concepts >= 10:
            return 0.8
        elif total_concepts >= 5:
            return 0.7
        else:
            return 0.6
    
    async def _basic_query_enhancement(self, feature_description: str, target_market: str) -> Dict[str, Any]:
        """Fallback basic query enhancement when NLP models fail"""
        # Simple word extraction
        words = re.findall(r'\b\w{3,}\b', feature_description.lower())
        words = [word for word in words if word not in self.stop_words][:5]
        
        return {
            'original_description': feature_description,
            'target_market': target_market,
            'extracted_concepts': {'core_terms': words},
            'enhanced_queries': {
                'core_concepts': words,
                'semantic_variations': [],
                'synonym_expansions': [],
                'industry_specific': [],
                'market_focused': [target_market] if target_market else [],
                'combined_queries': [' '.join(words[:2])] if len(words) >= 2 else words
            },
            'query_metadata': {
                'total_queries': len(words) + (1 if target_market else 0),
                'confidence_score': 0.5,
                'processing_method': 'basic_fallback'
            }
        }
    
    def get_optimized_queries_for_platform(self, enhanced_data: Dict[str, Any], platform: str) -> List[str]:
        """

        Get platform-optimized queries from enhanced data

        

        Args:

            enhanced_data: Result from enhance_query()

            platform: Target platform ('news', 'reddit', etc.)

            

        Returns:

            List of optimized queries for the specific platform

        """
        queries = enhanced_data.get('enhanced_queries', {})
        
        platform_strategies = {
            'news': ['core_concepts', 'industry_specific', 'combined_queries'],
            'reddit': ['semantic_variations', 'market_focused', 'core_concepts'],
            'linkedin': ['industry_specific', 'business_terms', 'market_focused']
        }
        
        strategy = platform_strategies.get(platform, ['core_concepts', 'combined_queries'])
        optimized_queries = []
        
        for strategy_key in strategy:
            if strategy_key in queries:
                query_list = queries[strategy_key]
                if isinstance(query_list, list):
                    optimized_queries.extend(query_list)
                else:
                    optimized_queries.append(str(query_list))
        
        # Remove duplicates while preserving order
        seen = set()
        unique_queries = []
        for query in optimized_queries:
            if query not in seen:
                seen.add(query)
                unique_queries.append(query)
        
        return unique_queries[:8]  # Limit to 8 queries per platform


# Example usage and testing
async def test_nlp_enhancer():
    """Test the NLP query enhancer"""
    enhancer = NLPQueryEnhancer()
    
    # Test query enhancement
    feature = "AI-powered voice ordering system for restaurants"
    market = "small to medium restaurants, food service industry"
    
    print("Testing NLP Query Enhancement...")
    enhanced = await enhancer.enhance_query(feature, market)
    
    print(f"Original: {enhanced['original_description']}")
    print(f"Market: {enhanced['target_market']}")
    print(f"Core concepts: {enhanced['extracted_concepts']['core_terms']}")
    print(f"Confidence: {enhanced['query_metadata']['confidence_score']}")
    
    # Test platform-specific queries
    for platform in ['news', 'reddit']:
        queries = enhancer.get_optimized_queries_for_platform(enhanced, platform)
        print(f"{platform.title()} queries: {queries[:3]}")
    
    return enhanced

if __name__ == "__main__":
    asyncio.run(test_nlp_enhancer())