Till Fischer commited on
Commit
c7ad5e5
·
1 Parent(s): cca703d

Update all changes

Browse files
Files changed (1) hide show
  1. analyze_aspects.py +2 -1
analyze_aspects.py CHANGED
@@ -94,7 +94,8 @@ def analyze_quickwin(db_path: Path, isbn: str, device: int = -1, languages: list
94
  logger.info(f"Review ID {review_id} ({lang}) wird verarbeitet.")
95
 
96
  lang_map = {'de': 'german', 'en': 'english'}
97
- sentences = sent_tokenize(text, language=lang_map.get(lang, 'english'))
 
98
 
99
  if lang == 'de':
100
  aspect_map = ASPECT_LABEL_MAP
 
94
  logger.info(f"Review ID {review_id} ({lang}) wird verarbeitet.")
95
 
96
  lang_map = {'de': 'german', 'en': 'english'}
97
+ tokenizer = nltk.data.load(f"tokenizers/punkt/{lang_map.get(lang, 'english')}.pickle")
98
+ sentences = tokenizer.tokenize(text)
99
 
100
  if lang == 'de':
101
  aspect_map = ASPECT_LABEL_MAP