File size: 2,667 Bytes
22f8eb7
1b3b6e1
22f8eb7
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1b3b6e1
 
 
 
 
 
22f8eb7
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1b3b6e1
 
 
 
 
 
22f8eb7
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1b3b6e1
 
22f8eb7
 
1b3b6e1
 
22f8eb7
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
import React from 'react'
import { useFeatureExtraction } from '../../contexts/FeatureExtractionContext'

const FeatureExtractionConfig = () => {
  const { config, setConfig } = useFeatureExtraction()

  return (
    <div className="space-y-4">
      <h3 className="text-lg font-semibold text-gray-900">
        Feature Extraction Settings
      </h3>

      <div className="space-y-3">
        <div>
          <label className="block text-sm font-medium text-gray-700 mb-1">
            Pooling Strategy
          </label>
          <select
            value={config.pooling}
            onChange={(e) =>
              setConfig((prev) => ({
                ...prev,
                pooling: e.target.value as 'mean' | 'cls' | 'max'
              }))
            }
            className="w-full px-3 py-2 border border-gray-300 rounded-md shadow-sm focus:outline-none focus:ring-2 focus:ring-blue-500 focus:border-blue-500 text-sm"
          >
            <option value="mean">Mean Pooling</option>
            <option value="cls">CLS Token</option>
            <option value="max">Max Pooling</option>
          </select>
          <p className="text-xs text-gray-500 mt-1">
            How to aggregate token embeddings into sentence embeddings
          </p>
        </div>

        <div>
          <label className="flex items-center space-x-2">
            <input
              type="checkbox"
              checked={config.normalize}
              onChange={(e) =>
                setConfig((prev) => ({
                  ...prev,
                  normalize: e.target.checked
                }))
              }
              className="rounded border-gray-300 text-blue-600 shadow-sm focus:border-blue-300 focus:ring focus:ring-blue-200 focus:ring-opacity-50"
            />
            <span className="text-sm font-medium text-gray-700">
              Normalize Embeddings
            </span>
          </label>
          <p className="text-xs text-gray-500 mt-1 ml-6">
            L2 normalize embeddings for better similarity calculations
          </p>
        </div>
      </div>

      <div className="pt-2 border-t border-gray-200">
        <div className="text-xs text-gray-500">
          <p className="mb-1">
            <strong>Mean Pooling:</strong> Average all token embeddings
          </p>
          <p className="mb-1">
            <strong>CLS Token:</strong> Use the [CLS] token embedding (if
            available)
          </p>
          <p>
            <strong>Max Pooling:</strong> Take element-wise maximum across
            tokens
          </p>
        </div>
      </div>
    </div>
  )
}

export default FeatureExtractionConfig