remove-single-vector-projection (#18)
Browse files- refactor: remove single vec projection (5bb9539014f6cef2d3662dba50b75749e0922b50)
adapters/adapter_config.json
CHANGED
@@ -1,7 +1,7 @@
|
|
1 |
{
|
2 |
"alpha_pattern": {},
|
3 |
"auto_mapping": null,
|
4 |
-
"base_model_name_or_path": "jinaai/
|
5 |
"bias": "none",
|
6 |
"corda_config": null,
|
7 |
"eva_config": null,
|
|
|
1 |
{
|
2 |
"alpha_pattern": {},
|
3 |
"auto_mapping": null,
|
4 |
+
"base_model_name_or_path": "jinaai/jina-embeddings-v4",
|
5 |
"bias": "none",
|
6 |
"corda_config": null,
|
7 |
"eva_config": null,
|
adapters/adapter_model.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:b6b7ab4a79daa3b4f3b5274500cc99d3dc89aa8c3419e9d79f89e366685e12e5
|
3 |
+
size 359863776
|
config.json
CHANGED
@@ -33,7 +33,6 @@
|
|
33 |
},
|
34 |
"rope_theta": 1000000.0,
|
35 |
"single_vector_pool_strategy": "mean",
|
36 |
-
"single_vector_projector_dim": 1024,
|
37 |
"sliding_window": 32768,
|
38 |
"tie_word_embeddings": true,
|
39 |
"torch_dtype": "bfloat16",
|
|
|
33 |
},
|
34 |
"rope_theta": 1000000.0,
|
35 |
"single_vector_pool_strategy": "mean",
|
|
|
36 |
"sliding_window": 32768,
|
37 |
"tie_word_embeddings": true,
|
38 |
"torch_dtype": "bfloat16",
|
configuration_jina_embeddings_v4.py
CHANGED
@@ -9,14 +9,12 @@ class JinaEmbeddingsV4Config(Qwen2_5_VLConfig):
|
|
9 |
|
10 |
def __init__(
|
11 |
self,
|
12 |
-
single_vector_projector_dim: int = 1024,
|
13 |
single_vector_pool_strategy: str = "mean",
|
14 |
multi_vector_projector_dim: int = 128,
|
15 |
pretrained_peft_model_name_or_path: Optional[str] = None,
|
16 |
**kwargs,
|
17 |
):
|
18 |
super().__init__(**kwargs)
|
19 |
-
self.single_vector_projector_dim = single_vector_projector_dim
|
20 |
self.single_vector_pool_strategy = single_vector_pool_strategy
|
21 |
self.multi_vector_projector_dim = multi_vector_projector_dim
|
22 |
self.pretrained_peft_model_name_or_path = pretrained_peft_model_name_or_path
|
|
|
9 |
|
10 |
def __init__(
|
11 |
self,
|
|
|
12 |
single_vector_pool_strategy: str = "mean",
|
13 |
multi_vector_projector_dim: int = 128,
|
14 |
pretrained_peft_model_name_or_path: Optional[str] = None,
|
15 |
**kwargs,
|
16 |
):
|
17 |
super().__init__(**kwargs)
|
|
|
18 |
self.single_vector_pool_strategy = single_vector_pool_strategy
|
19 |
self.multi_vector_projector_dim = multi_vector_projector_dim
|
20 |
self.pretrained_peft_model_name_or_path = pretrained_peft_model_name_or_path
|
model-00001-of-00002.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:abb244162956ec2f26d944b6c10cbb96afe211d2aff908b8b2f498ec27a9100b
|
3 |
+
size 4997750728
|
model-00002-of-00002.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:5d5252a7ede6469220b0e7386af53fea9a45fa299a1d2af6fe68cb29897de3e3
|
3 |
+
size 2512111904
|
model.safetensors.index.json
CHANGED
@@ -439,8 +439,6 @@
|
|
439 |
"model.norm.weight": "model-00002-of-00002.safetensors",
|
440 |
"multi_vector_projector.bias": "model-00002-of-00002.safetensors",
|
441 |
"multi_vector_projector.weight": "model-00002-of-00002.safetensors",
|
442 |
-
"single_vector_projector.bias": "model-00002-of-00002.safetensors",
|
443 |
-
"single_vector_projector.weight": "model-00002-of-00002.safetensors",
|
444 |
"visual.blocks.0.attn.proj.bias": "model-00001-of-00002.safetensors",
|
445 |
"visual.blocks.0.attn.proj.weight": "model-00001-of-00002.safetensors",
|
446 |
"visual.blocks.0.attn.qkv.bias": "model-00001-of-00002.safetensors",
|
|
|
439 |
"model.norm.weight": "model-00002-of-00002.safetensors",
|
440 |
"multi_vector_projector.bias": "model-00002-of-00002.safetensors",
|
441 |
"multi_vector_projector.weight": "model-00002-of-00002.safetensors",
|
|
|
|
|
442 |
"visual.blocks.0.attn.proj.bias": "model-00001-of-00002.safetensors",
|
443 |
"visual.blocks.0.attn.proj.weight": "model-00001-of-00002.safetensors",
|
444 |
"visual.blocks.0.attn.qkv.bias": "model-00001-of-00002.safetensors",
|
modeling_jina_embeddings_v4.py
CHANGED
@@ -141,12 +141,11 @@ class JinaEmbeddingsV4Model(Qwen2_5_VLForConditionalGeneration):
|
|
141 |
|
142 |
def __init__(self, config: JinaEmbeddingsV4Config):
|
143 |
Qwen2_5_VLForConditionalGeneration.__init__(self, config)
|
144 |
-
self.
|
145 |
self.post_init()
|
146 |
self.processor = JinaEmbeddingsV4Processor.from_pretrained(
|
147 |
self.name_or_path, trust_remote_code=True, use_fast=True
|
148 |
)
|
149 |
-
self.single_vector_projector_dim = config.single_vector_projector_dim
|
150 |
self.multi_vector_projector_dim = config.multi_vector_projector_dim
|
151 |
self._task = None
|
152 |
|
@@ -204,32 +203,25 @@ class JinaEmbeddingsV4Model(Qwen2_5_VLForConditionalGeneration):
|
|
204 |
|
205 |
return hidden_states[-1]
|
206 |
|
207 |
-
def
|
208 |
"""
|
209 |
Initializes projection layers.
|
210 |
"""
|
211 |
-
self.config.single_vector_projector_dim = config.single_vector_projector_dim
|
212 |
self.config.multi_vector_projector_dim = config.multi_vector_projector_dim
|
213 |
|
214 |
-
self.single_vector_projector = nn.Linear(
|
215 |
-
in_features=self.config.text_config.hidden_size,
|
216 |
-
out_features=self.config.single_vector_projector_dim,
|
217 |
-
)
|
218 |
-
|
219 |
self.multi_vector_projector = nn.Linear(
|
220 |
in_features=self.config.text_config.hidden_size,
|
221 |
out_features=self.config.multi_vector_projector_dim,
|
222 |
)
|
223 |
|
224 |
-
def
|
225 |
self,
|
226 |
-
task_label: Union[str, List[str]],
|
227 |
hidden_states: torch.Tensor,
|
228 |
attention_mask: torch.Tensor,
|
229 |
input_ids: Optional[torch.LongTensor] = None,
|
230 |
) -> torch.Tensor:
|
231 |
"""
|
232 |
-
|
233 |
"""
|
234 |
if self._input_has_image(input_ids[0]): # got document image
|
235 |
img_start_positions = torch.where(
|
@@ -257,12 +249,9 @@ class JinaEmbeddingsV4Model(Qwen2_5_VLForConditionalGeneration):
|
|
257 |
hidden_states * attention_mask.unsqueeze(-1), dim=1
|
258 |
) / torch.sum(attention_mask, dim=1, keepdim=True)
|
259 |
|
260 |
-
|
261 |
-
pooled_output, task_label=task_label
|
262 |
-
)
|
263 |
-
return torch.nn.functional.normalize(single_vec_emb, dim=-1)
|
264 |
|
265 |
-
def
|
266 |
self,
|
267 |
task_label: Union[str, List[str]],
|
268 |
hidden_states: torch.Tensor,
|
@@ -306,13 +295,12 @@ class JinaEmbeddingsV4Model(Qwen2_5_VLForConditionalGeneration):
|
|
306 |
**kwargs,
|
307 |
) # (batch_size, seq_length, hidden_size)
|
308 |
# Compute the embeddings
|
309 |
-
single_vec_emb = self.
|
310 |
hidden_states=hidden_states,
|
311 |
attention_mask=attention_mask,
|
312 |
input_ids=input_ids,
|
313 |
-
task_label=task_label,
|
314 |
)
|
315 |
-
multi_vec_emb = self.
|
316 |
hidden_states=hidden_states,
|
317 |
attention_mask=attention_mask,
|
318 |
task_label=task_label,
|
|
|
141 |
|
142 |
def __init__(self, config: JinaEmbeddingsV4Config):
|
143 |
Qwen2_5_VLForConditionalGeneration.__init__(self, config)
|
144 |
+
self._init_projection_layer(config)
|
145 |
self.post_init()
|
146 |
self.processor = JinaEmbeddingsV4Processor.from_pretrained(
|
147 |
self.name_or_path, trust_remote_code=True, use_fast=True
|
148 |
)
|
|
|
149 |
self.multi_vector_projector_dim = config.multi_vector_projector_dim
|
150 |
self._task = None
|
151 |
|
|
|
203 |
|
204 |
return hidden_states[-1]
|
205 |
|
206 |
+
def _init_projection_layer(self, config) -> None:
|
207 |
"""
|
208 |
Initializes projection layers.
|
209 |
"""
|
|
|
210 |
self.config.multi_vector_projector_dim = config.multi_vector_projector_dim
|
211 |
|
|
|
|
|
|
|
|
|
|
|
212 |
self.multi_vector_projector = nn.Linear(
|
213 |
in_features=self.config.text_config.hidden_size,
|
214 |
out_features=self.config.multi_vector_projector_dim,
|
215 |
)
|
216 |
|
217 |
+
def get_single_vector_embeddings(
|
218 |
self,
|
|
|
219 |
hidden_states: torch.Tensor,
|
220 |
attention_mask: torch.Tensor,
|
221 |
input_ids: Optional[torch.LongTensor] = None,
|
222 |
) -> torch.Tensor:
|
223 |
"""
|
224 |
+
Get the single-vector embeddings from the hidden states.
|
225 |
"""
|
226 |
if self._input_has_image(input_ids[0]): # got document image
|
227 |
img_start_positions = torch.where(
|
|
|
249 |
hidden_states * attention_mask.unsqueeze(-1), dim=1
|
250 |
) / torch.sum(attention_mask, dim=1, keepdim=True)
|
251 |
|
252 |
+
return torch.nn.functional.normalize(pooled_output, dim=-1)
|
|
|
|
|
|
|
253 |
|
254 |
+
def get_multi_vector_embeddings(
|
255 |
self,
|
256 |
task_label: Union[str, List[str]],
|
257 |
hidden_states: torch.Tensor,
|
|
|
295 |
**kwargs,
|
296 |
) # (batch_size, seq_length, hidden_size)
|
297 |
# Compute the embeddings
|
298 |
+
single_vec_emb = self.get_single_vector_embeddings(
|
299 |
hidden_states=hidden_states,
|
300 |
attention_mask=attention_mask,
|
301 |
input_ids=input_ids,
|
|
|
302 |
)
|
303 |
+
multi_vec_emb = self.get_multi_vector_embeddings(
|
304 |
hidden_states=hidden_states,
|
305 |
attention_mask=attention_mask,
|
306 |
task_label=task_label,
|