Spaces:
Running
Running
File size: 12,220 Bytes
773f1d0 4c7f050 773f1d0 4c7f050 773f1d0 4c7f050 773f1d0 4c7f050 773f1d0 4c7f050 773f1d0 f0ef735 773f1d0 4c7f050 773f1d0 4c7f050 773f1d0 4c7f050 773f1d0 f0ef735 773f1d0 4c7f050 773f1d0 4c7f050 773f1d0 4c7f050 773f1d0 4c7f050 773f1d0 4c7f050 773f1d0 f0ef735 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 |
#!/usr/bin/env python
# coding: utf-8
# Copyright 2021, IBM Corporation.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""
Python lib to recommend prompts.
"""
__author__ = "Vagner Santana, Melina Alberio, Cassia Sanctos and Tiago Machado"
__copyright__ = "IBM Corporation 2024"
__credits__ = ["Vagner Santana, Melina Alberio, Cassia Sanctos, Tiago Machado"]
__license__ = "Apache 2.0"
__version__ = "0.0.1"
import requests
import json
import math
import re
import pandas as pd
import numpy as np
from sklearn.metrics.pairwise import cosine_similarity
import os
from sentence_transformers import SentenceTransformer
def populate_json(json_file_path = './prompt-sentences-main/prompt_sentences-all-minilm-l6-v2.json',
existing_json_populated_file_path = './prompt-sentences-main/prompt_sentences-all-minilm-l6-v2.json'):
"""
Function that receives a default json file with
empty embeddings and checks whether there is a
partially populated json file.
Args:
json_file_path: Path to json default file with
empty embeddings.
existing_json_populated_file_path: Path to partially
populated json file.
Returns:
A json.
Raises:
Exception when json file can't be loaded.
"""
json_file = json_file_path
if(os.path.isfile(existing_json_populated_file_path)):
json_file = existing_json_populated_file_path
prompt_json = json.load(open(json_file))
return prompt_json
def get_embedding_func(inference = 'huggingface', **kwargs):
if inference == 'local':
if 'model_id' not in kwargs:
raise TypeError("Missing required argument: model_id")
model = SentenceTransformer(kwargs['model_id'])
def embedding_fn(texts):
return model.encode(texts).tolist()
elif inference == 'huggingface':
if 'api_url' not in kwargs:
raise TypeError("Missing required argument: api_url")
if 'headers' not in kwargs:
raise TypeError("Missing required argument: headers")
def embedding_fn(texts):
response = requests.post(kwargs['api_url'], headers=kwargs['headers'], json={"inputs": texts, "options":{"wait_for_model":True}})
return response.json()
else:
raise ValueError(f"Inference type {inference} is not supported. Please choose one of ['local', 'huggingface'].")
return embedding_fn
def split_into_sentences(prompt):
"""
Function that splits the input text into sentences based
on punctuation (.!?). The regular expression pattern
'(?<=[.!?]) +' ensures that we split after a sentence-ending
punctuation followed by one or more spaces.
Args:
prompt: The entered prompt text.
Returns:
A list of extracted sentences.
Raises:
Nothing.
"""
sentences = re.split(r'(?<=[.!?]) +', prompt)
return sentences
def get_distance(embedding1, embedding2):
"""
Function that returns euclidean distance between
two embeddings.
Args:
embedding1: first embedding.
embedding2: second embedding.
Returns:
The euclidean distance value.
Raises:
Nothing.
"""
total = 0
if(len(embedding1) != len(embedding2)):
return math.inf
for i, obj in enumerate(embedding1):
total += math.pow(embedding2[0][i] - embedding1[0][i], 2)
return(math.sqrt(total))
def sort_by_similarity(e):
"""
Function that sorts by similarity.
Args:
e:
Returns:
The sorted similarity value.
Raises:
Nothing.
"""
return e['similarity']
def recommend_prompt(
prompt,
prompt_json,
embedding_fn = None,
add_lower_threshold = 0.3,
add_upper_threshold = 0.5,
remove_lower_threshold = 0.1,
remove_upper_threshold = 0.5,
umap_model = None
):
"""
Function that recommends prompts additions or removals.
Args:
prompt: The entered prompt text.
prompt_json: Json file populated with embeddings.
embedding_fn: Embedding function to convert prompt sentences into embeddings.
If None, uses all-MiniLM-L6-v2 run locally.
add_lower_threshold: Lower threshold for sentence addition,
the default value is 0.3.
add_upper_threshold: Upper threshold for sentence addition,
the default value is 0.5.
remove_lower_threshold: Lower threshold for sentence removal,
the default value is 0.3.
remove_upper_threshold: Upper threshold for sentence removal,
the default value is 0.5.
umap_model: Umap model used for visualization.
If None, the projected embeddings of input sentences will not be returned.
Returns:
Prompt values to add or remove.
Raises:
Nothing.
"""
if embedding_fn is None:
# Use all-MiniLM-L6-v2 locally by default
embedding_fn = get_embedding_func('local', model_id='sentence-transformers/all-MiniLM-L6-v2')
# Output initialization
out, out['input'], out['add'], out['remove'] = {}, {}, {}, {}
input_items, items_to_add, items_to_remove = [], [], []
# Spliting prompt into sentences
input_sentences = split_into_sentences(prompt)
# TODO: Request embeddings for input an d store in a input_embeddingS
# Recommendation of values to add to the current prompt
# Using only the last sentence for the add recommendation
input_embedding = embedding_fn(input_sentences[-1])
input_embedding = np.array(input_embedding)
sentence_embeddings = np.array(
[v['centroid'] for v in prompt_json['positive_values']]
)
similarities_positive_sent = cosine_similarity(np.expand_dims(input_embedding, axis=0), sentence_embeddings)[0, :]
for value_idx, v in enumerate(prompt_json['positive_values']):
# Dealing with values without prompts and makinig sure they have the same dimensions
if(len(v['centroid']) != len(input_embedding)):
continue
if(similarities_positive_sent[value_idx] < add_lower_threshold):
continue
value_sents_similarity = cosine_similarity(
np.expand_dims(input_embedding, axis=0),
np.array([p['embedding'] for p in v['prompts']])
)[0, :]
closer_prompt_idxs = np.nonzero((add_lower_threshold < value_sents_similarity) & (value_sents_similarity < add_upper_threshold))[0]
for idx in closer_prompt_idxs:
items_to_add.append({
'value': v['label'],
'prompt': v['prompts'][idx]['text'],
'similarity': value_sents_similarity[idx],
'x': v['prompts'][idx]['x'],
'y': v['prompts'][idx]['y']
})
out['add'] = items_to_add
inp_sentence_embeddings = np.array([embedding_fn(sent) for sent in input_sentences])
pairwise_similarities = cosine_similarity(
inp_sentence_embeddings,
np.array([v['centroid'] for v in prompt_json['negative_values']])
)
# Recommendation of values to remove from the current prompt
for sent_idx, sentence in enumerate(input_sentences):
input_embedding = inp_sentence_embeddings[sent_idx]
if umap_model:
# Obtaining XY coords for input sentences from a parametric UMAP model
if(len(prompt_json['negative_values'][0]['centroid']) == len(input_embedding) and sentence != ''):
embeddings_umap = umap_model.transform(np.expand_dims(pd.DataFrame(input_embedding).squeeze(), axis=0))
input_items.append({
'sentence': sentence,
'x': str(embeddings_umap[0][0]),
'y': str(embeddings_umap[0][1])
})
for value_idx, v in enumerate(prompt_json['negative_values']):
# Dealing with values without prompts and making sure they have the same dimensions
if(len(v['centroid']) != len(input_embedding)):
continue
if(pairwise_similarities[sent_idx][value_idx] < remove_lower_threshold):
continue
# A more restrict threshold is used here to prevent false positives
# The sentence_threshold is being used to indicate that there must be a sentence in the prompt that is similiar to one of our adversarial prompts
# So, yes, we want to recommend the removal of something adversarial we've found
value_sents_similarity = cosine_similarity(
np.expand_dims(input_embedding, axis=0),
np.array([p['embedding'] for p in v['prompts']])
)[0, :]
closer_prompt_idxs = np.nonzero(value_sents_similarity > remove_upper_threshold)[0]
for idx in closer_prompt_idxs:
items_to_remove.append({
'value': v['label'],
'sentence': sentence,
'sentence_index': sent_idx,
'closest_harmful_sentence': v['prompts'][idx]['text'],
'similarity': value_sents_similarity[idx],
'x': v['prompts'][idx]['x'],
'y': v['prompts'][idx]['y']
})
out['remove'] = items_to_remove
out['input'] = input_items
out['add'] = sorted(out['add'], key=sort_by_similarity, reverse=True)
values_map = {}
for item in out['add'][:]:
if(item['value'] in values_map):
out['add'].remove(item)
else:
values_map[item['value']] = item['similarity']
out['add'] = out['add'][0:5]
out['remove'] = sorted(out['remove'], key=sort_by_similarity, reverse=True)
values_map = {}
for item in out['remove'][:]:
if(item['value'] in values_map):
out['remove'].remove(item)
else:
values_map[item['value']] = item['similarity']
out['remove'] = out['remove'][0:5]
return out
def get_thresholds(
prompts,
prompt_json,
embedding_fn = None,
):
"""
Function that recommends thresholds given an array of prompts.
Args:
prompts: The array with samples of prompts to be used in the system.
prompt_json: Sentences to be forwarded to the recommendation endpoint.
embedding_fn: Embedding function to convert prompt sentences into embeddings.
If None, uses all-MiniLM-L6-v2 run locally.
Returns:
A map with thresholds for the sample prompts and the informed model.
Raises:
Nothing.
"""
if embedding_fn is None:
embedding_fn = get_embedding_func('local', model_id='sentence-transformers/all-MiniLM-L6-v2')
add_similarities = []
remove_similarities = []
for p_id, p in enumerate(prompts):
out = recommend_prompt(p, prompt_json, embedding_fn, 0, 1, 0, 0, None) # Wider possible range
for r in out['add']:
add_similarities.append(r['similarity'])
for r in out['remove']:
remove_similarities.append(r['similarity'])
add_similarities_df = pd.DataFrame({'similarity': add_similarities})
remove_similarities_df = pd.DataFrame({'similarity': remove_similarities})
thresholds = {}
thresholds['add_lower_threshold'] = round(add_similarities_df.describe([.1]).loc['10%', 'similarity'], 1)
thresholds['add_higher_threshold'] = round(add_similarities_df.describe([.9]).loc['90%', 'similarity'], 1)
thresholds['remove_lower_threshold'] = round(remove_similarities_df.describe([.1]).loc['10%', 'similarity'], 1)
thresholds['remove_higher_threshold'] = round(remove_similarities_df.describe([.9]).loc['90%', 'similarity'], 1)
return thresholds |