Upload tensor_type_testing.py
Browse files- tensor_type_testing.py +128 -0
tensor_type_testing.py
ADDED
@@ -0,0 +1,128 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# tensor_type_testing.py
|
2 |
+
# Python 3.11.2
|
3 |
+
|
4 |
+
import os
|
5 |
+
import sys
|
6 |
+
|
7 |
+
import numpy as np
|
8 |
+
import easy_llama as ez
|
9 |
+
|
10 |
+
from typing import Union
|
11 |
+
|
12 |
+
INPUT_TEXTS_AS_TEXT: list[str] = []
|
13 |
+
|
14 |
+
for i in range(10):
|
15 |
+
with open(f'./inputs/{i}.txt', 'r') as file:
|
16 |
+
INPUT_TEXTS_AS_TEXT.append(file.read())
|
17 |
+
|
18 |
+
BASELINE_MODEL_PATH = '/opt/workspace/gguf/Qwen2.5-14B-BF16.gguf'
|
19 |
+
BASELINE_MODEL_FILENAME = os.path.basename(BASELINE_MODEL_PATH)
|
20 |
+
QUANT_MODEL_DIR = '/opt/workspace/gguf/'
|
21 |
+
QUANT_MODEL_FILES = [
|
22 |
+
'Qwen2.5-14B-Q2_K.gguf',
|
23 |
+
'Qwen2.5-14B-EQ2_K-FQ8_0-AQ8_0-OQ8_0.gguf',
|
24 |
+
'Qwen2.5-14B-EQ8_0-FQ2_K-AQ8_0-OQ8_0.gguf',
|
25 |
+
'Qwen2.5-14B-EQ8_0-FQ8_0-AQ2_K-OQ8_0.gguf',
|
26 |
+
'Qwen2.5-14B-EQ8_0-FQ8_0-AQ8_0-OQ2_K.gguf',
|
27 |
+
'Qwen2.5-14B-Q8_0.gguf'
|
28 |
+
]
|
29 |
+
|
30 |
+
def msd(a: np.ndarray, b: np.ndarray) -> np.floating:
|
31 |
+
return np.mean((a - b) ** 2)
|
32 |
+
|
33 |
+
def tokenize_prompt(llama: ez.Llama, prompt: str) -> list[int]:
|
34 |
+
return llama.tokenize(
|
35 |
+
text_bytes=prompt.encode('utf-8', 'strict'),
|
36 |
+
add_special=True,
|
37 |
+
parse_special=False
|
38 |
+
)
|
39 |
+
|
40 |
+
def eval_text(llama: ez.Llama, text_toks: list[int]) -> np.ndarray:
|
41 |
+
llama.reset()
|
42 |
+
logits = llama.eval(input_tokens=text_toks, logits_all=True)
|
43 |
+
return logits
|
44 |
+
|
45 |
+
def load_llama(model_file: str) -> ez.Llama:
|
46 |
+
return ez.Llama(
|
47 |
+
path_model=model_file,
|
48 |
+
n_gpu_layers=10,
|
49 |
+
use_mmap=False,
|
50 |
+
use_mlock=False,
|
51 |
+
n_ctx=5120,
|
52 |
+
offload_kqv=True,
|
53 |
+
warmup=False,
|
54 |
+
verbose=False
|
55 |
+
)
|
56 |
+
|
57 |
+
def get_model_results(model_path: str) -> list[np.ndarray]:
|
58 |
+
print('Load model...')
|
59 |
+
Llama = load_llama(model_path)
|
60 |
+
print('Evaluate prompts...')
|
61 |
+
results = [eval_text(Llama, prompt) for prompt in input_texts_as_tokens]
|
62 |
+
print('Unload model...')
|
63 |
+
Llama.free()
|
64 |
+
return results
|
65 |
+
|
66 |
+
def main() -> int:
|
67 |
+
|
68 |
+
global input_texts_as_tokens
|
69 |
+
|
70 |
+
results: dict[str, list[Union[list[np.floating], np.floating]]] = {}
|
71 |
+
|
72 |
+
baseline_llama = load_llama(BASELINE_MODEL_PATH)
|
73 |
+
input_texts_as_tokens = [
|
74 |
+
tokenize_prompt(baseline_llama, text) for text in INPUT_TEXTS_AS_TEXT
|
75 |
+
]
|
76 |
+
n_inputs = len(input_texts_as_tokens)
|
77 |
+
max_len_input = max(len(toks) for toks in input_texts_as_tokens)
|
78 |
+
min_len_input = min(len(toks) for toks in input_texts_as_tokens)
|
79 |
+
avg_len_input = sum(len(toks) for toks in input_texts_as_tokens) / n_inputs
|
80 |
+
n_input_tokens = sum(len(toks) for toks in input_texts_as_tokens)
|
81 |
+
print(f' Number of input texts: {len(input_texts_as_tokens)}')
|
82 |
+
print(f'Shortest input length in tokens: {min_len_input}')
|
83 |
+
print(f' Longest input length in tokens: {max_len_input}')
|
84 |
+
print(f' Average input length in tokens: {avg_len_input}')
|
85 |
+
print(f' Total number of input tokens: {n_input_tokens}')
|
86 |
+
print('-' * 80)
|
87 |
+
baseline_llama.free()
|
88 |
+
|
89 |
+
print(f'Evaluating baseline model {BASELINE_MODEL_FILENAME}...')
|
90 |
+
baseline_results = get_model_results(BASELINE_MODEL_PATH)
|
91 |
+
|
92 |
+
for quant_file in QUANT_MODEL_FILES:
|
93 |
+
quant_path = os.path.join(QUANT_MODEL_DIR, quant_file)
|
94 |
+
if not os.path.exists(quant_path):
|
95 |
+
print(f"Error: {quant_path} not found. Skipping.")
|
96 |
+
continue
|
97 |
+
|
98 |
+
print('-' * 80)
|
99 |
+
print(f'Now processing: {quant_file}')
|
100 |
+
quant_results = get_model_results(quant_path)
|
101 |
+
|
102 |
+
print(f'Compute MSD...')
|
103 |
+
deviations = [
|
104 |
+
msd(baseline_results[i], quant_results[i]) for i in range(len(quant_results))
|
105 |
+
]
|
106 |
+
avg = np.mean(deviations)
|
107 |
+
|
108 |
+
results[quant_file] = [deviations, avg]
|
109 |
+
|
110 |
+
print(
|
111 |
+
f'Mean-Squared Deviation - '
|
112 |
+
f'{BASELINE_MODEL_FILENAME} vs. {os.path.basename(quant_path)}:'
|
113 |
+
)
|
114 |
+
for i in range(len(input_texts_as_tokens)):
|
115 |
+
print(f'-- Prompt {i}: {deviations[i]}')
|
116 |
+
print(f'Average MSD: {avg}')
|
117 |
+
|
118 |
+
print('-' * 80)
|
119 |
+
print(f'Average Mean-Squared Deviation compared to {BASELINE_MODEL_FILENAME}:')
|
120 |
+
print('-' * 80)
|
121 |
+
for k, v in results.items():
|
122 |
+
print(f'{k:>60} -- {v[1]}')
|
123 |
+
print('-' * 80)
|
124 |
+
|
125 |
+
return 0
|
126 |
+
|
127 |
+
if __name__ == '__main__':
|
128 |
+
sys.exit(main())
|