Spaces:
Running
Running
# | |
# SPDX-FileCopyrightText: Hadad <hadad@linuxmail.org> | |
# SPDX-License-Identifier: Apache-2.0 | |
# | |
# Model parameters | |
def parameters(reasoning: bool): | |
""" | |
Determine and return a set of model generation parameters based on whether reasoning mode is enabled. | |
Args: | |
reasoning (bool): A flag indicating if reasoning mode is active. | |
When True, parameters favor more controlled and focused generation suitable for reasoning tasks. | |
When False, parameters allow for more creative or diverse outputs. | |
Returns: | |
tuple: A tuple containing five parameters used for text generation: | |
- temperature (float): Controls randomness in generation. Lower values make output more deterministic. | |
- top_k (int): Limits sampling to the top_k most likely next tokens. | |
- min_p (float): Minimum probability threshold for token inclusion (0 means no minimum). | |
- top_p (float): Nucleus sampling threshold, cumulative probability cutoff for token selection. | |
- repetition_penalty (float): Penalizes repeated tokens to reduce repetition in generated text. | |
""" | |
# Reasoning | |
if reasoning: | |
# Parameters tuned for reasoning tasks: | |
# Lower temperature (0.6) to reduce randomness and improve logical consistency. | |
# top_k set to 20 to limit choices to the 20 most probable tokens, focusing generation. | |
# min_p is 0, meaning no minimum probability cutoff is enforced. | |
# top_p is 0.95, allowing nucleus sampling to consider tokens covering 95% cumulative probability. | |
# repetition_penalty is 1.0, meaning no penalty applied for token repetition. | |
return ( | |
0.6, # temperature: less randomness for focused reasoning | |
20, # top_k: restrict to top 20 tokens for more precise output | |
0, # min_p: no minimum probability threshold | |
0.95, # top_p: nucleus sampling cutoff to include tokens up to 95% cumulative probability | |
1 # repetition_penalty: no penalty on repeated tokens | |
) | |
# Non-reasoning | |
else: | |
# Parameters tuned for non-reasoning or more creative generation: | |
# Slightly higher temperature (0.7) to allow more diversity and creativity. | |
# top_k remains 20 to keep some restriction on token selection. | |
# min_p is 0.0, no minimum probability cutoff. | |
# top_p is lower at 0.8, narrowing nucleus sampling to more probable tokens for balanced creativity. | |
# repetition_penalty remains 1.0, no penalty on repeated tokens. | |
return ( | |
0.7, # temperature: more randomness for creative outputs | |
20, # top_k: restrict to top 20 tokens to maintain some control | |
0, # min_p: no minimum probability threshold | |
0.8, # top_p: nucleus sampling cutoff at 80% cumulative probability | |
1 # repetition_penalty: no penalty on repeated tokens | |
) |