allow passing a sink cache back in

Files changed (2) hide show

custom_generate/generate.py CHANGED Viewed

@@ -194,6 +194,17 @@ class SinkCache(Cache):
 def generate(model, window_length=256, num_sink_tokens=4, **kwargs):
-    past_key_values = SinkCache(window_length=window_length, num_sink_tokens=num_sink_tokens)
     generation_outputs = model.generate(**kwargs, past_key_values=past_key_values, use_cache=True)
     return generation_outputs

 def generate(model, window_length=256, num_sink_tokens=4, **kwargs):
+    # compatibility with transformers 4.52: we must pop `custom_generate` from kwargs, otherwise it will result in an
+    # infinite loop. This is solved in transformers 4.53.
+    kwargs.pop("custom_generate", None)
+    # prepare the cache, it is was not passed.
+    past_key_values = kwargs.pop("past_key_values", None)
+    if past_key_values is None:
+        past_key_values = SinkCache(window_length=window_length, num_sink_tokens=num_sink_tokens)
+    elif not isinstance(past_key_values, SinkCache):
+        raise ValueError(f"`past_key_values` must be a `SinkCache` instance, got a {type(past_key_values)} instance")
+    # generate with the cache
     generation_outputs = model.generate(**kwargs, past_key_values=past_key_values, use_cache=True)
     return generation_outputs

custom_generate/requirements.txt DELETED Viewed

	@@ -1 +0,0 @@
1	- transformers>=4.53.0 # 4.52 results in an infinite loop