Spaces:

jamtur01
/

MMaDA

Runtime error

App Files Files Community

MMaDA / venv /lib /python3.11 /site-packages /sentry_sdk /integrations /langchain.py

jamtur01

Upload folder using huggingface_hub

9c6594c verified about 1 month ago

raw

history blame contribute delete

17.7 kB

	from collections import OrderedDict
	from functools import wraps

	import sentry_sdk
	from sentry_sdk.ai.monitoring import set_ai_pipeline_name, record_token_usage
	from sentry_sdk.consts import OP, SPANDATA
	from sentry_sdk.ai.utils import set_data_normalized
	from sentry_sdk.scope import should_send_default_pii
	from sentry_sdk.tracing import Span
	from sentry_sdk.integrations import DidNotEnable, Integration
	from sentry_sdk.utils import logger, capture_internal_exceptions

	from typing import TYPE_CHECKING

	if TYPE_CHECKING:
	from typing import Any, List, Callable, Dict, Union, Optional
	from uuid import UUID

	try:
	from langchain_core.messages import BaseMessage
	from langchain_core.outputs import LLMResult
	from langchain_core.callbacks import (
	manager,
	BaseCallbackHandler,
	)
	from langchain_core.agents import AgentAction, AgentFinish
	except ImportError:
	raise DidNotEnable("langchain not installed")


	DATA_FIELDS = {
	"temperature": SPANDATA.AI_TEMPERATURE,
	"top_p": SPANDATA.AI_TOP_P,
	"top_k": SPANDATA.AI_TOP_K,
	"function_call": SPANDATA.AI_FUNCTION_CALL,
	"tool_calls": SPANDATA.AI_TOOL_CALLS,
	"tools": SPANDATA.AI_TOOLS,
	"response_format": SPANDATA.AI_RESPONSE_FORMAT,
	"logit_bias": SPANDATA.AI_LOGIT_BIAS,
	"tags": SPANDATA.AI_TAGS,
	}

	# To avoid double collecting tokens, we do not measure
	# token counts for models for which we have an explicit integration
	NO_COLLECT_TOKEN_MODELS = [
	"openai-chat",
	"anthropic-chat",
	"cohere-chat",
	"huggingface_endpoint",
	]


	class LangchainIntegration(Integration):
	identifier = "langchain"
	origin = f"auto.ai.{identifier}"

	# The most number of spans (e.g., LLM calls) that can be processed at the same time.
	max_spans = 1024

	def __init__(
	self, include_prompts=True, max_spans=1024, tiktoken_encoding_name=None
	):
	# type: (LangchainIntegration, bool, int, Optional[str]) -> None
	self.include_prompts = include_prompts
	self.max_spans = max_spans
	self.tiktoken_encoding_name = tiktoken_encoding_name

	@staticmethod
	def setup_once():
	# type: () -> None
	manager._configure = _wrap_configure(manager._configure)


	class WatchedSpan:
	span = None # type: Span
	num_completion_tokens = 0 # type: int
	num_prompt_tokens = 0 # type: int
	no_collect_tokens = False # type: bool
	children = [] # type: List[WatchedSpan]
	is_pipeline = False # type: bool

	def __init__(self, span):
	# type: (Span) -> None
	self.span = span


	class SentryLangchainCallback(BaseCallbackHandler): # type: ignore[misc]
	"""Base callback handler that can be used to handle callbacks from langchain."""

	span_map = OrderedDict() # type: OrderedDict[UUID, WatchedSpan]

	max_span_map_size = 0

	def __init__(self, max_span_map_size, include_prompts, tiktoken_encoding_name=None):
	# type: (int, bool, Optional[str]) -> None
	self.max_span_map_size = max_span_map_size
	self.include_prompts = include_prompts

	self.tiktoken_encoding = None
	if tiktoken_encoding_name is not None:
	import tiktoken # type: ignore

	self.tiktoken_encoding = tiktoken.get_encoding(tiktoken_encoding_name)

	def count_tokens(self, s):
	# type: (str) -> int
	if self.tiktoken_encoding is not None:
	return len(self.tiktoken_encoding.encode_ordinary(s))
	return 0

	def gc_span_map(self):
	# type: () -> None

	while len(self.span_map) > self.max_span_map_size:
	run_id, watched_span = self.span_map.popitem(last=False)
	self._exit_span(watched_span, run_id)

	def _handle_error(self, run_id, error):
	# type: (UUID, Any) -> None
	if not run_id or run_id not in self.span_map:
	return

	span_data = self.span_map[run_id]
	if not span_data:
	return
	sentry_sdk.capture_exception(error, span_data.span.scope)
	span_data.span.__exit__(None, None, None)
	del self.span_map[run_id]

	def _normalize_langchain_message(self, message):
	# type: (BaseMessage) -> Any
	parsed = {"content": message.content, "role": message.type}
	parsed.update(message.additional_kwargs)
	return parsed

	def _create_span(self, run_id, parent_id, **kwargs):
	# type: (SentryLangchainCallback, UUID, Optional[Any], Any) -> WatchedSpan

	watched_span = None # type: Optional[WatchedSpan]
	if parent_id:
	parent_span = self.span_map.get(parent_id) # type: Optional[WatchedSpan]
	if parent_span:
	watched_span = WatchedSpan(parent_span.span.start_child(**kwargs))
	parent_span.children.append(watched_span)
	if watched_span is None:
	watched_span = WatchedSpan(sentry_sdk.start_span(**kwargs))

	if kwargs.get("op", "").startswith("ai.pipeline."):
	if kwargs.get("name"):
	set_ai_pipeline_name(kwargs.get("name"))
	watched_span.is_pipeline = True

	watched_span.span.__enter__()
	self.span_map[run_id] = watched_span
	self.gc_span_map()
	return watched_span

	def _exit_span(self, span_data, run_id):
	# type: (SentryLangchainCallback, WatchedSpan, UUID) -> None

	if span_data.is_pipeline:
	set_ai_pipeline_name(None)

	span_data.span.__exit__(None, None, None)
	del self.span_map[run_id]

	def on_llm_start(
	self,
	serialized,
	prompts,
	*,
	run_id,
	tags=None,
	parent_run_id=None,
	metadata=None,
	**kwargs,
	):
	# type: (SentryLangchainCallback, Dict[str, Any], List[str], UUID, Optional[List[str]], Optional[UUID], Optional[Dict[str, Any]], Any) -> Any
	"""Run when LLM starts running."""
	with capture_internal_exceptions():
	if not run_id:
	return
	all_params = kwargs.get("invocation_params", {})
	all_params.update(serialized.get("kwargs", {}))
	watched_span = self._create_span(
	run_id,
	kwargs.get("parent_run_id"),
	op=OP.LANGCHAIN_RUN,
	name=kwargs.get("name") or "Langchain LLM call",
	origin=LangchainIntegration.origin,
	)
	span = watched_span.span
	if should_send_default_pii() and self.include_prompts:
	set_data_normalized(span, SPANDATA.AI_INPUT_MESSAGES, prompts)
	for k, v in DATA_FIELDS.items():
	if k in all_params:
	set_data_normalized(span, v, all_params[k])

	def on_chat_model_start(self, serialized, messages, , run_id, *kwargs):
	# type: (SentryLangchainCallback, Dict[str, Any], List[List[BaseMessage]], UUID, Any) -> Any
	"""Run when Chat Model starts running."""
	with capture_internal_exceptions():
	if not run_id:
	return
	all_params = kwargs.get("invocation_params", {})
	all_params.update(serialized.get("kwargs", {}))
	watched_span = self._create_span(
	run_id,
	kwargs.get("parent_run_id"),
	op=OP.LANGCHAIN_CHAT_COMPLETIONS_CREATE,
	name=kwargs.get("name") or "Langchain Chat Model",
	origin=LangchainIntegration.origin,
	)
	span = watched_span.span
	model = all_params.get(
	"model", all_params.get("model_name", all_params.get("model_id"))
	)
	watched_span.no_collect_tokens = any(
	x in all_params.get("_type", "") for x in NO_COLLECT_TOKEN_MODELS
	)

	if not model and "anthropic" in all_params.get("_type"):
	model = "claude-2"
	if model:
	span.set_data(SPANDATA.AI_MODEL_ID, model)
	if should_send_default_pii() and self.include_prompts:
	set_data_normalized(
	span,
	SPANDATA.AI_INPUT_MESSAGES,
	[
	[self._normalize_langchain_message(x) for x in list_]
	for list_ in messages
	],
	)
	for k, v in DATA_FIELDS.items():
	if k in all_params:
	set_data_normalized(span, v, all_params[k])
	if not watched_span.no_collect_tokens:
	for list_ in messages:
	for message in list_:
	self.span_map[run_id].num_prompt_tokens += self.count_tokens(
	message.content
	) + self.count_tokens(message.type)

	def on_llm_new_token(self, token, , run_id, *kwargs):
	# type: (SentryLangchainCallback, str, UUID, Any) -> Any
	"""Run on new LLM token. Only available when streaming is enabled."""
	with capture_internal_exceptions():
	if not run_id or run_id not in self.span_map:
	return
	span_data = self.span_map[run_id]
	if not span_data or span_data.no_collect_tokens:
	return
	span_data.num_completion_tokens += self.count_tokens(token)

	def on_llm_end(self, response, , run_id, *kwargs):
	# type: (SentryLangchainCallback, LLMResult, UUID, Any) -> Any
	"""Run when LLM ends running."""
	with capture_internal_exceptions():
	if not run_id:
	return

	token_usage = (
	response.llm_output.get("token_usage") if response.llm_output else None
	)

	span_data = self.span_map[run_id]
	if not span_data:
	return

	if should_send_default_pii() and self.include_prompts:
	set_data_normalized(
	span_data.span,
	SPANDATA.AI_RESPONSES,
	[[x.text for x in list_] for list_ in response.generations],
	)

	if not span_data.no_collect_tokens:
	if token_usage:
	record_token_usage(
	span_data.span,
	token_usage.get("prompt_tokens"),
	token_usage.get("completion_tokens"),
	token_usage.get("total_tokens"),
	)
	else:
	record_token_usage(
	span_data.span,
	span_data.num_prompt_tokens,
	span_data.num_completion_tokens,
	)

	self._exit_span(span_data, run_id)

	def on_llm_error(self, error, , run_id, *kwargs):
	# type: (SentryLangchainCallback, Union[Exception, KeyboardInterrupt], UUID, Any) -> Any
	"""Run when LLM errors."""
	with capture_internal_exceptions():
	self._handle_error(run_id, error)

	def on_chain_start(self, serialized, inputs, , run_id, *kwargs):
	# type: (SentryLangchainCallback, Dict[str, Any], Dict[str, Any], UUID, Any) -> Any
	"""Run when chain starts running."""
	with capture_internal_exceptions():
	if not run_id:
	return
	watched_span = self._create_span(
	run_id,
	kwargs.get("parent_run_id"),
	op=(
	OP.LANGCHAIN_RUN
	if kwargs.get("parent_run_id") is not None
	else OP.LANGCHAIN_PIPELINE
	),
	name=kwargs.get("name") or "Chain execution",
	origin=LangchainIntegration.origin,
	)
	metadata = kwargs.get("metadata")
	if metadata:
	set_data_normalized(watched_span.span, SPANDATA.AI_METADATA, metadata)

	def on_chain_end(self, outputs, , run_id, *kwargs):
	# type: (SentryLangchainCallback, Dict[str, Any], UUID, Any) -> Any
	"""Run when chain ends running."""
	with capture_internal_exceptions():
	if not run_id or run_id not in self.span_map:
	return

	span_data = self.span_map[run_id]
	if not span_data:
	return
	self._exit_span(span_data, run_id)

	def on_chain_error(self, error, , run_id, *kwargs):
	# type: (SentryLangchainCallback, Union[Exception, KeyboardInterrupt], UUID, Any) -> Any
	"""Run when chain errors."""
	self._handle_error(run_id, error)

	def on_agent_action(self, action, , run_id, *kwargs):
	# type: (SentryLangchainCallback, AgentAction, UUID, Any) -> Any
	with capture_internal_exceptions():
	if not run_id:
	return
	watched_span = self._create_span(
	run_id,
	kwargs.get("parent_run_id"),
	op=OP.LANGCHAIN_AGENT,
	name=action.tool or "AI tool usage",
	origin=LangchainIntegration.origin,
	)
	if action.tool_input and should_send_default_pii() and self.include_prompts:
	set_data_normalized(
	watched_span.span, SPANDATA.AI_INPUT_MESSAGES, action.tool_input
	)

	def on_agent_finish(self, finish, , run_id, *kwargs):
	# type: (SentryLangchainCallback, AgentFinish, UUID, Any) -> Any
	with capture_internal_exceptions():
	if not run_id:
	return

	span_data = self.span_map[run_id]
	if not span_data:
	return
	if should_send_default_pii() and self.include_prompts:
	set_data_normalized(
	span_data.span, SPANDATA.AI_RESPONSES, finish.return_values.items()
	)
	self._exit_span(span_data, run_id)

	def on_tool_start(self, serialized, input_str, , run_id, *kwargs):
	# type: (SentryLangchainCallback, Dict[str, Any], str, UUID, Any) -> Any
	"""Run when tool starts running."""
	with capture_internal_exceptions():
	if not run_id:
	return
	watched_span = self._create_span(
	run_id,
	kwargs.get("parent_run_id"),
	op=OP.LANGCHAIN_TOOL,
	name=serialized.get("name") or kwargs.get("name") or "AI tool usage",
	origin=LangchainIntegration.origin,
	)
	if should_send_default_pii() and self.include_prompts:
	set_data_normalized(
	watched_span.span,
	SPANDATA.AI_INPUT_MESSAGES,
	kwargs.get("inputs", [input_str]),
	)
	if kwargs.get("metadata"):
	set_data_normalized(
	watched_span.span, SPANDATA.AI_METADATA, kwargs.get("metadata")
	)

	def on_tool_end(self, output, , run_id, *kwargs):
	# type: (SentryLangchainCallback, str, UUID, Any) -> Any
	"""Run when tool ends running."""
	with capture_internal_exceptions():
	if not run_id or run_id not in self.span_map:
	return

	span_data = self.span_map[run_id]
	if not span_data:
	return
	if should_send_default_pii() and self.include_prompts:
	set_data_normalized(span_data.span, SPANDATA.AI_RESPONSES, output)
	self._exit_span(span_data, run_id)

	def on_tool_error(self, error, args, run_id, *kwargs):
	# type: (SentryLangchainCallback, Union[Exception, KeyboardInterrupt], UUID, Any) -> Any
	"""Run when tool errors."""
	self._handle_error(run_id, error)


	def _wrap_configure(f):
	# type: (Callable[..., Any]) -> Callable[..., Any]

	@wraps(f)
	def new_configure(args, *kwargs):
	# type: (Any, Any) -> Any

	integration = sentry_sdk.get_client().get_integration(LangchainIntegration)
	if integration is None:
	return f(args, *kwargs)

	with capture_internal_exceptions():
	new_callbacks = [] # type: List[BaseCallbackHandler]
	if "local_callbacks" in kwargs:
	existing_callbacks = kwargs["local_callbacks"]
	kwargs["local_callbacks"] = new_callbacks
	elif len(args) > 2:
	existing_callbacks = args[2]
	args = (
	args[0],
	args[1],
	new_callbacks,
	) + args[3:]
	else:
	existing_callbacks = []

	if existing_callbacks:
	if isinstance(existing_callbacks, list):
	for cb in existing_callbacks:
	new_callbacks.append(cb)
	elif isinstance(existing_callbacks, BaseCallbackHandler):
	new_callbacks.append(existing_callbacks)
	else:
	logger.debug("Unknown callback type: %s", existing_callbacks)

	already_added = False
	for callback in new_callbacks:
	if isinstance(callback, SentryLangchainCallback):
	already_added = True

	if not already_added:
	new_callbacks.append(
	SentryLangchainCallback(
	integration.max_spans,
	integration.include_prompts,
	integration.tiktoken_encoding_name,
	)
	)
	return f(args, *kwargs)

	return new_configure