import base64 import datetime import functools import http.client import json import logging import os import re import socket import sys import threading from copy import deepcopy from pathlib import Path from typing import ( IO, TYPE_CHECKING, Any, Callable, Dict, Iterable, List, Literal, Mapping, MutableMapping, NamedTuple, Optional, Sequence, TextIO, Tuple, Union, ) import click import requests import yaml from wandb_gql import Client, gql from wandb_gql.client import RetryError from wandb_graphql.language.ast import Document import wandb from wandb import env, util from wandb.apis.normalize import normalize_exceptions, parse_backend_error_messages from wandb.errors import AuthenticationError, CommError, UnsupportedError, UsageError from wandb.integration.sagemaker import parse_sm_secrets from wandb.old.settings import Settings from wandb.proto.wandb_internal_pb2 import ServerFeature from wandb.sdk.artifacts._validators import is_artifact_registry_project from wandb.sdk.internal._generated import SERVER_FEATURES_QUERY_GQL, ServerFeaturesQuery from wandb.sdk.internal.thread_local_settings import _thread_local_api_settings from wandb.sdk.lib.gql_request import GraphQLSession from wandb.sdk.lib.hashutil import B64MD5, md5_file_b64 from ..lib import credentials, retry from ..lib.filenames import DIFF_FNAME, METADATA_FNAME from ..lib.gitlib import GitRepo from . import context from .progress import Progress logger = logging.getLogger(__name__) LAUNCH_DEFAULT_PROJECT = "model-registry" if TYPE_CHECKING: from typing import Literal, TypedDict from .progress import ProgressFn class CreateArtifactFileSpecInput(TypedDict, total=False): """Corresponds to `type CreateArtifactFileSpecInput` in schema.graphql.""" artifactID: str name: str md5: str mimetype: Optional[str] artifactManifestID: Optional[str] uploadPartsInput: Optional[List[Dict[str, object]]] class CreateArtifactFilesResponseFile(TypedDict): id: str name: str displayName: str uploadUrl: Optional[str] uploadHeaders: Sequence[str] uploadMultipartUrls: "UploadPartsResponse" storagePath: str artifact: "CreateArtifactFilesResponseFileNode" class CreateArtifactFilesResponseFileNode(TypedDict): id: str class UploadPartsResponse(TypedDict): uploadUrlParts: List["UploadUrlParts"] uploadID: str class UploadUrlParts(TypedDict): partNumber: int uploadUrl: str class CompleteMultipartUploadArtifactInput(TypedDict): """Corresponds to `type CompleteMultipartUploadArtifactInput` in schema.graphql.""" completeMultipartAction: str completedParts: Dict[int, str] artifactID: str storagePath: str uploadID: str md5: str class CompleteMultipartUploadArtifactResponse(TypedDict): digest: str class DefaultSettings(TypedDict): section: str git_remote: str ignore_globs: Optional[List[str]] base_url: Optional[str] root_dir: Optional[str] api_key: Optional[str] entity: Optional[str] organization: Optional[str] project: Optional[str] _extra_http_headers: Optional[Mapping[str, str]] _proxies: Optional[Mapping[str, str]] _Response = MutableMapping SweepState = Literal["RUNNING", "PAUSED", "CANCELED", "FINISHED"] Number = Union[int, float] # class _MappingSupportsCopy(Protocol): # def copy(self) -> "_MappingSupportsCopy": ... # def keys(self) -> Iterable: ... # def __getitem__(self, name: str) -> Any: ... httpclient_logger = logging.getLogger("http.client") if os.environ.get("WANDB_DEBUG"): httpclient_logger.setLevel(logging.DEBUG) def check_httpclient_logger_handler() -> None: # Only enable http.client logging if WANDB_DEBUG is set if not os.environ.get("WANDB_DEBUG"): return if httpclient_logger.handlers: return # Enable HTTPConnection debug logging to the logging framework level = logging.DEBUG def httpclient_log(*args: Any) -> None: httpclient_logger.log(level, " ".join(args)) # mask the print() built-in in the http.client module to use logging instead http.client.print = httpclient_log # type: ignore[attr-defined] # enable debugging http.client.HTTPConnection.debuglevel = 1 root_logger = logging.getLogger("wandb") if root_logger.handlers: httpclient_logger.addHandler(root_logger.handlers[0]) class _ThreadLocalData(threading.local): context: Optional[context.Context] def __init__(self) -> None: self.context = None class _OrgNames(NamedTuple): entity_name: str display_name: str def _match_org_with_fetched_org_entities( organization: str, orgs: Sequence[_OrgNames] ) -> str: """Match the organization provided in the path with the org entity or org name of the input entity. Args: organization: The organization name to match orgs: List of tuples containing (org_entity_name, org_display_name) Returns: str: The matched org entity name Raises: ValueError: If no matching organization is found or if multiple orgs exist without a match """ for org_names in orgs: if organization in org_names: return org_names.entity_name if len(orgs) == 1: raise ValueError( f"Expecting the organization name or entity name to match {orgs[0].display_name!r} " f"and cannot be linked/fetched with {organization!r}. " "Please update the target path with the correct organization name." ) raise ValueError( "Personal entity belongs to multiple organizations " f"and cannot be linked/fetched with {organization!r}. " "Please update the target path with the correct organization name " "or use a team entity in the entity settings." ) class Api: """W&B Internal Api wrapper. Note: Settings are automatically overridden by looking for a `wandb/settings` file in the current working directory or its parent directory. If none can be found, we look in the current user's home directory. Args: default_settings(dict, optional): If you aren't using a settings file, or you wish to override the section to use in the settings file Override the settings here. """ HTTP_TIMEOUT = env.get_http_timeout(20) FILE_PUSHER_TIMEOUT = env.get_file_pusher_timeout() _global_context: context.Context _local_data: _ThreadLocalData def __init__( self, default_settings: Optional[ Union[ "wandb.sdk.wandb_settings.Settings", "wandb.sdk.internal.settings_static.SettingsStatic", Settings, dict, ] ] = None, load_settings: bool = True, retry_timedelta: datetime.timedelta = datetime.timedelta( # okay because it's immutable days=7 ), environ: MutableMapping = os.environ, retry_callback: Optional[Callable[[int, str], Any]] = None, api_key: Optional[str] = None, ) -> None: self._environ = environ self._global_context = context.Context() self._local_data = _ThreadLocalData() self.default_settings: DefaultSettings = { "section": "default", "git_remote": "origin", "ignore_globs": [], "base_url": "https://api.wandb.ai", "root_dir": None, "api_key": None, "entity": None, "organization": None, "project": None, "_extra_http_headers": None, "_proxies": None, } self.retry_timedelta = retry_timedelta # todo: Old Settings do not follow the SupportsKeysAndGetItem Protocol default_settings = default_settings or {} self.default_settings.update(default_settings) # type: ignore self.retry_uploads = 10 self._settings = Settings( load_settings=load_settings, root_dir=self.default_settings.get("root_dir"), ) self.git = GitRepo(remote=self.settings("git_remote")) # Mutable settings set by the _file_stream_api self.dynamic_settings = { "system_sample_seconds": 2, "system_samples": 15, "heartbeat_seconds": 30, } # todo: remove these hacky hacks after settings refactor is complete # keeping this code here to limit scope and so that it is easy to remove later self._extra_http_headers = self.settings("_extra_http_headers") or json.loads( self._environ.get("WANDB__EXTRA_HTTP_HEADERS", "{}") ) self._extra_http_headers.update(_thread_local_api_settings.headers or {}) auth = None if api_key: auth = ("api", api_key) elif self.access_token is not None: self._extra_http_headers["Authorization"] = f"Bearer {self.access_token}" elif _thread_local_api_settings.cookies is None: auth = ("api", self.api_key or "") proxies = self.settings("_proxies") or json.loads( self._environ.get("WANDB__PROXIES", "{}") ) self.client = Client( transport=GraphQLSession( headers={ "User-Agent": self.user_agent, "X-WANDB-USERNAME": env.get_username(env=self._environ), "X-WANDB-USER-EMAIL": env.get_user_email(env=self._environ), **self._extra_http_headers, }, use_json=True, # this timeout won't apply when the DNS lookup fails. in that case, it will be 60s # https://bugs.python.org/issue22889 timeout=self.HTTP_TIMEOUT, auth=auth, url=f"{self.settings('base_url')}/graphql", cookies=_thread_local_api_settings.cookies, proxies=proxies, ) ) self.retry_callback = retry_callback self._retry_gql = retry.Retry( self.execute, retry_timedelta=retry_timedelta, check_retry_fn=util.no_retry_auth, retryable_exceptions=(RetryError, requests.RequestException), retry_callback=retry_callback, ) self._current_run_id: Optional[str] = None self._file_stream_api = None self._upload_file_session = requests.Session() if self.FILE_PUSHER_TIMEOUT: self._upload_file_session.put = functools.partial( # type: ignore self._upload_file_session.put, timeout=self.FILE_PUSHER_TIMEOUT, ) if proxies: self._upload_file_session.proxies.update(proxies) # This Retry class is initialized once for each Api instance, so this # defaults to retrying 1 million times per process or 7 days self.upload_file_retry = normalize_exceptions( retry.retriable(retry_timedelta=retry_timedelta)(self.upload_file) ) self.upload_multipart_file_chunk_retry = normalize_exceptions( retry.retriable(retry_timedelta=retry_timedelta)( self.upload_multipart_file_chunk ) ) self._client_id_mapping: Dict[str, str] = {} # Large file uploads to azure can optionally use their SDK self._azure_blob_module = util.get_module("azure.storage.blob") self.query_types: Optional[List[str]] = None self.mutation_types: Optional[List[str]] = None self.server_info_types: Optional[List[str]] = None self.server_use_artifact_input_info: Optional[List[str]] = None self.server_create_artifact_input_info: Optional[List[str]] = None self.server_artifact_fields_info: Optional[List[str]] = None self.server_organization_type_fields_info: Optional[List[str]] = None self.server_supports_enabling_artifact_usage_tracking: Optional[bool] = None self._max_cli_version: Optional[str] = None self._server_settings_type: Optional[List[str]] = None self.fail_run_queue_item_input_info: Optional[List[str]] = None self.create_launch_agent_input_info: Optional[List[str]] = None self.server_create_run_queue_supports_drc: Optional[bool] = None self.server_create_run_queue_supports_priority: Optional[bool] = None self.server_supports_template_variables: Optional[bool] = None self.server_push_to_run_queue_supports_priority: Optional[bool] = None self._server_features_cache: Optional[Dict[str, bool]] = None def gql(self, *args: Any, **kwargs: Any) -> Any: ret = self._retry_gql( *args, retry_cancel_event=self.context.cancel_event, **kwargs, ) return ret def set_local_context(self, api_context: Optional[context.Context]) -> None: self._local_data.context = api_context def clear_local_context(self) -> None: self._local_data.context = None @property def context(self) -> context.Context: return self._local_data.context or self._global_context def reauth(self) -> None: """Ensure the current api key is set in the transport.""" self.client.transport.session.auth = ("api", self.api_key or "") def relocate(self) -> None: """Ensure the current api points to the right server.""" self.client.transport.url = "{}/graphql".format(self.settings("base_url")) def execute(self, *args: Any, **kwargs: Any) -> "_Response": """Wrapper around execute that logs in cases of failure.""" try: return self.client.execute(*args, **kwargs) # type: ignore except requests.exceptions.HTTPError as err: response = err.response assert response is not None logger.exception("Error executing GraphQL.") for error in parse_backend_error_messages(response): wandb.termerror(f"Error while calling W&B API: {error} ({response})") raise def validate_api_key(self) -> bool: """Returns whether the API key stored on initialization is valid.""" res = self.execute(gql("query { viewer { id } }")) return res is not None and res["viewer"] is not None def set_current_run_id(self, run_id: str) -> None: self._current_run_id = run_id @property def current_run_id(self) -> Optional[str]: return self._current_run_id @property def user_agent(self) -> str: return f"W&B Internal Client {wandb.__version__}" @property def api_key(self) -> Optional[str]: if _thread_local_api_settings.api_key: return _thread_local_api_settings.api_key auth = requests.utils.get_netrc_auth(self.api_url) key = None if auth: key = auth[-1] # Environment should take precedence env_key: Optional[str] = self._environ.get(env.API_KEY) sagemaker_key: Optional[str] = parse_sm_secrets().get(env.API_KEY) default_key: Optional[str] = self.default_settings.get("api_key") return env_key or key or sagemaker_key or default_key @property def access_token(self) -> Optional[str]: """Retrieves an access token for authentication. This function attempts to exchange an identity token for a temporary access token from the server, and save it to the credentials file. It uses the path to the identity token as defined in the environment variables. If the environment variable is not set, it returns None. Returns: Optional[str]: The access token if available, otherwise None if no identity token is supplied. Raises: AuthenticationError: If the path to the identity token is not found. """ token_file_str = self._environ.get(env.IDENTITY_TOKEN_FILE) if not token_file_str: return None token_file = Path(token_file_str) if not token_file.exists(): raise AuthenticationError(f"Identity token file not found: {token_file}") base_url = self.settings("base_url") credentials_file = env.get_credentials_file( str(credentials.DEFAULT_WANDB_CREDENTIALS_FILE), self._environ ) return credentials.access_token(base_url, token_file, credentials_file) @property def api_url(self) -> str: return self.settings("base_url") # type: ignore @property def app_url(self) -> str: return wandb.util.app_url(self.api_url) @property def default_entity(self) -> str: return self.viewer().get("entity") # type: ignore def settings(self, key: Optional[str] = None, section: Optional[str] = None) -> Any: """The settings overridden from the wandb/settings file. Args: key (str, optional): If provided only this setting is returned section (str, optional): If provided this section of the setting file is used, defaults to "default" Returns: A dict with the current settings { "entity": "models", "base_url": "https://api.wandb.ai", "project": None, "organization": "my-org", } """ result = self.default_settings.copy() result.update(self._settings.items(section=section)) # type: ignore result.update( { "entity": env.get_entity( self._settings.get( Settings.DEFAULT_SECTION, "entity", fallback=result.get("entity"), ), env=self._environ, ), "organization": env.get_organization( self._settings.get( Settings.DEFAULT_SECTION, "organization", fallback=result.get("organization"), ), env=self._environ, ), "project": env.get_project( self._settings.get( Settings.DEFAULT_SECTION, "project", fallback=result.get("project"), ), env=self._environ, ), "base_url": env.get_base_url( self._settings.get( Settings.DEFAULT_SECTION, "base_url", fallback=result.get("base_url"), ), env=self._environ, ), "ignore_globs": env.get_ignore( self._settings.get( Settings.DEFAULT_SECTION, "ignore_globs", fallback=result.get("ignore_globs"), ), env=self._environ, ), } ) return result if key is None else result[key] # type: ignore def clear_setting( self, key: str, globally: bool = False, persist: bool = False ) -> None: self._settings.clear( Settings.DEFAULT_SECTION, key, globally=globally, persist=persist ) def set_setting( self, key: str, value: Any, globally: bool = False, persist: bool = False ) -> None: self._settings.set( Settings.DEFAULT_SECTION, key, value, globally=globally, persist=persist ) if key == "entity": env.set_entity(value, env=self._environ) elif key == "project": env.set_project(value, env=self._environ) elif key == "base_url": self.relocate() def parse_slug( self, slug: str, project: Optional[str] = None, run: Optional[str] = None ) -> Tuple[str, str]: """Parse a slug into a project and run. Args: slug (str): The slug to parse project (str, optional): The project to use, if not provided it will be inferred from the slug run (str, optional): The run to use, if not provided it will be inferred from the slug Returns: A dict with the project and run """ if slug and "/" in slug: parts = slug.split("/") project = parts[0] run = parts[1] else: project = project or self.settings().get("project") if project is None: raise CommError("No default project configured.") run = run or slug or self.current_run_id or env.get_run(env=self._environ) assert run, "run must be specified" return project, run @normalize_exceptions def server_info_introspection(self) -> Tuple[List[str], List[str], List[str]]: query_string = """ query ProbeServerCapabilities { QueryType: __type(name: "Query") { ...fieldData } MutationType: __type(name: "Mutation") { ...fieldData } ServerInfoType: __type(name: "ServerInfo") { ...fieldData } } fragment fieldData on __Type { fields { name } } """ if ( self.query_types is None or self.mutation_types is None or self.server_info_types is None ): query = gql(query_string) res = self.gql(query) self.query_types = [ field.get("name", "") for field in res.get("QueryType", {}).get("fields", [{}]) ] self.mutation_types = [ field.get("name", "") for field in res.get("MutationType", {}).get("fields", [{}]) ] self.server_info_types = [ field.get("name", "") for field in res.get("ServerInfoType", {}).get("fields", [{}]) ] return self.query_types, self.server_info_types, self.mutation_types @normalize_exceptions def server_settings_introspection(self) -> None: query_string = """ query ProbeServerSettings { ServerSettingsType: __type(name: "ServerSettings") { ...fieldData } } fragment fieldData on __Type { fields { name } } """ if self._server_settings_type is None: query = gql(query_string) res = self.gql(query) self._server_settings_type = ( [ field.get("name", "") for field in res.get("ServerSettingsType", {}).get("fields", [{}]) ] if res else [] ) def server_use_artifact_input_introspection(self) -> List: query_string = """ query ProbeServerUseArtifactInput { UseArtifactInputInfoType: __type(name: "UseArtifactInput") { name inputFields { name } } } """ if self.server_use_artifact_input_info is None: query = gql(query_string) res = self.gql(query) self.server_use_artifact_input_info = [ field.get("name", "") for field in res.get("UseArtifactInputInfoType", {}).get( "inputFields", [{}] ) ] return self.server_use_artifact_input_info @normalize_exceptions def launch_agent_introspection(self) -> Optional[str]: query = gql( """ query LaunchAgentIntrospection { LaunchAgentType: __type(name: "LaunchAgent") { name } } """ ) res = self.gql(query) return res.get("LaunchAgentType") or None @normalize_exceptions def create_run_queue_introspection(self) -> Tuple[bool, bool, bool]: _, _, mutations = self.server_info_introspection() query_string = """ query ProbeCreateRunQueueInput { CreateRunQueueInputType: __type(name: "CreateRunQueueInput") { name inputFields { name } } } """ if ( self.server_create_run_queue_supports_drc is None or self.server_create_run_queue_supports_priority is None ): query = gql(query_string) res = self.gql(query) if res is None: raise CommError("Could not get CreateRunQueue input from GQL.") self.server_create_run_queue_supports_drc = "defaultResourceConfigID" in [ x["name"] for x in ( res.get("CreateRunQueueInputType", {}).get("inputFields", [{}]) ) ] self.server_create_run_queue_supports_priority = "prioritizationMode" in [ x["name"] for x in ( res.get("CreateRunQueueInputType", {}).get("inputFields", [{}]) ) ] return ( "createRunQueue" in mutations, self.server_create_run_queue_supports_drc, self.server_create_run_queue_supports_priority, ) @normalize_exceptions def upsert_run_queue_introspection(self) -> bool: _, _, mutations = self.server_info_introspection() return "upsertRunQueue" in mutations @normalize_exceptions def push_to_run_queue_introspection(self) -> Tuple[bool, bool]: query_string = """ query ProbePushToRunQueueInput { PushToRunQueueInputType: __type(name: "PushToRunQueueInput") { name inputFields { name } } } """ if ( self.server_supports_template_variables is None or self.server_push_to_run_queue_supports_priority is None ): query = gql(query_string) res = self.gql(query) self.server_supports_template_variables = "templateVariableValues" in [ x["name"] for x in ( res.get("PushToRunQueueInputType", {}).get("inputFields", [{}]) ) ] self.server_push_to_run_queue_supports_priority = "priority" in [ x["name"] for x in ( res.get("PushToRunQueueInputType", {}).get("inputFields", [{}]) ) ] return ( self.server_supports_template_variables, self.server_push_to_run_queue_supports_priority, ) @normalize_exceptions def create_default_resource_config_introspection(self) -> bool: _, _, mutations = self.server_info_introspection() return "createDefaultResourceConfig" in mutations @normalize_exceptions def fail_run_queue_item_introspection(self) -> bool: _, _, mutations = self.server_info_introspection() return "failRunQueueItem" in mutations @normalize_exceptions def fail_run_queue_item_fields_introspection(self) -> List: if self.fail_run_queue_item_input_info: return self.fail_run_queue_item_input_info query_string = """ query ProbeServerFailRunQueueItemInput { FailRunQueueItemInputInfoType: __type(name:"FailRunQueueItemInput") { inputFields{ name } } } """ query = gql(query_string) res = self.gql(query) self.fail_run_queue_item_input_info = [ field.get("name", "") for field in res.get("FailRunQueueItemInputInfoType", {}).get( "inputFields", [{}] ) ] return self.fail_run_queue_item_input_info @normalize_exceptions def fail_run_queue_item( self, run_queue_item_id: str, message: str, stage: str, file_paths: Optional[List[str]] = None, ) -> bool: if not self.fail_run_queue_item_introspection(): return False variable_values: Dict[str, Union[str, Optional[List[str]]]] = { "runQueueItemId": run_queue_item_id, } if "message" in self.fail_run_queue_item_fields_introspection(): variable_values.update({"message": message, "stage": stage}) if file_paths is not None: variable_values["filePaths"] = file_paths mutation_string = """ mutation failRunQueueItem($runQueueItemId: ID!, $message: String!, $stage: String!, $filePaths: [String!]) { failRunQueueItem( input: { runQueueItemId: $runQueueItemId message: $message stage: $stage filePaths: $filePaths } ) { success } } """ else: mutation_string = """ mutation failRunQueueItem($runQueueItemId: ID!) { failRunQueueItem( input: { runQueueItemId: $runQueueItemId } ) { success } } """ mutation = gql(mutation_string) response = self.gql(mutation, variable_values=variable_values) result: bool = response["failRunQueueItem"]["success"] return result @normalize_exceptions def update_run_queue_item_warning_introspection(self) -> bool: _, _, mutations = self.server_info_introspection() return "updateRunQueueItemWarning" in mutations def _server_features(self) -> Dict[str, bool]: # NOTE: Avoid caching via `@cached_property`, due to undocumented # locking behavior before Python 3.12. # See: https://github.com/python/cpython/issues/87634 query = gql(SERVER_FEATURES_QUERY_GQL) try: response = self.gql(query) except Exception as e: # Unfortunately we currently have to match on the text of the error message, # as the `gql` client raises `Exception` rather than a more specific error. if 'Cannot query field "features" on type "ServerInfo".' in str(e): self._server_features_cache = {} else: raise else: info = ServerFeaturesQuery.model_validate(response).server_info if info and (feats := info.features): self._server_features_cache = {f.name: f.is_enabled for f in feats if f} else: self._server_features_cache = {} return self._server_features_cache def _server_supports(self, feature: Union[int, str]) -> bool: """Return whether the current server supports the given feature. This also caches the underlying lookup of server feature flags, and it maps {feature_name (str) -> is_enabled (bool)}. Good to use for features that have a fallback mechanism for older servers. """ # If we're given the protobuf enum value, convert to a string name. # NOTE: We deliberately use names (str) instead of enum values (int) # as the keys here, since: # - the server identifies features by their name, rather than (client-side) enum value # - the defined list of client-side flags may be behind the server-side list of flags key = ServerFeature.Name(feature) if isinstance(feature, int) else feature return self._server_features().get(key) or False @normalize_exceptions def update_run_queue_item_warning( self, run_queue_item_id: str, message: str, stage: str, file_paths: Optional[List[str]] = None, ) -> bool: if not self.update_run_queue_item_warning_introspection(): return False mutation = gql( """ mutation updateRunQueueItemWarning($runQueueItemId: ID!, $message: String!, $stage: String!, $filePaths: [String!]) { updateRunQueueItemWarning( input: { runQueueItemId: $runQueueItemId message: $message stage: $stage filePaths: $filePaths } ) { success } } """ ) response = self.gql( mutation, variable_values={ "runQueueItemId": run_queue_item_id, "message": message, "stage": stage, "filePaths": file_paths, }, ) result: bool = response["updateRunQueueItemWarning"]["success"] return result @normalize_exceptions def viewer(self) -> Dict[str, Any]: query = gql( """ query Viewer{ viewer { id entity username flags teams { edges { node { name } } } } } """ ) res = self.gql(query) return res.get("viewer") or {} @normalize_exceptions def max_cli_version(self) -> Optional[str]: if self._max_cli_version is not None: return self._max_cli_version query_types, server_info_types, _ = self.server_info_introspection() cli_version_exists = ( "serverInfo" in query_types and "cliVersionInfo" in server_info_types ) if not cli_version_exists: return None _, server_info = self.viewer_server_info() self._max_cli_version = server_info.get("cliVersionInfo", {}).get( "max_cli_version" ) return self._max_cli_version @normalize_exceptions def viewer_server_info(self) -> Tuple[Dict[str, Any], Dict[str, Any]]: local_query = """ latestLocalVersionInfo { outOfDate latestVersionString versionOnThisInstanceString } """ cli_query = """ serverInfo { cliVersionInfo _LOCAL_QUERY_ } """ query_template = """ query Viewer{ viewer { id entity username email flags teams { edges { node { name } } } } _CLI_QUERY_ } """ query_types, server_info_types, _ = self.server_info_introspection() cli_version_exists = ( "serverInfo" in query_types and "cliVersionInfo" in server_info_types ) local_version_exists = ( "serverInfo" in query_types and "latestLocalVersionInfo" in server_info_types ) cli_query_string = "" if not cli_version_exists else cli_query local_query_string = "" if not local_version_exists else local_query query_string = query_template.replace("_CLI_QUERY_", cli_query_string).replace( "_LOCAL_QUERY_", local_query_string ) query = gql(query_string) res = self.gql(query) return res.get("viewer") or {}, res.get("serverInfo") or {} @normalize_exceptions def list_projects(self, entity: Optional[str] = None) -> List[Dict[str, str]]: """List projects in W&B scoped by entity. Args: entity (str, optional): The entity to scope this project to. Returns: [{"id","name","description"}] """ query = gql( """ query EntityProjects($entity: String) { models(first: 10, entityName: $entity) { edges { node { id name description } } } } """ ) project_list: List[Dict[str, str]] = self._flatten_edges( self.gql( query, variable_values={"entity": entity or self.settings("entity")} )["models"] ) return project_list @normalize_exceptions def project(self, project: str, entity: Optional[str] = None) -> "_Response": """Retrieve project. Args: project (str): The project to get details for entity (str, optional): The entity to scope this project to. Returns: [{"id","name","repo","dockerImage","description"}] """ query = gql( """ query ProjectDetails($entity: String, $project: String) { model(name: $project, entityName: $entity) { id name repo dockerImage description } } """ ) response: _Response = self.gql( query, variable_values={"entity": entity, "project": project} )["model"] return response @normalize_exceptions def sweep( self, sweep: str, specs: str, project: Optional[str] = None, entity: Optional[str] = None, ) -> Dict[str, Any]: """Retrieve sweep. Args: sweep (str): The sweep to get details for specs (str): history specs project (str, optional): The project to scope this sweep to. entity (str, optional): The entity to scope this sweep to. Returns: [{"id","name","repo","dockerImage","description"}] """ query = gql( """ query SweepWithRuns($entity: String, $project: String, $sweep: String!, $specs: [JSONString!]!) { project(name: $project, entityName: $entity) { sweep(sweepName: $sweep) { id name method state description config createdAt heartbeatAt updatedAt earlyStopJobRunning bestLoss controller scheduler runs { edges { node { name state config exitcode heartbeatAt shouldStop failed stopped running summaryMetrics sampledHistory(specs: $specs) } } } } } } """ ) entity = entity or self.settings("entity") project = project or self.settings("project") response = self.gql( query, variable_values={ "entity": entity, "project": project, "sweep": sweep, "specs": specs, }, ) if response["project"] is None or response["project"]["sweep"] is None: raise ValueError(f"Sweep {entity}/{project}/{sweep} not found") data: Dict[str, Any] = response["project"]["sweep"] if data: data["runs"] = self._flatten_edges(data["runs"]) return data @normalize_exceptions def list_runs( self, project: str, entity: Optional[str] = None ) -> List[Dict[str, str]]: """List runs in W&B scoped by project. Args: project (str): The project to scope the runs to entity (str, optional): The entity to scope this project to. Defaults to public models Returns: [{"id","name","description"}] """ query = gql( """ query ProjectRuns($model: String!, $entity: String) { model(name: $model, entityName: $entity) { buckets(first: 10) { edges { node { id name displayName description } } } } } """ ) return self._flatten_edges( self.gql( query, variable_values={ "entity": entity or self.settings("entity"), "model": project or self.settings("project"), }, )["model"]["buckets"] ) @normalize_exceptions def run_config( self, project: str, run: Optional[str] = None, entity: Optional[str] = None ) -> Tuple[str, Dict[str, Any], Optional[str], Dict[str, Any]]: """Get the relevant configs for a run. Args: project (str): The project to download, (can include bucket) run (str, optional): The run to download entity (str, optional): The entity to scope this project to. """ check_httpclient_logger_handler() query = gql( """ query RunConfigs( $name: String!, $entity: String, $run: String!, $pattern: String!, $includeConfig: Boolean!, ) { model(name: $name, entityName: $entity) { bucket(name: $run) { config @include(if: $includeConfig) commit @include(if: $includeConfig) files(pattern: $pattern) { pageInfo { hasNextPage endCursor } edges { node { name directUrl } } } } } } """ ) variable_values = { "name": project, "run": run, "entity": entity, "includeConfig": True, } commit: str = "" config: Dict[str, Any] = {} patch: Optional[str] = None metadata: Dict[str, Any] = {} # If we use the `names` parameter on the `files` node, then the server # will helpfully give us and 'open' file handle to the files that don't # exist. This is so that we can upload data to it. However, in this # case, we just want to download that file and not upload to it, so # let's instead query for the files that do exist using `pattern` # (with no wildcards). # # Unfortunately we're unable to construct a single pattern that matches # our 2 files, we would need something like regex for that. for filename in [DIFF_FNAME, METADATA_FNAME]: variable_values["pattern"] = filename response = self.gql(query, variable_values=variable_values) if response["model"] is None: raise CommError(f"Run {entity}/{project}/{run} not found") run_obj: Dict = response["model"]["bucket"] # we only need to fetch this config once if variable_values["includeConfig"]: commit = run_obj["commit"] config = json.loads(run_obj["config"] or "{}") variable_values["includeConfig"] = False if run_obj["files"] is not None: for file_edge in run_obj["files"]["edges"]: name = file_edge["node"]["name"] url = file_edge["node"]["directUrl"] res = requests.get(url) res.raise_for_status() if name == METADATA_FNAME: metadata = res.json() elif name == DIFF_FNAME: patch = res.text return commit, config, patch, metadata @normalize_exceptions def run_resume_status( self, entity: str, project_name: str, name: str ) -> Optional[Dict[str, Any]]: """Check if a run exists and get resume information. Args: entity (str): The entity to scope this project to. project_name (str): The project to download, (can include bucket) name (str): The run to download """ # Pulling wandbConfig.start_time is required so that we can determine if a run has actually started query = gql( """ query RunResumeStatus($project: String, $entity: String, $name: String!) { model(name: $project, entityName: $entity) { id name entity { id name } bucket(name: $name, missingOk: true) { id name summaryMetrics displayName logLineCount historyLineCount eventsLineCount historyTail eventsTail config tags wandbConfig(keys: ["t"]) } } } """ ) response = self.gql( query, variable_values={ "entity": entity, "project": project_name, "name": name, }, ) if "model" not in response or "bucket" not in (response["model"] or {}): return None project = response["model"] self.set_setting("project", project_name) if "entity" in project: self.set_setting("entity", project["entity"]["name"]) result: Dict[str, Any] = project["bucket"] return result @normalize_exceptions def check_stop_requested( self, project_name: str, entity_name: str, run_id: str ) -> bool: query = gql( """ query RunStoppedStatus($projectName: String, $entityName: String, $runId: String!) { project(name:$projectName, entityName:$entityName) { run(name:$runId) { stopped } } } """ ) response = self.gql( query, variable_values={ "projectName": project_name, "entityName": entity_name, "runId": run_id, }, ) project = response.get("project", None) if not project: return False run = project.get("run", None) if not run: return False status: bool = run["stopped"] return status def format_project(self, project: str) -> str: return re.sub(r"\W+", "-", project.lower()).strip("-_") @normalize_exceptions def upsert_project( self, project: str, id: Optional[str] = None, description: Optional[str] = None, entity: Optional[str] = None, ) -> Dict[str, Any]: """Create a new project. Args: project (str): The project to create description (str, optional): A description of this project entity (str, optional): The entity to scope this project to. """ mutation = gql( """ mutation UpsertModel($name: String!, $id: String, $entity: String!, $description: String, $repo: String) { upsertModel(input: { id: $id, name: $name, entityName: $entity, description: $description, repo: $repo }) { model { name description } } } """ ) response = self.gql( mutation, variable_values={ "name": self.format_project(project), "entity": entity or self.settings("entity"), "description": description, "id": id, }, ) # TODO(jhr): Commenting out 'repo' field for cling, add back # 'description': description, 'repo': self.git.remote_url, 'id': id}) result: Dict[str, Any] = response["upsertModel"]["model"] return result @normalize_exceptions def entity_is_team(self, entity: str) -> bool: query = gql( """ query EntityIsTeam($entity: String!) { entity(name: $entity) { id isTeam } } """ ) variable_values = { "entity": entity, } res = self.gql(query, variable_values) if res.get("entity") is None: raise Exception( f"Error fetching entity {entity} " "check that you have access to this entity" ) is_team: bool = res["entity"]["isTeam"] return is_team @normalize_exceptions def get_project_run_queues(self, entity: str, project: str) -> List[Dict[str, str]]: query = gql( """ query ProjectRunQueues($entity: String!, $projectName: String!){ project(entityName: $entity, name: $projectName) { runQueues { id name createdBy access } } } """ ) variable_values = { "projectName": project, "entity": entity, } res = self.gql(query, variable_values) if res.get("project") is None: # circular dependency: (LAUNCH_DEFAULT_PROJECT = model-registry) if project == "model-registry": msg = ( f"Error fetching run queues for {entity} " "check that you have access to this entity and project" ) else: msg = ( f"Error fetching run queues for {entity}/{project} " "check that you have access to this entity and project" ) raise Exception(msg) project_run_queues: List[Dict[str, str]] = res["project"]["runQueues"] return project_run_queues @normalize_exceptions def create_default_resource_config( self, entity: str, resource: str, config: str, template_variables: Optional[Dict[str, Union[float, int, str]]], ) -> Optional[Dict[str, Any]]: if not self.create_default_resource_config_introspection(): raise Exception() supports_template_vars, _ = self.push_to_run_queue_introspection() mutation_params = """ $entityName: String!, $resource: String!, $config: JSONString! """ mutation_inputs = """ entityName: $entityName, resource: $resource, config: $config """ if supports_template_vars: mutation_params += ", $templateVariables: JSONString" mutation_inputs += ", templateVariables: $templateVariables" else: if template_variables is not None: raise UnsupportedError( "server does not support template variables, please update server instance to >=0.46" ) variable_values = { "entityName": entity, "resource": resource, "config": config, } if supports_template_vars: if template_variables is not None: variable_values["templateVariables"] = json.dumps(template_variables) else: variable_values["templateVariables"] = "{}" query = gql( f""" mutation createDefaultResourceConfig( {mutation_params} ) {{ createDefaultResourceConfig( input: {{ {mutation_inputs} }} ) {{ defaultResourceConfigID success }} }} """ ) result: Optional[Dict[str, Any]] = self.gql(query, variable_values)[ "createDefaultResourceConfig" ] return result @normalize_exceptions def create_run_queue( self, entity: str, project: str, queue_name: str, access: str, prioritization_mode: Optional[str] = None, config_id: Optional[str] = None, ) -> Optional[Dict[str, Any]]: ( create_run_queue, supports_drc, supports_prioritization, ) = self.create_run_queue_introspection() if not create_run_queue: raise UnsupportedError( "run queue creation is not supported by this version of " "wandb server. Consider updating to the latest version." ) if not supports_drc and config_id is not None: raise UnsupportedError( "default resource configurations are not supported by this version " "of wandb server. Consider updating to the latest version." ) if not supports_prioritization and prioritization_mode is not None: raise UnsupportedError( "launch prioritization is not supported by this version of " "wandb server. Consider updating to the latest version." ) if supports_prioritization: query = gql( """ mutation createRunQueue( $entity: String!, $project: String!, $queueName: String!, $access: RunQueueAccessType!, $prioritizationMode: RunQueuePrioritizationMode, $defaultResourceConfigID: ID, ) { createRunQueue( input: { entityName: $entity, projectName: $project, queueName: $queueName, access: $access, prioritizationMode: $prioritizationMode defaultResourceConfigID: $defaultResourceConfigID } ) { success queueID } } """ ) variable_values = { "entity": entity, "project": project, "queueName": queue_name, "access": access, "prioritizationMode": prioritization_mode, "defaultResourceConfigID": config_id, } else: query = gql( """ mutation createRunQueue( $entity: String!, $project: String!, $queueName: String!, $access: RunQueueAccessType!, $defaultResourceConfigID: ID, ) { createRunQueue( input: { entityName: $entity, projectName: $project, queueName: $queueName, access: $access, defaultResourceConfigID: $defaultResourceConfigID } ) { success queueID } } """ ) variable_values = { "entity": entity, "project": project, "queueName": queue_name, "access": access, "defaultResourceConfigID": config_id, } result: Optional[Dict[str, Any]] = self.gql(query, variable_values)[ "createRunQueue" ] return result @normalize_exceptions def upsert_run_queue( self, queue_name: str, entity: str, resource_type: str, resource_config: dict, project: str = LAUNCH_DEFAULT_PROJECT, prioritization_mode: Optional[str] = None, template_variables: Optional[dict] = None, external_links: Optional[dict] = None, ) -> Optional[Dict[str, Any]]: if not self.upsert_run_queue_introspection(): raise UnsupportedError( "upserting run queues is not supported by this version of " "wandb server. Consider updating to the latest version." ) query = gql( """ mutation upsertRunQueue( $entityName: String! $projectName: String! $queueName: String! $resourceType: String! $resourceConfig: JSONString! $templateVariables: JSONString $prioritizationMode: RunQueuePrioritizationMode $externalLinks: JSONString $clientMutationId: String ) { upsertRunQueue( input: { entityName: $entityName projectName: $projectName queueName: $queueName resourceType: $resourceType resourceConfig: $resourceConfig templateVariables: $templateVariables prioritizationMode: $prioritizationMode externalLinks: $externalLinks clientMutationId: $clientMutationId } ) { success configSchemaValidationErrors } } """ ) variable_values = { "entityName": entity, "projectName": project, "queueName": queue_name, "resourceType": resource_type, "resourceConfig": json.dumps(resource_config), "templateVariables": ( json.dumps(template_variables) if template_variables else None ), "prioritizationMode": prioritization_mode, "externalLinks": json.dumps(external_links) if external_links else None, } result: Dict[str, Any] = self.gql(query, variable_values) return result["upsertRunQueue"] @normalize_exceptions def push_to_run_queue_by_name( self, entity: str, project: str, queue_name: str, run_spec: str, template_variables: Optional[Dict[str, Union[int, float, str]]], priority: Optional[int] = None, ) -> Optional[Dict[str, Any]]: self.push_to_run_queue_introspection() """Queryless mutation, should be used before legacy fallback method.""" mutation_params = """ $entityName: String!, $projectName: String!, $queueName: String!, $runSpec: JSONString! """ mutation_input = """ entityName: $entityName, projectName: $projectName, queueName: $queueName, runSpec: $runSpec """ variables: Dict[str, Any] = { "entityName": entity, "projectName": project, "queueName": queue_name, "runSpec": run_spec, } if self.server_push_to_run_queue_supports_priority: if priority is not None: variables["priority"] = priority mutation_params += ", $priority: Int" mutation_input += ", priority: $priority" else: if priority is not None: raise UnsupportedError( "server does not support priority, please update server instance to >=0.46" ) if self.server_supports_template_variables: if template_variables is not None: variables.update( {"templateVariableValues": json.dumps(template_variables)} ) mutation_params += ", $templateVariableValues: JSONString" mutation_input += ", templateVariableValues: $templateVariableValues" else: if template_variables is not None: raise UnsupportedError( "server does not support template variables, please update server instance to >=0.46" ) mutation = gql( f""" mutation pushToRunQueueByName( {mutation_params} ) {{ pushToRunQueueByName( input: {{ {mutation_input} }} ) {{ runQueueItemId runSpec }} }} """ ) try: result: Optional[Dict[str, Any]] = self.gql( mutation, variables, check_retry_fn=util.no_retry_4xx ).get("pushToRunQueueByName") if not result: return None if result.get("runSpec"): run_spec = json.loads(str(result["runSpec"])) result["runSpec"] = run_spec return result except Exception as e: if ( 'Cannot query field "runSpec" on type "PushToRunQueueByNamePayload"' not in str(e) ): return None mutation_no_runspec = gql( """ mutation pushToRunQueueByName( $entityName: String!, $projectName: String!, $queueName: String!, $runSpec: JSONString!, ) { pushToRunQueueByName( input: { entityName: $entityName, projectName: $projectName, queueName: $queueName, runSpec: $runSpec } ) { runQueueItemId } } """ ) try: result = self.gql( mutation_no_runspec, variables, check_retry_fn=util.no_retry_4xx ).get("pushToRunQueueByName") except Exception: result = None return result @normalize_exceptions def push_to_run_queue( self, queue_name: str, launch_spec: Dict[str, str], template_variables: Optional[dict], project_queue: str, priority: Optional[int] = None, ) -> Optional[Dict[str, Any]]: self.push_to_run_queue_introspection() entity = launch_spec.get("queue_entity") or launch_spec["entity"] run_spec = json.dumps(launch_spec) push_result = self.push_to_run_queue_by_name( entity, project_queue, queue_name, run_spec, template_variables, priority ) if push_result: return push_result if priority is not None: # Cannot proceed with legacy method if priority is set return None """ Legacy Method """ queues_found = self.get_project_run_queues(entity, project_queue) matching_queues = [ q for q in queues_found if q["name"] == queue_name # ensure user has access to queue and ( # TODO: User created queues in the UI have USER access q["access"] in ["PROJECT", "USER"] or q["createdBy"] == self.default_entity ) ] if not matching_queues: # in the case of a missing default queue. create it if queue_name == "default": wandb.termlog( f"No default queue existing for entity: {entity} in project: {project_queue}, creating one." ) res = self.create_run_queue( launch_spec["entity"], project_queue, queue_name, access="PROJECT", ) if res is None or res.get("queueID") is None: wandb.termerror( f"Unable to create default queue for entity: {entity} on project: {project_queue}. Run could not be added to a queue" ) return None queue_id = res["queueID"] else: if project_queue == "model-registry": _msg = f"Unable to push to run queue {queue_name}. Queue not found." else: _msg = f"Unable to push to run queue {project_queue}/{queue_name}. Queue not found." wandb.termwarn(_msg) return None elif len(matching_queues) > 1: wandb.termerror( f"Unable to push to run queue {queue_name}. More than one queue found with this name." ) return None else: queue_id = matching_queues[0]["id"] spec_json = json.dumps(launch_spec) variables = {"queueID": queue_id, "runSpec": spec_json} mutation_params = """ $queueID: ID!, $runSpec: JSONString! """ mutation_input = """ queueID: $queueID, runSpec: $runSpec """ if self.server_supports_template_variables: if template_variables is not None: mutation_params += ", $templateVariableValues: JSONString" mutation_input += ", templateVariableValues: $templateVariableValues" variables.update( {"templateVariableValues": json.dumps(template_variables)} ) else: if template_variables is not None: raise UnsupportedError( "server does not support template variables, please update server instance to >=0.46" ) mutation = gql( f""" mutation pushToRunQueue( {mutation_params} ) {{ pushToRunQueue( input: {{{mutation_input}}} ) {{ runQueueItemId }} }} """ ) response = self.gql(mutation, variable_values=variables) if not response.get("pushToRunQueue"): raise CommError(f"Error pushing run queue item to queue {queue_name}.") result: Optional[Dict[str, Any]] = response["pushToRunQueue"] return result @normalize_exceptions def pop_from_run_queue( self, queue_name: str, entity: Optional[str] = None, project: Optional[str] = None, agent_id: Optional[str] = None, ) -> Optional[Dict[str, Any]]: mutation = gql( """ mutation popFromRunQueue($entity: String!, $project: String!, $queueName: String!, $launchAgentId: ID) { popFromRunQueue(input: { entityName: $entity, projectName: $project, queueName: $queueName, launchAgentId: $launchAgentId }) { runQueueItemId runSpec } } """ ) response = self.gql( mutation, variable_values={ "entity": entity, "project": project, "queueName": queue_name, "launchAgentId": agent_id, }, ) result: Optional[Dict[str, Any]] = response["popFromRunQueue"] return result @normalize_exceptions def ack_run_queue_item(self, item_id: str, run_id: Optional[str] = None) -> bool: mutation = gql( """ mutation ackRunQueueItem($itemId: ID!, $runId: String!) { ackRunQueueItem(input: { runQueueItemId: $itemId, runName: $runId }) { success } } """ ) response = self.gql( mutation, variable_values={"itemId": item_id, "runId": str(run_id)} ) if not response["ackRunQueueItem"]["success"]: raise CommError( "Error acking run queue item. Item may have already been acknowledged by another process" ) result: bool = response["ackRunQueueItem"]["success"] return result @normalize_exceptions def create_launch_agent_fields_introspection(self) -> List: if self.create_launch_agent_input_info: return self.create_launch_agent_input_info query_string = """ query ProbeServerCreateLaunchAgentInput { CreateLaunchAgentInputInfoType: __type(name:"CreateLaunchAgentInput") { inputFields{ name } } } """ query = gql(query_string) res = self.gql(query) self.create_launch_agent_input_info = [ field.get("name", "") for field in res.get("CreateLaunchAgentInputInfoType", {}).get( "inputFields", [{}] ) ] return self.create_launch_agent_input_info @normalize_exceptions def create_launch_agent( self, entity: str, project: str, queues: List[str], agent_config: Dict[str, Any], version: str, gorilla_agent_support: bool, ) -> dict: project_queues = self.get_project_run_queues(entity, project) if not project_queues: # create default queue if it doesn't already exist default = self.create_run_queue( entity, project, "default", access="PROJECT" ) if default is None or default.get("queueID") is None: raise CommError( f"Unable to create default queue for {entity}/{project}. No queues for agent to poll" ) project_queues = [{"id": default["queueID"], "name": "default"}] polling_queue_ids = [ q["id"] for q in project_queues if q["name"] in queues ] # filter to poll specified queues if len(polling_queue_ids) != len(queues): raise CommError( f"Could not start launch agent: Not all of requested queues ({', '.join(queues)}) found. " f"Available queues for this project: {','.join([q['name'] for q in project_queues])}" ) if not gorilla_agent_support: # if gorilla doesn't support launch agents, return a client-generated id return { "success": True, "launchAgentId": None, } hostname = socket.gethostname() variable_values = { "entity": entity, "project": project, "queues": polling_queue_ids, "hostname": hostname, } mutation_params = """ $entity: String!, $project: String!, $queues: [ID!]!, $hostname: String! """ mutation_input = """ entityName: $entity, projectName: $project, runQueues: $queues, hostname: $hostname """ if "agentConfig" in self.create_launch_agent_fields_introspection(): variable_values["agentConfig"] = json.dumps(agent_config) mutation_params += ", $agentConfig: JSONString" mutation_input += ", agentConfig: $agentConfig" if "version" in self.create_launch_agent_fields_introspection(): variable_values["version"] = version mutation_params += ", $version: String" mutation_input += ", version: $version" mutation = gql( f""" mutation createLaunchAgent( {mutation_params} ) {{ createLaunchAgent( input: {{ {mutation_input} }} ) {{ launchAgentId }} }} """ ) result: dict = self.gql(mutation, variable_values)["createLaunchAgent"] return result @normalize_exceptions def update_launch_agent_status( self, agent_id: str, status: str, gorilla_agent_support: bool, ) -> dict: if not gorilla_agent_support: # if gorilla doesn't support launch agents, this is a no-op return { "success": True, } mutation = gql( """ mutation updateLaunchAgent($agentId: ID!, $agentStatus: String){ updateLaunchAgent( input: { launchAgentId: $agentId agentStatus: $agentStatus } ) { success } } """ ) variable_values = { "agentId": agent_id, "agentStatus": status, } result: dict = self.gql(mutation, variable_values)["updateLaunchAgent"] return result @normalize_exceptions def get_launch_agent(self, agent_id: str, gorilla_agent_support: bool) -> dict: if not gorilla_agent_support: return { "id": None, "name": "", "stopPolling": False, } query = gql( """ query LaunchAgent($agentId: ID!) { launchAgent(id: $agentId) { id name runQueues hostname agentStatus stopPolling heartbeatAt } } """ ) variable_values = { "agentId": agent_id, } result: dict = self.gql(query, variable_values)["launchAgent"] return result @normalize_exceptions def upsert_run( self, id: Optional[str] = None, name: Optional[str] = None, project: Optional[str] = None, host: Optional[str] = None, group: Optional[str] = None, tags: Optional[List[str]] = None, config: Optional[dict] = None, description: Optional[str] = None, entity: Optional[str] = None, state: Optional[str] = None, display_name: Optional[str] = None, notes: Optional[str] = None, repo: Optional[str] = None, job_type: Optional[str] = None, program_path: Optional[str] = None, commit: Optional[str] = None, sweep_name: Optional[str] = None, summary_metrics: Optional[str] = None, num_retries: Optional[int] = None, ) -> Tuple[dict, bool, Optional[List]]: """Update a run. Args: id (str, optional): The existing run to update name (str, optional): The name of the run to create group (str, optional): Name of the group this run is a part of project (str, optional): The name of the project host (str, optional): The name of the host tags (list, optional): A list of tags to apply to the run config (dict, optional): The latest config params description (str, optional): A description of this project entity (str, optional): The entity to scope this project to. display_name (str, optional): The display name of this project notes (str, optional): Notes about this run repo (str, optional): Url of the program's repository. state (str, optional): State of the program. job_type (str, optional): Type of job, e.g 'train'. program_path (str, optional): Path to the program. commit (str, optional): The Git SHA to associate the run with sweep_name (str, optional): The name of the sweep this run is a part of summary_metrics (str, optional): The JSON summary metrics num_retries (int, optional): Number of retries """ query_string = """ mutation UpsertBucket( $id: String, $name: String, $project: String, $entity: String, $groupName: String, $description: String, $displayName: String, $notes: String, $commit: String, $config: JSONString, $host: String, $debug: Boolean, $program: String, $repo: String, $jobType: String, $state: String, $sweep: String, $tags: [String!], $summaryMetrics: JSONString, ) { upsertBucket(input: { id: $id, name: $name, groupName: $groupName, modelName: $project, entityName: $entity, description: $description, displayName: $displayName, notes: $notes, config: $config, commit: $commit, host: $host, debug: $debug, jobProgram: $program, jobRepo: $repo, jobType: $jobType, state: $state, sweep: $sweep, tags: $tags, summaryMetrics: $summaryMetrics, }) { bucket { id name displayName description config sweepName project { id name entity { id name } } historyLineCount } inserted _Server_Settings_ } } """ self.server_settings_introspection() server_settings_string = ( """ serverSettings { serverMessages{ utfText plainText htmlText messageType messageLevel } } """ if self._server_settings_type else "" ) query_string = query_string.replace("_Server_Settings_", server_settings_string) mutation = gql(query_string) config_str = json.dumps(config) if config else None if not description or description.isspace(): description = None kwargs = {} if num_retries is not None: kwargs["num_retries"] = num_retries variable_values = { "id": id, "entity": entity or self.settings("entity"), "name": name, "project": project or util.auto_project_name(program_path), "groupName": group, "tags": tags, "description": description, "config": config_str, "commit": commit, "displayName": display_name, "notes": notes, "host": None if self.settings().get("anonymous") in ["allow", "must"] else host, "debug": env.is_debug(env=self._environ), "repo": repo, "program": program_path, "jobType": job_type, "state": state, "sweep": sweep_name, "summaryMetrics": summary_metrics, } # retry conflict errors for 2 minutes, default to no_auth_retry check_retry_fn = util.make_check_retry_fn( check_fn=util.check_retry_conflict_or_gone, check_timedelta=datetime.timedelta(minutes=2), fallback_retry_fn=util.no_retry_auth, ) response = self.gql( mutation, variable_values=variable_values, check_retry_fn=check_retry_fn, **kwargs, ) run_obj: Dict[str, Dict[str, Dict[str, str]]] = response["upsertBucket"][ "bucket" ] project_obj: Dict[str, Dict[str, str]] = run_obj.get("project", {}) if project_obj: self.set_setting("project", project_obj["name"]) entity_obj = project_obj.get("entity", {}) if entity_obj: self.set_setting("entity", entity_obj["name"]) server_messages = None if self._server_settings_type: server_messages = ( response["upsertBucket"] .get("serverSettings", {}) .get("serverMessages", []) ) return ( response["upsertBucket"]["bucket"], response["upsertBucket"]["inserted"], server_messages, ) @normalize_exceptions def rewind_run( self, run_name: str, metric_name: str, metric_value: float, program_path: Optional[str] = None, entity: Optional[str] = None, project: Optional[str] = None, num_retries: Optional[int] = None, ) -> dict: """Rewinds a run to a previous state. Args: run_name (str): The name of the run to rewind metric_name (str): The name of the metric to rewind to metric_value (float): The value of the metric to rewind to program_path (str, optional): Path to the program entity (str, optional): The entity to scope this project to project (str, optional): The name of the project num_retries (int, optional): Number of retries Returns: A dict with the rewound run { "id": "run_id", "name": "run_name", "displayName": "run_display_name", "description": "run_description", "config": "stringified_run_config_json", "sweepName": "run_sweep_name", "project": { "id": "project_id", "name": "project_name", "entity": { "id": "entity_id", "name": "entity_name" } }, "historyLineCount": 100, } """ query_string = """ mutation RewindRun($runName: String!, $entity: String, $project: String, $metricName: String!, $metricValue: Float!) { rewindRun(input: {runName: $runName, entityName: $entity, projectName: $project, metricName: $metricName, metricValue: $metricValue}) { rewoundRun { id name displayName description config sweepName project { id name entity { id name } } historyLineCount } } } """ mutation = gql(query_string) kwargs = {} if num_retries is not None: kwargs["num_retries"] = num_retries variable_values = { "runName": run_name, "entity": entity or self.settings("entity"), "project": project or util.auto_project_name(program_path), "metricName": metric_name, "metricValue": metric_value, } # retry conflict errors for 2 minutes, default to no_auth_retry check_retry_fn = util.make_check_retry_fn( check_fn=util.check_retry_conflict_or_gone, check_timedelta=datetime.timedelta(minutes=2), fallback_retry_fn=util.no_retry_auth, ) response = self.gql( mutation, variable_values=variable_values, check_retry_fn=check_retry_fn, **kwargs, ) run_obj: Dict[str, Dict[str, Dict[str, str]]] = response.get( "rewindRun", {} ).get("rewoundRun", {}) project_obj: Dict[str, Dict[str, str]] = run_obj.get("project", {}) if project_obj: self.set_setting("project", project_obj["name"]) entity_obj = project_obj.get("entity", {}) if entity_obj: self.set_setting("entity", entity_obj["name"]) return run_obj @normalize_exceptions def get_run_info( self, entity: str, project: str, name: str, ) -> dict: query = gql( """ query RunInfo($project: String!, $entity: String!, $name: String!) { project(name: $project, entityName: $entity) { run(name: $name) { runInfo { program args os python colab executable codeSaved cpuCount gpuCount gpu git { remote commit } } } } } """ ) variable_values = {"project": project, "entity": entity, "name": name} res = self.gql(query, variable_values) if res.get("project") is None: raise CommError( f"Error fetching run info for {entity}/{project}/{name}. Check that this project exists and you have access to this entity and project" ) elif res["project"].get("run") is None: raise CommError( f"Error fetching run info for {entity}/{project}/{name}. Check that this run id exists" ) run_info: dict = res["project"]["run"]["runInfo"] return run_info @normalize_exceptions def get_run_state(self, entity: str, project: str, name: str) -> str: query = gql( """ query RunState( $project: String!, $entity: String!, $name: String!) { project(name: $project, entityName: $entity) { run(name: $name) { state } } } """ ) variable_values = { "project": project, "entity": entity, "name": name, } res = self.gql(query, variable_values) if res.get("project") is None or res["project"].get("run") is None: raise CommError(f"Error fetching run state for {entity}/{project}/{name}.") run_state: str = res["project"]["run"]["state"] return run_state @normalize_exceptions def create_run_files_introspection(self) -> bool: _, _, mutations = self.server_info_introspection() return "createRunFiles" in mutations @normalize_exceptions def upload_urls( self, project: str, files: Union[List[str], Dict[str, IO]], run: Optional[str] = None, entity: Optional[str] = None, description: Optional[str] = None, ) -> Tuple[str, List[str], Dict[str, Dict[str, Any]]]: """Generate temporary resumable upload urls. Args: project (str): The project to download files (list or dict): The filenames to upload run (str, optional): The run to upload to entity (str, optional): The entity to scope this project to. description (str, optional): description Returns: (run_id, upload_headers, file_info) run_id: id of run we uploaded files to upload_headers: A list of headers to use when uploading files. file_info: A dict of filenames and urls. { "run_id": "run_id", "upload_headers": [""], "file_info": [ { "weights.h5": { "uploadUrl": "https://weights.url" } }, { "model.json": { "uploadUrl": "https://model.json" } } ] } """ run_name = run or self.current_run_id assert run_name, "run must be specified" entity = entity or self.settings("entity") assert entity, "entity must be specified" has_create_run_files_mutation = self.create_run_files_introspection() if not has_create_run_files_mutation: return self.legacy_upload_urls(project, files, run, entity, description) query = gql( """ mutation CreateRunFiles($entity: String!, $project: String!, $run: String!, $files: [String!]!) { createRunFiles(input: {entityName: $entity, projectName: $project, runName: $run, files: $files}) { runID uploadHeaders files { name uploadUrl } } } """ ) query_result = self.gql( query, variable_values={ "project": project, "run": run_name, "entity": entity, "files": [file for file in files], }, ) result = query_result["createRunFiles"] run_id = result["runID"] if not run_id: raise CommError( f"Error uploading files to {entity}/{project}/{run_name}. Check that this project exists and you have access to this entity and project" ) file_name_urls = {file["name"]: file for file in result["files"]} return run_id, result["uploadHeaders"], file_name_urls def legacy_upload_urls( self, project: str, files: Union[List[str], Dict[str, IO]], run: Optional[str] = None, entity: Optional[str] = None, description: Optional[str] = None, ) -> Tuple[str, List[str], Dict[str, Dict[str, Any]]]: """Generate temporary resumable upload urls. A new mutation createRunFiles was introduced after 0.15.4. This function is used to support older versions. """ query = gql( """ query RunUploadUrls($name: String!, $files: [String]!, $entity: String, $run: String!, $description: String) { model(name: $name, entityName: $entity) { bucket(name: $run, desc: $description) { id files(names: $files) { uploadHeaders edges { node { name url(upload: true) updatedAt } } } } } } """ ) run_id = run or self.current_run_id assert run_id, "run must be specified" entity = entity or self.settings("entity") query_result = self.gql( query, variable_values={ "name": project, "run": run_id, "entity": entity, "files": [file for file in files], "description": description, }, ) run_obj = query_result["model"]["bucket"] if run_obj: for file_node in run_obj["files"]["edges"]: file = file_node["node"] # we previously used "url" field but now use "uploadUrl" # replace the "url" field with "uploadUrl for downstream compatibility if "url" in file and "uploadUrl" not in file: file["uploadUrl"] = file.pop("url") result = { file["name"]: file for file in self._flatten_edges(run_obj["files"]) } return run_obj["id"], run_obj["files"]["uploadHeaders"], result else: raise CommError(f"Run does not exist {entity}/{project}/{run_id}.") @normalize_exceptions def download_urls( self, project: str, run: Optional[str] = None, entity: Optional[str] = None, ) -> Dict[str, Dict[str, str]]: """Generate download urls. Args: project (str): The project to download run (str): The run to upload to entity (str, optional): The entity to scope this project to. Defaults to wandb models Returns: A dict of extensions and urls { 'weights.h5': { "url": "https://weights.url", "updatedAt": '2013-04-26T22:22:23.832Z', 'md5': 'mZFLkyvTelC5g8XnyQrpOw==' }, 'model.json': { "url": "https://model.url", "updatedAt": '2013-04-26T22:22:23.832Z', 'md5': 'mZFLkyvTelC5g8XnyQrpOw==' } } """ query = gql( """ query RunDownloadUrls($name: String!, $entity: String, $run: String!) { model(name: $name, entityName: $entity) { bucket(name: $run) { files { edges { node { name url md5 updatedAt } } } } } } """ ) run = run or self.current_run_id assert run, "run must be specified" entity = entity or self.settings("entity") query_result = self.gql( query, variable_values={ "name": project, "run": run, "entity": entity, }, ) if query_result["model"] is None: raise CommError(f"Run does not exist {entity}/{project}/{run}.") files = self._flatten_edges(query_result["model"]["bucket"]["files"]) return {file["name"]: file for file in files if file} @normalize_exceptions def download_url( self, project: str, file_name: str, run: Optional[str] = None, entity: Optional[str] = None, ) -> Optional[Dict[str, str]]: """Generate download urls. Args: project (str): The project to download file_name (str): The name of the file to download run (str): The run to upload to entity (str, optional): The entity to scope this project to. Defaults to wandb models Returns: A dict of extensions and urls { "url": "https://weights.url", "updatedAt": '2013-04-26T22:22:23.832Z', 'md5': 'mZFLkyvTelC5g8XnyQrpOw==' } """ query = gql( """ query RunDownloadUrl($name: String!, $fileName: String!, $entity: String, $run: String!) { model(name: $name, entityName: $entity) { bucket(name: $run) { files(names: [$fileName]) { edges { node { name url md5 updatedAt } } } } } } """ ) run = run or self.current_run_id assert run, "run must be specified" query_result = self.gql( query, variable_values={ "name": project, "run": run, "fileName": file_name, "entity": entity or self.settings("entity"), }, ) if query_result["model"]: files = self._flatten_edges(query_result["model"]["bucket"]["files"]) return files[0] if len(files) > 0 and files[0].get("updatedAt") else None else: return None @normalize_exceptions def download_file(self, url: str) -> Tuple[int, requests.Response]: """Initiate a streaming download. Args: url (str): The url to download Returns: A tuple of the content length and the streaming response """ check_httpclient_logger_handler() http_headers = _thread_local_api_settings.headers or {} auth = None if self.access_token is not None: http_headers["Authorization"] = f"Bearer {self.access_token}" elif _thread_local_api_settings.cookies is None: auth = ("api", self.api_key or "") response = requests.get( url, auth=auth, cookies=_thread_local_api_settings.cookies or {}, headers=http_headers, stream=True, ) response.raise_for_status() return int(response.headers.get("content-length", 0)), response @normalize_exceptions def download_write_file( self, metadata: Dict[str, str], out_dir: Optional[str] = None, ) -> Tuple[str, Optional[requests.Response]]: """Download a file from a run and write it to wandb/. Args: metadata (obj): The metadata object for the file to download. Comes from Api.download_urls(). out_dir (str, optional): The directory to write the file to. Defaults to wandb/ Returns: A tuple of the file's local path and the streaming response. The streaming response is None if the file already existed and was up-to-date. """ filename = metadata["name"] path = os.path.join(out_dir or self.settings("wandb_dir"), filename) if self.file_current(filename, B64MD5(metadata["md5"])): return path, None size, response = self.download_file(metadata["url"]) with util.fsync_open(path, "wb") as file: for data in response.iter_content(chunk_size=1024): file.write(data) return path, response def upload_file_azure( self, url: str, file: Any, extra_headers: Dict[str, str] ) -> None: """Upload a file to azure.""" from azure.core.exceptions import AzureError # type: ignore # Configure the client without retries so our existing logic can handle them client = self._azure_blob_module.BlobClient.from_blob_url( url, retry_policy=self._azure_blob_module.LinearRetry(retry_total=0) ) try: if extra_headers.get("Content-MD5") is not None: md5: Optional[bytes] = base64.b64decode(extra_headers["Content-MD5"]) else: md5 = None content_settings = self._azure_blob_module.ContentSettings( content_md5=md5, content_type=extra_headers.get("Content-Type"), ) client.upload_blob( file, max_concurrency=4, length=len(file), overwrite=True, content_settings=content_settings, ) except AzureError as e: if hasattr(e, "response"): response = requests.models.Response() response.status_code = e.response.status_code response.headers = e.response.headers raise requests.exceptions.RequestException(e.message, response=response) else: raise requests.exceptions.ConnectionError(e.message) def upload_multipart_file_chunk( self, url: str, upload_chunk: bytes, extra_headers: Optional[Dict[str, str]] = None, ) -> Optional[requests.Response]: """Upload a file chunk to S3 with failure resumption. Args: url: The url to download upload_chunk: The path to the file you want to upload extra_headers: A dictionary of extra headers to send with the request Returns: The `requests` library response object """ check_httpclient_logger_handler() try: if env.is_debug(env=self._environ): logger.debug("upload_file: %s", url) response = self._upload_file_session.put( url, data=upload_chunk, headers=extra_headers ) if env.is_debug(env=self._environ): logger.debug("upload_file: %s complete", url) response.raise_for_status() except requests.exceptions.RequestException as e: logger.exception(f"upload_file exception for {url=}") response_content = e.response.content if e.response is not None else "" status_code = e.response.status_code if e.response is not None else 0 # S3 reports retryable request timeouts out-of-band is_aws_retryable = status_code == 400 and "RequestTimeout" in str( response_content ) # Retry errors from cloud storage or local network issues if ( status_code in (308, 408, 409, 429, 500, 502, 503, 504) or isinstance( e, (requests.exceptions.Timeout, requests.exceptions.ConnectionError), ) or is_aws_retryable ): _e = retry.TransientError(exc=e) raise _e.with_traceback(sys.exc_info()[2]) else: wandb._sentry.reraise(e) return response def upload_file( self, url: str, file: IO[bytes], callback: Optional["ProgressFn"] = None, extra_headers: Optional[Dict[str, str]] = None, ) -> Optional[requests.Response]: """Upload a file to W&B with failure resumption. Args: url: The url to download file: The path to the file you want to upload callback: A callback which is passed the number of bytes uploaded since the last time it was called, used to report progress extra_headers: A dictionary of extra headers to send with the request Returns: The `requests` library response object """ check_httpclient_logger_handler() extra_headers = extra_headers.copy() if extra_headers else {} response: Optional[requests.Response] = None progress = Progress(file, callback=callback) try: if "x-ms-blob-type" in extra_headers and self._azure_blob_module: self.upload_file_azure(url, progress, extra_headers) else: if "x-ms-blob-type" in extra_headers: wandb.termwarn( "Azure uploads over 256MB require the azure SDK, install with pip install wandb[azure]", repeat=False, ) if env.is_debug(env=self._environ): logger.debug("upload_file: %s", url) response = self._upload_file_session.put( url, data=progress, headers=extra_headers ) if env.is_debug(env=self._environ): logger.debug("upload_file: %s complete", url) response.raise_for_status() except requests.exceptions.RequestException as e: logger.exception(f"upload_file exception for {url=}") response_content = e.response.content if e.response is not None else "" status_code = e.response.status_code if e.response is not None else 0 # S3 reports retryable request timeouts out-of-band is_aws_retryable = ( "x-amz-meta-md5" in extra_headers and status_code == 400 and "RequestTimeout" in str(response_content) ) # We need to rewind the file for the next retry (the file passed in is `seek`'ed to 0) progress.rewind() # Retry errors from cloud storage or local network issues if ( status_code in (308, 408, 409, 429, 500, 502, 503, 504) or isinstance( e, (requests.exceptions.Timeout, requests.exceptions.ConnectionError), ) or is_aws_retryable ): _e = retry.TransientError(exc=e) raise _e.with_traceback(sys.exc_info()[2]) else: wandb._sentry.reraise(e) return response @normalize_exceptions def register_agent( self, host: str, sweep_id: Optional[str] = None, project_name: Optional[str] = None, entity: Optional[str] = None, ) -> dict: """Register a new agent. Args: host (str): hostname sweep_id (str): sweep id project_name: (str): model that contains sweep entity: (str): entity that contains sweep """ mutation = gql( """ mutation CreateAgent( $host: String! $projectName: String, $entityName: String, $sweep: String! ) { createAgent(input: { host: $host, projectName: $projectName, entityName: $entityName, sweep: $sweep, }) { agent { id } } } """ ) if entity is None: entity = self.settings("entity") if project_name is None: project_name = self.settings("project") response = self.gql( mutation, variable_values={ "host": host, "entityName": entity, "projectName": project_name, "sweep": sweep_id, }, check_retry_fn=util.no_retry_4xx, ) result: dict = response["createAgent"]["agent"] return result def agent_heartbeat( self, agent_id: str, metrics: dict, run_states: dict ) -> List[Dict[str, Any]]: """Notify server about agent state, receive commands. Args: agent_id (str): agent_id metrics (dict): system metrics run_states (dict): run_id: state mapping Returns: List of commands to execute. """ mutation = gql( """ mutation Heartbeat( $id: ID!, $metrics: JSONString, $runState: JSONString ) { agentHeartbeat(input: { id: $id, metrics: $metrics, runState: $runState }) { agent { id } commands } } """ ) if agent_id is None: raise ValueError("Cannot call heartbeat with an unregistered agent.") try: response = self.gql( mutation, variable_values={ "id": agent_id, "metrics": json.dumps(metrics), "runState": json.dumps(run_states), }, timeout=60, ) except Exception: logger.exception("Error communicating with W&B.") return [] else: result: List[Dict[str, Any]] = json.loads( response["agentHeartbeat"]["commands"] ) return result @staticmethod def _validate_config_and_fill_distribution(config: dict) -> dict: # verify that parameters are well specified. # TODO(dag): deprecate this in favor of jsonschema validation once # apiVersion 2 is released and local controller is integrated with # wandb/client. # avoid modifying the original config dict in # case it is reused outside the calling func config = deepcopy(config) # explicitly cast to dict in case config was passed as a sweepconfig # sweepconfig does not serialize cleanly to yaml and breaks graphql, # but it is a subclass of dict, so this conversion is clean config = dict(config) if "parameters" not in config: # still shows an anaconda warning, but doesn't error return config for parameter_name in config["parameters"]: parameter = config["parameters"][parameter_name] if "min" in parameter and "max" in parameter: if "distribution" not in parameter: if isinstance(parameter["min"], int) and isinstance( parameter["max"], int ): parameter["distribution"] = "int_uniform" elif isinstance(parameter["min"], float) and isinstance( parameter["max"], float ): parameter["distribution"] = "uniform" else: raise ValueError( f"Parameter {parameter_name} is ambiguous, please specify bounds as both floats (for a float_" "uniform distribution) or ints (for an int_uniform distribution)." ) return config @normalize_exceptions def upsert_sweep( self, config: dict, controller: Optional[str] = None, launch_scheduler: Optional[str] = None, scheduler: Optional[str] = None, obj_id: Optional[str] = None, project: Optional[str] = None, entity: Optional[str] = None, state: Optional[str] = None, prior_runs: Optional[List[str]] = None, template_variable_values: Optional[Dict[str, Any]] = None, ) -> Tuple[str, List[str]]: """Upsert a sweep object. Args: config (dict): sweep config (will be converted to yaml) controller (str): controller to use launch_scheduler (str): launch scheduler to use scheduler (str): scheduler to use obj_id (str): object id project (str): project to use entity (str): entity to use state (str): state prior_runs (list): IDs of existing runs to add to the sweep template_variable_values (dict): template variable values """ project_query = """ project { id name entity { id name } } """ mutation_str = """ mutation UpsertSweep( $id: ID, $config: String, $description: String, $entityName: String, $projectName: String, $controller: JSONString, $scheduler: JSONString, $state: String, $priorRunsFilters: JSONString, ) { upsertSweep(input: { id: $id, config: $config, description: $description, entityName: $entityName, projectName: $projectName, controller: $controller, scheduler: $scheduler, state: $state, priorRunsFilters: $priorRunsFilters, }) { sweep { name _PROJECT_QUERY_ } configValidationWarnings } } """ # TODO(jhr): we need protocol versioning to know schema is not supported # for now we will just try both new and old query mutation_5 = gql( mutation_str.replace( "$controller: JSONString,", "$controller: JSONString,$launchScheduler: JSONString, $templateVariableValues: JSONString,", ) .replace( "controller: $controller,", "controller: $controller,launchScheduler: $launchScheduler,templateVariableValues: $templateVariableValues,", ) .replace("_PROJECT_QUERY_", project_query) ) # launchScheduler was introduced in core v0.14.0 mutation_4 = gql( mutation_str.replace( "$controller: JSONString,", "$controller: JSONString,$launchScheduler: JSONString,", ) .replace( "controller: $controller,", "controller: $controller,launchScheduler: $launchScheduler", ) .replace("_PROJECT_QUERY_", project_query) ) # mutation 3 maps to backend that can support CLI version of at least 0.10.31 mutation_3 = gql(mutation_str.replace("_PROJECT_QUERY_", project_query)) mutation_2 = gql( mutation_str.replace("_PROJECT_QUERY_", project_query).replace( "configValidationWarnings", "" ) ) mutation_1 = gql( mutation_str.replace("_PROJECT_QUERY_", "").replace( "configValidationWarnings", "" ) ) # TODO(dag): replace this with a query for protocol versioning mutations = [mutation_5, mutation_4, mutation_3, mutation_2, mutation_1] config = self._validate_config_and_fill_distribution(config) # Silly, but attr-dicts like EasyDicts don't serialize correctly to yaml. # This sanitizes them with a round trip pass through json to get a regular dict. config_str = yaml.dump( json.loads(json.dumps(config)), Dumper=util.NonOctalStringDumper ) filters = None if prior_runs: filters = json.dumps({"$or": [{"name": r} for r in prior_runs]}) err: Optional[Exception] = None for mutation in mutations: try: variables = { "id": obj_id, "config": config_str, "description": config.get("description"), "entityName": entity or self.settings("entity"), "projectName": project or self.settings("project"), "controller": controller, "launchScheduler": launch_scheduler, "templateVariableValues": json.dumps(template_variable_values), "scheduler": scheduler, "priorRunsFilters": filters, } if state: variables["state"] = state response = self.gql( mutation, variable_values=variables, check_retry_fn=util.no_retry_4xx, ) except UsageError: raise except Exception as e: # graphql schema exception is generic err = e continue err = None break if err: raise err sweep: Dict[str, Dict[str, Dict]] = response["upsertSweep"]["sweep"] project_obj: Dict[str, Dict] = sweep.get("project", {}) if project_obj: self.set_setting("project", project_obj["name"]) entity_obj: dict = project_obj.get("entity", {}) if entity_obj: self.set_setting("entity", entity_obj["name"]) warnings = response["upsertSweep"].get("configValidationWarnings", []) return response["upsertSweep"]["sweep"]["name"], warnings @normalize_exceptions def create_anonymous_api_key(self) -> str: """Create a new API key belonging to a new anonymous user.""" mutation = gql( """ mutation CreateAnonymousApiKey { createAnonymousEntity(input: {}) { apiKey { name } } } """ ) response = self.gql(mutation, variable_values={}) key: str = str(response["createAnonymousEntity"]["apiKey"]["name"]) return key @staticmethod def file_current(fname: str, md5: B64MD5) -> bool: """Checksum a file and compare the md5 with the known md5.""" return os.path.isfile(fname) and md5_file_b64(fname) == md5 @normalize_exceptions def pull( self, project: str, run: Optional[str] = None, entity: Optional[str] = None ) -> "List[requests.Response]": """Download files from W&B. Args: project (str): The project to download run (str, optional): The run to upload to entity (str, optional): The entity to scope this project to. Defaults to wandb models Returns: The `requests` library response object """ project, run = self.parse_slug(project, run=run) urls = self.download_urls(project, run, entity) responses = [] for filename in urls: _, response = self.download_write_file(urls[filename]) if response: responses.append(response) return responses def get_project(self) -> str: project: str = self.default_settings.get("project") or self.settings("project") return project @normalize_exceptions def push( self, files: Union[List[str], Dict[str, IO]], run: Optional[str] = None, entity: Optional[str] = None, project: Optional[str] = None, description: Optional[str] = None, force: bool = True, progress: Union[TextIO, Literal[False]] = False, ) -> "List[Optional[requests.Response]]": """Uploads multiple files to W&B. Args: files (list or dict): The filenames to upload, when dict the values are open files run (str, optional): The run to upload to entity (str, optional): The entity to scope this project to. Defaults to wandb models project (str, optional): The name of the project to upload to. Defaults to the one in settings. description (str, optional): The description of the changes force (bool, optional): Whether to prevent push if git has uncommitted changes progress (callable, or stream): If callable, will be called with (chunk_bytes, total_bytes) as argument. If TextIO, renders a progress bar to it. Returns: A list of `requests.Response` objects """ if project is None: project = self.get_project() if project is None: raise CommError("No project configured.") if run is None: run = self.current_run_id # TODO(adrian): we use a retriable version of self.upload_file() so # will never retry self.upload_urls() here. Instead, maybe we should # make push itself retriable. _, upload_headers, result = self.upload_urls( project, files, run, entity, ) extra_headers = {} for upload_header in upload_headers: key, val = upload_header.split(":", 1) extra_headers[key] = val responses = [] for file_name, file_info in result.items(): file_url = file_info["uploadUrl"] # If the upload URL is relative, fill it in with the base URL, # since it's a proxied file store like the on-prem VM. if file_url.startswith("/"): file_url = f"{self.api_url}{file_url}" try: # To handle Windows paths # TODO: this doesn't handle absolute paths... normal_name = os.path.join(*file_name.split("/")) open_file = ( files[file_name] if isinstance(files, dict) else open(normal_name, "rb") ) except OSError: print(f"{file_name} does not exist") # noqa: T201 continue if progress is False: responses.append( self.upload_file_retry( file_info["uploadUrl"], open_file, extra_headers=extra_headers ) ) else: if callable(progress): responses.append( # type: ignore self.upload_file_retry( file_url, open_file, progress, extra_headers=extra_headers ) ) else: length = os.fstat(open_file.fileno()).st_size with click.progressbar( # type: ignore file=progress, length=length, label=f"Uploading file: {file_name}", fill_char=click.style("&", fg="green"), ) as bar: responses.append( self.upload_file_retry( file_url, open_file, lambda bites, _: bar.update(bites), extra_headers=extra_headers, ) ) open_file.close() return responses def link_artifact( self, client_id: str, server_id: str, portfolio_name: str, entity: str, project: str, aliases: Sequence[str], organization: str, ) -> Dict[str, Any]: template = """ mutation LinkArtifact( $artifactPortfolioName: String!, $entityName: String!, $projectName: String!, $aliases: [ArtifactAliasInput!], ID_TYPE ) { linkArtifact(input: { artifactPortfolioName: $artifactPortfolioName, entityName: $entityName, projectName: $projectName, aliases: $aliases, ID_VALUE }) { versionIndex } } """ org_entity = "" if is_artifact_registry_project(project): try: org_entity = self._resolve_org_entity_name( entity=entity, organization=organization ) except ValueError as e: wandb.termerror(str(e)) raise def replace(a: str, b: str) -> None: nonlocal template template = template.replace(a, b) if server_id: replace("ID_TYPE", "$artifactID: ID") replace("ID_VALUE", "artifactID: $artifactID") elif client_id: replace("ID_TYPE", "$clientID: ID") replace("ID_VALUE", "clientID: $clientID") variable_values = { "clientID": client_id, "artifactID": server_id, "artifactPortfolioName": portfolio_name, "entityName": org_entity or entity, "projectName": project, "aliases": [ {"alias": alias, "artifactCollectionName": portfolio_name} for alias in aliases ], } mutation = gql(template) response = self.gql(mutation, variable_values=variable_values) link_artifact: Dict[str, Any] = response["linkArtifact"] return link_artifact def _resolve_org_entity_name(self, entity: str, organization: str = "") -> str: # resolveOrgEntityName fetches the portfolio's org entity's name. # # The organization parameter may be empty, an org's display name, or an org entity name. # # If the server doesn't support fetching the org name of a portfolio, then this returns # the organization parameter, or an error if it is empty. Otherwise, this returns the # fetched value after validating that the given organization, if not empty, matches # either the org's display or entity name. if not entity: raise ValueError("Entity name is required to resolve org entity name.") org_fields = self.server_organization_type_introspection() can_shorthand_org_entity = "orgEntity" in org_fields if not organization and not can_shorthand_org_entity: raise ValueError( "Fetching Registry artifacts without inputting an organization " "is unavailable for your server version. " "Please upgrade your server to 0.50.0 or later." ) if not can_shorthand_org_entity: # Server doesn't support fetching org entity to validate, # assume org entity is correctly inputted return organization orgs_from_entity = self._fetch_orgs_and_org_entities_from_entity(entity) if organization: return _match_org_with_fetched_org_entities(organization, orgs_from_entity) # If no input organization provided, error if entity belongs to multiple orgs because we # cannot determine which one to use. if len(orgs_from_entity) > 1: raise ValueError( f"Personal entity {entity!r} belongs to multiple organizations " "and cannot be used without specifying the organization name. " "Please specify the organization in the Registry path or use a team entity in the entity settings." ) return orgs_from_entity[0].entity_name def _fetch_orgs_and_org_entities_from_entity(self, entity: str) -> List[_OrgNames]: """Fetches organization entity names and display names for a given entity. Args: entity (str): Entity name to lookup. Can be either a personal or team entity. Returns: List[_OrgNames]: List of _OrgNames tuples. (_OrgNames(entity_name, display_name)) Raises: ValueError: If entity is not found, has no organizations, or other validation errors. """ query = gql( """ query FetchOrgEntityFromEntity($entityName: String!) { entity(name: $entityName) { organization { name orgEntity { name } } user { organizations { name orgEntity { name } } } } } """ ) response = self.gql( query, variable_values={ "entityName": entity, }, ) # Parse organization from response entity_resp = response["entity"]["organization"] user_resp = response["entity"]["user"] # Check for organization under team/org entity type if entity_resp: org_name = entity_resp.get("name") org_entity_name = entity_resp.get("orgEntity") and entity_resp[ "orgEntity" ].get("name") if not org_name or not org_entity_name: raise ValueError( f"Unable to find an organization under entity {entity!r}." ) return [_OrgNames(entity_name=org_entity_name, display_name=org_name)] # Check for organization under personal entity type, where a user can belong to multiple orgs elif user_resp: orgs = user_resp.get("organizations", []) org_entities_return = [ _OrgNames( entity_name=org["orgEntity"]["name"], display_name=org["name"] ) for org in orgs if org.get("orgEntity") and org.get("name") ] if not org_entities_return: raise ValueError( f"Unable to resolve an organization associated with personal entity: {entity!r}. " "This could be because its a personal entity that doesn't belong to any organizations. " "Please specify the organization in the Registry path or use a team entity in the entity settings." ) return org_entities_return else: raise ValueError(f"Unable to find an organization under entity {entity!r}.") def _construct_use_artifact_query( self, artifact_id: str, entity_name: Optional[str] = None, project_name: Optional[str] = None, run_name: Optional[str] = None, use_as: Optional[str] = None, artifact_entity_name: Optional[str] = None, artifact_project_name: Optional[str] = None, ) -> Tuple[Document, Dict[str, Any]]: query_vars = [ "$entityName: String!", "$projectName: String!", "$runName: String!", "$artifactID: ID!", ] query_args = [ "entityName: $entityName", "projectName: $projectName", "runName: $runName", "artifactID: $artifactID", ] artifact_types = self.server_use_artifact_input_introspection() if "usedAs" in artifact_types and use_as: query_vars.append("$usedAs: String") query_args.append("usedAs: $usedAs") entity_name = entity_name or self.settings("entity") project_name = project_name or self.settings("project") run_name = run_name or self.current_run_id variable_values: Dict[str, Any] = { "entityName": entity_name, "projectName": project_name, "runName": run_name, "artifactID": artifact_id, "usedAs": use_as, } server_allows_entity_project_information = self._server_supports( ServerFeature.USE_ARTIFACT_WITH_ENTITY_AND_PROJECT_INFORMATION ) if server_allows_entity_project_information: query_vars.extend( [ "$artifactEntityName: String", "$artifactProjectName: String", ] ) query_args.extend( [ "artifactEntityName: $artifactEntityName", "artifactProjectName: $artifactProjectName", ] ) variable_values["artifactEntityName"] = artifact_entity_name variable_values["artifactProjectName"] = artifact_project_name vars_str = ", ".join(query_vars) args_str = ", ".join(query_args) query = gql( f""" mutation UseArtifact({vars_str}) {{ useArtifact(input: {{{args_str}}}) {{ artifact {{ id digest description state createdAt metadata }} }} }} """ ) return query, variable_values def use_artifact( self, artifact_id: str, entity_name: Optional[str] = None, project_name: Optional[str] = None, run_name: Optional[str] = None, artifact_entity_name: Optional[str] = None, artifact_project_name: Optional[str] = None, use_as: Optional[str] = None, ) -> Optional[Dict[str, Any]]: query, variable_values = self._construct_use_artifact_query( artifact_id, entity_name, project_name, run_name, use_as, artifact_entity_name, artifact_project_name, ) response = self.gql(query, variable_values) if response["useArtifact"]["artifact"]: artifact: Dict[str, Any] = response["useArtifact"]["artifact"] return artifact return None # Fetch fields available in backend of Organization type def server_organization_type_introspection(self) -> List[str]: query_string = """ query ProbeServerOrganization { OrganizationInfoType: __type(name:"Organization") { fields { name } } } """ if self.server_organization_type_fields_info is None: query = gql(query_string) res = self.gql(query) input_fields = res.get("OrganizationInfoType", {}).get("fields", [{}]) self.server_organization_type_fields_info = [ field["name"] for field in input_fields if "name" in field ] return self.server_organization_type_fields_info # Fetch input arguments for the "artifact" endpoint on the "Project" type def server_project_type_introspection(self) -> bool: if self.server_supports_enabling_artifact_usage_tracking is not None: return self.server_supports_enabling_artifact_usage_tracking query_string = """ query ProbeServerProjectInfo { ProjectInfoType: __type(name:"Project") { fields { name args { name } } } } """ query = gql(query_string) res = self.gql(query) input_fields = res.get("ProjectInfoType", {}).get("fields", [{}]) artifact_args: List[Dict[str, str]] = next( ( field.get("args", []) for field in input_fields if field.get("name") == "artifact" ), [], ) self.server_supports_enabling_artifact_usage_tracking = any( arg.get("name") == "enableTracking" for arg in artifact_args ) return self.server_supports_enabling_artifact_usage_tracking def create_artifact_type( self, artifact_type_name: str, entity_name: Optional[str] = None, project_name: Optional[str] = None, description: Optional[str] = None, ) -> Optional[str]: mutation = gql( """ mutation CreateArtifactType( $entityName: String!, $projectName: String!, $artifactTypeName: String!, $description: String ) { createArtifactType(input: { entityName: $entityName, projectName: $projectName, name: $artifactTypeName, description: $description }) { artifactType { id } } } """ ) entity_name = entity_name or self.settings("entity") project_name = project_name or self.settings("project") response = self.gql( mutation, variable_values={ "entityName": entity_name, "projectName": project_name, "artifactTypeName": artifact_type_name, "description": description, }, ) _id: Optional[str] = response["createArtifactType"]["artifactType"]["id"] return _id def server_artifact_introspection(self) -> List[str]: query_string = """ query ProbeServerArtifact { ArtifactInfoType: __type(name:"Artifact") { fields { name } } } """ if self.server_artifact_fields_info is None: query = gql(query_string) res = self.gql(query) input_fields = res.get("ArtifactInfoType", {}).get("fields", [{}]) self.server_artifact_fields_info = [ field["name"] for field in input_fields if "name" in field ] return self.server_artifact_fields_info def server_create_artifact_introspection(self) -> List[str]: query_string = """ query ProbeServerCreateArtifactInput { CreateArtifactInputInfoType: __type(name:"CreateArtifactInput") { inputFields{ name } } } """ if self.server_create_artifact_input_info is None: query = gql(query_string) res = self.gql(query) input_fields = res.get("CreateArtifactInputInfoType", {}).get( "inputFields", [{}] ) self.server_create_artifact_input_info = [ field["name"] for field in input_fields if "name" in field ] return self.server_create_artifact_input_info def _get_create_artifact_mutation( self, fields: List, history_step: Optional[int], distributed_id: Optional[str], ) -> str: types = "" values = "" if "historyStep" in fields and history_step not in [0, None]: types += "$historyStep: Int64!," values += "historyStep: $historyStep," if distributed_id: types += "$distributedID: String," values += "distributedID: $distributedID," if "clientID" in fields: types += "$clientID: ID," values += "clientID: $clientID," if "sequenceClientID" in fields: types += "$sequenceClientID: ID," values += "sequenceClientID: $sequenceClientID," if "enableDigestDeduplication" in fields: values += "enableDigestDeduplication: true," if "ttlDurationSeconds" in fields: types += "$ttlDurationSeconds: Int64," values += "ttlDurationSeconds: $ttlDurationSeconds," if "tags" in fields: types += "$tags: [TagInput!]," values += "tags: $tags," query_template = """ mutation CreateArtifact( $artifactTypeName: String!, $artifactCollectionNames: [String!], $entityName: String!, $projectName: String!, $runName: String, $description: String, $digest: String!, $aliases: [ArtifactAliasInput!], $metadata: JSONString, _CREATE_ARTIFACT_ADDITIONAL_TYPE_ ) { createArtifact(input: { artifactTypeName: $artifactTypeName, artifactCollectionNames: $artifactCollectionNames, entityName: $entityName, projectName: $projectName, runName: $runName, description: $description, digest: $digest, digestAlgorithm: MANIFEST_MD5, aliases: $aliases, metadata: $metadata, _CREATE_ARTIFACT_ADDITIONAL_VALUE_ }) { artifact { id state artifactSequence { id latestArtifact { id versionIndex } } } } } """ return query_template.replace( "_CREATE_ARTIFACT_ADDITIONAL_TYPE_", types ).replace("_CREATE_ARTIFACT_ADDITIONAL_VALUE_", values) def create_artifact( self, artifact_type_name: str, artifact_collection_name: str, digest: str, client_id: Optional[str] = None, sequence_client_id: Optional[str] = None, entity_name: Optional[str] = None, project_name: Optional[str] = None, run_name: Optional[str] = None, description: Optional[str] = None, metadata: Optional[Dict] = None, ttl_duration_seconds: Optional[int] = None, aliases: Optional[List[Dict[str, str]]] = None, tags: Optional[List[Dict[str, str]]] = None, distributed_id: Optional[str] = None, is_user_created: Optional[bool] = False, history_step: Optional[int] = None, ) -> Tuple[Dict, Dict]: fields = self.server_create_artifact_introspection() artifact_fields = self.server_artifact_introspection() if ("ttlIsInherited" not in artifact_fields) and ttl_duration_seconds: wandb.termwarn( "Server not compatible with setting Artifact TTLs, please upgrade the server to use Artifact TTL" ) # ttlDurationSeconds is only usable if ttlIsInherited is also present ttl_duration_seconds = None if ("tags" not in artifact_fields) and tags: wandb.termwarn( "Server not compatible with Artifact tags. " "To use Artifact tags, please upgrade the server to v0.85 or higher." ) query_template = self._get_create_artifact_mutation( fields, history_step, distributed_id ) entity_name = entity_name or self.settings("entity") project_name = project_name or self.settings("project") if not is_user_created: run_name = run_name or self.current_run_id mutation = gql(query_template) response = self.gql( mutation, variable_values={ "entityName": entity_name, "projectName": project_name, "runName": run_name, "artifactTypeName": artifact_type_name, "artifactCollectionNames": [artifact_collection_name], "clientID": client_id, "sequenceClientID": sequence_client_id, "digest": digest, "description": description, "aliases": list(aliases or []), "tags": list(tags or []), "metadata": json.dumps(util.make_safe_for_json(metadata)) if metadata else None, "ttlDurationSeconds": ttl_duration_seconds, "distributedID": distributed_id, "historyStep": history_step, }, ) av = response["createArtifact"]["artifact"] latest = response["createArtifact"]["artifact"]["artifactSequence"].get( "latestArtifact" ) return av, latest def commit_artifact(self, artifact_id: str) -> "_Response": mutation = gql( """ mutation CommitArtifact( $artifactID: ID!, ) { commitArtifact(input: { artifactID: $artifactID, }) { artifact { id digest } } } """ ) response: _Response = self.gql( mutation, variable_values={"artifactID": artifact_id}, timeout=60, ) return response def complete_multipart_upload_artifact( self, artifact_id: str, storage_path: str, completed_parts: List[Dict[str, Any]], upload_id: Optional[str], complete_multipart_action: str = "Complete", ) -> Optional[str]: mutation = gql( """ mutation CompleteMultipartUploadArtifact( $completeMultipartAction: CompleteMultipartAction!, $completedParts: [UploadPartsInput!]!, $artifactID: ID! $storagePath: String! $uploadID: String! ) { completeMultipartUploadArtifact( input: { completeMultipartAction: $completeMultipartAction, completedParts: $completedParts, artifactID: $artifactID, storagePath: $storagePath uploadID: $uploadID } ) { digest } } """ ) response = self.gql( mutation, variable_values={ "completeMultipartAction": complete_multipart_action, "artifactID": artifact_id, "storagePath": storage_path, "completedParts": completed_parts, "uploadID": upload_id, }, ) digest: Optional[str] = response["completeMultipartUploadArtifact"]["digest"] return digest def create_artifact_manifest( self, name: str, digest: str, artifact_id: Optional[str], base_artifact_id: Optional[str] = None, entity: Optional[str] = None, project: Optional[str] = None, run: Optional[str] = None, include_upload: bool = True, type: str = "FULL", ) -> Tuple[str, Dict[str, Any]]: mutation = gql( """ mutation CreateArtifactManifest( $name: String!, $digest: String!, $artifactID: ID!, $baseArtifactID: ID, $entityName: String!, $projectName: String!, $runName: String!, $includeUpload: Boolean!, {} ) {{ createArtifactManifest(input: {{ name: $name, digest: $digest, artifactID: $artifactID, baseArtifactID: $baseArtifactID, entityName: $entityName, projectName: $projectName, runName: $runName, {} }}) {{ artifactManifest {{ id file {{ id name displayName uploadUrl @include(if: $includeUpload) uploadHeaders @include(if: $includeUpload) }} }} }} }} """.format( "$type: ArtifactManifestType = FULL" if type != "FULL" else "", "type: $type" if type != "FULL" else "", ) ) entity_name = entity or self.settings("entity") project_name = project or self.settings("project") run_name = run or self.current_run_id response = self.gql( mutation, variable_values={ "name": name, "digest": digest, "artifactID": artifact_id, "baseArtifactID": base_artifact_id, "entityName": entity_name, "projectName": project_name, "runName": run_name, "includeUpload": include_upload, "type": type, }, ) return ( response["createArtifactManifest"]["artifactManifest"]["id"], response["createArtifactManifest"]["artifactManifest"]["file"], ) def update_artifact_manifest( self, artifact_manifest_id: str, base_artifact_id: Optional[str] = None, digest: Optional[str] = None, include_upload: Optional[bool] = True, ) -> Tuple[str, Dict[str, Any]]: mutation = gql( """ mutation UpdateArtifactManifest( $artifactManifestID: ID!, $digest: String, $baseArtifactID: ID, $includeUpload: Boolean!, ) { updateArtifactManifest(input: { artifactManifestID: $artifactManifestID, digest: $digest, baseArtifactID: $baseArtifactID, }) { artifactManifest { id file { id name displayName uploadUrl @include(if: $includeUpload) uploadHeaders @include(if: $includeUpload) } } } } """ ) response = self.gql( mutation, variable_values={ "artifactManifestID": artifact_manifest_id, "digest": digest, "baseArtifactID": base_artifact_id, "includeUpload": include_upload, }, ) return ( response["updateArtifactManifest"]["artifactManifest"]["id"], response["updateArtifactManifest"]["artifactManifest"]["file"], ) def update_artifact_metadata( self, artifact_id: str, metadata: Dict[str, Any] ) -> Dict[str, Any]: """Set the metadata of the given artifact version.""" mutation = gql( """ mutation UpdateArtifact( $artifactID: ID!, $metadata: JSONString, ) { updateArtifact(input: { artifactID: $artifactID, metadata: $metadata, }) { artifact { id } } } """ ) response = self.gql( mutation, variable_values={ "artifactID": artifact_id, "metadata": json.dumps(metadata), }, ) return response["updateArtifact"]["artifact"] def _resolve_client_id( self, client_id: str, ) -> Optional[str]: if client_id in self._client_id_mapping: return self._client_id_mapping[client_id] query = gql( """ query ClientIDMapping($clientID: ID!) { clientIDMapping(clientID: $clientID) { serverID } } """ ) response = self.gql( query, variable_values={ "clientID": client_id, }, ) server_id = None if response is not None: client_id_mapping = response.get("clientIDMapping") if client_id_mapping is not None: server_id = client_id_mapping.get("serverID") if server_id is not None: self._client_id_mapping[client_id] = server_id return server_id def server_create_artifact_file_spec_input_introspection(self) -> List: query_string = """ query ProbeServerCreateArtifactFileSpecInput { CreateArtifactFileSpecInputInfoType: __type(name:"CreateArtifactFileSpecInput") { inputFields{ name } } } """ query = gql(query_string) res = self.gql(query) create_artifact_file_spec_input_info = [ field.get("name", "") for field in res.get("CreateArtifactFileSpecInputInfoType", {}).get( "inputFields", [{}] ) ] return create_artifact_file_spec_input_info @normalize_exceptions def create_artifact_files( self, artifact_files: Iterable["CreateArtifactFileSpecInput"] ) -> Mapping[str, "CreateArtifactFilesResponseFile"]: query_template = """ mutation CreateArtifactFiles( $storageLayout: ArtifactStorageLayout! $artifactFiles: [CreateArtifactFileSpecInput!]! ) { createArtifactFiles(input: { artifactFiles: $artifactFiles, storageLayout: $storageLayout, }) { files { edges { node { id name displayName uploadUrl uploadHeaders _MULTIPART_UPLOAD_FIELDS_ artifact { id } } } } } } """ multipart_upload_url_query = """ storagePath uploadMultipartUrls { uploadID uploadUrlParts { partNumber uploadUrl } } """ # TODO: we should use constants here from interface/artifacts.py # but probably don't want the dependency. We're going to remove # this setting in a future release, so I'm just hard-coding the strings. storage_layout = "V2" if env.get_use_v1_artifacts(): storage_layout = "V1" create_artifact_file_spec_input_fields = ( self.server_create_artifact_file_spec_input_introspection() ) if "uploadPartsInput" in create_artifact_file_spec_input_fields: query_template = query_template.replace( "_MULTIPART_UPLOAD_FIELDS_", multipart_upload_url_query ) else: query_template = query_template.replace("_MULTIPART_UPLOAD_FIELDS_", "") mutation = gql(query_template) response = self.gql( mutation, variable_values={ "storageLayout": storage_layout, "artifactFiles": [af for af in artifact_files], }, ) result = {} for edge in response["createArtifactFiles"]["files"]["edges"]: node = edge["node"] result[node["displayName"]] = node return result @normalize_exceptions def notify_scriptable_run_alert( self, title: str, text: str, level: Optional[str] = None, wait_duration: Optional["Number"] = None, ) -> bool: mutation = gql( """ mutation NotifyScriptableRunAlert( $entityName: String!, $projectName: String!, $runName: String!, $title: String!, $text: String!, $severity: AlertSeverity = INFO, $waitDuration: Duration ) { notifyScriptableRunAlert(input: { entityName: $entityName, projectName: $projectName, runName: $runName, title: $title, text: $text, severity: $severity, waitDuration: $waitDuration }) { success } } """ ) response = self.gql( mutation, variable_values={ "entityName": self.settings("entity"), "projectName": self.settings("project"), "runName": self.current_run_id, "title": title, "text": text, "severity": level, "waitDuration": wait_duration, }, ) success: bool = response["notifyScriptableRunAlert"]["success"] return success def get_sweep_state( self, sweep: str, entity: Optional[str] = None, project: Optional[str] = None ) -> "SweepState": state: SweepState = self.sweep( sweep=sweep, entity=entity, project=project, specs="{}" )["state"] return state def set_sweep_state( self, sweep: str, state: "SweepState", entity: Optional[str] = None, project: Optional[str] = None, ) -> None: assert state in ("RUNNING", "PAUSED", "CANCELED", "FINISHED") s = self.sweep(sweep=sweep, entity=entity, project=project, specs="{}") curr_state = s["state"].upper() if state == "PAUSED" and curr_state not in ("PAUSED", "RUNNING"): raise Exception(f"Cannot pause {curr_state.lower()} sweep.") elif state != "RUNNING" and curr_state not in ("RUNNING", "PAUSED", "PENDING"): raise Exception(f"Sweep already {curr_state.lower()}.") sweep_id = s["id"] mutation = gql( """ mutation UpsertSweep( $id: ID, $state: String, $entityName: String, $projectName: String ) { upsertSweep(input: { id: $id, state: $state, entityName: $entityName, projectName: $projectName }){ sweep { name } } } """ ) self.gql( mutation, variable_values={ "id": sweep_id, "state": state, "entityName": entity or self.settings("entity"), "projectName": project or self.settings("project"), }, ) def stop_sweep( self, sweep: str, entity: Optional[str] = None, project: Optional[str] = None, ) -> None: """Finish the sweep to stop running new runs and let currently running runs finish.""" self.set_sweep_state( sweep=sweep, state="FINISHED", entity=entity, project=project ) def cancel_sweep( self, sweep: str, entity: Optional[str] = None, project: Optional[str] = None, ) -> None: """Cancel the sweep to kill all running runs and stop running new runs.""" self.set_sweep_state( sweep=sweep, state="CANCELED", entity=entity, project=project ) def pause_sweep( self, sweep: str, entity: Optional[str] = None, project: Optional[str] = None, ) -> None: """Pause the sweep to temporarily stop running new runs.""" self.set_sweep_state( sweep=sweep, state="PAUSED", entity=entity, project=project ) def resume_sweep( self, sweep: str, entity: Optional[str] = None, project: Optional[str] = None, ) -> None: """Resume the sweep to continue running new runs.""" self.set_sweep_state( sweep=sweep, state="RUNNING", entity=entity, project=project ) def _status_request(self, url: str, length: int) -> requests.Response: """Ask google how much we've uploaded.""" check_httpclient_logger_handler() return requests.put( url=url, headers={"Content-Length": "0", "Content-Range": f"bytes */{length}"}, ) def _flatten_edges(self, response: "_Response") -> List[Dict]: """Return an array from the nested graphql relay structure.""" return [node["node"] for node in response["edges"]] @normalize_exceptions def stop_run( self, run_id: str, ) -> bool: mutation = gql( """ mutation stopRun($id: ID!) { stopRun(input: { id: $id }) { clientMutationId success } } """ ) response = self.gql( mutation, variable_values={ "id": run_id, }, ) success: bool = response["stopRun"].get("success") return success