jamtur01's picture
Upload folder using huggingface_hub
9c6594c verified
"""Support for parsing W&B URLs (which might be user provided) into constituent parts."""
from dataclasses import dataclass
from enum import IntEnum
from typing import Optional
from urllib.parse import urlparse
PREFIX_HTTP = "http://"
PREFIX_HTTPS = "https://"
class ReferenceType(IntEnum):
RUN = 1
JOB = 2
# Ideally we would not overload the URL paths as we do.
# TODO: Not sure these are exhaustive, and even if so more special paths might get added.
# Would be good to have restrictions that we could check.
RESERVED_NON_ENTITIES = (
"create-team",
"fully-connected",
"registry",
"settings",
"subscriptions",
)
RESERVED_NON_PROJECTS = (
"likes",
"projects",
)
RESERVED_JOB_PATHS = ("_view",)
@dataclass
class WandbReference:
# TODO: This will include port, should we separate that out?
host: Optional[str] = None
entity: Optional[str] = None
project: Optional[str] = None
# Set when we don't know how to parse yet
path: Optional[str] = None
# Reference type will determine what other fields are set
ref_type: Optional[ReferenceType] = None
run_id: Optional[str] = None
job_name: Optional[str] = None
job_alias: str = "latest" # In addition to an alias can be a version specifier
def is_bare(self) -> bool:
return self.host is None
def is_job(self) -> bool:
return self.ref_type == ReferenceType.JOB
def is_run(self) -> bool:
return self.ref_type == ReferenceType.RUN
def is_job_or_run(self) -> bool:
return self.is_job() or self.is_run()
def job_reference(self) -> str:
assert self.is_job()
return f"{self.job_name}:{self.job_alias}"
def job_reference_scoped(self) -> str:
assert self.entity
assert self.project
unscoped = self.job_reference()
return f"{self.entity}/{self.project}/{unscoped}"
def url_host(self) -> str:
return f"{PREFIX_HTTPS}{self.host}" if self.host else ""
def url_entity(self) -> str:
assert self.entity
return f"{self.url_host()}/{self.entity}"
def url_project(self) -> str:
assert self.project
return f"{self.url_entity()}/{self.project}"
@staticmethod
def parse(uri: str) -> Optional["WandbReference"]:
"""Attempt to parse a string as a W&B URL."""
# TODO: Error if HTTP and host is not localhost?
if (
not uri.startswith("/")
and not uri.startswith(PREFIX_HTTP)
and not uri.startswith(PREFIX_HTTPS)
):
return None
ref = WandbReference()
# This takes care of things like query and fragment
parsed = urlparse(uri)
if parsed.netloc:
ref.host = parsed.netloc
if not parsed.path.startswith("/"):
return ref
ref.path = parsed.path[1:]
parts = ref.path.split("/")
if len(parts) > 0:
if parts[0] not in RESERVED_NON_ENTITIES:
ref.path = None
ref.entity = parts[0]
if len(parts) > 1:
if parts[1] not in RESERVED_NON_PROJECTS:
ref.project = parts[1]
if len(parts) > 3 and parts[2] == "runs":
ref.ref_type = ReferenceType.RUN
ref.run_id = parts[3]
elif (
len(parts) > 4
and parts[2] == "artifacts"
and parts[3] == "job"
):
ref.ref_type = ReferenceType.JOB
ref.job_name = parts[4]
if len(parts) > 5 and parts[5] not in RESERVED_JOB_PATHS:
ref.job_alias = parts[5]
# TODO: Right now we are not tracking selection as part of URL state in the Jobs tab.
# If that changes we'll want to update this.
return ref
@staticmethod
def is_uri_job_or_run(uri: str) -> bool:
ref = WandbReference.parse(uri)
if ref and ref.is_job_or_run():
return True
return False