File size: 4,253 Bytes
9c6594c |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 |
"""Support for parsing W&B URLs (which might be user provided) into constituent parts."""
from dataclasses import dataclass
from enum import IntEnum
from typing import Optional
from urllib.parse import urlparse
PREFIX_HTTP = "http://"
PREFIX_HTTPS = "https://"
class ReferenceType(IntEnum):
RUN = 1
JOB = 2
# Ideally we would not overload the URL paths as we do.
# TODO: Not sure these are exhaustive, and even if so more special paths might get added.
# Would be good to have restrictions that we could check.
RESERVED_NON_ENTITIES = (
"create-team",
"fully-connected",
"registry",
"settings",
"subscriptions",
)
RESERVED_NON_PROJECTS = (
"likes",
"projects",
)
RESERVED_JOB_PATHS = ("_view",)
@dataclass
class WandbReference:
# TODO: This will include port, should we separate that out?
host: Optional[str] = None
entity: Optional[str] = None
project: Optional[str] = None
# Set when we don't know how to parse yet
path: Optional[str] = None
# Reference type will determine what other fields are set
ref_type: Optional[ReferenceType] = None
run_id: Optional[str] = None
job_name: Optional[str] = None
job_alias: str = "latest" # In addition to an alias can be a version specifier
def is_bare(self) -> bool:
return self.host is None
def is_job(self) -> bool:
return self.ref_type == ReferenceType.JOB
def is_run(self) -> bool:
return self.ref_type == ReferenceType.RUN
def is_job_or_run(self) -> bool:
return self.is_job() or self.is_run()
def job_reference(self) -> str:
assert self.is_job()
return f"{self.job_name}:{self.job_alias}"
def job_reference_scoped(self) -> str:
assert self.entity
assert self.project
unscoped = self.job_reference()
return f"{self.entity}/{self.project}/{unscoped}"
def url_host(self) -> str:
return f"{PREFIX_HTTPS}{self.host}" if self.host else ""
def url_entity(self) -> str:
assert self.entity
return f"{self.url_host()}/{self.entity}"
def url_project(self) -> str:
assert self.project
return f"{self.url_entity()}/{self.project}"
@staticmethod
def parse(uri: str) -> Optional["WandbReference"]:
"""Attempt to parse a string as a W&B URL."""
# TODO: Error if HTTP and host is not localhost?
if (
not uri.startswith("/")
and not uri.startswith(PREFIX_HTTP)
and not uri.startswith(PREFIX_HTTPS)
):
return None
ref = WandbReference()
# This takes care of things like query and fragment
parsed = urlparse(uri)
if parsed.netloc:
ref.host = parsed.netloc
if not parsed.path.startswith("/"):
return ref
ref.path = parsed.path[1:]
parts = ref.path.split("/")
if len(parts) > 0:
if parts[0] not in RESERVED_NON_ENTITIES:
ref.path = None
ref.entity = parts[0]
if len(parts) > 1:
if parts[1] not in RESERVED_NON_PROJECTS:
ref.project = parts[1]
if len(parts) > 3 and parts[2] == "runs":
ref.ref_type = ReferenceType.RUN
ref.run_id = parts[3]
elif (
len(parts) > 4
and parts[2] == "artifacts"
and parts[3] == "job"
):
ref.ref_type = ReferenceType.JOB
ref.job_name = parts[4]
if len(parts) > 5 and parts[5] not in RESERVED_JOB_PATHS:
ref.job_alias = parts[5]
# TODO: Right now we are not tracking selection as part of URL state in the Jobs tab.
# If that changes we'll want to update this.
return ref
@staticmethod
def is_uri_job_or_run(uri: str) -> bool:
ref = WandbReference.parse(uri)
if ref and ref.is_job_or_run():
return True
return False
|