Spaces:

jamtur01
/

MMaDA

Runtime error

App Files Files Community

MMaDA / venv /lib /python3.11 /site-packages /torchvision /datasets /stl10.py

jamtur01

Upload folder using huggingface_hub

9c6594c verified about 1 month ago

raw

history blame contribute delete

7.23 kB

	import os.path
	from pathlib import Path
	from typing import Any, Callable, cast, Optional, Tuple, Union

	import numpy as np
	from PIL import Image

	from .utils import check_integrity, download_and_extract_archive, verify_str_arg
	from .vision import VisionDataset


	class STL10(VisionDataset):
	"""`STL10 <https://cs.stanford.edu/~acoates/stl10/>`_ Dataset.

	Args:
	root (str or ``pathlib.Path``): Root directory of dataset where directory
	``stl10_binary`` exists.
	split (string): One of {'train', 'test', 'unlabeled', 'train+unlabeled'}.
	Accordingly, dataset is selected.
	folds (int, optional): One of {0-9} or None.
	For training, loads one of the 10 pre-defined folds of 1k samples for the
	standard evaluation procedure. If no value is passed, loads the 5k samples.
	transform (callable, optional): A function/transform that takes in a PIL image
	and returns a transformed version. E.g, ``transforms.RandomCrop``
	target_transform (callable, optional): A function/transform that takes in the
	target and transforms it.
	download (bool, optional): If true, downloads the dataset from the internet and
	puts it in root directory. If dataset is already downloaded, it is not
	downloaded again.
	"""

	base_folder = "stl10_binary"
	url = "http://ai.stanford.edu/~acoates/stl10/stl10_binary.tar.gz"
	filename = "stl10_binary.tar.gz"
	tgz_md5 = "91f7769df0f17e558f3565bffb0c7dfb"
	class_names_file = "class_names.txt"
	folds_list_file = "fold_indices.txt"
	train_list = [
	["train_X.bin", "918c2871b30a85fa023e0c44e0bee87f"],
	["train_y.bin", "5a34089d4802c674881badbb80307741"],
	["unlabeled_X.bin", "5242ba1fed5e4be9e1e742405eb56ca4"],
	]

	test_list = [["test_X.bin", "7f263ba9f9e0b06b93213547f721ac82"], ["test_y.bin", "36f9794fa4beb8a2c72628de14fa638e"]]
	splits = ("train", "train+unlabeled", "unlabeled", "test")

	def __init__(
	self,
	root: Union[str, Path],
	split: str = "train",
	folds: Optional[int] = None,
	transform: Optional[Callable] = None,
	target_transform: Optional[Callable] = None,
	download: bool = False,
	) -> None:
	super().__init__(root, transform=transform, target_transform=target_transform)
	self.split = verify_str_arg(split, "split", self.splits)
	self.folds = self._verify_folds(folds)

	if download:
	self.download()
	elif not self._check_integrity():
	raise RuntimeError("Dataset not found or corrupted. You can use download=True to download it")

	# now load the picked numpy arrays
	self.labels: Optional[np.ndarray]
	if self.split == "train":
	self.data, self.labels = self.__loadfile(self.train_list[0][0], self.train_list[1][0])
	self.labels = cast(np.ndarray, self.labels)
	self.__load_folds(folds)

	elif self.split == "train+unlabeled":
	self.data, self.labels = self.__loadfile(self.train_list[0][0], self.train_list[1][0])
	self.labels = cast(np.ndarray, self.labels)
	self.__load_folds(folds)
	unlabeled_data, _ = self.__loadfile(self.train_list[2][0])
	self.data = np.concatenate((self.data, unlabeled_data))
	self.labels = np.concatenate((self.labels, np.asarray([-1] * unlabeled_data.shape[0])))

	elif self.split == "unlabeled":
	self.data, _ = self.__loadfile(self.train_list[2][0])
	self.labels = np.asarray([-1] * self.data.shape[0])
	else: # self.split == 'test':
	self.data, self.labels = self.__loadfile(self.test_list[0][0], self.test_list[1][0])

	class_file = os.path.join(self.root, self.base_folder, self.class_names_file)
	if os.path.isfile(class_file):
	with open(class_file) as f:
	self.classes = f.read().splitlines()

	def _verify_folds(self, folds: Optional[int]) -> Optional[int]:
	if folds is None:
	return folds
	elif isinstance(folds, int):
	if folds in range(10):
	return folds
	msg = "Value for argument folds should be in the range [0, 10), but got {}."
	raise ValueError(msg.format(folds))
	else:
	msg = "Expected type None or int for argument folds, but got type {}."
	raise ValueError(msg.format(type(folds)))

	def __getitem__(self, index: int) -> Tuple[Any, Any]:
	"""
	Args:
	index (int): Index

	Returns:
	tuple: (image, target) where target is index of the target class.
	"""
	target: Optional[int]
	if self.labels is not None:
	img, target = self.data[index], int(self.labels[index])
	else:
	img, target = self.data[index], None

	# doing this so that it is consistent with all other datasets
	# to return a PIL Image
	img = Image.fromarray(np.transpose(img, (1, 2, 0)))

	if self.transform is not None:
	img = self.transform(img)

	if self.target_transform is not None:
	target = self.target_transform(target)

	return img, target

	def __len__(self) -> int:
	return self.data.shape[0]

	def __loadfile(self, data_file: str, labels_file: Optional[str] = None) -> Tuple[np.ndarray, Optional[np.ndarray]]:
	labels = None
	if labels_file:
	path_to_labels = os.path.join(self.root, self.base_folder, labels_file)
	with open(path_to_labels, "rb") as f:
	labels = np.fromfile(f, dtype=np.uint8) - 1 # 0-based

	path_to_data = os.path.join(self.root, self.base_folder, data_file)
	with open(path_to_data, "rb") as f:
	# read whole file in uint8 chunks
	everything = np.fromfile(f, dtype=np.uint8)
	images = np.reshape(everything, (-1, 3, 96, 96))
	images = np.transpose(images, (0, 1, 3, 2))

	return images, labels

	def _check_integrity(self) -> bool:
	for filename, md5 in self.train_list + self.test_list:
	fpath = os.path.join(self.root, self.base_folder, filename)
	if not check_integrity(fpath, md5):
	return False
	return True

	def download(self) -> None:
	if self._check_integrity():
	return
	download_and_extract_archive(self.url, self.root, filename=self.filename, md5=self.tgz_md5)
	self._check_integrity()

	def extra_repr(self) -> str:
	return "Split: {split}".format(**self.__dict__)

	def __load_folds(self, folds: Optional[int]) -> None:
	# loads one of the folds if specified
	if folds is None:
	return
	path_to_folds = os.path.join(self.root, self.base_folder, self.folds_list_file)
	with open(path_to_folds) as f:
	str_idx = f.read().splitlines()[folds]
	list_idx = np.fromstring(str_idx, dtype=np.int64, sep=" ")
	self.data = self.data[list_idx, :, :, :]
	if self.labels is not None:
	self.labels = self.labels[list_idx]