Spaces:

jamtur01
/

MMaDA

Runtime error

App Files Files Community

MMaDA / venv /lib /python3.11 /site-packages /webdataset /extradatasets.py

jamtur01

Upload folder using huggingface_hub

9c6594c verified about 1 month ago

raw

history blame contribute delete

4.48 kB

	#
	# Copyright (c) 2017-2021 NVIDIA CORPORATION. All rights reserved.
	# This file is part of the WebDataset library.
	# See the LICENSE file for licensing terms (BSD-style).
	#


	"""Train PyTorch models directly from POSIX tar archive.

	Code works locally or over HTTP connections.
	"""


	from . import utils
	from .pytorch import IterableDataset
	from .utils import PipelineStage


	class MockDataset(IterableDataset):
	"""Create a mock dataset for performance testing and unit testing.

	Args:
	sample: The sample to be returned repeatedly.
	length (int): The length of the mock dataset.
	"""

	def __init__(self, sample, length):
	self.sample = sample
	self.length = length

	def __iter__(self):
	"""Yield samples from the mock dataset.

	Returns:
	Iterator: An iterator that yields the same sample repeatedly.
	"""
	for _ in range(self.length):
	yield self.sample


	class repeatedly(IterableDataset, PipelineStage):
	"""Repeatedly yield samples from a dataset.

	Args:
	source: The source dataset to repeat.
	nepochs (int, optional): Maximum number of epochs to repeat.
	nbatches (int, optional): Maximum number of batches to repeat.
	length (int, optional): Length of the repeated dataset.
	"""

	def __init__(self, source, nepochs=None, nbatches=None, length=None):
	self.source = source
	self.length = length
	self.nbatches = nbatches

	def invoke(self, source):
	"""Return an iterator that iterates repeatedly over a source.

	Args:
	source: The source dataset to repeat.

	Returns:
	Iterator: An iterator that repeatedly yields samples from the source.
	"""
	return utils.repeatedly(
	source,
	nepochs=self.nepochs,
	nbatches=self.nbatches,
	)


	class with_epoch(IterableDataset):
	"""Change the actual and nominal length of an IterableDataset.

	This will continuously iterate through the original dataset, but
	impose new epoch boundaries at the given length/nominal.
	This exists mainly as a workaround for the odd logic in DataLoader.
	It is also useful for choosing smaller nominal epoch sizes with
	very large datasets.

	Args:
	dataset: The source IterableDataset.
	length (int): Declared length of the dataset.
	"""

	def __init__(self, dataset, length):
	super().__init__()
	self.length = length
	self.source = None

	def __getstate__(self):
	"""Return the pickled state of the dataset.

	This resets the dataset iterator, since that can't be pickled.

	Returns:
	dict: A dictionary representing the pickled state of the dataset.
	"""
	result = dict(self.__dict__)
	result["source"] = None
	return result

	def invoke(self, dataset):
	"""Return an iterator over the dataset.

	This iterator returns as many samples as given by the `length` parameter.

	Args:
	dataset: The source dataset to iterate over.

	Yields:
	Sample: The next sample from the dataset.
	"""
	if self.source is None:
	self.source = iter(dataset)
	for _ in range(self.length):
	try:
	sample = next(self.source)
	except StopIteration:
	self.source = iter(dataset)
	try:
	sample = next(self.source)
	except StopIteration:
	return
	yield sample
	self.source = None


	class with_length(IterableDataset, PipelineStage):
	"""Repeatedly yield samples from a dataset with a specified length.

	Args:
	dataset: The source dataset.
	length (int): The stated length of the dataset.
	"""

	def __init__(self, dataset, length):
	super().__init__()
	self.dataset = dataset
	self.length = length

	def invoke(self, dataset):
	"""Return an iterator that iterates over the source dataset.

	Args:
	dataset: The source dataset to iterate over.

	Returns:
	Iterator: An iterator over the source dataset.
	"""
	return iter(dataset)

	def __len__(self):
	"""Return the user specified length.

	Returns:
	int: The specified length of the dataset.
	"""
	return self.length