jamtur01's picture
Upload folder using huggingface_hub
9c6594c verified
from abc import ABC, abstractmethod
from typing import Optional, Union
from .. import Dataset, DatasetDict, Features, IterableDataset, IterableDatasetDict, NamedSplit
from ..utils.typing import NestedDataStructureLike, PathLike
class AbstractDatasetReader(ABC):
def __init__(
self,
path_or_paths: Optional[NestedDataStructureLike[PathLike]] = None,
split: Optional[NamedSplit] = None,
features: Optional[Features] = None,
cache_dir: str = None,
keep_in_memory: bool = False,
streaming: bool = False,
num_proc: Optional[int] = None,
**kwargs,
):
self.path_or_paths = path_or_paths
self.split = split if split or isinstance(path_or_paths, dict) else "train"
self.features = features
self.cache_dir = cache_dir
self.keep_in_memory = keep_in_memory
self.streaming = streaming
self.num_proc = num_proc
self.kwargs = kwargs
@abstractmethod
def read(self) -> Union[Dataset, DatasetDict, IterableDataset, IterableDatasetDict]:
pass
class AbstractDatasetInputStream(ABC):
def __init__(
self,
features: Optional[Features] = None,
cache_dir: str = None,
keep_in_memory: bool = False,
streaming: bool = False,
num_proc: Optional[int] = None,
**kwargs,
):
self.features = features
self.cache_dir = cache_dir
self.keep_in_memory = keep_in_memory
self.streaming = streaming
self.num_proc = num_proc
self.kwargs = kwargs
@abstractmethod
def read(self) -> Union[Dataset, IterableDataset]:
pass