|
from abc import ABC, abstractmethod |
|
from typing import Optional, Union |
|
|
|
from .. import Dataset, DatasetDict, Features, IterableDataset, IterableDatasetDict, NamedSplit |
|
from ..utils.typing import NestedDataStructureLike, PathLike |
|
|
|
|
|
class AbstractDatasetReader(ABC): |
|
def __init__( |
|
self, |
|
path_or_paths: Optional[NestedDataStructureLike[PathLike]] = None, |
|
split: Optional[NamedSplit] = None, |
|
features: Optional[Features] = None, |
|
cache_dir: str = None, |
|
keep_in_memory: bool = False, |
|
streaming: bool = False, |
|
num_proc: Optional[int] = None, |
|
**kwargs, |
|
): |
|
self.path_or_paths = path_or_paths |
|
self.split = split if split or isinstance(path_or_paths, dict) else "train" |
|
self.features = features |
|
self.cache_dir = cache_dir |
|
self.keep_in_memory = keep_in_memory |
|
self.streaming = streaming |
|
self.num_proc = num_proc |
|
self.kwargs = kwargs |
|
|
|
@abstractmethod |
|
def read(self) -> Union[Dataset, DatasetDict, IterableDataset, IterableDatasetDict]: |
|
pass |
|
|
|
|
|
class AbstractDatasetInputStream(ABC): |
|
def __init__( |
|
self, |
|
features: Optional[Features] = None, |
|
cache_dir: str = None, |
|
keep_in_memory: bool = False, |
|
streaming: bool = False, |
|
num_proc: Optional[int] = None, |
|
**kwargs, |
|
): |
|
self.features = features |
|
self.cache_dir = cache_dir |
|
self.keep_in_memory = keep_in_memory |
|
self.streaming = streaming |
|
self.num_proc = num_proc |
|
self.kwargs = kwargs |
|
|
|
@abstractmethod |
|
def read(self) -> Union[Dataset, IterableDataset]: |
|
pass |
|
|