File size: 1,060 Bytes
cd123bf |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 |
import enum
class DatasetName(enum.Enum):
"""
Supported dataset names enumeration
"""
IMDB = "imdb"
POLARITY = "polarity"
class DatasetParams:
"""
Abstarct class for dataset
"""
hugging_face_name = ""
content_col_name = ""
label_col_name = ""
local_path = ""
def get_dataset_params_by_name(dataset_name: DatasetName) -> DatasetParams:
if dataset_name == DatasetName.IMDB:
return ImbdParams()
if dataset_name == DatasetName.POLARITY:
return PolarityParams()
raise ValueError(f"Unsupported dataset: {dataset_name}")
class ImbdParams(DatasetParams):
"""
IMDB dataset params class
"""
hugging_face_name = "stanfordnlp/imdb"
content_col_name = "text"
label_col_name = "label"
local_path = "imdb"
class PolarityParams(DatasetParams):
"""
POLARITY dataset params class
"""
hugging_face_name = "fancyzhx/amazon_polarity"
content_col_name = "content"
label_col_name = "label"
local_path = "polarity"
|