File size: 1,060 Bytes
cd123bf
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
import enum


class DatasetName(enum.Enum):
    """
    Supported dataset names enumeration
    """
    
    IMDB = "imdb"
    POLARITY = "polarity"


class DatasetParams:
    """
    Abstarct class for dataset 
    """
    
    hugging_face_name = ""
    content_col_name = ""
    label_col_name = ""
    local_path = ""



def get_dataset_params_by_name(dataset_name: DatasetName) -> DatasetParams:
    if dataset_name == DatasetName.IMDB:
        return ImbdParams()
    if dataset_name == DatasetName.POLARITY:
        return PolarityParams()
    
    raise ValueError(f"Unsupported dataset: {dataset_name}")


class ImbdParams(DatasetParams):
    """
    IMDB dataset params class
    """
    
    hugging_face_name = "stanfordnlp/imdb"
    content_col_name = "text"
    label_col_name = "label"
    local_path = "imdb"


class PolarityParams(DatasetParams):
    """
    POLARITY dataset params class
    """
    
    hugging_face_name = "fancyzhx/amazon_polarity"
    content_col_name = "content"
    label_col_name = "label"
    local_path = "polarity"