Ben Burtenshaw commited on
Commit
2dd914b
·
1 Parent(s): 74a54e5

add imdb data to hub

Browse files
dataset_dir/dataset_dict.json CHANGED
@@ -1 +1 @@
1
- {"splits": ["train"]}
 
1
+ {"splits": ["train", "test", "unsupervised"]}
dataset_dir/test/dataset_info.json ADDED
@@ -0,0 +1,65 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "builder_name": "parquet",
3
+ "citation": "",
4
+ "config_name": "plain_text",
5
+ "dataset_name": "imdb",
6
+ "dataset_size": 133202802,
7
+ "description": "",
8
+ "download_checksums": {
9
+ "hf://datasets/imdb@e6281661ce1c48d982bc483cf8a173c1bbeb5d31/plain_text/train-00000-of-00001.parquet": {
10
+ "num_bytes": 20979968,
11
+ "checksum": null
12
+ },
13
+ "hf://datasets/imdb@e6281661ce1c48d982bc483cf8a173c1bbeb5d31/plain_text/test-00000-of-00001.parquet": {
14
+ "num_bytes": 20470363,
15
+ "checksum": null
16
+ },
17
+ "hf://datasets/imdb@e6281661ce1c48d982bc483cf8a173c1bbeb5d31/plain_text/unsupervised-00000-of-00001.parquet": {
18
+ "num_bytes": 41996509,
19
+ "checksum": null
20
+ }
21
+ },
22
+ "download_size": 83446840,
23
+ "features": {
24
+ "text": {
25
+ "dtype": "string",
26
+ "_type": "Value"
27
+ },
28
+ "label": {
29
+ "names": [
30
+ "neg",
31
+ "pos"
32
+ ],
33
+ "_type": "ClassLabel"
34
+ }
35
+ },
36
+ "homepage": "",
37
+ "license": "",
38
+ "size_in_bytes": 216649642,
39
+ "splits": {
40
+ "train": {
41
+ "name": "train",
42
+ "num_bytes": 33435948,
43
+ "num_examples": 25000,
44
+ "dataset_name": "imdb"
45
+ },
46
+ "test": {
47
+ "name": "test",
48
+ "num_bytes": 32653810,
49
+ "num_examples": 25000,
50
+ "dataset_name": "imdb"
51
+ },
52
+ "unsupervised": {
53
+ "name": "unsupervised",
54
+ "num_bytes": 67113044,
55
+ "num_examples": 50000,
56
+ "dataset_name": "imdb"
57
+ }
58
+ },
59
+ "version": {
60
+ "version_str": "0.0.0",
61
+ "major": 0,
62
+ "minor": 0,
63
+ "patch": 0
64
+ }
65
+ }
dataset_dir/test/state.json ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_data_files": [
3
+ {
4
+ "filename": "data-00000-of-00001.arrow"
5
+ }
6
+ ],
7
+ "_fingerprint": "0c4517be449a88ae",
8
+ "_format_columns": null,
9
+ "_format_kwargs": {},
10
+ "_format_type": null,
11
+ "_output_all_columns": false,
12
+ "_split": "test"
13
+ }
dataset_dir/train/dataset_info.json CHANGED
@@ -1,88 +1,59 @@
1
  {
2
  "builder_name": "parquet",
3
  "citation": "",
4
- "config_name": "default",
5
- "dataset_name": "10k_prompts_ranked",
6
- "dataset_size": 8711680,
7
  "description": "",
8
  "download_checksums": {
9
- "hf://datasets/DIBT/10k_prompts_ranked@3a9e44c398d92681e58b5c8ad39502203a002bac/data/train-00000-of-00001.parquet": {
10
- "num_bytes": 3579688,
 
 
 
 
 
 
 
 
11
  "checksum": null
12
  }
13
  },
14
- "download_size": 3579688,
15
  "features": {
16
- "prompt": {
17
- "dtype": "string",
18
- "id": "field",
19
- "_type": "Value"
20
- },
21
- "quality": [
22
- {
23
- "user_id": {
24
- "dtype": "string",
25
- "id": "question",
26
- "_type": "Value"
27
- },
28
- "value": {
29
- "dtype": "string",
30
- "id": "suggestion",
31
- "_type": "Value"
32
- },
33
- "status": {
34
- "dtype": "string",
35
- "id": "question",
36
- "_type": "Value"
37
- }
38
- }
39
- ],
40
- "metadata": {
41
- "dtype": "string",
42
- "id": "metadata",
43
- "_type": "Value"
44
- },
45
- "avg_rating": {
46
- "dtype": "float64",
47
- "_type": "Value"
48
- },
49
- "num_responses": {
50
- "dtype": "int64",
51
- "_type": "Value"
52
- },
53
- "agreement_ratio": {
54
- "dtype": "float64",
55
- "_type": "Value"
56
- },
57
- "raw_responses": {
58
- "feature": {
59
- "dtype": "int64",
60
- "_type": "Value"
61
- },
62
- "_type": "Sequence"
63
- },
64
- "kind": {
65
- "dtype": "string",
66
- "_type": "Value"
67
- },
68
- "cluster_description": {
69
  "dtype": "string",
70
  "_type": "Value"
71
  },
72
- "topic": {
73
- "dtype": "string",
74
- "_type": "Value"
 
 
 
75
  }
76
  },
77
  "homepage": "",
78
  "license": "",
79
- "size_in_bytes": 12291368,
80
  "splits": {
81
  "train": {
82
  "name": "train",
83
- "num_bytes": 8711680,
84
- "num_examples": 10331,
85
- "dataset_name": "10k_prompts_ranked"
 
 
 
 
 
 
 
 
 
 
 
 
86
  }
87
  },
88
  "version": {
 
1
  {
2
  "builder_name": "parquet",
3
  "citation": "",
4
+ "config_name": "plain_text",
5
+ "dataset_name": "imdb",
6
+ "dataset_size": 133202802,
7
  "description": "",
8
  "download_checksums": {
9
+ "hf://datasets/imdb@e6281661ce1c48d982bc483cf8a173c1bbeb5d31/plain_text/train-00000-of-00001.parquet": {
10
+ "num_bytes": 20979968,
11
+ "checksum": null
12
+ },
13
+ "hf://datasets/imdb@e6281661ce1c48d982bc483cf8a173c1bbeb5d31/plain_text/test-00000-of-00001.parquet": {
14
+ "num_bytes": 20470363,
15
+ "checksum": null
16
+ },
17
+ "hf://datasets/imdb@e6281661ce1c48d982bc483cf8a173c1bbeb5d31/plain_text/unsupervised-00000-of-00001.parquet": {
18
+ "num_bytes": 41996509,
19
  "checksum": null
20
  }
21
  },
22
+ "download_size": 83446840,
23
  "features": {
24
+ "text": {
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
25
  "dtype": "string",
26
  "_type": "Value"
27
  },
28
+ "label": {
29
+ "names": [
30
+ "neg",
31
+ "pos"
32
+ ],
33
+ "_type": "ClassLabel"
34
  }
35
  },
36
  "homepage": "",
37
  "license": "",
38
+ "size_in_bytes": 216649642,
39
  "splits": {
40
  "train": {
41
  "name": "train",
42
+ "num_bytes": 33435948,
43
+ "num_examples": 25000,
44
+ "dataset_name": "imdb"
45
+ },
46
+ "test": {
47
+ "name": "test",
48
+ "num_bytes": 32653810,
49
+ "num_examples": 25000,
50
+ "dataset_name": "imdb"
51
+ },
52
+ "unsupervised": {
53
+ "name": "unsupervised",
54
+ "num_bytes": 67113044,
55
+ "num_examples": 50000,
56
+ "dataset_name": "imdb"
57
  }
58
  },
59
  "version": {
dataset_dir/train/state.json ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_data_files": [
3
+ {
4
+ "filename": "data-00000-of-00001.arrow"
5
+ }
6
+ ],
7
+ "_fingerprint": "2819011aee707696",
8
+ "_format_columns": null,
9
+ "_format_kwargs": {},
10
+ "_format_type": null,
11
+ "_output_all_columns": false,
12
+ "_split": "train"
13
+ }
dataset_dir/unsupervised/dataset_info.json ADDED
@@ -0,0 +1,65 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "builder_name": "parquet",
3
+ "citation": "",
4
+ "config_name": "plain_text",
5
+ "dataset_name": "imdb",
6
+ "dataset_size": 133202802,
7
+ "description": "",
8
+ "download_checksums": {
9
+ "hf://datasets/imdb@e6281661ce1c48d982bc483cf8a173c1bbeb5d31/plain_text/train-00000-of-00001.parquet": {
10
+ "num_bytes": 20979968,
11
+ "checksum": null
12
+ },
13
+ "hf://datasets/imdb@e6281661ce1c48d982bc483cf8a173c1bbeb5d31/plain_text/test-00000-of-00001.parquet": {
14
+ "num_bytes": 20470363,
15
+ "checksum": null
16
+ },
17
+ "hf://datasets/imdb@e6281661ce1c48d982bc483cf8a173c1bbeb5d31/plain_text/unsupervised-00000-of-00001.parquet": {
18
+ "num_bytes": 41996509,
19
+ "checksum": null
20
+ }
21
+ },
22
+ "download_size": 83446840,
23
+ "features": {
24
+ "text": {
25
+ "dtype": "string",
26
+ "_type": "Value"
27
+ },
28
+ "label": {
29
+ "names": [
30
+ "neg",
31
+ "pos"
32
+ ],
33
+ "_type": "ClassLabel"
34
+ }
35
+ },
36
+ "homepage": "",
37
+ "license": "",
38
+ "size_in_bytes": 216649642,
39
+ "splits": {
40
+ "train": {
41
+ "name": "train",
42
+ "num_bytes": 33435948,
43
+ "num_examples": 25000,
44
+ "dataset_name": "imdb"
45
+ },
46
+ "test": {
47
+ "name": "test",
48
+ "num_bytes": 32653810,
49
+ "num_examples": 25000,
50
+ "dataset_name": "imdb"
51
+ },
52
+ "unsupervised": {
53
+ "name": "unsupervised",
54
+ "num_bytes": 67113044,
55
+ "num_examples": 50000,
56
+ "dataset_name": "imdb"
57
+ }
58
+ },
59
+ "version": {
60
+ "version_str": "0.0.0",
61
+ "major": 0,
62
+ "minor": 0,
63
+ "patch": 0
64
+ }
65
+ }
dataset_dir/unsupervised/state.json ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_data_files": [
3
+ {
4
+ "filename": "data-00000-of-00001.arrow"
5
+ }
6
+ ],
7
+ "_fingerprint": "df8b13b1c356ff29",
8
+ "_format_columns": null,
9
+ "_format_kwargs": {},
10
+ "_format_type": null,
11
+ "_output_all_columns": false,
12
+ "_split": "unsupervised"
13
+ }