|
from argparse import ArgumentParser |
|
from typing import Optional |
|
|
|
from datasets.commands import BaseDatasetsCLICommand |
|
from datasets.hub import convert_to_parquet |
|
|
|
|
|
def _command_factory(args): |
|
return ConvertToParquetCommand( |
|
args.dataset_id, |
|
args.token, |
|
args.revision, |
|
args.trust_remote_code, |
|
) |
|
|
|
|
|
class ConvertToParquetCommand(BaseDatasetsCLICommand): |
|
@staticmethod |
|
def register_subcommand(parser): |
|
parser: ArgumentParser = parser.add_parser("convert_to_parquet", help="Convert dataset to Parquet") |
|
parser.add_argument( |
|
"dataset_id", help="source dataset ID, e.g. USERNAME/DATASET_NAME or ORGANIZATION/DATASET_NAME" |
|
) |
|
parser.add_argument("--token", help="access token to the Hugging Face Hub (defaults to logged-in user's one)") |
|
parser.add_argument("--revision", help="source revision") |
|
parser.add_argument( |
|
"--trust_remote_code", action="store_true", help="whether to trust the code execution of the load script" |
|
) |
|
parser.set_defaults(func=_command_factory) |
|
|
|
def __init__( |
|
self, |
|
dataset_id: str, |
|
token: Optional[str], |
|
revision: Optional[str], |
|
trust_remote_code: bool, |
|
): |
|
self._dataset_id = dataset_id |
|
self._token = token |
|
self._revision = revision |
|
self._trust_remote_code = trust_remote_code |
|
|
|
def run(self) -> None: |
|
_ = convert_to_parquet( |
|
self._dataset_id, revision=self._revision, token=self._token, trust_remote_code=self._trust_remote_code |
|
) |
|
|