jamtur01's picture
Upload folder using huggingface_hub
9c6594c verified
from argparse import ArgumentParser
from typing import Optional
from datasets.commands import BaseDatasetsCLICommand
from datasets.hub import convert_to_parquet
def _command_factory(args):
return ConvertToParquetCommand(
args.dataset_id,
args.token,
args.revision,
args.trust_remote_code,
)
class ConvertToParquetCommand(BaseDatasetsCLICommand):
@staticmethod
def register_subcommand(parser):
parser: ArgumentParser = parser.add_parser("convert_to_parquet", help="Convert dataset to Parquet")
parser.add_argument(
"dataset_id", help="source dataset ID, e.g. USERNAME/DATASET_NAME or ORGANIZATION/DATASET_NAME"
)
parser.add_argument("--token", help="access token to the Hugging Face Hub (defaults to logged-in user's one)")
parser.add_argument("--revision", help="source revision")
parser.add_argument(
"--trust_remote_code", action="store_true", help="whether to trust the code execution of the load script"
)
parser.set_defaults(func=_command_factory)
def __init__(
self,
dataset_id: str,
token: Optional[str],
revision: Optional[str],
trust_remote_code: bool,
):
self._dataset_id = dataset_id
self._token = token
self._revision = revision
self._trust_remote_code = trust_remote_code
def run(self) -> None:
_ = convert_to_parquet(
self._dataset_id, revision=self._revision, token=self._token, trust_remote_code=self._trust_remote_code
)