{ "cells": [ { "cell_type": "code", "execution_count": 1, "id": "a9935ae2", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "\n", "===================================BUG REPORT===================================\n", "Welcome to bitsandbytes. For bug reports, please submit your error trace to: https://github.com/TimDettmers/bitsandbytes/issues\n", "For effortless bug reporting copy-paste your error into this form: https://docs.google.com/forms/d/e/1FAIpQLScPB8emS3Thkp66nvqwmjTEgxp8Y9ufuWTzFyr9kJ5AoI47dQ/viewform?usp=sf_link\n", "================================================================================\n", "CUDA SETUP: CUDA runtime path found: /home/sourab/miniconda3/envs/ml/lib/libcudart.so\n", "CUDA SETUP: Highest compute capability among GPUs detected: 7.5\n", "CUDA SETUP: Detected CUDA version 117\n", "CUDA SETUP: Loading binary /home/sourab/miniconda3/envs/ml/lib/python3.10/site-packages/bitsandbytes/libbitsandbytes_cuda117.so...\n" ] } ], "source": [ "import argparse\n", "import os\n", "\n", "import torch\n", "from torch.optim import AdamW\n", "from torch.utils.data import DataLoader\n", "from peft import (\n", " get_peft_config,\n", " get_peft_model,\n", " get_peft_model_state_dict,\n", " set_peft_model_state_dict,\n", " LoraConfig,\n", " PeftType,\n", " PrefixTuningConfig,\n", " PromptEncoderConfig,\n", ")\n", "\n", "import evaluate\n", "from datasets import load_dataset\n", "from transformers import AutoModelForSequenceClassification, AutoTokenizer, get_linear_schedule_with_warmup, set_seed\n", "from tqdm import tqdm" ] }, { "cell_type": "code", "execution_count": 2, "id": "e3b13308", "metadata": {}, "outputs": [], "source": [ "batch_size = 32\n", "model_name_or_path = \"roberta-large\"\n", "task = \"mrpc\"\n", "peft_type = PeftType.LORA\n", "device = \"cuda\"\n", "num_epochs = 20" ] }, { "cell_type": "code", "execution_count": 3, "id": "0526f571", "metadata": {}, "outputs": [], "source": [ "peft_config = LoraConfig(task_type=\"SEQ_CLS\", inference_mode=False, r=8, lora_alpha=16, lora_dropout=0.1)\n", "lr = 3e-4" ] }, { "cell_type": "code", "execution_count": 4, "id": "c2697d07", "metadata": {}, "outputs": [ { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "0f74797387a941cbb0709487b8808eba", "version_major": 2, "version_minor": 0 }, "text/plain": [ "Downloading readme: 0%| | 0.00/27.9k [00:00<?, ?B/s]" ] }, "metadata": {}, "output_type": "display_data" }, { "name": "stderr", "output_type": "stream", "text": [ "Found cached dataset glue (/home/sourab/.cache/huggingface/datasets/glue/mrpc/1.0.0/dacbe3125aa31d7f70367a07a8a9e72a5a0bfeb5fc42e75c9db75b96da6053ad)\n" ] }, { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "1a9ecc2f624343c3af8d1824afb66ac5", "version_major": 2, "version_minor": 0 }, "text/plain": [ " 0%| | 0/3 [00:00<?, ?it/s]" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "33b071c0e5794cb48b38bbf68f22b49b", "version_major": 2, "version_minor": 0 }, "text/plain": [ " 0%| | 0/4 [00:00<?, ?ba/s]" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "a977694036394d5c99adfb13c023e258", "version_major": 2, "version_minor": 0 }, "text/plain": [ " 0%| | 0/1 [00:00<?, ?ba/s]" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "facc8d9092dc4abe9e553fc8e5b795b8", "version_major": 2, "version_minor": 0 }, "text/plain": [ " 0%| | 0/2 [00:00<?, ?ba/s]" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "if any(k in model_name_or_path for k in (\"gpt\", \"opt\", \"bloom\")):\n", " padding_side = \"left\"\n", "else:\n", " padding_side = \"right\"\n", "\n", "tokenizer = AutoTokenizer.from_pretrained(model_name_or_path, padding_side=padding_side)\n", "if getattr(tokenizer, \"pad_token_id\") is None:\n", " tokenizer.pad_token_id = tokenizer.eos_token_id\n", "\n", "datasets = load_dataset(\"glue\", task)\n", "metric = evaluate.load(\"glue\", task)\n", "\n", "\n", "def tokenize_function(examples):\n", " # max_length=None => use the model max length (it's actually the default)\n", " outputs = tokenizer(examples[\"sentence1\"], examples[\"sentence2\"], truncation=True, max_length=None)\n", " return outputs\n", "\n", "\n", "tokenized_datasets = datasets.map(\n", " tokenize_function,\n", " batched=True,\n", " remove_columns=[\"idx\", \"sentence1\", \"sentence2\"],\n", ")\n", "\n", "# We also rename the 'label' column to 'labels' which is the expected name for labels by the models of the\n", "# transformers library\n", "tokenized_datasets = tokenized_datasets.rename_column(\"label\", \"labels\")\n", "\n", "\n", "def collate_fn(examples):\n", " return tokenizer.pad(examples, padding=\"longest\", return_tensors=\"pt\")\n", "\n", "\n", "# Instantiate dataloaders.\n", "train_dataloader = DataLoader(tokenized_datasets[\"train\"], shuffle=True, collate_fn=collate_fn, batch_size=batch_size)\n", "eval_dataloader = DataLoader(\n", " tokenized_datasets[\"validation\"], shuffle=False, collate_fn=collate_fn, batch_size=batch_size\n", ")" ] }, { "cell_type": "code", "execution_count": null, "id": "2ed5ac74", "metadata": {}, "outputs": [], "source": [ "model = AutoModelForSequenceClassification.from_pretrained(model_name_or_path, return_dict=True)\n", "model = get_peft_model(model, peft_config)\n", "model.print_trainable_parameters()\n", "model" ] }, { "cell_type": "code", "execution_count": 6, "id": "0d2d0381", "metadata": {}, "outputs": [], "source": [ "optimizer = AdamW(params=model.parameters(), lr=lr)\n", "\n", "# Instantiate scheduler\n", "lr_scheduler = get_linear_schedule_with_warmup(\n", " optimizer=optimizer,\n", " num_warmup_steps=0.06 * (len(train_dataloader) * num_epochs),\n", " num_training_steps=(len(train_dataloader) * num_epochs),\n", ")" ] }, { "cell_type": "code", "execution_count": 7, "id": "fa0e73be", "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ " 0%| | 0/115 [00:00<?, ?it/s]You're using a RobertaTokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.\n", "100%|████████████████████████████████████████████████████████████████████████████████████████| 115/115 [00:28<00:00, 4.08it/s]\n", "100%|██████████████████████████████████████████████████████████████████████████████████████████| 13/13 [00:01<00:00, 8.68it/s]\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "epoch 0: {'accuracy': 0.7009803921568627, 'f1': 0.8189910979228486}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "100%|████████████████████████████████████████████████████████████████████████████████████████| 115/115 [00:27<00:00, 4.18it/s]\n", "100%|██████████████████████████████████████████████████████████████████████████████████████████| 13/13 [00:01<00:00, 8.64it/s]\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "epoch 1: {'accuracy': 0.7622549019607843, 'f1': 0.8482003129890453}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "100%|████████████████████████████████████████████████████████████████████████████████████████| 115/115 [00:27<00:00, 4.20it/s]\n", "100%|██████████████████████████████████████████████████████████████████████████████████████████| 13/13 [00:01<00:00, 8.63it/s]\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "epoch 2: {'accuracy': 0.8651960784313726, 'f1': 0.9005424954792043}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "100%|████████████████████████████████████████████████████████████████████████████████████████| 115/115 [00:27<00:00, 4.21it/s]\n", "100%|██████████████████████████████████████████████████████████████████████████████████████████| 13/13 [00:01<00:00, 8.62it/s]\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "epoch 3: {'accuracy': 0.8921568627450981, 'f1': 0.9228070175438596}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "100%|████████████████████████████████████████████████████████████████████████████████████████| 115/115 [00:27<00:00, 4.20it/s]\n", "100%|██████████████████████████████████████████████████████████████████████████████████████████| 13/13 [00:01<00:00, 8.62it/s]\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "epoch 4: {'accuracy': 0.8970588235294118, 'f1': 0.9257950530035336}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "100%|████████████████████████████████████████████████████████████████████████████████████████| 115/115 [00:27<00:00, 4.16it/s]\n", "100%|██████████████████████████████████████████████████████████████████████████████████████████| 13/13 [00:01<00:00, 8.01it/s]\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "epoch 5: {'accuracy': 0.8823529411764706, 'f1': 0.9169550173010381}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "100%|████████████████████████████████████████████████████████████████████████████████████████| 115/115 [00:30<00:00, 3.81it/s]\n", "100%|██████████████████████████████████████████████████████████████████████████████████████████| 13/13 [00:01<00:00, 8.62it/s]\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "epoch 6: {'accuracy': 0.8799019607843137, 'f1': 0.9170896785109983}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "100%|████████████████████████████████████████████████████████████████████████████████████████| 115/115 [00:27<00:00, 4.16it/s]\n", "100%|██████████████████████████████████████████████████████████████████████████████████████████| 13/13 [00:01<00:00, 8.61it/s]\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "epoch 7: {'accuracy': 0.8799019607843137, 'f1': 0.9150779896013865}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "100%|████████████████████████████████████████████████████████████████████████████████████████| 115/115 [00:27<00:00, 4.18it/s]\n", "100%|██████████████████████████████████████████████████████████████████████████████████████████| 13/13 [00:01<00:00, 8.61it/s]\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "epoch 8: {'accuracy': 0.8921568627450981, 'f1': 0.9233449477351917}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "100%|████████████████████████████████████████████████████████████████████████████████████████| 115/115 [00:27<00:00, 4.18it/s]\n", "100%|██████████████████████████████████████████████████████████████████████████████████████████| 13/13 [00:01<00:00, 8.59it/s]\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "epoch 9: {'accuracy': 0.8872549019607843, 'f1': 0.9217687074829931}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "100%|████████████████████████████████████████████████████████████████████████████████████████| 115/115 [00:27<00:00, 4.16it/s]\n", "100%|██████████████████████████████████████████████████████████████████████████████████████████| 13/13 [00:01<00:00, 8.61it/s]\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "epoch 10: {'accuracy': 0.8774509803921569, 'f1': 0.9137931034482758}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "100%|████████████████████████████████████████████████████████████████████████████████████████| 115/115 [00:29<00:00, 3.90it/s]\n", "100%|██████████████████████████████████████████████████████████████████████████████████████████| 13/13 [00:01<00:00, 6.81it/s]\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "epoch 11: {'accuracy': 0.9068627450980392, 'f1': 0.9321428571428573}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "100%|████████████████████████████████████████████████████████████████████████████████████████| 115/115 [00:28<00:00, 4.05it/s]\n", "100%|██████████████████████████████████████████████████████████████████████████████████████████| 13/13 [00:01<00:00, 8.59it/s]\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "epoch 12: {'accuracy': 0.8946078431372549, 'f1': 0.925476603119584}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "100%|████████████████████████████████████████████████████████████████████████████████████████| 115/115 [00:27<00:00, 4.17it/s]\n", "100%|██████████████████████████████████████████████████████████████████████████████████████████| 13/13 [00:01<00:00, 8.58it/s]\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "epoch 13: {'accuracy': 0.8897058823529411, 'f1': 0.922279792746114}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "100%|████████████████████████████████████████████████████████████████████████████████████████| 115/115 [00:27<00:00, 4.18it/s]\n", "100%|██████████████████████████████████████████████████████████████████████████████████████████| 13/13 [00:01<00:00, 8.61it/s]\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "epoch 14: {'accuracy': 0.8970588235294118, 'f1': 0.9265734265734265}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "100%|████████████████████████████████████████████████████████████████████████████████████████| 115/115 [00:27<00:00, 4.18it/s]\n", "100%|██████████████████████████████████████████████████████████████████████████████████████████| 13/13 [00:01<00:00, 8.60it/s]\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "epoch 15: {'accuracy': 0.8970588235294118, 'f1': 0.9263157894736843}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "100%|████████████████████████████████████████████████████████████████████████████████████████| 115/115 [00:27<00:00, 4.17it/s]\n", "100%|██████████████████████████████████████████████████████████████████████████████████████████| 13/13 [00:01<00:00, 8.59it/s]\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "epoch 16: {'accuracy': 0.8921568627450981, 'f1': 0.9233449477351917}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "100%|████████████████████████████████████████████████████████████████████████████████████████| 115/115 [00:27<00:00, 4.18it/s]\n", "100%|██████████████████████████████████████████████████████████████████████████████████████████| 13/13 [00:01<00:00, 8.58it/s]\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "epoch 17: {'accuracy': 0.8897058823529411, 'f1': 0.9220103986135182}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "100%|████████████████████████████████████████████████████████████████████████████████████████| 115/115 [00:30<00:00, 3.78it/s]\n", "100%|██████████████████████████████████████████████████████████████████████████████████████████| 13/13 [00:01<00:00, 8.58it/s]\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "epoch 18: {'accuracy': 0.8921568627450981, 'f1': 0.9233449477351917}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "100%|████████████████████████████████████████████████████████████████████████████████████████| 115/115 [00:27<00:00, 4.16it/s]\n", "100%|██████████████████████████████████████████████████████████████████████████████████████████| 13/13 [00:01<00:00, 8.60it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "epoch 19: {'accuracy': 0.8946078431372549, 'f1': 0.924693520140105}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\n" ] } ], "source": [ "model.to(device)\n", "for epoch in range(num_epochs):\n", " model.train()\n", " for step, batch in enumerate(tqdm(train_dataloader)):\n", " batch.to(device)\n", " outputs = model(**batch)\n", " loss = outputs.loss\n", " loss.backward()\n", " optimizer.step()\n", " lr_scheduler.step()\n", " optimizer.zero_grad()\n", "\n", " model.eval()\n", " for step, batch in enumerate(tqdm(eval_dataloader)):\n", " batch.to(device)\n", " with torch.no_grad():\n", " outputs = model(**batch)\n", " predictions = outputs.logits.argmax(dim=-1)\n", " predictions, references = predictions, batch[\"labels\"]\n", " metric.add_batch(\n", " predictions=predictions,\n", " references=references,\n", " )\n", "\n", " eval_metric = metric.compute()\n", " print(f\"epoch {epoch}:\", eval_metric)" ] }, { "cell_type": "markdown", "id": "f2b2caca", "metadata": {}, "source": [ "## Share adapters on the 🤗 Hub" ] }, { "cell_type": "code", "execution_count": 8, "id": "990b3c93", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "CommitInfo(commit_url='https://huggingface.co/smangrul/roberta-large-peft-lora/commit/c2c661898b8b6a0c68ecd068931e598d0a79686b', commit_message='Upload model', commit_description='', oid='c2c661898b8b6a0c68ecd068931e598d0a79686b', pr_url=None, pr_revision=None, pr_num=None)" ] }, "execution_count": 8, "metadata": {}, "output_type": "execute_result" } ], "source": [ "model.push_to_hub(\"smangrul/roberta-large-peft-lora\", use_auth_token=True)" ] }, { "cell_type": "markdown", "id": "9d140b26", "metadata": {}, "source": [ "## Load adapters from the Hub\n", "\n", "You can also directly load adapters from the Hub using the commands below:" ] }, { "cell_type": "code", "execution_count": 11, "id": "4d55c87d", "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "Some weights of the model checkpoint at roberta-large were not used when initializing RobertaForSequenceClassification: ['lm_head.bias', 'roberta.pooler.dense.weight', 'roberta.pooler.dense.bias', 'lm_head.layer_norm.weight', 'lm_head.decoder.weight', 'lm_head.dense.bias', 'lm_head.dense.weight', 'lm_head.layer_norm.bias']\n", "- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).\n", "- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).\n", "Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at roberta-large and are newly initialized: ['classifier.dense.bias', 'classifier.out_proj.bias', 'classifier.dense.weight', 'classifier.out_proj.weight']\n", "You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.\n", " 0%| | 0/13 [00:00<?, ?it/s]You're using a RobertaTokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.\n", "100%|██████████████████████████████████████████████████████████████████████████████████████████| 13/13 [00:01<00:00, 8.45it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "{'accuracy': 0.8946078431372549, 'f1': 0.924693520140105}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\n" ] } ], "source": [ "import torch\n", "from peft import PeftModel, PeftConfig\n", "from transformers import AutoModelForCausalLM, AutoTokenizer\n", "\n", "peft_model_id = \"smangrul/roberta-large-peft-lora\"\n", "config = PeftConfig.from_pretrained(peft_model_id)\n", "inference_model = AutoModelForSequenceClassification.from_pretrained(config.base_model_name_or_path)\n", "tokenizer = AutoTokenizer.from_pretrained(config.base_model_name_or_path)\n", "\n", "# Load the Lora model\n", "inference_model = PeftModel.from_pretrained(inference_model, peft_model_id)\n", "\n", "inference_model.to(device)\n", "inference_model.eval()\n", "for step, batch in enumerate(tqdm(eval_dataloader)):\n", " batch.to(device)\n", " with torch.no_grad():\n", " outputs = inference_model(**batch)\n", " predictions = outputs.logits.argmax(dim=-1)\n", " predictions, references = predictions, batch[\"labels\"]\n", " metric.add_batch(\n", " predictions=predictions,\n", " references=references,\n", " )\n", "\n", "eval_metric = metric.compute()\n", "print(eval_metric)" ] }, { "cell_type": "code", "execution_count": null, "id": "27c43da1", "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.10.5 (v3.10.5:f377153967, Jun 6 2022, 12:36:10) [Clang 13.0.0 (clang-1300.0.29.30)]" }, "vscode": { "interpreter": { "hash": "aee8b7b246df8f9039afb4144a1f6fd8d2ca17a180786b69acc140d282b71a49" } } }, "nbformat": 4, "nbformat_minor": 5 }