"
],
"text/html": [
"\n",
" \n",
" \n",
"
\n",
" [ 20/300 05:43 < 1:29:07, 0.05 it/s, Epoch 0.69/12]\n",
"
\n",
" \n",
" \n",
" \n",
" Step | \n",
" Training Loss | \n",
"
\n",
" \n",
" \n",
" \n",
" 10 | \n",
" 5.385800 | \n",
"
\n",
" \n",
"
"
]
},
"metadata": {}
},
{
"output_type": "error",
"ename": "KeyboardInterrupt",
"evalue": "ignored",
"traceback": [
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
"\u001b[0;31mKeyboardInterrupt\u001b[0m Traceback (most recent call last)",
"\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m()\u001b[0m\n\u001b[1;32m 25\u001b[0m \u001b[0mprint\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m\"\\n ========== train...========= \\n\"\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 26\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 27\u001b[0;31m \u001b[0mtrainer\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mtrain\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m",
"\u001b[0;32m/usr/local/lib/python3.10/dist-packages/transformers/trainer.py\u001b[0m in \u001b[0;36mtrain\u001b[0;34m(self, resume_from_checkpoint, trial, ignore_keys_for_eval, **kwargs)\u001b[0m\n\u001b[1;32m 1554\u001b[0m \u001b[0mhf_hub_utils\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0menable_progress_bars\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1555\u001b[0m \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 1556\u001b[0;31m return inner_training_loop(\n\u001b[0m\u001b[1;32m 1557\u001b[0m \u001b[0margs\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0margs\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1558\u001b[0m \u001b[0mresume_from_checkpoint\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mresume_from_checkpoint\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;32m/usr/local/lib/python3.10/dist-packages/transformers/trainer.py\u001b[0m in \u001b[0;36m_inner_training_loop\u001b[0;34m(self, batch_size, args, resume_from_checkpoint, trial, ignore_keys_for_eval)\u001b[0m\n\u001b[1;32m 1836\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1837\u001b[0m \u001b[0;32mwith\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0maccelerator\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0maccumulate\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mmodel\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 1838\u001b[0;31m \u001b[0mtr_loss_step\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mtraining_step\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mmodel\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0minputs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 1839\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1840\u001b[0m if (\n",
"\u001b[0;32m/usr/local/lib/python3.10/dist-packages/transformers/trainer.py\u001b[0m in \u001b[0;36mtraining_step\u001b[0;34m(self, model, inputs)\u001b[0m\n\u001b[1;32m 2681\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 2682\u001b[0m \u001b[0;32mwith\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mcompute_loss_context_manager\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 2683\u001b[0;31m \u001b[0mloss\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mcompute_loss\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mmodel\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0minputs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 2684\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 2685\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0margs\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mn_gpu\u001b[0m \u001b[0;34m>\u001b[0m \u001b[0;36m1\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;32m/usr/local/lib/python3.10/dist-packages/transformers/trainer.py\u001b[0m in \u001b[0;36mcompute_loss\u001b[0;34m(self, model, inputs, return_outputs)\u001b[0m\n\u001b[1;32m 2706\u001b[0m \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 2707\u001b[0m \u001b[0mlabels\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;32mNone\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 2708\u001b[0;31m \u001b[0moutputs\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mmodel\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m**\u001b[0m\u001b[0minputs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 2709\u001b[0m \u001b[0;31m# Save past state if it exists\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 2710\u001b[0m \u001b[0;31m# TODO: this needs to be fixed and made cleaner later.\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;32m/usr/local/lib/python3.10/dist-packages/torch/nn/modules/module.py\u001b[0m in \u001b[0;36m_call_impl\u001b[0;34m(self, *args, **kwargs)\u001b[0m\n\u001b[1;32m 1499\u001b[0m \u001b[0;32mor\u001b[0m \u001b[0m_global_backward_pre_hooks\u001b[0m \u001b[0;32mor\u001b[0m \u001b[0m_global_backward_hooks\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1500\u001b[0m or _global_forward_hooks or _global_forward_pre_hooks):\n\u001b[0;32m-> 1501\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0mforward_call\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m*\u001b[0m\u001b[0margs\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 1502\u001b[0m \u001b[0;31m# Do not call functions when jit is used\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1503\u001b[0m \u001b[0mfull_backward_hooks\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mnon_full_backward_hooks\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;34m[\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m[\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;32m/usr/local/lib/python3.10/dist-packages/accelerate/utils/operations.py\u001b[0m in \u001b[0;36mforward\u001b[0;34m(*args, **kwargs)\u001b[0m\n\u001b[1;32m 630\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 631\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0mforward\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m*\u001b[0m\u001b[0margs\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 632\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0mmodel_forward\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m*\u001b[0m\u001b[0margs\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 633\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 634\u001b[0m \u001b[0;31m# To act like a decorator so that it can be popped when doing `extract_model_from_parallel`\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;32m/usr/local/lib/python3.10/dist-packages/accelerate/utils/operations.py\u001b[0m in \u001b[0;36m__call__\u001b[0;34m(self, *args, **kwargs)\u001b[0m\n\u001b[1;32m 618\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 619\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0m__call__\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m*\u001b[0m\u001b[0margs\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 620\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0mconvert_to_fp32\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mmodel_forward\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m*\u001b[0m\u001b[0margs\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 621\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 622\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0m__getstate__\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;32m/usr/local/lib/python3.10/dist-packages/accelerate/utils/operations.py\u001b[0m in \u001b[0;36mconvert_to_fp32\u001b[0;34m(tensor)\u001b[0m\n\u001b[1;32m 597\u001b[0m \u001b[0;32mreturn\u001b[0m \u001b[0mhasattr\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mtensor\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m\"dtype\"\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;32mand\u001b[0m \u001b[0mtensor\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mdtype\u001b[0m \u001b[0;32min\u001b[0m \u001b[0;34m(\u001b[0m\u001b[0mtorch\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mfloat16\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mtorch\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mbfloat16\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 598\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 599\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0mrecursively_apply\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0m_convert_to_fp32\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mtensor\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mtest_type\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0m_is_fp16_bf16_tensor\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 600\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 601\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;32m/usr/local/lib/python3.10/dist-packages/accelerate/utils/operations.py\u001b[0m in \u001b[0;36mrecursively_apply\u001b[0;34m(func, data, test_type, error_on_other_type, *args, **kwargs)\u001b[0m\n\u001b[1;32m 117\u001b[0m )\n\u001b[1;32m 118\u001b[0m \u001b[0;32melif\u001b[0m \u001b[0misinstance\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mdata\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mMapping\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 119\u001b[0;31m return type(data)(\n\u001b[0m\u001b[1;32m 120\u001b[0m {\n\u001b[1;32m 121\u001b[0m k: recursively_apply(\n",
"\u001b[0;32m/usr/local/lib/python3.10/dist-packages/transformers/modeling_outputs.py\u001b[0m in \u001b[0;36m__init__\u001b[0;34m(self, loss, logits, past_key_values, hidden_states, attentions, cross_attentions)\u001b[0m\n",
"\u001b[0;32m/usr/local/lib/python3.10/dist-packages/transformers/utils/generic.py\u001b[0m in \u001b[0;36m__post_init__\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m 327\u001b[0m \u001b[0mother_fields_are_none\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mall\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mgetattr\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mfield\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mname\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;32mis\u001b[0m \u001b[0;32mNone\u001b[0m \u001b[0;32mfor\u001b[0m \u001b[0mfield\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mclass_fields\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m1\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 328\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 329\u001b[0;31m \u001b[0;32mif\u001b[0m \u001b[0mother_fields_are_none\u001b[0m \u001b[0;32mand\u001b[0m \u001b[0;32mnot\u001b[0m \u001b[0mis_tensor\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mfirst_field\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 330\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0misinstance\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mfirst_field\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mdict\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 331\u001b[0m \u001b[0miterator\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mfirst_field\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mitems\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;32m/usr/local/lib/python3.10/dist-packages/transformers/utils/generic.py\u001b[0m in \u001b[0;36mis_tensor\u001b[0;34m(x)\u001b[0m\n\u001b[1;32m 114\u001b[0m \"\"\"\n\u001b[1;32m 115\u001b[0m \u001b[0;31m# This gives us a smart order to test the frameworks with the corresponding tests.\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 116\u001b[0;31m \u001b[0mframework_to_test_func\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0m_get_frameworks_and_test_func\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mx\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 117\u001b[0m \u001b[0;32mfor\u001b[0m \u001b[0mtest_func\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mframework_to_test_func\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mvalues\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 118\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mtest_func\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mx\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;32m/usr/local/lib/python3.10/dist-packages/transformers/utils/generic.py\u001b[0m in \u001b[0;36m_get_frameworks_and_test_func\u001b[0;34m(x)\u001b[0m\n\u001b[1;32m 99\u001b[0m \u001b[0;34m\"np\"\u001b[0m\u001b[0;34m:\u001b[0m \u001b[0mis_numpy_array\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 100\u001b[0m }\n\u001b[0;32m--> 101\u001b[0;31m \u001b[0mpreferred_framework\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0minfer_framework_from_repr\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mx\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 102\u001b[0m \u001b[0;31m# We will test this one first, then numpy, then the others.\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 103\u001b[0m \u001b[0mframeworks\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;34m[\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mpreferred_framework\u001b[0m \u001b[0;32mis\u001b[0m \u001b[0;32mNone\u001b[0m \u001b[0;32melse\u001b[0m \u001b[0;34m[\u001b[0m\u001b[0mpreferred_framework\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;32m/usr/local/lib/python3.10/dist-packages/transformers/utils/generic.py\u001b[0m in \u001b[0;36minfer_framework_from_repr\u001b[0;34m(x)\u001b[0m\n\u001b[1;32m 77\u001b[0m frameworks in a smart order, without the need to import the frameworks).\n\u001b[1;32m 78\u001b[0m \"\"\"\n\u001b[0;32m---> 79\u001b[0;31m \u001b[0mrepresentation\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mrepr\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mx\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 80\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mrepresentation\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mstartswith\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m\"tensor\"\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 81\u001b[0m \u001b[0;32mreturn\u001b[0m \u001b[0;34m\"pt\"\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;32m/usr/local/lib/python3.10/dist-packages/torch/_tensor.py\u001b[0m in \u001b[0;36m__repr__\u001b[0;34m(self, tensor_contents)\u001b[0m\n\u001b[1;32m 424\u001b[0m )\n\u001b[1;32m 425\u001b[0m \u001b[0;31m# All strings are unicode in Python 3.\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 426\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0mtorch\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_tensor_str\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_str\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mtensor_contents\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mtensor_contents\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 427\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 428\u001b[0m def backward(\n",
"\u001b[0;32m/usr/local/lib/python3.10/dist-packages/torch/_tensor_str.py\u001b[0m in \u001b[0;36m_str\u001b[0;34m(self, tensor_contents)\u001b[0m\n\u001b[1;32m 634\u001b[0m \u001b[0;32mwith\u001b[0m \u001b[0mtorch\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mno_grad\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mtorch\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mutils\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_python_dispatch\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_disable_current_modes\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 635\u001b[0m \u001b[0mguard\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mtorch\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_C\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_DisableFuncTorch\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 636\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0m_str_intern\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mtensor_contents\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mtensor_contents\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m",
"\u001b[0;32m/usr/local/lib/python3.10/dist-packages/torch/_tensor_str.py\u001b[0m in \u001b[0;36m_str_intern\u001b[0;34m(inp, tensor_contents)\u001b[0m\n\u001b[1;32m 565\u001b[0m \u001b[0mtensor_str\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0m_tensor_str\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mto_dense\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mindent\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 566\u001b[0m \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 567\u001b[0;31m \u001b[0mtensor_str\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0m_tensor_str\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mindent\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 568\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 569\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mlayout\u001b[0m \u001b[0;34m!=\u001b[0m \u001b[0mtorch\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mstrided\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;32m/usr/local/lib/python3.10/dist-packages/torch/_tensor_str.py\u001b[0m in \u001b[0;36m_tensor_str\u001b[0;34m(self, indent)\u001b[0m\n\u001b[1;32m 326\u001b[0m \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 327\u001b[0m \u001b[0mformatter\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0m_Formatter\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mget_summarized_data\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0msummarize\u001b[0m \u001b[0;32melse\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 328\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0m_tensor_str_with_formatter\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mindent\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0msummarize\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mformatter\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 329\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 330\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;32m/usr/local/lib/python3.10/dist-packages/torch/_tensor_str.py\u001b[0m in \u001b[0;36m_tensor_str_with_formatter\u001b[0;34m(self, indent, summarize, formatter1, formatter2)\u001b[0m\n\u001b[1;32m 266\u001b[0m ]\n\u001b[1;32m 267\u001b[0m \u001b[0;34m+\u001b[0m \u001b[0;34m[\u001b[0m\u001b[0;34m\"...\"\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 268\u001b[0;31m + [\n\u001b[0m\u001b[1;32m 269\u001b[0m _tensor_str_with_formatter(\n\u001b[1;32m 270\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mi\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mindent\u001b[0m \u001b[0;34m+\u001b[0m \u001b[0;36m1\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0msummarize\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mformatter1\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mformatter2\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;32m/usr/local/lib/python3.10/dist-packages/torch/_tensor_str.py\u001b[0m in \u001b[0;36m\u001b[0;34m(.0)\u001b[0m\n\u001b[1;32m 267\u001b[0m \u001b[0;34m+\u001b[0m \u001b[0;34m[\u001b[0m\u001b[0;34m\"...\"\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 268\u001b[0m + [\n\u001b[0;32m--> 269\u001b[0;31m _tensor_str_with_formatter(\n\u001b[0m\u001b[1;32m 270\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mi\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mindent\u001b[0m \u001b[0;34m+\u001b[0m \u001b[0;36m1\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0msummarize\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mformatter1\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mformatter2\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 271\u001b[0m )\n",
"\u001b[0;32m/usr/local/lib/python3.10/dist-packages/torch/_tensor_str.py\u001b[0m in \u001b[0;36m_tensor_str_with_formatter\u001b[0;34m(self, indent, summarize, formatter1, formatter2)\u001b[0m\n\u001b[1;32m 259\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0msummarize\u001b[0m \u001b[0;32mand\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0msize\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;36m0\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;34m>\u001b[0m \u001b[0;36m2\u001b[0m \u001b[0;34m*\u001b[0m \u001b[0mPRINT_OPTS\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0medgeitems\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 260\u001b[0m slices = (\n\u001b[0;32m--> 261\u001b[0;31m [\n\u001b[0m\u001b[1;32m 262\u001b[0m _tensor_str_with_formatter(\n\u001b[1;32m 263\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mi\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mindent\u001b[0m \u001b[0;34m+\u001b[0m \u001b[0;36m1\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0msummarize\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mformatter1\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mformatter2\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;32m/usr/local/lib/python3.10/dist-packages/torch/_tensor_str.py\u001b[0m in \u001b[0;36m\u001b[0;34m(.0)\u001b[0m\n\u001b[1;32m 260\u001b[0m slices = (\n\u001b[1;32m 261\u001b[0m [\n\u001b[0;32m--> 262\u001b[0;31m _tensor_str_with_formatter(\n\u001b[0m\u001b[1;32m 263\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mi\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mindent\u001b[0m \u001b[0;34m+\u001b[0m \u001b[0;36m1\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0msummarize\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mformatter1\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mformatter2\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 264\u001b[0m )\n",
"\u001b[0;32m/usr/local/lib/python3.10/dist-packages/torch/_tensor_str.py\u001b[0m in \u001b[0;36m_tensor_str_with_formatter\u001b[0;34m(self, indent, summarize, formatter1, formatter2)\u001b[0m\n\u001b[1;32m 255\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 256\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mdim\u001b[0m \u001b[0;34m==\u001b[0m \u001b[0;36m1\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 257\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0m_vector_str\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mindent\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0msummarize\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mformatter1\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mformatter2\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 258\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 259\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0msummarize\u001b[0m \u001b[0;32mand\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0msize\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;36m0\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;34m>\u001b[0m \u001b[0;36m2\u001b[0m \u001b[0;34m*\u001b[0m \u001b[0mPRINT_OPTS\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0medgeitems\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;32m/usr/local/lib/python3.10/dist-packages/torch/_tensor_str.py\u001b[0m in \u001b[0;36m_vector_str\u001b[0;34m(self, indent, summarize, formatter1, formatter2)\u001b[0m\n\u001b[1;32m 231\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0msummarize\u001b[0m \u001b[0;32mand\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0msize\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;36m0\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;34m>\u001b[0m \u001b[0;36m2\u001b[0m \u001b[0;34m*\u001b[0m \u001b[0mPRINT_OPTS\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0medgeitems\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 232\u001b[0m data = (\n\u001b[0;32m--> 233\u001b[0;31m \u001b[0;34m[\u001b[0m\u001b[0m_val_formatter\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mval\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;32mfor\u001b[0m \u001b[0mval\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m:\u001b[0m \u001b[0mPRINT_OPTS\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0medgeitems\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mtolist\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 234\u001b[0m \u001b[0;34m+\u001b[0m \u001b[0;34m[\u001b[0m\u001b[0;34m\" ...\"\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 235\u001b[0m \u001b[0;34m+\u001b[0m \u001b[0;34m[\u001b[0m\u001b[0m_val_formatter\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mval\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;32mfor\u001b[0m \u001b[0mval\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m-\u001b[0m\u001b[0mPRINT_OPTS\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0medgeitems\u001b[0m \u001b[0;34m:\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mtolist\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;31mKeyboardInterrupt\u001b[0m: "
]
}
]
},
{
"cell_type": "code",
"source": [
"trainer.push_to_hub()\n",
"path = 'bloom_p560m_5'\n",
"#trainer.save_model(path)\n",
"tokenizer.push_to_hub(path)"
],
"metadata": {
"id": "33yk2qpnH_ae",
"outputId": "d4fecde5-dfc5-4160-d1f6-8428da67ad61",
"colab": {
"base_uri": "https://localhost:8080/",
"height": 539,
"referenced_widgets": [
"200e444f452f4a03bcbd5d273fcc4234",
"29e1d1838f584b72994982c869398ab0",
"59ba3363869d4800ad189dabeef280bd",
"85d23d6d7ead4807bf05d40f0a6e30fc",
"bc403948ea48423882376ce0ddc6468a",
"e535e69f71f34adba5a8afafeb50e828",
"5c2cd6390c8d4aaf868b7761e991adac",
"dfb21c6e065a40e1b55630633f8cd70f",
"81bece364c014681a3737755797f6887",
"8d180d89ec3647b2bfcaf77bac0a3ac9",
"25b543a36d544c8dbd4acfec9576991c",
"29b7309404f84565889195b1cb929fb5",
"8fe41dce120e4720b726ecf3ccc550fe",
"b8a3119cf31b40398c48d0ca3f975bcc",
"f85e4b82c8db457a853dfe9a65c5d796",
"5bb056479c1b4debbe0af2a1c18b9b99",
"d321cc7cad13441990339911dc5047c5",
"ce5b5b67203f4b3fbeb66325a4532170",
"9c43144460684b8986303acf3b3a833e",
"3c6723f966c747928a7ca70588028900",
"020d884a48424375a0688ea4089a4a01",
"f0924297c3e24eb083a61acbd90526ea",
"34545041020f4f9a91f537764a74bdd7",
"a8dd142d7d894da3bc480fe3e5540667",
"4004ae5969934797bf0903fb3148c776",
"a24845671f1f4e1480cfa4b62826e990",
"98295af7ef7345e99abc424052ece0a6",
"5e92243cc99d4591b157430c83f89da2",
"f28e9e22ab0f432aa406b34b7a086e72",
"e29e62e75f1645619c175e142ed60ecb",
"fe209923023b42b8b3c45a70b048fbb6",
"898c598dfd8a447a969b15454750a0dd",
"9f2f7f4b92cb4ff38ad5854730bff239",
"bc073cc2b22d4e1bbe3f601fe19daa05",
"1be09c967d6047e3a3cc6d2718468860",
"df26c22072b743ae9e0cd3a3a8b85a54",
"162eec4fd64e4a7fbd1567642607ac8b",
"9df9e40f9fbf4969bd4d4d56d6ec0086",
"382025314a4147fea87126145a12ff69",
"3ef14015489143debf5cc520932a6548",
"7d8ba3f58bbf439d986f1585fe348564",
"93801f0ab1e34d5abe8d56466d19a209",
"96667d01313f44f4a10e32d15b83b217",
"f2defc9aad24484186d975d0316c9eae",
"4dec119278a5470b82c972ee5fe3c235",
"0dd413fee7bb4adcad7c1b71a45fa84b",
"b1f8def1c90f47438bb5982afb6d4706",
"89f03129efbc4d129e47d4f32ff17b45",
"67934e9e9134439b9027a301b8049d6a",
"88ad7f6e75d448ae991cdda80b6e007e",
"6126b224349245788b2663b605034493",
"4fddb21d26574e6db86fdf70622dc2c7",
"0390562cb271493191189d700fcab51f",
"55c48a8994be46e0b2547c1f460df165",
"2b64a98317064418bc16ca46734a90c6",
"4f5a4073d91e4c4f8a9f6fec6db5b057",
"599b35785074490eae42b1458c3f38ae",
"ede8e2516142426cb0d25869edb4d7c7",
"17ff7f1f50be4f808d3169b6fc6fc877",
"bc4cddb1bb0c451ab72cf5d4f60de489",
"268b4b2068e24829862155a969079aac",
"b125c0313f2245c6a3fd639b60206c6c",
"6f44c77b79134ec1a7b645285a5dfa38",
"5d9b65b731844992a6daaf67798a340a",
"3f073d5acc0b4af49c34929cccc397ec",
"a20ac5ea51094e288fe2088634136dfa"
]
}
},
"execution_count": null,
"outputs": [
{
"output_type": "display_data",
"data": {
"text/plain": [
"Upload file pytorch_model.bin: 0%| | 1.00/2.08G [00:00, ?B/s]"
],
"application/vnd.jupyter.widget-view+json": {
"version_major": 2,
"version_minor": 0,
"model_id": "200e444f452f4a03bcbd5d273fcc4234"
}
},
"metadata": {}
},
{
"output_type": "display_data",
"data": {
"text/plain": [
"Upload file runs/May28_06-51-56_11b6804bfda1/1685256722.265447/events.out.tfevents.1685256722.11b6804bfda1.296…"
],
"application/vnd.jupyter.widget-view+json": {
"version_major": 2,
"version_minor": 0,
"model_id": "29b7309404f84565889195b1cb929fb5"
}
},
"metadata": {}
},
{
"output_type": "display_data",
"data": {
"text/plain": [
"Upload file training_args.bin: 0%| | 1.00/3.81k [00:00, ?B/s]"
],
"application/vnd.jupyter.widget-view+json": {
"version_major": 2,
"version_minor": 0,
"model_id": "34545041020f4f9a91f537764a74bdd7"
}
},
"metadata": {}
},
{
"output_type": "display_data",
"data": {
"text/plain": [
"Upload file runs/May28_06-51-56_11b6804bfda1/events.out.tfevents.1685256722.11b6804bfda1.296.0: 0%| …"
],
"application/vnd.jupyter.widget-view+json": {
"version_major": 2,
"version_minor": 0,
"model_id": "bc073cc2b22d4e1bbe3f601fe19daa05"
}
},
"metadata": {}
},
{
"output_type": "stream",
"name": "stderr",
"text": [
"To https://huggingface.co/Imran1/bloom_p560m_5\n",
" 2d98ad4..90185c0 main -> main\n",
"\n",
"WARNING:huggingface_hub.repository:To https://huggingface.co/Imran1/bloom_p560m_5\n",
" 2d98ad4..90185c0 main -> main\n",
"\n",
"To https://huggingface.co/Imran1/bloom_p560m_5\n",
" 90185c0..a4cb8b6 main -> main\n",
"\n",
"WARNING:huggingface_hub.repository:To https://huggingface.co/Imran1/bloom_p560m_5\n",
" 90185c0..a4cb8b6 main -> main\n",
"\n"
]
},
{
"output_type": "display_data",
"data": {
"text/plain": [
"Upload 1 LFS files: 0%| | 0/1 [00:00, ?it/s]"
],
"application/vnd.jupyter.widget-view+json": {
"version_major": 2,
"version_minor": 0,
"model_id": "4dec119278a5470b82c972ee5fe3c235"
}
},
"metadata": {}
},
{
"output_type": "display_data",
"data": {
"text/plain": [
"tokenizer.json: 0%| | 0.00/14.5M [00:00, ?B/s]"
],
"application/vnd.jupyter.widget-view+json": {
"version_major": 2,
"version_minor": 0,
"model_id": "4f5a4073d91e4c4f8a9f6fec6db5b057"
}
},
"metadata": {}
},
{
"output_type": "execute_result",
"data": {
"text/plain": [
"CommitInfo(commit_url='https://huggingface.co/Imran1/bloom_p560m_5/commit/2676e959a58b857b0ea367bf3232c4389331e401', commit_message='Upload tokenizer', commit_description='', oid='2676e959a58b857b0ea367bf3232c4389331e401', pr_url=None, pr_revision=None, pr_num=None)"
]
},
"metadata": {},
"execution_count": 12
}
]
},
{
"cell_type": "code",
"source": [
"path = 'Imran1/bloom_p560m_5'\n",
"from transformers import pipeline\n",
"pipe = pipeline('text-generation',model=path, tokenizer=path,device=0)"
],
"metadata": {
"id": "u0E5IMiWIPfR",
"outputId": "3a2819db-266f-4675-be07-c9ad0d718b8f",
"colab": {
"base_uri": "https://localhost:8080/",
"height": 209,
"referenced_widgets": [
"4aee3698c7af41c1a1f28877e3c281a7",
"743aaf2f493044bc900e21ab3d1683fc",
"425f64bebbf04c7a988ab6fb617f8628",
"efcd594ae7094e6b93249d7a34087927",
"afe7b8aa65f64ffeaa0ce6453ad7e7e1",
"c12aa88d226c437aa77eeb080e03596a",
"92521dc3cf60428db619f37f3dd3281c",
"f0a3a0475f714fccab3eea850a789a4c",
"580a34c16c484332afc5f0dbec484944",
"93678a642dcd4354b7cd122370ed6a51",
"601a499f4dda4f3c99a991b9f7783201",
"5e8570aaa9da46cc9345162c5dee49c8",
"5f4c17763f0e4d7a938a8a73aed63bf1",
"1e7a2d5be97e460091ec3ea4f9ecb8de",
"a9db926a0ab6429fb38f18892769515e",
"84aef8ad08514886b4543267d02f9eef",
"3ff480b483e740d288100a42b100ebc3",
"38d615b2757b4ec69c0d68fbbc132d87",
"7f22fd3a591c4aa4aee6f7ec6ac53425",
"9e924a5406724ce1810835cbc3baa14c",
"18bf9af5bd3546fd813a42f1873970bd",
"1bdffe13acb44fee8d55a7e20113e834",
"1eb118d574e74b2f9c7e72c1e53bb964",
"b2a008c18985493cbafabfdaf0f06669",
"54dfd04cc19e43928de9bcd8b66770a2",
"f9131ad75a4b429fadb71a9c2d2c5159",
"1b82530606464d1cbc81bd25bfb500e4",
"13851ffd552b49d0b9cda85abc2b0faa",
"9ff209d6455d4c1991bc5e7f9d225790",
"dfe7de5c18634e638cae57d36b50b7ba",
"70de6e86f7364f9d941ea2bca94d006f",
"7bbdb6267c5046f5a44a85b3cad95748",
"08cacfba1fae40979742760eeebff0f7",
"2b814ae985ad46de8195906823c9f110",
"e92a25bf26f6413cb1a9d25eb65020ef",
"355a82b4049640bc8711454a7dc91924",
"f0847ce10ce54ff49a49c04ac16d3b9d",
"a38ad089d3474eaaa5af69cd6431e061",
"bd64d880dd144d8ca572ee2cfb1b65cd",
"7e54e9ed013448329246298b591f4615",
"1e4e8571171e4acfaef5db4f3ce85a6e",
"ade2dbcc845f40839161d1f08c537098",
"ae27a41146b04dd997a8a71ddfe6c388",
"f4bc0c97e82a49eea12fd6a0afb9cde1",
"bab487ab275845e4b1ee6cd9ab51c50b",
"422fe61f0c344d0a8b0e24bed10327f9",
"b6f34fd3b3484a87a3dde98dda6d725b",
"2fa1a9536bc447b8b0623b773ab6e7ff",
"1978c801f1aa4540b52941fe937e9ac8",
"f48044a884674e26a85cc39104cdc3d1",
"f64d6e3cb99c41b09a3abdf87df5fc0b",
"892ce22e115545aaaafa72ad7ee17546",
"6c4b7dfecd3044e2a0d185d6426d4162",
"0b656550d23b43ce9dc92ec5f00a7efa",
"8615200b883c480b908d3f7ca393690f",
"35291717329044de8706738e9a1a541f",
"aa0065c1fbf24900a0df90d64774ffc2",
"bebdc7c08c624e27afff870f97ebc7c9",
"c1c01c295ce6424da132d72ddeee920a",
"2e648029ab37406f9ff7f36b0e9e7a7d",
"a1907ff90e734949931d8a0ad63c9191",
"7a3136db2363446ab1faaff8a1803e47",
"7b362a3637ff487b9ab3f099d3e71a12",
"a72c2d1c0294482e84e2424032f2ca92",
"e6abcf5b529543e18a0bbe02f76995af",
"eef77cf4e66d46d4a319ffea5af57455"
]
}
},
"execution_count": null,
"outputs": [
{
"output_type": "display_data",
"data": {
"text/plain": [
"Downloading (…)lve/main/config.json: 0%| | 0.00/812 [00:00, ?B/s]"
],
"application/vnd.jupyter.widget-view+json": {
"version_major": 2,
"version_minor": 0,
"model_id": "4aee3698c7af41c1a1f28877e3c281a7"
}
},
"metadata": {}
},
{
"output_type": "display_data",
"data": {
"text/plain": [
"Downloading pytorch_model.bin: 0%| | 0.00/2.24G [00:00, ?B/s]"
],
"application/vnd.jupyter.widget-view+json": {
"version_major": 2,
"version_minor": 0,
"model_id": "5e8570aaa9da46cc9345162c5dee49c8"
}
},
"metadata": {}
},
{
"output_type": "display_data",
"data": {
"text/plain": [
"Downloading (…)neration_config.json: 0%| | 0.00/137 [00:00, ?B/s]"
],
"application/vnd.jupyter.widget-view+json": {
"version_major": 2,
"version_minor": 0,
"model_id": "1eb118d574e74b2f9c7e72c1e53bb964"
}
},
"metadata": {}
},
{
"output_type": "display_data",
"data": {
"text/plain": [
"Downloading (…)okenizer_config.json: 0%| | 0.00/286 [00:00, ?B/s]"
],
"application/vnd.jupyter.widget-view+json": {
"version_major": 2,
"version_minor": 0,
"model_id": "2b814ae985ad46de8195906823c9f110"
}
},
"metadata": {}
},
{
"output_type": "display_data",
"data": {
"text/plain": [
"Downloading tokenizer.json: 0%| | 0.00/14.5M [00:00, ?B/s]"
],
"application/vnd.jupyter.widget-view+json": {
"version_major": 2,
"version_minor": 0,
"model_id": "bab487ab275845e4b1ee6cd9ab51c50b"
}
},
"metadata": {}
},
{
"output_type": "display_data",
"data": {
"text/plain": [
"Downloading (…)cial_tokens_map.json: 0%| | 0.00/92.0 [00:00, ?B/s]"
],
"application/vnd.jupyter.widget-view+json": {
"version_major": 2,
"version_minor": 0,
"model_id": "35291717329044de8706738e9a1a541f"
}
},
"metadata": {}
}
]
},
{
"cell_type": "code",
"source": [
"def generate_prompt(instruction, input=None):\n",
" if input:\n",
" return f\"\"\"Below is an instruction that describes a task, paired with an input that provides further context. Write a response that appropriately completes the request.\n",
"### Instruction:\n",
"{instruction}\n",
"### Input:\n",
"{input}\n",
"### Response:\"\"\"\n",
" else:\n",
" return f\"\"\"Below is an instruction that describes a task. Write a response that appropriately completes the request.\n",
"### Instruction:\n",
"{instruction}\n",
"### Response:\"\"\""
],
"metadata": {
"id": "mPA5f8GRIPkg"
},
"execution_count": null,
"outputs": []
},
{
"cell_type": "code",
"source": [
"text = \"\"\"\n",
"چې ناظر د ساده رويو پۀ رخسار شم\n",
"\"\"\"\n",
"prompt = generate_prompt(text)\n",
"new = pipe(prompt,\n",
" max_length=200,\n",
" do_sample=True,\n",
" top_k=400,\n",
" top_p=0.7,\n",
" temperature=0.9,\n",
" #repetition_penalty=True,\n",
" )\n",
"\n",
"\n",
"print(new[0]['generated_text'])"
],
"metadata": {
"id": "oqA7ZXnVIPoS",
"outputId": "b0c58654-8507-490b-9591-3a58abf1491a",
"colab": {
"base_uri": "https://localhost:8080/"
}
},
"execution_count": null,
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"Below is an instruction that describes a task. Write a response that appropriately completes the request.\n",
"### Instruction:\n",
"\n",
"چې ناظر د ساده رويو پۀ رخسار شم\n",
"\n",
"### Response:\n",
"پۀ دا شمع, پروانه غوندې, نثار شم\n",
"\n",
"کۀ هر څو پۀ صبر زړۀ ټولوم, نۀ شي\n",
"بې اختياره لکه موم ويلې پۀ نار شم\n",
"\n",
"غنچه خلۀ چې په خبرو راته وا کا\n",
"د نرګس پۀ څير کوز ګورم, شرمسار شم\n",
"\n",
"چې يې شونډې تبسم تازه نګار کا\n",
"باندې زۀ لکه سپند سوو ته تيار شم\n",
"\n",
"ځان جهان مې هس�\n"
]
}
]
},
{
"cell_type": "code",
"source": [],
"metadata": {
"id": "wFOSu6eNIPwh"
},
"execution_count": null,
"outputs": []
}
],
"metadata": {
"colab": {
"provenance": [],
"gpuType": "T4"
},
"kernelspec": {
"display_name": "Python 3",
"name": "python3"
},
"accelerator": "GPU",
"widgets": {
"application/vnd.jupyter.widget-state+json": {
"cd666e115ca949a6a0828fbabae7d592": {
"model_module": "@jupyter-widgets/controls",
"model_name": "VBoxModel",
"model_module_version": "1.5.0",
"state": {
"_dom_classes": [],
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "VBoxModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/controls",
"_view_module_version": "1.5.0",
"_view_name": "VBoxView",
"box_style": "",
"children": [
"IPY_MODEL_798d5eca9e714fa39914f2ee00d02cfa",
"IPY_MODEL_5bee398314df4b34b1897b219216c130",
"IPY_MODEL_e6f56b79264f4977b470558549828584",
"IPY_MODEL_21b1df2da90f4036b97eb5d48ad52d33"
],
"layout": "IPY_MODEL_f8aae38607414753a7f3f3a524744872"
}
},
"05fe078374924fbc914de938abd34375": {
"model_module": "@jupyter-widgets/controls",
"model_name": "HTMLModel",
"model_module_version": "1.5.0",
"state": {
"_dom_classes": [],
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "HTMLModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/controls",
"_view_module_version": "1.5.0",
"_view_name": "HTMLView",
"description": "",
"description_tooltip": null,
"layout": "IPY_MODEL_dff3b24b7a82494192279d8e409a5d4f",
"placeholder": "",
"style": "IPY_MODEL_27a0600d2f4247b6842f21de84e4b5e9",
"value": " Copy a token from your Hugging Face\ntokens page and paste it below. Immediately click login after copying\nyour token or it might be stored in plain text in this notebook file. "
}
},
"393e108b388d48188ad86cc657e563a1": {
"model_module": "@jupyter-widgets/controls",
"model_name": "PasswordModel",
"model_module_version": "1.5.0",
"state": {
"_dom_classes": [],
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "PasswordModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/controls",
"_view_module_version": "1.5.0",
"_view_name": "PasswordView",
"continuous_update": true,
"description": "Token:",
"description_tooltip": null,
"disabled": false,
"layout": "IPY_MODEL_90b7f6b78a8e41ac9d24adc66dde0c9f",
"placeholder": "",
"style": "IPY_MODEL_0ed85f1babff431e9658cbb23065b84c",
"value": ""
}
},
"c3b1e9055a044b5b9a851d21cc776e7b": {
"model_module": "@jupyter-widgets/controls",
"model_name": "CheckboxModel",
"model_module_version": "1.5.0",
"state": {
"_dom_classes": [],
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "CheckboxModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/controls",
"_view_module_version": "1.5.0",
"_view_name": "CheckboxView",
"description": "Add token as git credential?",
"description_tooltip": null,
"disabled": false,
"indent": true,
"layout": "IPY_MODEL_af367683d85147c3abd9db2c07050307",
"style": "IPY_MODEL_507e2602a5214608b780169b1faaad31",
"value": true
}
},
"3825bcd53a4e43359240a1f29d9b6590": {
"model_module": "@jupyter-widgets/controls",
"model_name": "ButtonModel",
"model_module_version": "1.5.0",
"state": {
"_dom_classes": [],
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "ButtonModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/controls",
"_view_module_version": "1.5.0",
"_view_name": "ButtonView",
"button_style": "",
"description": "Login",
"disabled": false,
"icon": "",
"layout": "IPY_MODEL_0f03692191c248eaae5a9692b2582377",
"style": "IPY_MODEL_7c177077979d4ceeb0048bfffaaaea4b",
"tooltip": ""
}
},
"72e5ca4f630c49ba872206af6a9ffd09": {
"model_module": "@jupyter-widgets/controls",
"model_name": "HTMLModel",
"model_module_version": "1.5.0",
"state": {
"_dom_classes": [],
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "HTMLModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/controls",
"_view_module_version": "1.5.0",
"_view_name": "HTMLView",
"description": "",
"description_tooltip": null,
"layout": "IPY_MODEL_efd0359c0efc4a2ab6f8557bfddb9491",
"placeholder": "",
"style": "IPY_MODEL_fd3e47df2da742688671ce614ea18135",
"value": "\nPro Tip: If you don't already have one, you can create a dedicated\n'notebooks' token with 'write' access, that you can then easily reuse for all\nnotebooks. |