{"cells":[{"cell_type":"code","execution_count":1,"metadata":{"executionInfo":{"elapsed":5036,"status":"ok","timestamp":1717067779656,"user":{"displayName":"вера великоборец","userId":"17606763383908550373"},"user_tz":-180},"id":"oiCyNkXhltM1"},"outputs":[],"source":["import numpy as np\n","import pandas as pd\n","from sklearn.model_selection import train_test_split\n","\n","from sklearn.model_selection import cross_val_score\n","import torch\n","from torch import nn\n","import matplotlib.pyplot as plt\n","\n","# импортируем трансформеры\n","import transformers\n","import warnings\n","warnings.filterwarnings('ignore')\n","import re"]},{"cell_type":"markdown","metadata":{"id":"Mx4_RYe_N10x"},"source":["#Messages"]},{"cell_type":"code","execution_count":2,"metadata":{"executionInfo":{"elapsed":922,"status":"ok","timestamp":1717067833296,"user":{"displayName":"вера великоборец","userId":"17606763383908550373"},"user_tz":-180},"id":"hQbBScPWa2CC"},"outputs":[],"source":["df = pd.read_csv('/content/drive/MyDrive/Colab Notebooks/aux/labeled(1).csv')"]},{"cell_type":"code","execution_count":3,"metadata":{"colab":{"base_uri":"https://localhost:8080/","height":424},"executionInfo":{"elapsed":389,"status":"ok","timestamp":1717067837646,"user":{"displayName":"вера великоборец","userId":"17606763383908550373"},"user_tz":-180},"id":"c7OBdZAXN-bJ","outputId":"5e34663c-50d2-4648-ec7a-1f9667320e38"},"outputs":[{"data":{"application/vnd.google.colaboratory.intrinsic+json":{"summary":"{\n \"name\": \"df\",\n \"rows\": 14412,\n \"fields\": [\n {\n \"column\": \"comment\",\n \"properties\": {\n \"dtype\": \"string\",\n \"num_unique_values\": 14412,\n \"samples\": [\n \"\\u0431\\u0435\\u0437\\u0440\\u043e\\u0434\\u043d\\u044b\\u0439 \\u043f\\u043e\\u0442\\u043e\\u043c\\u043e\\u043a \\u0445\\u043e\\u043b\\u043e\\u043f\\u0430 \\u0440\\u0430\\u0441\\u0441\\u0443\\u0436\\u0434\\u0430\\u0435\\u0442 \\u043e \\u043d\\u0430\\u0446\\u0438\\u043e\\u043d\\u0430\\u043b\\u044c\\u043d\\u043e\\u0439 \\u0433\\u043e\\u0440\\u0434\\u043e\\u0441\\u0442\\u0438.\\n\",\n \"\\u0418\\u043d\\u0442\\u0435\\u0440\\u0435\\u0441\\u043d\\u0430\\u044f \\u0442\\u0435\\u043c\\u0430, \\u043e\\u0434\\u043d\\u0430\\u043a\\u043e. \\u041e\\u0422\\u041f \\u0432\\u0440\\u043e\\u0434\\u0435 \\u0432\\u0435\\u043d\\u0433\\u0435\\u0440\\u0441\\u043a\\u0438\\u0439 \\u0431\\u0430\\u043d\\u043a, \\u0432 \\u0412\\u0435\\u043d\\u0433\\u0440\\u0438\\u0438 \\u043e\\u043d \\u0441\\u0430\\u043c\\u044b\\u0439 \\u043f\\u043e\\u043f\\u0443\\u043b\\u044f\\u0440\\u043d\\u044b\\u0439, \\u0443 \\u043c\\u0435\\u043d\\u044f \\u0443 \\u0441\\u0430\\u043c\\u043e\\u0433\\u043e \\u0435\\u0433\\u043e \\u0441\\u0447\\u0451\\u0442 \\u0438 \\u043a\\u0430\\u0440\\u0442\\u0430, \\u0438\\u0431\\u043e \\u0443 \\u043d\\u0435\\u0433\\u043e \\u0434\\u043e\\u0433\\u043e\\u0432\\u043e\\u0440 \\u0441 \\u0443\\u043d\\u0438\\u0432\\u0435\\u0440\\u043e\\u043c, \\u0441\\u043a\\u0438\\u0434\\u043a\\u0438-\\u043f\\u043b\\u044e\\u0448\\u043a\\u0438-\\u0432\\u043e\\u0437\\u0432\\u0440\\u0430\\u0442 \\u0438 \\u0442.\\u043f. \\u0434\\u043b\\u044f \\u0441\\u0442\\u0443\\u0434\\u0435\\u043d\\u0442\\u043e\\u0432. \\u0418 \\u0437\\u0430 4 \\u0433\\u043e\\u0434\\u0430 \\u043f\\u043e\\u043b\\u044c\\u0437\\u043e\\u0432\\u0430\\u043d\\u0438\\u044f \\u043d\\u0438 \\u043e\\u0434\\u043d\\u043e\\u0439 \\u043f\\u0440\\u043e\\u0431\\u043b\\u0435\\u043c\\u044b, \\u043d\\u0438 \\u043e\\u0434\\u043d\\u043e\\u0439 \\u043f\\u043e\\u0434\\u043a\\u043b\\u044e\\u0447\\u0451\\u043d\\u043d\\u043e\\u0439 \\u0443\\u0441\\u043b\\u0443\\u0433\\u0438, \\u0431\\u043b\\u043e\\u043a\\u0438\\u0440\\u043e\\u0432\\u0430\\u043d\\u0438\\u044f, \\u0441\\u043f\\u0438\\u0441\\u0430\\u043d\\u0438\\u044f \\u043d\\u0438 \\u0437\\u0430 \\u0447\\u0442\\u043e \\u0438 \\u043f\\u043e\\u0434\\u043e\\u0431\\u043d\\u043e\\u0439 \\u0435\\u0440\\u0435\\u0441\\u0438, \\u043a\\u043e\\u0442\\u043e\\u0440\\u043e\\u0439 \\u0441\\u0442\\u0440\\u0430\\u0434\\u0430\\u044e\\u0442 \\u0432\\u0441\\u0435 \\u0440\\u043e\\u0441\\u0441\\u0438\\u0439\\u0441\\u043a\\u0438\\u0435 \\u0431\\u0430\\u043d\\u043a\\u0438. \\u041d\\u0438\\u043a\\u0430\\u043a\\u0438\\u0445 \\u043a\\u0440\\u0435\\u0434\\u0438\\u0442\\u043d\\u044b\\u0445 \\u043a\\u0430\\u0440\\u0442 \\u0434\\u0430\\u0436\\u0435 \\u043d\\u0435 \\u043f\\u0440\\u0435\\u0434\\u043b\\u0430\\u0433\\u0430\\u044e\\u0442 (\\u0438 \\u0432\\u043e\\u043e\\u0431\\u0449\\u0435 \\u043e \\u043f\\u043e\\u0434\\u043e\\u0431\\u043d\\u044b\\u0445 \\u0438\\u0441\\u0442\\u043e\\u0440\\u0438\\u044f\\u0445 \\u043d\\u0435 \\u0441\\u043b\\u044b\\u0448\\u0430\\u043b). \\u0412\\u044b\\u0445\\u043e\\u0434\\u0438\\u0442, \\u0431\\u0430\\u043d\\u043a \\u0442\\u043e\\u0442 \\u0436\\u0435, \\u0430 \\u043f\\u0440\\u0438\\u043d\\u0446\\u0438\\u043f \\u0440\\u0430\\u0431\\u043e\\u0442\\u044b \\u0434\\u0440\\u0443\\u0433\\u043e\\u0439, \\u0437\\u0430\\u0442\\u043e\\u0447\\u0435\\u043d\\u043d\\u044b\\u0439 \\u043f\\u043e\\u0434 \\u0440\\u043e\\u0441\\u0441\\u0438\\u0439\\u0441\\u043a\\u0438\\u0439 \\u043a\\u043b\\u0438\\u0435\\u043d\\u0442 - \\u043b\\u043e\\u0445 . P.S. \\u0412 \\u0412\\u0435\\u043d\\u0433\\u0440\\u0438\\u0438 \\u0432\\u0441\\u0435 \\u0431\\u0430\\u043d\\u043a\\u0438 \\u0448\\u043b\\u044e\\u0442 \\u043a\\u0430\\u0440\\u0442\\u044b \\u043f\\u043e \\u043f\\u043e\\u0447\\u0442\\u0435, \\u043c\\u043e\\u044f \\u043a\\u0430\\u043a \\u0440\\u0430\\u0437 \\u0432 \\u044d\\u0442\\u043e\\u043c \\u043c\\u0435\\u0441\\u044f\\u0446\\u0435 \\u043f\\u0440\\u0438\\u0448\\u043b\\u0430. \\u041d\\u043e \\u0432 \\u043f\\u043e\\u0447\\u0442\\u043e\\u0432\\u044b\\u0439 \\u044f\\u0449\\u0438\\u043a \\u0438\\u0445 \\u043d\\u0438 \\u0437\\u0430 \\u0447\\u0442\\u043e \\u043d\\u0435 \\u043a\\u0438\\u043d\\u0443\\u0442. \\u041b\\u0438\\u0431\\u043e \\u0432 \\u0440\\u0443\\u043a\\u0438, \\u043b\\u0438\\u0431\\u043e \\u0438\\u0437\\u0432\\u0435\\u0449\\u0435\\u043d\\u0438\\u0435 \\u0447\\u0442\\u043e\\u0431 \\u0437\\u0430\\u0431\\u0440\\u0430\\u043b \\u043d\\u0430 \\u043f\\u043e\\u0447\\u0442\\u0435. \\u0410 \\u043f\\u043e\\u0447\\u0442\\u0430 \\u043f\\u043e\\u0440\\u044f\\u0434\\u043e\\u0447\\u043d\\u0430\\u044f\\n\",\n \"\\u0421\\u0443\\u0442\\u044c \\u0442\\u0440\\u0435\\u0431\\u043e\\u0432\\u0430\\u043d\\u0438\\u044f \\u043f\\u0440\\u043e \\u041230 - \\u043d\\u0435 \\u043f\\u0440\\u043e\\u0447\\u043d\\u043e\\u0441\\u0442\\u044c, \\u0430 \\u0432\\u043e\\u0434\\u043e\\u043d\\u0435\\u043f\\u0440\\u043e\\u043d\\u0438\\u0446\\u0430\\u0435\\u043c\\u043e\\u0441\\u0442\\u044c. \\u0412\\u043e\\u0434\\u043e\\u043d\\u0435\\u043f\\u0440\\u043e\\u043d\\u0438\\u0446\\u0430\\u0435\\u043c\\u043e\\u0441\\u0442\\u044c \\u0432\\u043e\\u043e\\u0431\\u0449\\u0435 \\u0442\\u043e \\u043e\\u0431\\u043e\\u0437\\u043d\\u0430\\u0447\\u0430\\u0435\\u0442\\u0441\\u044f W, \\u043d\\u0443 \\u0434\\u0430 \\u043d\\u0435 \\u0441\\u0443\\u0442\\u044c, \\u0437\\u0430\\u0447\\u0435\\u043c \\u043c\\u043d\\u0435 \\u043e\\u043d\\u0430 \\u0432 \\u043b\\u0435\\u043d\\u0442\\u0435 \\u043f\\u043e\\u0434 \\u0437\\u0435\\u043c\\u043b\\u0451\\u0439 ?\\n\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"toxic\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 0.47195781877088117,\n \"min\": 0.0,\n \"max\": 1.0,\n \"num_unique_values\": 2,\n \"samples\": [\n 0.0,\n 1.0\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n }\n ]\n}","type":"dataframe","variable_name":"df"},"text/html":["\n","
\n"," | comment | \n","toxic | \n","
---|---|---|
0 | \n","Верблюдов-то за что? Дебилы, бл...\\n | \n","1.0 | \n","
1 | \n","Хохлы, это отдушина затюканого россиянина, мол... | \n","1.0 | \n","
2 | \n","Собаке - собачья смерть\\n | \n","1.0 | \n","
3 | \n","Страницу обнови, дебил. Это тоже не оскорблени... | \n","1.0 | \n","
4 | \n","тебя не убедил 6-страничный пдф в том, что Скр... | \n","1.0 | \n","
... | \n","... | \n","... | \n","
14407 | \n","Вонючий совковый скот прибежал и ноет. А вот и... | \n","1.0 | \n","
14408 | \n","А кого любить? Гоблина тупорылого что-ли? Или ... | \n","1.0 | \n","
14409 | \n","Посмотрел Утомленных солнцем 2. И оказалось, ч... | \n","0.0 | \n","
14410 | \n","КРЫМОТРЕД НАРУШАЕТ ПРАВИЛА РАЗДЕЛА Т.К В НЕМ Н... | \n","1.0 | \n","
14411 | \n","До сих пор пересматриваю его видео. Орамбо кст... | \n","0.0 | \n","
14412 rows × 2 columns
\n","\n"," | comment | \n","toxic | \n","
---|---|---|
0 | \n","Верблюдовто что? Дебилы, бл... | \n","1.0 | \n","
1 | \n","Хохлы, это отдушина затюканого россиянина, мол... | \n","1.0 | \n","
2 | \n","Собаке собачья смерть | \n","1.0 | \n","
3 | \n","Страницу обнови, дебил. Это оскорбление, доказ... | \n","1.0 | \n","
4 | \n","убедил 6страничный пдф том, Скрипалей отравила... | \n","1.0 | \n","
... | \n","... | \n","... | \n","
14407 | \n","Вонючий совковый скот прибежал ноет. А сторонн... | \n","1.0 | \n","
14408 | \n","А кого любить? Гоблина тупорылого чтоли? Или к... | \n","1.0 | \n","
14409 | \n","Посмотрел Утомленных солнцем 2. И оказалось, э... | \n","0.0 | \n","
14410 | \n","КРЫМОТРЕД НАРУШАЕТ ПРАВИЛА РАЗДЕЛА Т.К В НЕМ Н... | \n","1.0 | \n","
14411 | \n","До сих пор пересматриваю видео. Орамбо кстати ... | \n","0.0 | \n","
14412 rows × 2 columns
\n","