{"cells":[{"cell_type":"code","execution_count":1,"metadata":{"executionInfo":{"elapsed":5036,"status":"ok","timestamp":1717067779656,"user":{"displayName":"вера великоборец","userId":"17606763383908550373"},"user_tz":-180},"id":"oiCyNkXhltM1"},"outputs":[],"source":["import numpy as np\n","import pandas as pd\n","from sklearn.model_selection import train_test_split\n","\n","from sklearn.model_selection import cross_val_score\n","import torch\n","from torch import nn\n","import matplotlib.pyplot as plt\n","\n","# импортируем трансформеры\n","import transformers\n","import warnings\n","warnings.filterwarnings('ignore')\n","import re"]},{"cell_type":"markdown","metadata":{"id":"Mx4_RYe_N10x"},"source":["#Messages"]},{"cell_type":"code","execution_count":2,"metadata":{"executionInfo":{"elapsed":922,"status":"ok","timestamp":1717067833296,"user":{"displayName":"вера великоборец","userId":"17606763383908550373"},"user_tz":-180},"id":"hQbBScPWa2CC"},"outputs":[],"source":["df = pd.read_csv('/content/drive/MyDrive/Colab Notebooks/aux/labeled(1).csv')"]},{"cell_type":"code","execution_count":3,"metadata":{"colab":{"base_uri":"https://localhost:8080/","height":424},"executionInfo":{"elapsed":389,"status":"ok","timestamp":1717067837646,"user":{"displayName":"вера великоборец","userId":"17606763383908550373"},"user_tz":-180},"id":"c7OBdZAXN-bJ","outputId":"5e34663c-50d2-4648-ec7a-1f9667320e38"},"outputs":[{"data":{"application/vnd.google.colaboratory.intrinsic+json":{"summary":"{\n \"name\": \"df\",\n \"rows\": 14412,\n \"fields\": [\n {\n \"column\": \"comment\",\n \"properties\": {\n \"dtype\": \"string\",\n \"num_unique_values\": 14412,\n \"samples\": [\n \"\\u0431\\u0435\\u0437\\u0440\\u043e\\u0434\\u043d\\u044b\\u0439 \\u043f\\u043e\\u0442\\u043e\\u043c\\u043e\\u043a \\u0445\\u043e\\u043b\\u043e\\u043f\\u0430 \\u0440\\u0430\\u0441\\u0441\\u0443\\u0436\\u0434\\u0430\\u0435\\u0442 \\u043e \\u043d\\u0430\\u0446\\u0438\\u043e\\u043d\\u0430\\u043b\\u044c\\u043d\\u043e\\u0439 \\u0433\\u043e\\u0440\\u0434\\u043e\\u0441\\u0442\\u0438.\\n\",\n \"\\u0418\\u043d\\u0442\\u0435\\u0440\\u0435\\u0441\\u043d\\u0430\\u044f \\u0442\\u0435\\u043c\\u0430, \\u043e\\u0434\\u043d\\u0430\\u043a\\u043e. \\u041e\\u0422\\u041f \\u0432\\u0440\\u043e\\u0434\\u0435 \\u0432\\u0435\\u043d\\u0433\\u0435\\u0440\\u0441\\u043a\\u0438\\u0439 \\u0431\\u0430\\u043d\\u043a, \\u0432 \\u0412\\u0435\\u043d\\u0433\\u0440\\u0438\\u0438 \\u043e\\u043d \\u0441\\u0430\\u043c\\u044b\\u0439 \\u043f\\u043e\\u043f\\u0443\\u043b\\u044f\\u0440\\u043d\\u044b\\u0439, \\u0443 \\u043c\\u0435\\u043d\\u044f \\u0443 \\u0441\\u0430\\u043c\\u043e\\u0433\\u043e \\u0435\\u0433\\u043e \\u0441\\u0447\\u0451\\u0442 \\u0438 \\u043a\\u0430\\u0440\\u0442\\u0430, \\u0438\\u0431\\u043e \\u0443 \\u043d\\u0435\\u0433\\u043e \\u0434\\u043e\\u0433\\u043e\\u0432\\u043e\\u0440 \\u0441 \\u0443\\u043d\\u0438\\u0432\\u0435\\u0440\\u043e\\u043c, \\u0441\\u043a\\u0438\\u0434\\u043a\\u0438-\\u043f\\u043b\\u044e\\u0448\\u043a\\u0438-\\u0432\\u043e\\u0437\\u0432\\u0440\\u0430\\u0442 \\u0438 \\u0442.\\u043f. \\u0434\\u043b\\u044f \\u0441\\u0442\\u0443\\u0434\\u0435\\u043d\\u0442\\u043e\\u0432. \\u0418 \\u0437\\u0430 4 \\u0433\\u043e\\u0434\\u0430 \\u043f\\u043e\\u043b\\u044c\\u0437\\u043e\\u0432\\u0430\\u043d\\u0438\\u044f \\u043d\\u0438 \\u043e\\u0434\\u043d\\u043e\\u0439 \\u043f\\u0440\\u043e\\u0431\\u043b\\u0435\\u043c\\u044b, \\u043d\\u0438 \\u043e\\u0434\\u043d\\u043e\\u0439 \\u043f\\u043e\\u0434\\u043a\\u043b\\u044e\\u0447\\u0451\\u043d\\u043d\\u043e\\u0439 \\u0443\\u0441\\u043b\\u0443\\u0433\\u0438, \\u0431\\u043b\\u043e\\u043a\\u0438\\u0440\\u043e\\u0432\\u0430\\u043d\\u0438\\u044f, \\u0441\\u043f\\u0438\\u0441\\u0430\\u043d\\u0438\\u044f \\u043d\\u0438 \\u0437\\u0430 \\u0447\\u0442\\u043e \\u0438 \\u043f\\u043e\\u0434\\u043e\\u0431\\u043d\\u043e\\u0439 \\u0435\\u0440\\u0435\\u0441\\u0438, \\u043a\\u043e\\u0442\\u043e\\u0440\\u043e\\u0439 \\u0441\\u0442\\u0440\\u0430\\u0434\\u0430\\u044e\\u0442 \\u0432\\u0441\\u0435 \\u0440\\u043e\\u0441\\u0441\\u0438\\u0439\\u0441\\u043a\\u0438\\u0435 \\u0431\\u0430\\u043d\\u043a\\u0438. \\u041d\\u0438\\u043a\\u0430\\u043a\\u0438\\u0445 \\u043a\\u0440\\u0435\\u0434\\u0438\\u0442\\u043d\\u044b\\u0445 \\u043a\\u0430\\u0440\\u0442 \\u0434\\u0430\\u0436\\u0435 \\u043d\\u0435 \\u043f\\u0440\\u0435\\u0434\\u043b\\u0430\\u0433\\u0430\\u044e\\u0442 (\\u0438 \\u0432\\u043e\\u043e\\u0431\\u0449\\u0435 \\u043e \\u043f\\u043e\\u0434\\u043e\\u0431\\u043d\\u044b\\u0445 \\u0438\\u0441\\u0442\\u043e\\u0440\\u0438\\u044f\\u0445 \\u043d\\u0435 \\u0441\\u043b\\u044b\\u0448\\u0430\\u043b). \\u0412\\u044b\\u0445\\u043e\\u0434\\u0438\\u0442, \\u0431\\u0430\\u043d\\u043a \\u0442\\u043e\\u0442 \\u0436\\u0435, \\u0430 \\u043f\\u0440\\u0438\\u043d\\u0446\\u0438\\u043f \\u0440\\u0430\\u0431\\u043e\\u0442\\u044b \\u0434\\u0440\\u0443\\u0433\\u043e\\u0439, \\u0437\\u0430\\u0442\\u043e\\u0447\\u0435\\u043d\\u043d\\u044b\\u0439 \\u043f\\u043e\\u0434 \\u0440\\u043e\\u0441\\u0441\\u0438\\u0439\\u0441\\u043a\\u0438\\u0439 \\u043a\\u043b\\u0438\\u0435\\u043d\\u0442 - \\u043b\\u043e\\u0445 . P.S. \\u0412 \\u0412\\u0435\\u043d\\u0433\\u0440\\u0438\\u0438 \\u0432\\u0441\\u0435 \\u0431\\u0430\\u043d\\u043a\\u0438 \\u0448\\u043b\\u044e\\u0442 \\u043a\\u0430\\u0440\\u0442\\u044b \\u043f\\u043e \\u043f\\u043e\\u0447\\u0442\\u0435, \\u043c\\u043e\\u044f \\u043a\\u0430\\u043a \\u0440\\u0430\\u0437 \\u0432 \\u044d\\u0442\\u043e\\u043c \\u043c\\u0435\\u0441\\u044f\\u0446\\u0435 \\u043f\\u0440\\u0438\\u0448\\u043b\\u0430. \\u041d\\u043e \\u0432 \\u043f\\u043e\\u0447\\u0442\\u043e\\u0432\\u044b\\u0439 \\u044f\\u0449\\u0438\\u043a \\u0438\\u0445 \\u043d\\u0438 \\u0437\\u0430 \\u0447\\u0442\\u043e \\u043d\\u0435 \\u043a\\u0438\\u043d\\u0443\\u0442. \\u041b\\u0438\\u0431\\u043e \\u0432 \\u0440\\u0443\\u043a\\u0438, \\u043b\\u0438\\u0431\\u043e \\u0438\\u0437\\u0432\\u0435\\u0449\\u0435\\u043d\\u0438\\u0435 \\u0447\\u0442\\u043e\\u0431 \\u0437\\u0430\\u0431\\u0440\\u0430\\u043b \\u043d\\u0430 \\u043f\\u043e\\u0447\\u0442\\u0435. \\u0410 \\u043f\\u043e\\u0447\\u0442\\u0430 \\u043f\\u043e\\u0440\\u044f\\u0434\\u043e\\u0447\\u043d\\u0430\\u044f\\n\",\n \"\\u0421\\u0443\\u0442\\u044c \\u0442\\u0440\\u0435\\u0431\\u043e\\u0432\\u0430\\u043d\\u0438\\u044f \\u043f\\u0440\\u043e \\u041230 - \\u043d\\u0435 \\u043f\\u0440\\u043e\\u0447\\u043d\\u043e\\u0441\\u0442\\u044c, \\u0430 \\u0432\\u043e\\u0434\\u043e\\u043d\\u0435\\u043f\\u0440\\u043e\\u043d\\u0438\\u0446\\u0430\\u0435\\u043c\\u043e\\u0441\\u0442\\u044c. \\u0412\\u043e\\u0434\\u043e\\u043d\\u0435\\u043f\\u0440\\u043e\\u043d\\u0438\\u0446\\u0430\\u0435\\u043c\\u043e\\u0441\\u0442\\u044c \\u0432\\u043e\\u043e\\u0431\\u0449\\u0435 \\u0442\\u043e \\u043e\\u0431\\u043e\\u0437\\u043d\\u0430\\u0447\\u0430\\u0435\\u0442\\u0441\\u044f W, \\u043d\\u0443 \\u0434\\u0430 \\u043d\\u0435 \\u0441\\u0443\\u0442\\u044c, \\u0437\\u0430\\u0447\\u0435\\u043c \\u043c\\u043d\\u0435 \\u043e\\u043d\\u0430 \\u0432 \\u043b\\u0435\\u043d\\u0442\\u0435 \\u043f\\u043e\\u0434 \\u0437\\u0435\\u043c\\u043b\\u0451\\u0439 ?\\n\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"toxic\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 0.47195781877088117,\n \"min\": 0.0,\n \"max\": 1.0,\n \"num_unique_values\": 2,\n \"samples\": [\n 0.0,\n 1.0\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n }\n ]\n}","type":"dataframe","variable_name":"df"},"text/html":["\n","
\n","
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
commenttoxic
0Верблюдов-то за что? Дебилы, бл...\\n1.0
1Хохлы, это отдушина затюканого россиянина, мол...1.0
2Собаке - собачья смерть\\n1.0
3Страницу обнови, дебил. Это тоже не оскорблени...1.0
4тебя не убедил 6-страничный пдф в том, что Скр...1.0
.........
14407Вонючий совковый скот прибежал и ноет. А вот и...1.0
14408А кого любить? Гоблина тупорылого что-ли? Или ...1.0
14409Посмотрел Утомленных солнцем 2. И оказалось, ч...0.0
14410КРЫМОТРЕД НАРУШАЕТ ПРАВИЛА РАЗДЕЛА Т.К В НЕМ Н...1.0
14411До сих пор пересматриваю его видео. Орамбо кст...0.0
\n","

14412 rows × 2 columns

\n","
\n","
\n","\n","
\n"," \n","\n"," \n","\n"," \n","
\n","\n","\n","
\n"," \n","\n","\n","\n"," \n","
\n","
\n","
\n"],"text/plain":[" comment toxic\n","0 Верблюдов-то за что? Дебилы, бл...\\n 1.0\n","1 Хохлы, это отдушина затюканого россиянина, мол... 1.0\n","2 Собаке - собачья смерть\\n 1.0\n","3 Страницу обнови, дебил. Это тоже не оскорблени... 1.0\n","4 тебя не убедил 6-страничный пдф в том, что Скр... 1.0\n","... ... ...\n","14407 Вонючий совковый скот прибежал и ноет. А вот и... 1.0\n","14408 А кого любить? Гоблина тупорылого что-ли? Или ... 1.0\n","14409 Посмотрел Утомленных солнцем 2. И оказалось, ч... 0.0\n","14410 КРЫМОТРЕД НАРУШАЕТ ПРАВИЛА РАЗДЕЛА Т.К В НЕМ Н... 1.0\n","14411 До сих пор пересматриваю его видео. Орамбо кст... 0.0\n","\n","[14412 rows x 2 columns]"]},"execution_count":3,"metadata":{},"output_type":"execute_result"}],"source":["df"]},{"cell_type":"code","execution_count":15,"metadata":{"executionInfo":{"elapsed":2,"status":"ok","timestamp":1717068388527,"user":{"displayName":"вера великоборец","userId":"17606763383908550373"},"user_tz":-180},"id":"gaMP-mo0O-a6"},"outputs":[],"source":["import nltk"]},{"cell_type":"code","execution_count":16,"metadata":{"executionInfo":{"elapsed":3,"status":"ok","timestamp":1717068391596,"user":{"displayName":"вера великоборец","userId":"17606763383908550373"},"user_tz":-180},"id":"8vtOsNYpPh4P"},"outputs":[],"source":["# !pip install nltk"]},{"cell_type":"code","execution_count":17,"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"elapsed":3,"status":"ok","timestamp":1717068394415,"user":{"displayName":"вера великоборец","userId":"17606763383908550373"},"user_tz":-180},"id":"IJVUmClVPglC","outputId":"8a917fc4-c865-41e7-8590-1f9f0fdc2a05"},"outputs":[{"name":"stderr","output_type":"stream","text":["[nltk_data] Downloading package stopwords to /root/nltk_data...\n","[nltk_data] Unzipping corpora/stopwords.zip.\n"]},{"data":{"text/plain":["True"]},"execution_count":17,"metadata":{},"output_type":"execute_result"}],"source":["nltk.download('stopwords')"]},{"cell_type":"code","execution_count":18,"metadata":{"executionInfo":{"elapsed":269,"status":"ok","timestamp":1717068398821,"user":{"displayName":"вера великоборец","userId":"17606763383908550373"},"user_tz":-180},"id":"ShpWz1gRO7OD"},"outputs":[],"source":["stop_words = set(stopwords.words(\"russian\"))"]},{"cell_type":"code","execution_count":20,"metadata":{"executionInfo":{"elapsed":2,"status":"ok","timestamp":1717068486724,"user":{"displayName":"вера великоборец","userId":"17606763383908550373"},"user_tz":-180},"id":"Ki_SvBdYOIr5"},"outputs":[],"source":["def clean_text(text):\n"," # Удаление всего, что не является буквами или знаками препинания\n"," clean_pattern = re.compile(r'[^a-zA-Zа-яА-ЯёЁ0-9.,!?;:\\s]')\n"," text = clean_pattern.sub('', text)\n"," url_pattern = re.compile(r'http\\S+|www\\S+|https\\S+')\n"," text = url_pattern.sub(r'', text)\n"," text = re.sub(\"\\s+\", \" \", text)\n"," splitted_text = [word for word in text.split() if word not in stop_words]\n"," text = \" \".join(splitted_text)\n"," return text"]},{"cell_type":"code","execution_count":21,"metadata":{"executionInfo":{"elapsed":835,"status":"ok","timestamp":1717068490177,"user":{"displayName":"вера великоборец","userId":"17606763383908550373"},"user_tz":-180},"id":"nfXeCbYkOSAn"},"outputs":[],"source":["df['comment'] = df['comment'].apply(clean_text)"]},{"cell_type":"code","execution_count":22,"metadata":{"colab":{"base_uri":"https://localhost:8080/","height":424},"executionInfo":{"elapsed":319,"status":"ok","timestamp":1717068491955,"user":{"displayName":"вера великоборец","userId":"17606763383908550373"},"user_tz":-180},"id":"nrtGQtc0OZaF","outputId":"42ba7437-0a23-414f-c1a6-7c717a973a79"},"outputs":[{"data":{"application/vnd.google.colaboratory.intrinsic+json":{"summary":"{\n \"name\": \"df\",\n \"rows\": 14412,\n \"fields\": [\n {\n \"column\": \"comment\",\n \"properties\": {\n \"dtype\": \"string\",\n \"num_unique_values\": 14154,\n \"samples\": [\n \"\\u041a\\u043e\\u0433\\u0434\\u0430 \\u0448\\u0430\\u0440\\u0430\\u0445\\u043d\\u0443\\u043b 6 \\u0433\\u043e\\u0434\\u0443, 4 \\u043c\\u0435\\u0441\\u044f\\u0446\\u0432\\u043e\\u043e\\u0431\\u0449\\u0435 \\u0441\\u0442\\u0430\\u043b\\u043e \\u043e\\u0431\\u044b\\u0447\\u043d\\u044b\\u043c \\u0434\\u0435\\u043b\\u043e\\u043c\",\n \"\\u041b\\u0435\\u0442 7 \\u043d\\u0430\\u0437\\u0430\\u0434 \\u0436\\u0438\\u043b \\u0434\\u043e\\u043c\\u0435 \\u043e\\u0434\\u043d\\u0438\\u043c \\u043f\\u043e\\u0434\\u044a\\u0435\\u0437\\u0434\\u043e\\u043c. \\u0417\\u0432\\u043e\\u043d\\u0438\\u043b \\u0421\\u043a\\u043e\\u0440\\u0443\\u044e \\u0442\\u0440\\u0430\\u0432\\u043c\\u0435 \\u0433\\u043e\\u043b\\u043e\\u0432\\u044b \\u0431\\u043b\\u0438\\u0437\\u043a\\u043e\\u0433\\u043e \\u0447\\u0435\\u043b\\u043e\\u0432\\u0435\\u043a\\u0430 \\u043e\\u043f\\u0435\\u0440\\u0430\\u0442\\u043e\\u0440 \\u043e\\u043f\\u0440\\u043e\\u0441\\u0438\\u0432 \\u043e\\u0431\\u043e \\u0432\\u0441\\u0435\\u043c \\u0434\\u043e\\u0445\\u043e\\u0434\\u0438\\u0442 \\u0432\\u043e\\u043f\\u0440\\u043e\\u0441\\u0430 \\u043d\\u043e\\u043c\\u0435\\u0440 \\u043f\\u043e\\u0434\\u044a\\u0435\\u0437\\u0434\\u0430 , \\u0433\\u043e\\u0432\\u043e\\u0440\\u044e, \\u043f\\u043e\\u0434\\u044a\\u0435\\u0437\\u0434 1 \\u043d\\u043e\\u043c\\u0435\\u0440\\u0430 \\u0432\\u0438\\u0441\\u0435\\u043b\\u043e. \\u0425\\u043e\\u043b\\u043e\\u0434\\u043d\\u044b\\u043c \\u0442\\u043e\\u043d\\u043e\\u043c \\u043f\\u043e\\u0432\\u0442\\u043e\\u0440\\u0438\\u043b\\u0430 \\u0432\\u043e\\u043f\\u0440\\u043e\\u0441. \\u042f \\u0431\\u044b\\u0441\\u0442\\u0440\\u043e \\u043f\\u043e\\u043f\\u044b\\u0442\\u0430\\u043b\\u0441\\u044f \\u043f\\u043e\\u0432\\u0442\\u043e\\u0440\\u0438\\u0442\\u044c \\u043e\\u0442\\u0432\\u0435\\u0442, \\u0431\\u043e\\u044f\\u043b\\u0441\\u044f \\u043e\\u0448\\u0438\\u0431\\u0438\\u0442\\u044c\\u0441\\u044f 23 \\u043f\\u043e\\u0434\\u044a\\u0435\\u0437\\u0434\\u043e\\u043c, \\u0441\\u043a\\u043e\\u0440\\u0430\\u044f \\u043d\\u0435\\u043c\\u0443 \\u043f\\u043e\\u0434\\u044a\\u0435\\u0434\\u0435\\u0442 \\u0434\\u0430\\u043b\\u0435\\u043a\\u043e \\u0438\\u0434\\u0442\\u0438 \\u043c\\u043e\\u0435\\u0433\\u043e \\u043d\\u043e\\u043c\\u0435\\u0440\\u0430. 4 \\u043f\\u043e\\u043f\\u044b\\u0442\\u043a\\u0438 \\u0431\\u0440\\u043e\\u0441\\u0438\\u043b \\u0442\\u0440\\u0443\\u0431\\u043a\\u0443, \\u0432\\u044b\\u0432\\u0435\\u043b \\u0442\\u0440\\u0430\\u0432\\u043c\\u0438\\u0440\\u043e\\u0432\\u0430\\u043d\\u043e\\u0433\\u043e \\u0443\\u043b\\u0438\\u0446\\u0443 \\u0441\\u0440\\u0430\\u0437\\u0443 \\u043a\\u0430\\u0440\\u0442\\u0438\\u043d\\u0443 \\u0437\\u0430\\u043c\\u0435\\u0442\\u0438\\u043b \\u0441\\u043e\\u0441\\u0435\\u0434 \\u043c\\u0430\\u0448\\u0438\\u043d\\u043e\\u0439, 10 \\u043c\\u0438\\u043d\\u0443\\u0442 \\u043f\\u0440\\u0438\\u0451\\u043c\\u043d\\u043e\\u043c \\u043e\\u0442\\u0434\\u0435\\u043b\\u0435\\u043d\\u0438\\u0438.\",\n \"\\u041f\\u0440\\u0438\\u0431\\u044b\\u043b\\u044c \\u0431\\u0430\\u043d\\u043a\\u0430 \\u0443\\u043c\\u0435\\u043d\\u044c\\u0448\\u0438\\u0442\\u044c\\u0441\\u044f \\u0441\\u0447\\u0451\\u0442 \\u043d\\u0430\\u0447\\u0438\\u0441\\u043b\\u0435\\u043d\\u044b\\u0445 \\u043f\\u0440\\u043e\\u0446\\u0435\\u043d\\u0442\\u043e\\u0432 30 \\u0434\\u043d\\u0435\\u0439, \\u0441\\u0447\\u0451\\u0442 \\u0434\\u043e\\u0441\\u0442\\u0430\\u0442\\u043e\\u0447\\u043d\\u043e\\u0433\\u043e \\u043a\\u043e\\u043b\\u0438\\u0447\\u0435\\u0441\\u0442\\u0432\\u0430 \\u043f\\u0435\\u0440\\u0441\\u043e\\u043d\\u0430\\u043b\\u0430, \\u043e\\u0431\\u0440\\u0430\\u0431\\u0430\\u0442\\u044b\\u0432\\u0430\\u0442\\u044c \\u0437\\u0430\\u044f\\u0432\\u043b\\u0435\\u043d\\u0438\\u044f \\u0431\\u044b\\u0441\\u0442\\u0440\\u0435\\u0435 30 \\u0434\\u043d\\u0435\\u0439. \\u0410 \\u0437\\u0430\\u0451\\u043c\\u0449\\u0438\\u043a \\u043f\\u0435\\u0440\\u0432\\u0443\\u044e \\u043e\\u0447\\u0435\\u0440\\u0435\\u0434\\u044c \\u0434\\u043e\\u043b\\u0436\\u0435\\u043d \\u0434\\u043e\\u0433\\u043e\\u0432\\u043e\\u0440 \\u043f\\u0440\\u043e\\u0447\\u0438\\u0442\\u0430\\u0442\\u044c \\u043f\\u043e\\u043b\\u044c\\u0437\\u043e\\u0432\\u0430\\u0442\\u044c\\u0441\\u044f \\u043f\\u0440\\u043e\\u0434\\u0443\\u043a\\u0442\\u0430\\u043c\\u0438, \\u043a\\u043e\\u0442\\u043e\\u0440\\u044b\\u0435 \\u0443\\u0441\\u0442\\u0440\\u0430\\u0438\\u0432\\u0430\\u044e\\u0442\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"toxic\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 0.47195781877088117,\n \"min\": 0.0,\n \"max\": 1.0,\n \"num_unique_values\": 2,\n \"samples\": [\n 0.0,\n 1.0\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n }\n ]\n}","type":"dataframe","variable_name":"df"},"text/html":["\n","
\n","
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
commenttoxic
0Верблюдовто что? Дебилы, бл...1.0
1Хохлы, это отдушина затюканого россиянина, мол...1.0
2Собаке собачья смерть1.0
3Страницу обнови, дебил. Это оскорбление, доказ...1.0
4убедил 6страничный пдф том, Скрипалей отравила...1.0
.........
14407Вонючий совковый скот прибежал ноет. А сторонн...1.0
14408А кого любить? Гоблина тупорылого чтоли? Или к...1.0
14409Посмотрел Утомленных солнцем 2. И оказалось, э...0.0
14410КРЫМОТРЕД НАРУШАЕТ ПРАВИЛА РАЗДЕЛА Т.К В НЕМ Н...1.0
14411До сих пор пересматриваю видео. Орамбо кстати ...0.0
\n","

14412 rows × 2 columns

\n","
\n","
\n","\n","
\n"," \n","\n"," \n","\n"," \n","
\n","\n","\n","
\n"," \n","\n","\n","\n"," \n","
\n","
\n","
\n"],"text/plain":[" comment toxic\n","0 Верблюдовто что? Дебилы, бл... 1.0\n","1 Хохлы, это отдушина затюканого россиянина, мол... 1.0\n","2 Собаке собачья смерть 1.0\n","3 Страницу обнови, дебил. Это оскорбление, доказ... 1.0\n","4 убедил 6страничный пдф том, Скрипалей отравила... 1.0\n","... ... ...\n","14407 Вонючий совковый скот прибежал ноет. А сторонн... 1.0\n","14408 А кого любить? Гоблина тупорылого чтоли? Или к... 1.0\n","14409 Посмотрел Утомленных солнцем 2. И оказалось, э... 0.0\n","14410 КРЫМОТРЕД НАРУШАЕТ ПРАВИЛА РАЗДЕЛА Т.К В НЕМ Н... 1.0\n","14411 До сих пор пересматриваю видео. Орамбо кстати ... 0.0\n","\n","[14412 rows x 2 columns]"]},"execution_count":22,"metadata":{},"output_type":"execute_result"}],"source":["df"]},{"cell_type":"code","execution_count":45,"metadata":{"executionInfo":{"elapsed":663,"status":"ok","timestamp":1717069872952,"user":{"displayName":"вера великоборец","userId":"17606763383908550373"},"user_tz":-180},"id":"yeEiYdqWRz9M"},"outputs":[],"source":["import torch\n","from transformers import AutoTokenizer, AutoModelForSequenceClassification\n","\n","model_checkpoint = 'cointegrated/rubert-tiny-toxicity'\n","tokenizer = AutoTokenizer.from_pretrained(model_checkpoint)\n","model = AutoModelForSequenceClassification.from_pretrained(model_checkpoint)\n","if torch.cuda.is_available():\n"," model.cuda()"]},{"cell_type":"code","execution_count":46,"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"elapsed":291,"status":"ok","timestamp":1717069879631,"user":{"displayName":"вера великоборец","userId":"17606763383908550373"},"user_tz":-180},"id":"SymPcx7vVrps","outputId":"6a1b8850-e210-4607-d485-cc28f1acf7df"},"outputs":[{"data":{"text/plain":["BertForSequenceClassification(\n"," (bert): BertModel(\n"," (embeddings): BertEmbeddings(\n"," (word_embeddings): Embedding(29564, 312, padding_idx=0)\n"," (position_embeddings): Embedding(512, 312)\n"," (token_type_embeddings): Embedding(2, 312)\n"," (LayerNorm): LayerNorm((312,), eps=1e-12, elementwise_affine=True)\n"," (dropout): Dropout(p=0.1, inplace=False)\n"," )\n"," (encoder): BertEncoder(\n"," (layer): ModuleList(\n"," (0-2): 3 x BertLayer(\n"," (attention): BertAttention(\n"," (self): BertSdpaSelfAttention(\n"," (query): Linear(in_features=312, out_features=312, bias=True)\n"," (key): Linear(in_features=312, out_features=312, bias=True)\n"," (value): Linear(in_features=312, out_features=312, bias=True)\n"," (dropout): Dropout(p=0.1, inplace=False)\n"," )\n"," (output): BertSelfOutput(\n"," (dense): Linear(in_features=312, out_features=312, bias=True)\n"," (LayerNorm): LayerNorm((312,), eps=1e-12, elementwise_affine=True)\n"," (dropout): Dropout(p=0.1, inplace=False)\n"," )\n"," )\n"," (intermediate): BertIntermediate(\n"," (dense): Linear(in_features=312, out_features=600, bias=True)\n"," (intermediate_act_fn): GELUActivation()\n"," )\n"," (output): BertOutput(\n"," (dense): Linear(in_features=600, out_features=312, bias=True)\n"," (LayerNorm): LayerNorm((312,), eps=1e-12, elementwise_affine=True)\n"," (dropout): Dropout(p=0.1, inplace=False)\n"," )\n"," )\n"," )\n"," )\n"," (pooler): BertPooler(\n"," (dense): Linear(in_features=312, out_features=312, bias=True)\n"," (activation): Tanh()\n"," )\n"," )\n"," (dropout): Dropout(p=0.1, inplace=False)\n"," (classifier): Linear(in_features=312, out_features=5, bias=True)\n",")"]},"execution_count":46,"metadata":{},"output_type":"execute_result"}],"source":["model"]},{"cell_type":"code","execution_count":24,"metadata":{"executionInfo":{"elapsed":276,"status":"ok","timestamp":1717069000589,"user":{"displayName":"вера великоборец","userId":"17606763383908550373"},"user_tz":-180},"id":"6c4sA0oQSah5"},"outputs":[],"source":["from sklearn.model_selection import train_test_split"]},{"cell_type":"code","execution_count":25,"metadata":{"executionInfo":{"elapsed":244,"status":"ok","timestamp":1717069049509,"user":{"displayName":"вера великоборец","userId":"17606763383908550373"},"user_tz":-180},"id":"IYbB9fpKSPN4"},"outputs":[],"source":["X_train, X_test, y_train, y_test = train_test_split(df['comment'], df['toxic'], test_size=0.2, random_state=42)"]},{"cell_type":"code","execution_count":27,"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"elapsed":6,"status":"ok","timestamp":1717069113528,"user":{"displayName":"вера великоборец","userId":"17606763383908550373"},"user_tz":-180},"id":"Z2hdPUhrSyVR","outputId":"0d7664db-3df0-467b-cd99-13631d53b265"},"outputs":[{"data":{"text/plain":["160.0"]},"execution_count":27,"metadata":{},"output_type":"execute_result"}],"source":["lengths = [len(review) for review in df['comment']]\n","\n","# Шаг 3: Вычислить 75-й квантиль длины отзывов\n","quantile_75 = np.percentile(lengths, 75)\n","quantile_75"]},{"cell_type":"code","execution_count":28,"metadata":{"executionInfo":{"elapsed":241,"status":"ok","timestamp":1717069120517,"user":{"displayName":"вера великоборец","userId":"17606763383908550373"},"user_tz":-180},"id":"Z0Z0HuTHSpu9"},"outputs":[],"source":["MAX_LEN = 100"]},{"cell_type":"code","execution_count":29,"metadata":{"executionInfo":{"elapsed":6393,"status":"ok","timestamp":1717069128723,"user":{"displayName":"вера великоборец","userId":"17606763383908550373"},"user_tz":-180},"id":"D1kEbsZsSO_4"},"outputs":[],"source":["tokenized_train = X_train.apply((lambda x: tokenizer.encode(x,\n"," add_special_tokens=True,\n"," truncation=True,\n"," padding='max_length',\n"," max_length=MAX_LEN)))"]},{"cell_type":"code","execution_count":30,"metadata":{"executionInfo":{"elapsed":1408,"status":"ok","timestamp":1717069139904,"user":{"displayName":"вера великоборец","userId":"17606763383908550373"},"user_tz":-180},"id":"w9fDzcLTSo-0"},"outputs":[],"source":["tokenized_valid = X_test.apply((lambda x: tokenizer.encode(x,\n"," add_special_tokens=True,\n"," truncation=True,\n"," padding='max_length',\n"," max_length=MAX_LEN)))"]},{"cell_type":"code","execution_count":31,"metadata":{"executionInfo":{"elapsed":280,"status":"ok","timestamp":1717069158349,"user":{"displayName":"вера великоборец","userId":"17606763383908550373"},"user_tz":-180},"id":"y0JTa8XgS8XT"},"outputs":[],"source":["attention_mask_train = np.where(np.array(list(tokenized_train.values)) != 0, 1, 0)"]},{"cell_type":"code","execution_count":32,"metadata":{"executionInfo":{"elapsed":456,"status":"ok","timestamp":1717069173165,"user":{"displayName":"вера великоборец","userId":"17606763383908550373"},"user_tz":-180},"id":"W04NPnoqTBR8"},"outputs":[],"source":["attention_mask_valid = np.where(np.array(list(tokenized_valid.values)) != 0, 1, 0)"]},{"cell_type":"code","execution_count":33,"metadata":{"executionInfo":{"elapsed":263,"status":"ok","timestamp":1717069622940,"user":{"displayName":"вера великоборец","userId":"17606763383908550373"},"user_tz":-180},"id":"lbzBRiSlTUTy"},"outputs":[],"source":["from torch.utils.data import Dataset"]},{"cell_type":"code","execution_count":34,"metadata":{"executionInfo":{"elapsed":261,"status":"ok","timestamp":1717069625066,"user":{"displayName":"вера великоборец","userId":"17606763383908550373"},"user_tz":-180},"id":"37ZKdpt2TUTy"},"outputs":[],"source":["class BertInputs(torch.utils.data.Dataset):\n"," def __init__(self, tokenized_inputs, attention_masks, targets):\n"," self.tokenized_inputs = tokenized_inputs\n"," self.attention_masks = attention_masks\n"," self.targets = targets\n","\n"," def __len__(self):\n"," return self.tokenized_inputs.shape[0]\n","\n"," def __getitem__(self, idx):\n"," ids = self.tokenized_inputs[idx]\n"," ams = self.attention_masks[idx]\n"," target = self.targets[idx]\n","\n"," return ids, ams, target\n"]},{"cell_type":"code","execution_count":35,"metadata":{"executionInfo":{"elapsed":2,"status":"ok","timestamp":1717069626758,"user":{"displayName":"вера великоборец","userId":"17606763383908550373"},"user_tz":-180},"id":"qSxgGiCUTUTy"},"outputs":[],"source":["train_tokens = np.array(list(tokenized_train.values))"]},{"cell_type":"code","execution_count":36,"metadata":{"executionInfo":{"elapsed":268,"status":"ok","timestamp":1717069635938,"user":{"displayName":"вера великоборец","userId":"17606763383908550373"},"user_tz":-180},"id":"RhxDZ1ouTUTz"},"outputs":[],"source":["valid_tokens = np.array(list(tokenized_valid.values))"]},{"cell_type":"code","execution_count":41,"metadata":{"executionInfo":{"elapsed":4,"status":"ok","timestamp":1717069778832,"user":{"displayName":"вера великоборец","userId":"17606763383908550373"},"user_tz":-180},"id":"p5SzOedyVJum"},"outputs":[],"source":["target_train = y_train.to_numpy()\n","target_valid = y_test.to_numpy()"]},{"cell_type":"code","execution_count":39,"metadata":{"executionInfo":{"elapsed":344,"status":"ok","timestamp":1717069760952,"user":{"displayName":"вера великоборец","userId":"17606763383908550373"},"user_tz":-180},"id":"MDvj3KOKTUTz"},"outputs":[],"source":["train_dataset = BertInputs(torch.from_numpy(train_tokens), attention_mask_train, torch.from_numpy(target_train))"]},{"cell_type":"code","execution_count":42,"metadata":{"executionInfo":{"elapsed":329,"status":"ok","timestamp":1717069780569,"user":{"displayName":"вера великоборец","userId":"17606763383908550373"},"user_tz":-180},"id":"ocyD7tDRTUTz"},"outputs":[],"source":["valid_dataset = BertInputs(torch.from_numpy(valid_tokens), attention_mask_valid, torch.from_numpy(target_valid))"]},{"cell_type":"code","execution_count":43,"metadata":{"executionInfo":{"elapsed":308,"status":"ok","timestamp":1717069788867,"user":{"displayName":"вера великоборец","userId":"17606763383908550373"},"user_tz":-180},"id":"8HCB_1AoTUTz"},"outputs":[],"source":["train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=32, shuffle=False)\n","valid_loader = torch.utils.data.DataLoader(valid_dataset, batch_size=32, shuffle=False)"]},{"cell_type":"code","execution_count":107,"metadata":{"executionInfo":{"elapsed":292,"status":"ok","timestamp":1717072670743,"user":{"displayName":"вера великоборец","userId":"17606763383908550373"},"user_tz":-180},"id":"wKpry0axTUTz"},"outputs":[],"source":["class BERTClassifier(nn.Module):\n"," def __init__(self):\n"," super().__init__()\n"," self.bert = AutoModelForSequenceClassification.from_pretrained('cointegrated/rubert-tiny-toxicity')\n"," self.bert.classifier = nn.Linear(312, 312)\n"," for param in self.bert.parameters():\n"," param.requires_grad = False\n"," self.linear = nn.Sequential(\n"," nn.Linear(312, 128),\n"," nn.Sigmoid(),\n"," nn.Dropout(),\n"," nn.Linear(128, 1)\n"," )\n","\n"," def forward(self, x, attention_mask=None):\n"," bert_out = self.bert(x, attention_mask=attention_mask).logits\n"," out = self.linear(bert_out).squeeze(1)\n"," return out"]},{"cell_type":"code","execution_count":108,"metadata":{"executionInfo":{"elapsed":6,"status":"ok","timestamp":1717072670744,"user":{"displayName":"вера великоборец","userId":"17606763383908550373"},"user_tz":-180},"id":"VsusHb9ZZ9FN"},"outputs":[],"source":["# !pip install torchmetrics"]},{"cell_type":"code","execution_count":109,"metadata":{"executionInfo":{"elapsed":3,"status":"ok","timestamp":1717072671174,"user":{"displayName":"вера великоборец","userId":"17606763383908550373"},"user_tz":-180},"id":"ZWve7ng-Z6xP"},"outputs":[],"source":["from torchmetrics import Accuracy\n","from torchmetrics.classification import BinaryF1Score"]},{"cell_type":"code","execution_count":110,"metadata":{"executionInfo":{"elapsed":3,"status":"ok","timestamp":1717072671456,"user":{"displayName":"вера великоборец","userId":"17606763383908550373"},"user_tz":-180},"id":"CK9G-kRzZ6xQ"},"outputs":[],"source":["model = BERTClassifier()\n","device = 'cuda'\n","criterion = nn.BCEWithLogitsLoss()\n","optimizer = torch.optim.Adam(model.parameters(), lr=0.0001)\n","metric = BinaryF1Score().to(device)\n","model.to(device);"]},{"cell_type":"code","execution_count":111,"metadata":{"executionInfo":{"elapsed":4,"status":"ok","timestamp":1717072671705,"user":{"displayName":"вера великоборец","userId":"17606763383908550373"},"user_tz":-180},"id":"QqCSRkOKZsgI"},"outputs":[],"source":["def train_attention_lstm(epochs, model, train_loader, valid_loader, optimizer,\n"," criterion, metric, lstm_conf=None):\n"," epoch_train_losses = []\n"," epoch_valid_losses = []\n"," epoch_train_metric = []\n"," epoch_valid_metric = []\n"," device = 'cuda'\n"," for epoch in range(epochs):\n"," batch_losses = []\n"," batch_metric = []\n"," model.train()\n"," model.to(device)\n"," for inputs, attention_masks, labels in train_loader:\n"," inputs, attention_masks, labels = inputs.to(device), attention_masks.to(device), labels.to(device)\n","\n"," output = model(inputs, attention_mask=attention_masks)\n"," loss = criterion(output, labels)\n"," optimizer.zero_grad()\n"," loss.backward()\n"," optimizer.step()\n","\n"," batch_losses.append(loss.item())\n"," batch_metric.append(metric(output, labels).item())\n","\n"," epoch_train_losses.append(np.mean(batch_losses))\n"," epoch_train_metric.append(np.mean(batch_metric))\n","\n"," batch_losses = []\n"," batch_metric = []\n"," model.eval()\n"," for inputs, attention_masks, labels in valid_loader:\n"," inputs, attention_masks, labels = inputs.to(device), attention_masks.to(device), labels.to(device)\n"," with torch.no_grad():\n"," output = model(inputs, attention_mask=attention_masks)\n"," loss = criterion(output, labels)\n"," batch_losses.append(loss.item())\n"," batch_metric.append(metric(output, labels).item())\n"," epoch_valid_losses.append(np.mean(batch_losses))\n"," epoch_valid_metric.append(np.mean(batch_metric))\n","\n"," print(f'Epoch {epoch+1}')\n"," print(f'Train loss: {epoch_train_losses[-1]:.4f}, Val loss {epoch_valid_losses[-1]:.4f}')\n"," print(f'Train accuracy: {epoch_train_metric[-1]:.2f}, Val accuracy: {epoch_valid_metric[-1]:.2f}')\n"," print(25*'==')\n","\n"," return (epoch_train_losses, epoch_valid_losses, epoch_train_metric, epoch_valid_metric)"]},{"cell_type":"code","execution_count":112,"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"elapsed":86525,"status":"ok","timestamp":1717072758525,"user":{"displayName":"вера великоборец","userId":"17606763383908550373"},"user_tz":-180},"id":"A1wQkEHTZxw4","outputId":"ca0403be-64c5-4864-a2f9-636b77991150"},"outputs":[{"name":"stdout","output_type":"stream","text":["Epoch 1\n","Train loss: 0.5084, Val loss 0.4024\n","Train accuracy: 0.57, Val accuracy: 0.70\n","==================================================\n","Epoch 2\n","Train loss: 0.4391, Val loss 0.3867\n","Train accuracy: 0.66, Val accuracy: 0.72\n","==================================================\n","Epoch 3\n","Train loss: 0.4279, Val loss 0.3793\n","Train accuracy: 0.68, Val accuracy: 0.73\n","==================================================\n","Epoch 4\n","Train loss: 0.4193, Val loss 0.3731\n","Train accuracy: 0.69, Val accuracy: 0.73\n","==================================================\n","Epoch 5\n","Train loss: 0.4131, Val loss 0.3679\n","Train accuracy: 0.70, Val accuracy: 0.74\n","==================================================\n","Epoch 6\n","Train loss: 0.4070, Val loss 0.3634\n","Train accuracy: 0.70, Val accuracy: 0.74\n","==================================================\n","Epoch 7\n","Train loss: 0.4082, Val loss 0.3593\n","Train accuracy: 0.70, Val accuracy: 0.75\n","==================================================\n","Epoch 8\n","Train loss: 0.4026, Val loss 0.3563\n","Train accuracy: 0.71, Val accuracy: 0.75\n","==================================================\n","Epoch 9\n","Train loss: 0.4043, Val loss 0.3537\n","Train accuracy: 0.71, Val accuracy: 0.76\n","==================================================\n","Epoch 10\n","Train loss: 0.4014, Val loss 0.3518\n","Train accuracy: 0.72, Val accuracy: 0.76\n","==================================================\n","Epoch 11\n","Train loss: 0.3974, Val loss 0.3497\n","Train accuracy: 0.72, Val accuracy: 0.76\n","==================================================\n","Epoch 12\n","Train loss: 0.3948, Val loss 0.3478\n","Train accuracy: 0.72, Val accuracy: 0.76\n","==================================================\n","Epoch 13\n","Train loss: 0.3952, Val loss 0.3467\n","Train accuracy: 0.72, Val accuracy: 0.76\n","==================================================\n","Epoch 14\n","Train loss: 0.3946, Val loss 0.3454\n","Train accuracy: 0.72, Val accuracy: 0.76\n","==================================================\n","Epoch 15\n","Train loss: 0.3930, Val loss 0.3444\n","Train accuracy: 0.72, Val accuracy: 0.77\n","==================================================\n","Epoch 16\n","Train loss: 0.3902, Val loss 0.3433\n","Train accuracy: 0.72, Val accuracy: 0.76\n","==================================================\n","Epoch 17\n","Train loss: 0.3901, Val loss 0.3424\n","Train accuracy: 0.72, Val accuracy: 0.77\n","==================================================\n","Epoch 18\n","Train loss: 0.3899, Val loss 0.3415\n","Train accuracy: 0.72, Val accuracy: 0.77\n","==================================================\n","Epoch 19\n","Train loss: 0.3905, Val loss 0.3409\n","Train accuracy: 0.72, Val accuracy: 0.77\n","==================================================\n","Epoch 20\n","Train loss: 0.3904, Val loss 0.3403\n","Train accuracy: 0.72, Val accuracy: 0.77\n","==================================================\n"]},{"data":{"text/plain":["([0.5083780060451313,\n"," 0.4391106702167688,\n"," 0.42793449899041985,\n"," 0.4192526624897032,\n"," 0.4130774913023195,\n"," 0.40695059533327993,\n"," 0.40816243859394574,\n"," 0.4025705500237704,\n"," 0.40428590983994134,\n"," 0.4014262039793654,\n"," 0.3973992633044861,\n"," 0.39483769866059215,\n"," 0.3951748218383066,\n"," 0.39460813710124554,\n"," 0.3929524585412899,\n"," 0.3901995505507913,\n"," 0.39005828239460105,\n"," 0.3898510054791003,\n"," 0.39051921931187866,\n"," 0.3903515106272374],\n"," [0.402441298047971,\n"," 0.3867063879904477,\n"," 0.3793247446857884,\n"," 0.3730928883862242,\n"," 0.3679111393862797,\n"," 0.3633536281389945,\n"," 0.3593491589004045,\n"," 0.3562914771151528,\n"," 0.3536964474871052,\n"," 0.35182306410159725,\n"," 0.34968449011011793,\n"," 0.3477907560270232,\n"," 0.34674735903661463,\n"," 0.3454434093392382,\n"," 0.34439929916767315,\n"," 0.34329559643294033,\n"," 0.34237983056187166,\n"," 0.3415207479647173,\n"," 0.3408517934908176,\n"," 0.3403323164568777],\n"," [0.5658733633829286,\n"," 0.6621058487908662,\n"," 0.6838039802464752,\n"," 0.6886554599386173,\n"," 0.6963648699492299,\n"," 0.6996440213803109,\n"," 0.7021754302311471,\n"," 0.7119714741148777,\n"," 0.710395372268896,\n"," 0.7155060225741685,\n"," 0.7166487282332952,\n"," 0.7205212973986966,\n"," 0.7195848430457868,\n"," 0.7156672836010476,\n"," 0.7237848192866159,\n"," 0.7244235358905264,\n"," 0.7235892567773274,\n"," 0.7228518685640721,\n"," 0.7241709839521683,\n"," 0.7238757334587647],\n"," [0.7004337032417675,\n"," 0.71641305907742,\n"," 0.728126830124593,\n"," 0.7323439026271904,\n"," 0.739565012219188,\n"," 0.7440549931028387,\n"," 0.7477671592445164,\n"," 0.7535042890480587,\n"," 0.7572787361485618,\n"," 0.7609272281547169,\n"," 0.7609272281547169,\n"," 0.7626037581281347,\n"," 0.7633639616625649,\n"," 0.7642797123599838,\n"," 0.7651110563304399,\n"," 0.7646179228693575,\n"," 0.7660046583348579,\n"," 0.768307666529666,\n"," 0.770193106853045,\n"," 0.7677900676543896])"]},"execution_count":112,"metadata":{},"output_type":"execute_result"}],"source":["train_attention_lstm(20, model, train_loader, valid_loader, optimizer, criterion, metric)"]},{"cell_type":"code","execution_count":113,"metadata":{"executionInfo":{"elapsed":282,"status":"ok","timestamp":1717073592487,"user":{"displayName":"вера великоборец","userId":"17606763383908550373"},"user_tz":-180},"id":"oXNFCJ4wbhUl"},"outputs":[],"source":["# Сохранение весов модели\n","torch.save(model.state_dict(), 'model_weights.pth')\n"]},{"cell_type":"code","execution_count":null,"metadata":{"id":"BWYh3Sppj70S"},"outputs":[],"source":[]}],"metadata":{"accelerator":"GPU","colab":{"authorship_tag":"ABX9TyPBcq+impWauwHnDO3K9VQh","gpuType":"T4","mount_file_id":"1usIDPB7YVnxZIo3V3ggC49u8SFxGFn3M","provenance":[]},"kernelspec":{"display_name":"Python 3","name":"python3"},"language_info":{"name":"python"}},"nbformat":4,"nbformat_minor":0}