{ "cells": [ { "cell_type": "markdown", "id": "a5cd6249", "metadata": {}, "source": [ "# 🧪 Analyse exploratoire du dataset Jigsaw (français)" ] }, { "cell_type": "code", "execution_count": 1, "id": "943b9ceb", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Requirement already satisfied: pandas in /Users/ymokay/toxicheck/venv/lib/python3.12/site-packages (2.3.0)\n", "Collecting matplotlib\n", " Using cached matplotlib-3.10.3-cp312-cp312-macosx_11_0_arm64.whl.metadata (11 kB)\n", "Collecting seaborn\n", " Using cached seaborn-0.13.2-py3-none-any.whl.metadata (5.4 kB)\n", "Collecting wordcloud\n", " Downloading wordcloud-1.9.4-cp312-cp312-macosx_11_0_arm64.whl.metadata (3.4 kB)\n", "Requirement already satisfied: numpy>=1.26.0 in /Users/ymokay/toxicheck/venv/lib/python3.12/site-packages (from pandas) (2.3.0)\n", "Requirement already satisfied: python-dateutil>=2.8.2 in /Users/ymokay/toxicheck/venv/lib/python3.12/site-packages (from pandas) (2.9.0.post0)\n", "Requirement already satisfied: pytz>=2020.1 in /Users/ymokay/toxicheck/venv/lib/python3.12/site-packages (from pandas) (2025.2)\n", "Requirement already satisfied: tzdata>=2022.7 in /Users/ymokay/toxicheck/venv/lib/python3.12/site-packages (from pandas) (2025.2)\n", "Collecting contourpy>=1.0.1 (from matplotlib)\n", " Using cached contourpy-1.3.2-cp312-cp312-macosx_11_0_arm64.whl.metadata (5.5 kB)\n", "Collecting cycler>=0.10 (from matplotlib)\n", " Using cached cycler-0.12.1-py3-none-any.whl.metadata (3.8 kB)\n", "Collecting fonttools>=4.22.0 (from matplotlib)\n", " Downloading fonttools-4.58.4-cp312-cp312-macosx_10_13_universal2.whl.metadata (106 kB)\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m106.6/106.6 kB\u001b[0m \u001b[31m2.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[?25hCollecting kiwisolver>=1.3.1 (from matplotlib)\n", " Using cached kiwisolver-1.4.8-cp312-cp312-macosx_11_0_arm64.whl.metadata (6.2 kB)\n", "Requirement already satisfied: packaging>=20.0 in /Users/ymokay/toxicheck/venv/lib/python3.12/site-packages (from matplotlib) (25.0)\n", "Requirement already satisfied: pillow>=8 in /Users/ymokay/toxicheck/venv/lib/python3.12/site-packages (from matplotlib) (11.2.1)\n", "Collecting pyparsing>=2.3.1 (from matplotlib)\n", " Using cached pyparsing-3.2.3-py3-none-any.whl.metadata (5.0 kB)\n", "Requirement already satisfied: six>=1.5 in /Users/ymokay/toxicheck/venv/lib/python3.12/site-packages (from python-dateutil>=2.8.2->pandas) (1.17.0)\n", "Using cached matplotlib-3.10.3-cp312-cp312-macosx_11_0_arm64.whl (8.1 MB)\n", "Using cached seaborn-0.13.2-py3-none-any.whl (294 kB)\n", "Downloading wordcloud-1.9.4-cp312-cp312-macosx_11_0_arm64.whl (168 kB)\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m168.9/168.9 kB\u001b[0m \u001b[31m6.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[?25hUsing cached contourpy-1.3.2-cp312-cp312-macosx_11_0_arm64.whl (255 kB)\n", "Using cached cycler-0.12.1-py3-none-any.whl (8.3 kB)\n", "Downloading fonttools-4.58.4-cp312-cp312-macosx_10_13_universal2.whl (2.8 MB)\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m2.8/2.8 MB\u001b[0m \u001b[31m8.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m00:01\u001b[0m00:01\u001b[0m\n", "\u001b[?25hUsing cached kiwisolver-1.4.8-cp312-cp312-macosx_11_0_arm64.whl (65 kB)\n", "Using cached pyparsing-3.2.3-py3-none-any.whl (111 kB)\n", "Installing collected packages: pyparsing, kiwisolver, fonttools, cycler, contourpy, matplotlib, wordcloud, seaborn\n", "Successfully installed contourpy-1.3.2 cycler-0.12.1 fonttools-4.58.4 kiwisolver-1.4.8 matplotlib-3.10.3 pyparsing-3.2.3 seaborn-0.13.2 wordcloud-1.9.4\n", "\n", "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m A new release of pip is available: \u001b[0m\u001b[31;49m24.0\u001b[0m\u001b[39;49m -> \u001b[0m\u001b[32;49m25.1.1\u001b[0m\n", "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m To update, run: \u001b[0m\u001b[32;49mpip install --upgrade pip\u001b[0m\n" ] } ], "source": [ "!pip install pandas matplotlib seaborn wordcloud" ] }, { "cell_type": "code", "execution_count": 14, "id": "0ff4a780", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", " | id | \n", "comment_text | \n", "toxic | \n", "severe_toxic | \n", "obscene | \n", "threat | \n", "insult | \n", "identity_hate | \n", "
---|---|---|---|---|---|---|---|---|
0 | \n", "000113f07ec002fd | \n", "Hé mec, je n'essaye vraiment pas de modifier l... | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "
1 | \n", "00025465d4725e87 | \n", "\"\\n\\nFélicitations de ma part également, utili... | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "
2 | \n", "0009801bd85e5806 | \n", "Le point Mitsurugi n'avait aucun sens - pourqu... | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "
3 | \n", "0001b41b1c6bb37e | \n", "\"\\nPlus\\nJe ne peux pas faire de véritables su... | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "
4 | \n", "00040093b2687caa | \n", "alignement sur ce sujet et qui sont contraires... | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "