{ "cells": [ { "cell_type": "markdown", "id": "d9fec777", "metadata": {}, "source": [] }, { "cell_type": "code", "execution_count": 5, "id": "ba0578d2", "metadata": {}, "outputs": [], "source": [ "import pandas as pd\n", "\n", "books = pd.read_csv(\"books_with_categories.csv\")" ] }, { "cell_type": "code", "execution_count": 2, "id": "547ef724", "metadata": {}, "outputs": [], "source": [ "import torch" ] }, { "cell_type": "code", "execution_count": null, "id": "0184c98f", "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "Device set to use cuda\n" ] }, { "data": { "text/plain": [ "[[{'label': 'joy', 'score': 0.9771687984466553},\n", " {'label': 'surprise', 'score': 0.008528684265911579},\n", " {'label': 'neutral', 'score': 0.005764602683484554},\n", " {'label': 'anger', 'score': 0.004419787786900997},\n", " {'label': 'sadness', 'score': 0.0020923942793160677},\n", " {'label': 'disgust', 'score': 0.0016119946958497167},\n", " {'label': 'fear', 'score': 0.0004138521908316761}]]" ] }, "execution_count": 4, "metadata": {}, "output_type": "execute_result" } ], "source": [ "from transformers import pipeline\n", "classifier = pipeline(\"text-classification\", \n", " model=\"j-hartmann/emotion-english-distilroberta-base\",\n", " top_k = None,\n", " device = \"cuda\")\n", "classifier(\"I love this!\")" ] }, { "cell_type": "code", "execution_count": 6, "id": "ab7b7c6e", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "'A NOVEL THAT READERS and critics have been eagerly anticipating for over a decade, Gilead is an astonishingly imagined story of remarkable lives. John Ames is a preacher, the son of a preacher and the grandson (both maternal and paternal) of preachers. It’s 1956 in Gilead, Iowa, towards the end of the Reverend Ames’s life, and he is absorbed in recording his family’s story, a legacy for the young son he will never see grow up. Haunted by his grandfather’s presence, John tells of the rift between his grandfather and his father: the elder, an angry visionary who fought for the abolitionist cause, and his son, an ardent pacifist. He is troubled, too, by his prodigal namesake, Jack (John Ames) Boughton, his best friend’s lost son who returns to Gilead searching for forgiveness and redemption. Told in John Ames’s joyous, rambling voice that finds beauty, humour and truth in the smallest of life’s details, Gilead is a song of celebration and acceptance of the best and the worst the world has to offer. At its heart is a tale of the sacred bonds between fathers and sons, pitch-perfect in style and story, set to dazzle critics and readers alike.'" ] }, "execution_count": 6, "metadata": {}, "output_type": "execute_result" } ], "source": [ "books[\"description\"][0]" ] }, { "cell_type": "code", "execution_count": 7, "id": "80d70c9c", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "[[{'label': 'fear', 'score': 0.6548407673835754},\n", " {'label': 'neutral', 'score': 0.16985219717025757},\n", " {'label': 'sadness', 'score': 0.11640916764736176},\n", " {'label': 'surprise', 'score': 0.020700708031654358},\n", " {'label': 'disgust', 'score': 0.019100721925497055},\n", " {'label': 'joy', 'score': 0.01516129169613123},\n", " {'label': 'anger', 'score': 0.0039351508021354675}]]" ] }, "execution_count": 7, "metadata": {}, "output_type": "execute_result" } ], "source": [ "classifier(books[\"description\"][0])" ] }, { "cell_type": "code", "execution_count": 8, "id": "d8c6e560", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "[[{'label': 'surprise', 'score': 0.7296026349067688},\n", " {'label': 'neutral', 'score': 0.14038565754890442},\n", " {'label': 'fear', 'score': 0.0681622102856636},\n", " {'label': 'joy', 'score': 0.047942474484443665},\n", " {'label': 'anger', 'score': 0.009156355634331703},\n", " {'label': 'disgust', 'score': 0.002628473797813058},\n", " {'label': 'sadness', 'score': 0.002122160280123353}],\n", " [{'label': 'neutral', 'score': 0.4493710994720459},\n", " {'label': 'disgust', 'score': 0.27359139919281006},\n", " {'label': 'joy', 'score': 0.10908272117376328},\n", " {'label': 'sadness', 'score': 0.09362725168466568},\n", " {'label': 'anger', 'score': 0.04047825187444687},\n", " {'label': 'surprise', 'score': 0.026970187202095985},\n", " {'label': 'fear', 'score': 0.006879056338220835}],\n", " [{'label': 'neutral', 'score': 0.6462168097496033},\n", " {'label': 'sadness', 'score': 0.24273255467414856},\n", " {'label': 'disgust', 'score': 0.04342259094119072},\n", " {'label': 'surprise', 'score': 0.028300529345870018},\n", " {'label': 'joy', 'score': 0.014211456291377544},\n", " {'label': 'fear', 'score': 0.01408409047871828},\n", " {'label': 'anger', 'score': 0.011031880043447018}],\n", " [{'label': 'fear', 'score': 0.9281679391860962},\n", " {'label': 'anger', 'score': 0.03219100087881088},\n", " {'label': 'neutral', 'score': 0.012808752246201038},\n", " {'label': 'sadness', 'score': 0.00875688437372446},\n", " {'label': 'surprise', 'score': 0.008597937412559986},\n", " {'label': 'disgust', 'score': 0.008431863971054554},\n", " {'label': 'joy', 'score': 0.0010455828160047531}],\n", " [{'label': 'sadness', 'score': 0.9671575427055359},\n", " {'label': 'neutral', 'score': 0.015104170888662338},\n", " {'label': 'disgust', 'score': 0.006480599287897348},\n", " {'label': 'fear', 'score': 0.0053939977660775185},\n", " {'label': 'surprise', 'score': 0.0022869440726935863},\n", " {'label': 'anger', 'score': 0.0018428928451612592},\n", " {'label': 'joy', 'score': 0.0017338799079880118}],\n", " [{'label': 'joy', 'score': 0.9327967166900635},\n", " {'label': 'disgust', 'score': 0.037718113511800766},\n", " {'label': 'neutral', 'score': 0.015892023220658302},\n", " {'label': 'sadness', 'score': 0.006444575730711222},\n", " {'label': 'anger', 'score': 0.005025049671530724},\n", " {'label': 'surprise', 'score': 0.0015812116907909513},\n", " {'label': 'fear', 'score': 0.0005423118127509952}],\n", " [{'label': 'joy', 'score': 0.6528710126876831},\n", " {'label': 'neutral', 'score': 0.2542743384838104},\n", " {'label': 'surprise', 'score': 0.06808304786682129},\n", " {'label': 'sadness', 'score': 0.00990898534655571},\n", " {'label': 'disgust', 'score': 0.006512206979095936},\n", " {'label': 'anger', 'score': 0.004821315407752991},\n", " {'label': 'fear', 'score': 0.003529016859829426}],\n", " [{'label': 'neutral', 'score': 0.5494765043258667},\n", " {'label': 'sadness', 'score': 0.1116902157664299},\n", " {'label': 'disgust', 'score': 0.10400670766830444},\n", " {'label': 'surprise', 'score': 0.07876554876565933},\n", " {'label': 'anger', 'score': 0.0641336739063263},\n", " {'label': 'fear', 'score': 0.05136284604668617},\n", " {'label': 'joy', 'score': 0.04056443274021149}]]" ] }, "execution_count": 8, "metadata": {}, "output_type": "execute_result" } ], "source": [ "classifier(books[\"description\"][0].split(\".\"))" ] }, { "cell_type": "code", "execution_count": 9, "id": "ecd96f5e", "metadata": {}, "outputs": [], "source": [ "sentences = books[\"description\"][0].split(\".\")\n", "predictions = classifier(sentences)" ] }, { "cell_type": "code", "execution_count": 10, "id": "395f13c4", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "'A NOVEL THAT READERS and critics have been eagerly anticipating for over a decade, Gilead is an astonishingly imagined story of remarkable lives'" ] }, "execution_count": 10, "metadata": {}, "output_type": "execute_result" } ], "source": [ "sentences[0]" ] }, { "cell_type": "code", "execution_count": 11, "id": "0f136f0e", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "[{'label': 'surprise', 'score': 0.7296026349067688},\n", " {'label': 'neutral', 'score': 0.14038565754890442},\n", " {'label': 'fear', 'score': 0.0681622102856636},\n", " {'label': 'joy', 'score': 0.047942474484443665},\n", " {'label': 'anger', 'score': 0.009156355634331703},\n", " {'label': 'disgust', 'score': 0.002628473797813058},\n", " {'label': 'sadness', 'score': 0.002122160280123353}]" ] }, "execution_count": 11, "metadata": {}, "output_type": "execute_result" } ], "source": [ "predictions[0]" ] }, { "cell_type": "code", "execution_count": 12, "id": "5468ffde", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "' Haunted by his grandfather’s presence, John tells of the rift between his grandfather and his father: the elder, an angry visionary who fought for the abolitionist cause, and his son, an ardent pacifist'" ] }, "execution_count": 12, "metadata": {}, "output_type": "execute_result" } ], "source": [ "sentences[3]" ] }, { "cell_type": "code", "execution_count": 13, "id": "4cec13a5", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "[{'label': 'fear', 'score': 0.9281679391860962},\n", " {'label': 'anger', 'score': 0.03219100087881088},\n", " {'label': 'neutral', 'score': 0.012808752246201038},\n", " {'label': 'sadness', 'score': 0.00875688437372446},\n", " {'label': 'surprise', 'score': 0.008597937412559986},\n", " {'label': 'disgust', 'score': 0.008431863971054554},\n", " {'label': 'joy', 'score': 0.0010455828160047531}]" ] }, "execution_count": 13, "metadata": {}, "output_type": "execute_result" } ], "source": [ "predictions[3]" ] }, { "cell_type": "code", "execution_count": 14, "id": "08252ce7", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "[[{'label': 'surprise', 'score': 0.7296026349067688},\n", " {'label': 'neutral', 'score': 0.14038565754890442},\n", " {'label': 'fear', 'score': 0.0681622102856636},\n", " {'label': 'joy', 'score': 0.047942474484443665},\n", " {'label': 'anger', 'score': 0.009156355634331703},\n", " {'label': 'disgust', 'score': 0.002628473797813058},\n", " {'label': 'sadness', 'score': 0.002122160280123353}],\n", " [{'label': 'neutral', 'score': 0.4493710994720459},\n", " {'label': 'disgust', 'score': 0.27359139919281006},\n", " {'label': 'joy', 'score': 0.10908272117376328},\n", " {'label': 'sadness', 'score': 0.09362725168466568},\n", " {'label': 'anger', 'score': 0.04047825187444687},\n", " {'label': 'surprise', 'score': 0.026970187202095985},\n", " {'label': 'fear', 'score': 0.006879056338220835}],\n", " [{'label': 'neutral', 'score': 0.6462168097496033},\n", " {'label': 'sadness', 'score': 0.24273255467414856},\n", " {'label': 'disgust', 'score': 0.04342259094119072},\n", " {'label': 'surprise', 'score': 0.028300529345870018},\n", " {'label': 'joy', 'score': 0.014211456291377544},\n", " {'label': 'fear', 'score': 0.01408409047871828},\n", " {'label': 'anger', 'score': 0.011031880043447018}],\n", " [{'label': 'fear', 'score': 0.9281679391860962},\n", " {'label': 'anger', 'score': 0.03219100087881088},\n", " {'label': 'neutral', 'score': 0.012808752246201038},\n", " {'label': 'sadness', 'score': 0.00875688437372446},\n", " {'label': 'surprise', 'score': 0.008597937412559986},\n", " {'label': 'disgust', 'score': 0.008431863971054554},\n", " {'label': 'joy', 'score': 0.0010455828160047531}],\n", " [{'label': 'sadness', 'score': 0.9671575427055359},\n", " {'label': 'neutral', 'score': 0.015104170888662338},\n", " {'label': 'disgust', 'score': 0.006480599287897348},\n", " {'label': 'fear', 'score': 0.0053939977660775185},\n", " {'label': 'surprise', 'score': 0.0022869440726935863},\n", " {'label': 'anger', 'score': 0.0018428928451612592},\n", " {'label': 'joy', 'score': 0.0017338799079880118}],\n", " [{'label': 'joy', 'score': 0.9327967166900635},\n", " {'label': 'disgust', 'score': 0.037718113511800766},\n", " {'label': 'neutral', 'score': 0.015892023220658302},\n", " {'label': 'sadness', 'score': 0.006444575730711222},\n", " {'label': 'anger', 'score': 0.005025049671530724},\n", " {'label': 'surprise', 'score': 0.0015812116907909513},\n", " {'label': 'fear', 'score': 0.0005423118127509952}],\n", " [{'label': 'joy', 'score': 0.6528710126876831},\n", " {'label': 'neutral', 'score': 0.2542743384838104},\n", " {'label': 'surprise', 'score': 0.06808304786682129},\n", " {'label': 'sadness', 'score': 0.00990898534655571},\n", " {'label': 'disgust', 'score': 0.006512206979095936},\n", " {'label': 'anger', 'score': 0.004821315407752991},\n", " {'label': 'fear', 'score': 0.003529016859829426}],\n", " [{'label': 'neutral', 'score': 0.5494765043258667},\n", " {'label': 'sadness', 'score': 0.1116902157664299},\n", " {'label': 'disgust', 'score': 0.10400670766830444},\n", " {'label': 'surprise', 'score': 0.07876554876565933},\n", " {'label': 'anger', 'score': 0.0641336739063263},\n", " {'label': 'fear', 'score': 0.05136284604668617},\n", " {'label': 'joy', 'score': 0.04056443274021149}]]" ] }, "execution_count": 14, "metadata": {}, "output_type": "execute_result" } ], "source": [ "predictions" ] }, { "cell_type": "code", "execution_count": 15, "id": "e76fc604", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "[{'label': 'anger', 'score': 0.009156355634331703},\n", " {'label': 'disgust', 'score': 0.002628473797813058},\n", " {'label': 'fear', 'score': 0.0681622102856636},\n", " {'label': 'joy', 'score': 0.047942474484443665},\n", " {'label': 'neutral', 'score': 0.14038565754890442},\n", " {'label': 'sadness', 'score': 0.002122160280123353},\n", " {'label': 'surprise', 'score': 0.7296026349067688}]" ] }, "execution_count": 15, "metadata": {}, "output_type": "execute_result" } ], "source": [ "sorted(predictions[0], key = lambda x:x[\"label\"])" ] }, { "cell_type": "code", "execution_count": 21, "id": "bb49276c", "metadata": {}, "outputs": [], "source": [ "import numpy as np\n", "\n", "emotion_labels = [\"anger\", \"disgust\", \"fear\", \"joy\", \"sadness\", \"surprise\", \"neutral\"]\n", "isbn = []\n", "emotion_scores = {label: [] for label in emotion_labels}\n", "\n", "def calculate_max_emotion_score(predictions):\n", " per_emotion_scores = {label:[] for label in emotion_labels}\n", " for prediction in predictions:\n", " sorted_predictions = sorted(prediction, key = lambda x: x[\"label\"])\n", " for index, label in enumerate(emotion_labels):\n", " per_emotion_scores[label].append(sorted_predictions[index][\"score\"])\n", " return {label: np.max(scores) for label, scores in per_emotion_scores.items()}" ] }, { "cell_type": "code", "execution_count": null, "id": "4e3fc36a", "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "You seem to be using the pipelines sequentially on GPU. In order to maximize efficiency please use a dataset\n" ] } ], "source": [ "for i in range(10):\n", " isbn.append(books[\"isbn13\"][i])\n", " sentences = books[\"description\"][0].split(\".\")\n", " predictions = classifier(sentences)\n", " max_scores = calculate_max_emotion_score(predictions)\n", " for label in emotion_labels:\n", " emotion_scores[label].append(max_scores[label])" ] }, { "cell_type": "code", "execution_count": 20, "id": "a5ea90b1", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "{'anger': [np.float64(0.0641336739063263),\n", " np.float64(0.0641336739063263),\n", " np.float64(0.0641336739063263),\n", " np.float64(0.0641336739063263),\n", " np.float64(0.0641336739063263),\n", " np.float64(0.0641336739063263),\n", " np.float64(0.0641336739063263),\n", " np.float64(0.0641336739063263),\n", " np.float64(0.0641336739063263),\n", " np.float64(0.0641336739063263)],\n", " 'disgust': [np.float64(0.27359139919281006),\n", " np.float64(0.27359139919281006),\n", " np.float64(0.27359139919281006),\n", " np.float64(0.27359139919281006),\n", " np.float64(0.27359139919281006),\n", " np.float64(0.27359139919281006),\n", " np.float64(0.27359139919281006),\n", " np.float64(0.27359139919281006),\n", " np.float64(0.27359139919281006),\n", " np.float64(0.27359139919281006)],\n", " 'fear': [np.float64(0.9281679391860962),\n", " np.float64(0.9281679391860962),\n", " np.float64(0.9281679391860962),\n", " np.float64(0.9281679391860962),\n", " np.float64(0.9281679391860962),\n", " np.float64(0.9281679391860962),\n", " np.float64(0.9281679391860962),\n", " np.float64(0.9281679391860962),\n", " np.float64(0.9281679391860962),\n", " np.float64(0.9281679391860962)],\n", " 'joy': [np.float64(0.9327967166900635),\n", " np.float64(0.9327967166900635),\n", " np.float64(0.9327967166900635),\n", " np.float64(0.9327967166900635),\n", " np.float64(0.9327967166900635),\n", " np.float64(0.9327967166900635),\n", " np.float64(0.9327967166900635),\n", " np.float64(0.9327967166900635),\n", " np.float64(0.9327967166900635),\n", " np.float64(0.9327967166900635)],\n", " 'sadness': [np.float64(0.6462168097496033),\n", " np.float64(0.6462168097496033),\n", " np.float64(0.6462168097496033),\n", " np.float64(0.6462168097496033),\n", " np.float64(0.6462168097496033),\n", " np.float64(0.6462168097496033),\n", " np.float64(0.6462168097496033),\n", " np.float64(0.6462168097496033),\n", " np.float64(0.6462168097496033),\n", " np.float64(0.6462168097496033)],\n", " 'surprise': [np.float64(0.9671575427055359),\n", " np.float64(0.9671575427055359),\n", " np.float64(0.9671575427055359),\n", " np.float64(0.9671575427055359),\n", " np.float64(0.9671575427055359),\n", " np.float64(0.9671575427055359),\n", " np.float64(0.9671575427055359),\n", " np.float64(0.9671575427055359),\n", " np.float64(0.9671575427055359),\n", " np.float64(0.9671575427055359)],\n", " 'neutral': [np.float64(0.7296026349067688),\n", " np.float64(0.7296026349067688),\n", " np.float64(0.7296026349067688),\n", " np.float64(0.7296026349067688),\n", " np.float64(0.7296026349067688),\n", " np.float64(0.7296026349067688),\n", " np.float64(0.7296026349067688),\n", " np.float64(0.7296026349067688),\n", " np.float64(0.7296026349067688),\n", " np.float64(0.7296026349067688)]}" ] }, "execution_count": 20, "metadata": {}, "output_type": "execute_result" } ], "source": [ "emotion_scores" ] }, { "cell_type": "code", "execution_count": 22, "id": "5b2ca76f", "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "100%|██████████| 5197/5197 [01:58<00:00, 43.92it/s]\n" ] } ], "source": [ "\n", "from tqdm import tqdm\n", "\n", "emotion_labels = [\"anger\", \"disgust\", \"fear\", \"joy\", \"sadness\", \"surprise\", \"neutral\"]\n", "isbn = []\n", "emotion_scores = {label: [] for label in emotion_labels}\n", "\n", "for i in tqdm(range(len(books))):\n", " isbn.append(books[\"isbn13\"][i])\n", " sentences = books[\"description\"][i].split(\".\")\n", " predictions = classifier(sentences)\n", " max_scores = calculate_max_emotion_score(predictions)\n", " for label in emotion_labels:\n", " emotion_scores[label].append(max_scores[label])" ] }, { "cell_type": "code", "execution_count": 23, "id": "f69ecf48", "metadata": {}, "outputs": [], "source": [ "emotions_df = pd.DataFrame(emotion_scores)\n", "emotions_df[\"isbn13\"] = isbn" ] }, { "cell_type": "code", "execution_count": 24, "id": "f74c27e0", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
angerdisgustfearjoysadnesssurpriseneutralisbn13
00.0641340.2735910.9281680.9327970.6462170.9671580.7296039780002005883
10.6126190.3482850.9425280.7044210.8879390.1116900.2525459780002261982
20.0641340.1040070.9723210.7672360.5494770.1116900.0787669780006178736
30.3514830.1507220.3607070.2518810.7326850.1116900.0787669780006280897
40.0814120.1844950.0950430.0405640.8843890.4758810.0787669780006280934
...........................
51920.1482090.0306430.9191650.2551690.8537210.9808770.0306569788172235222
51930.0641340.1143830.0513630.4002630.8831990.1116900.2277659788173031014
51940.0099970.0099290.3392180.9477790.3757550.0666850.0576259788179921623
51950.0641340.1040070.4592690.7594560.9511040.3681110.0787669788185300535
51960.0641340.1040070.0513630.9585490.9151930.1116900.0787669789027712059
\n", "

5197 rows × 8 columns

\n", "
" ], "text/plain": [ " anger disgust fear joy sadness surprise neutral \\\n", "0 0.064134 0.273591 0.928168 0.932797 0.646217 0.967158 0.729603 \n", "1 0.612619 0.348285 0.942528 0.704421 0.887939 0.111690 0.252545 \n", "2 0.064134 0.104007 0.972321 0.767236 0.549477 0.111690 0.078766 \n", "3 0.351483 0.150722 0.360707 0.251881 0.732685 0.111690 0.078766 \n", "4 0.081412 0.184495 0.095043 0.040564 0.884389 0.475881 0.078766 \n", "... ... ... ... ... ... ... ... \n", "5192 0.148209 0.030643 0.919165 0.255169 0.853721 0.980877 0.030656 \n", "5193 0.064134 0.114383 0.051363 0.400263 0.883199 0.111690 0.227765 \n", "5194 0.009997 0.009929 0.339218 0.947779 0.375755 0.066685 0.057625 \n", "5195 0.064134 0.104007 0.459269 0.759456 0.951104 0.368111 0.078766 \n", "5196 0.064134 0.104007 0.051363 0.958549 0.915193 0.111690 0.078766 \n", "\n", " isbn13 \n", "0 9780002005883 \n", "1 9780002261982 \n", "2 9780006178736 \n", "3 9780006280897 \n", "4 9780006280934 \n", "... ... \n", "5192 9788172235222 \n", "5193 9788173031014 \n", "5194 9788179921623 \n", "5195 9788185300535 \n", "5196 9789027712059 \n", "\n", "[5197 rows x 8 columns]" ] }, "execution_count": 24, "metadata": {}, "output_type": "execute_result" } ], "source": [ "emotions_df" ] }, { "cell_type": "code", "execution_count": 25, "id": "93f5ef26", "metadata": {}, "outputs": [], "source": [ "books = pd.merge(books, emotions_df, on = \"isbn13\")" ] }, { "cell_type": "code", "execution_count": 26, "id": "88e9a111", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
isbn13isbn10titleauthorscategoriesthumbnaildescriptionpublished_yearaverage_ratingnum_pages...title_and_subtitletagged_descriptionsimple_categoriesangerdisgustfearjoysadnesssurpriseneutral
097800020058830002005883GileadMarilynne RobinsonFictionhttp://books.google.com/books/content?id=KQZCP...A NOVEL THAT READERS and critics have been eag...2004.03.85247.0...Gilead9780002005883 A NOVEL THAT READERS and critics...Fiction0.0641340.2735910.9281680.9327970.6462170.9671580.729603
197800022619820002261987Spider's WebCharles Osborne;Agatha ChristieDetective and mystery storieshttp://books.google.com/books/content?id=gA5GP...A new 'Christie for Christmas' -- a full-lengt...2000.03.83241.0...Spider's Web: A Novel9780002261982 A new 'Christie for Christmas' -...Fiction0.6126190.3482850.9425280.7044210.8879390.1116900.252545
297800061787360006178731Rage of angelsSidney SheldonFictionhttp://books.google.com/books/content?id=FKo2T...A memorable, mesmerizing heroine Jennifer -- b...1993.03.93512.0...Rage of angels9780006178736 A memorable, mesmerizing heroine...Fiction0.0641340.1040070.9723210.7672360.5494770.1116900.078766
397800062808970006280897The Four LovesClive Staples LewisChristian lifehttp://books.google.com/books/content?id=XhQ5X...Lewis' work on the nature of love divides love...2002.04.15170.0...The Four Loves9780006280897 Lewis' work on the nature of lov...Nonfiction0.3514830.1507220.3607070.2518810.7326850.1116900.078766
497800062809340006280935The Problem of PainClive Staples LewisChristian lifehttp://books.google.com/books/content?id=Kk-uV...\"In The Problem of Pain, C.S. Lewis, one of th...2002.04.09176.0...The Problem of Pain9780006280934 \"In The Problem of Pain, C.S. Le...Nonfiction0.0814120.1844950.0950430.0405640.8843890.4758810.078766
..................................................................
519297881722352228172235224Mistaken IdentityNayantara SahgalIndic fiction (English)http://books.google.com/books/content?id=q-tKP...On A Train Journey Home To North India After L...2003.02.93324.0...Mistaken Identity9788172235222 On A Train Journey Home To North...Fiction0.1482090.0306430.9191650.2551690.8537210.9808770.030656
519397881730310148173031010Journey to the EastHermann HesseAdventure storieshttp://books.google.com/books/content?id=rq6JP...This book tells the tale of a man who goes on ...2002.03.70175.0...Journey to the East9788173031014 This book tells the tale of a ma...Nonfiction0.0641340.1143830.0513630.4002630.8831990.1116900.227765
51949788179921623817992162XThe Monk Who Sold His Ferrari: A Fable About F...Robin SharmaHealth & Fitnesshttp://books.google.com/books/content?id=c_7mf...Wisdom to Create a Life of Passion, Purpose, a...2003.03.82198.0...The Monk Who Sold His Ferrari: A Fable About F...9788179921623 Wisdom to Create a Life of Passi...Fiction0.0099970.0099290.3392180.9477790.3757550.0666850.057625
519597881853005358185300534I Am thatSri Nisargadatta Maharaj;Sudhakar S. DikshitPhilosophyhttp://books.google.com/books/content?id=Fv_JP...This collection of the timeless teachings of o...1999.04.51531.0...I Am that: Talks with Sri Nisargadatta Maharaj9788185300535 This collection of the timeless ...Nonfiction0.0641340.1040070.4592690.7594560.9511040.3681110.078766
519697890277120599027712050The Berlin PhenomenologyGeorg Wilhelm Friedrich HegelHistoryhttp://books.google.com/books/content?id=Vy7Sk...Since the three volume edition ofHegel's Philo...1981.00.00210.0...The Berlin Phenomenology9789027712059 Since the three volume edition o...Nonfiction0.0641340.1040070.0513630.9585490.9151930.1116900.078766
\n", "

5197 rows × 22 columns

\n", "
" ], "text/plain": [ " isbn13 isbn10 \\\n", "0 9780002005883 0002005883 \n", "1 9780002261982 0002261987 \n", "2 9780006178736 0006178731 \n", "3 9780006280897 0006280897 \n", "4 9780006280934 0006280935 \n", "... ... ... \n", "5192 9788172235222 8172235224 \n", "5193 9788173031014 8173031010 \n", "5194 9788179921623 817992162X \n", "5195 9788185300535 8185300534 \n", "5196 9789027712059 9027712050 \n", "\n", " title \\\n", "0 Gilead \n", "1 Spider's Web \n", "2 Rage of angels \n", "3 The Four Loves \n", "4 The Problem of Pain \n", "... ... \n", "5192 Mistaken Identity \n", "5193 Journey to the East \n", "5194 The Monk Who Sold His Ferrari: A Fable About F... \n", "5195 I Am that \n", "5196 The Berlin Phenomenology \n", "\n", " authors \\\n", "0 Marilynne Robinson \n", "1 Charles Osborne;Agatha Christie \n", "2 Sidney Sheldon \n", "3 Clive Staples Lewis \n", "4 Clive Staples Lewis \n", "... ... \n", "5192 Nayantara Sahgal \n", "5193 Hermann Hesse \n", "5194 Robin Sharma \n", "5195 Sri Nisargadatta Maharaj;Sudhakar S. Dikshit \n", "5196 Georg Wilhelm Friedrich Hegel \n", "\n", " categories \\\n", "0 Fiction \n", "1 Detective and mystery stories \n", "2 Fiction \n", "3 Christian life \n", "4 Christian life \n", "... ... \n", "5192 Indic fiction (English) \n", "5193 Adventure stories \n", "5194 Health & Fitness \n", "5195 Philosophy \n", "5196 History \n", "\n", " thumbnail \\\n", "0 http://books.google.com/books/content?id=KQZCP... \n", "1 http://books.google.com/books/content?id=gA5GP... \n", "2 http://books.google.com/books/content?id=FKo2T... \n", "3 http://books.google.com/books/content?id=XhQ5X... \n", "4 http://books.google.com/books/content?id=Kk-uV... \n", "... ... \n", "5192 http://books.google.com/books/content?id=q-tKP... \n", "5193 http://books.google.com/books/content?id=rq6JP... \n", "5194 http://books.google.com/books/content?id=c_7mf... \n", "5195 http://books.google.com/books/content?id=Fv_JP... \n", "5196 http://books.google.com/books/content?id=Vy7Sk... \n", "\n", " description published_year \\\n", "0 A NOVEL THAT READERS and critics have been eag... 2004.0 \n", "1 A new 'Christie for Christmas' -- a full-lengt... 2000.0 \n", "2 A memorable, mesmerizing heroine Jennifer -- b... 1993.0 \n", "3 Lewis' work on the nature of love divides love... 2002.0 \n", "4 \"In The Problem of Pain, C.S. Lewis, one of th... 2002.0 \n", "... ... ... \n", "5192 On A Train Journey Home To North India After L... 2003.0 \n", "5193 This book tells the tale of a man who goes on ... 2002.0 \n", "5194 Wisdom to Create a Life of Passion, Purpose, a... 2003.0 \n", "5195 This collection of the timeless teachings of o... 1999.0 \n", "5196 Since the three volume edition ofHegel's Philo... 1981.0 \n", "\n", " average_rating num_pages ... \\\n", "0 3.85 247.0 ... \n", "1 3.83 241.0 ... \n", "2 3.93 512.0 ... \n", "3 4.15 170.0 ... \n", "4 4.09 176.0 ... \n", "... ... ... ... \n", "5192 2.93 324.0 ... \n", "5193 3.70 175.0 ... \n", "5194 3.82 198.0 ... \n", "5195 4.51 531.0 ... \n", "5196 0.00 210.0 ... \n", "\n", " title_and_subtitle \\\n", "0 Gilead \n", "1 Spider's Web: A Novel \n", "2 Rage of angels \n", "3 The Four Loves \n", "4 The Problem of Pain \n", "... ... \n", "5192 Mistaken Identity \n", "5193 Journey to the East \n", "5194 The Monk Who Sold His Ferrari: A Fable About F... \n", "5195 I Am that: Talks with Sri Nisargadatta Maharaj \n", "5196 The Berlin Phenomenology \n", "\n", " tagged_description simple_categories \\\n", "0 9780002005883 A NOVEL THAT READERS and critics... Fiction \n", "1 9780002261982 A new 'Christie for Christmas' -... Fiction \n", "2 9780006178736 A memorable, mesmerizing heroine... Fiction \n", "3 9780006280897 Lewis' work on the nature of lov... Nonfiction \n", "4 9780006280934 \"In The Problem of Pain, C.S. Le... Nonfiction \n", "... ... ... \n", "5192 9788172235222 On A Train Journey Home To North... Fiction \n", "5193 9788173031014 This book tells the tale of a ma... Nonfiction \n", "5194 9788179921623 Wisdom to Create a Life of Passi... Fiction \n", "5195 9788185300535 This collection of the timeless ... Nonfiction \n", "5196 9789027712059 Since the three volume edition o... Nonfiction \n", "\n", " anger disgust fear joy sadness surprise neutral \n", "0 0.064134 0.273591 0.928168 0.932797 0.646217 0.967158 0.729603 \n", "1 0.612619 0.348285 0.942528 0.704421 0.887939 0.111690 0.252545 \n", "2 0.064134 0.104007 0.972321 0.767236 0.549477 0.111690 0.078766 \n", "3 0.351483 0.150722 0.360707 0.251881 0.732685 0.111690 0.078766 \n", "4 0.081412 0.184495 0.095043 0.040564 0.884389 0.475881 0.078766 \n", "... ... ... ... ... ... ... ... \n", "5192 0.148209 0.030643 0.919165 0.255169 0.853721 0.980877 0.030656 \n", "5193 0.064134 0.114383 0.051363 0.400263 0.883199 0.111690 0.227765 \n", "5194 0.009997 0.009929 0.339218 0.947779 0.375755 0.066685 0.057625 \n", "5195 0.064134 0.104007 0.459269 0.759456 0.951104 0.368111 0.078766 \n", "5196 0.064134 0.104007 0.051363 0.958549 0.915193 0.111690 0.078766 \n", "\n", "[5197 rows x 22 columns]" ] }, "execution_count": 26, "metadata": {}, "output_type": "execute_result" } ], "source": [ "books" ] }, { "cell_type": "code", "execution_count": 27, "id": "6e7c5bd9", "metadata": {}, "outputs": [], "source": [ "books.to_csv(\"books_with_emotions.csv\", index = False)" ] } ], "metadata": { "kernelspec": { "display_name": "books_env", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.13.1" } }, "nbformat": 4, "nbformat_minor": 5 }