{ "cells": [ { "cell_type": "code", "execution_count": 1, "id": "04f8450b", "metadata": {}, "outputs": [], "source": [ "\n", "import pandas as pd\n", "\n", "books = pd.read_csv(\"books_cleaned.csv\")" ] }, { "cell_type": "code", "execution_count": 2, "id": "ab2829de", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
categoriescount
0Fiction2111
1Juvenile Fiction390
2Biography & Autobiography311
3History207
4Literary Criticism124
.........
474Aged women1
475Imperialism1
476Human-animal relationships1
477Amish1
478Human cloning1
\n", "

479 rows × 2 columns

\n", "
" ], "text/plain": [ " categories count\n", "0 Fiction 2111\n", "1 Juvenile Fiction 390\n", "2 Biography & Autobiography 311\n", "3 History 207\n", "4 Literary Criticism 124\n", ".. ... ...\n", "474 Aged women 1\n", "475 Imperialism 1\n", "476 Human-animal relationships 1\n", "477 Amish 1\n", "478 Human cloning 1\n", "\n", "[479 rows x 2 columns]" ] }, "execution_count": 2, "metadata": {}, "output_type": "execute_result" } ], "source": [ "books[\"categories\"].value_counts().reset_index()" ] }, { "cell_type": "code", "execution_count": 3, "id": "30f187ba", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
categoriescount
0Fiction2111
1Juvenile Fiction390
2Biography & Autobiography311
3History207
4Literary Criticism124
5Religion117
6Philosophy117
7Comics & Graphic Novels116
8Drama86
9Juvenile Nonfiction57
10Science56
11Poetry51
\n", "
" ], "text/plain": [ " categories count\n", "0 Fiction 2111\n", "1 Juvenile Fiction 390\n", "2 Biography & Autobiography 311\n", "3 History 207\n", "4 Literary Criticism 124\n", "5 Religion 117\n", "6 Philosophy 117\n", "7 Comics & Graphic Novels 116\n", "8 Drama 86\n", "9 Juvenile Nonfiction 57\n", "10 Science 56\n", "11 Poetry 51" ] }, "execution_count": 3, "metadata": {}, "output_type": "execute_result" } ], "source": [ "books[\"categories\"].value_counts().reset_index().query(\"count > 50\")" ] }, { "cell_type": "code", "execution_count": 4, "id": "672caf81", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
isbn13isbn10titleauthorscategoriesthumbnaildescriptionpublished_yearaverage_ratingnum_pagesratings_countagg_of_booktitle_and_subtitletagged_description
309780006646006000664600XOcean Star ExpressMark Haddon;Peter SuttonJuvenile Fictionhttp://books.google.com/books/content?id=I2QZA...Joe and his parents are enjoying a summer holi...2002.03.5032.01.023.0Ocean Star Express9780006646006 Joe and his parents are enjoying...
7997800204426080020442602The voyage of the Dawn TreaderClive Staples LewisJuvenile Fictionhttp://books.google.com/books/content?id=fDD3C...The \"Dawn Treader\" is the first ship Narnia ha...1970.04.09216.02869.055.0The voyage of the Dawn Treader9780020442608 The \"Dawn Treader\" is the first ...
8597800305477440030547741Where the Red Fern GrowsWilson RawlsJuvenile Fictionhttp://books.google.com/books/content?id=IHpRw...A young boy living in the Ozarks achieves his ...2000.04.37288.095.025.0Where the Red Fern Grows: The Story of Two Dog...9780030547744 A young boy living in the Ozarks...
8697800600001410060000147Poppy's ReturnAviJuvenile Fictionhttp://books.google.com/books/content?id=XbcMJ...There's trouble at Gray House, the girlhood ho...2006.03.99256.01086.019.0Poppy's Return9780060000141 There's trouble at Gray House, t...
8797800600015370060001534Diary of a SpiderDoreen CroninJuvenile Fictionhttp://books.google.com/books/content?id=UWvZo...This is the diary ... of a spider. But don't b...2005.04.2540.07903.020.0Diary of a Spider9780060001537 This is the diary ... of a spide...
.............................................
484597815903858141590385810FablehavenBrandon MullJuvenile Fictionhttp://books.google.com/books/content?id=tbVIP...When Kendra and Seth go to stay at their grand...2006.04.09351.0111896.019.0Fablehaven9781590385814 When Kendra and Seth go to stay ...
493097815967925001596792507Sherlock Holmes and the Case of the Hound of t...Arthur Conan Doyle;Malvina G. VogelJuvenile Fictionhttp://books.google.com/books/content?id=EWgWP...Sherlock Holmes and Dr. Watson travel to the b...2005.04.51237.028.020.0Sherlock Holmes and the Case of the Hound of t...9781596792500 Sherlock Holmes and Dr. Watson t...
49429781599900056159990005XThe Drift HouseDale PeckJuvenile Fictionhttp://books.google.com/books/content?id=kbwPY...Sent to stay with their uncle in a ship-like h...2006.03.64437.0595.019.0The Drift House: The First Voyage9781599900056 Sent to stay with their uncle in...
501097818445805141844580512Attack of the JaguarM. A. HarveyJuvenile Fictionhttp://books.google.com/books/content?id=3HUdt...This training manual for operatives of Xtreme ...2004.03.40125.04.021.0Attack of the Jaguar9781844580514 This training manual for operati...
515797819324163741932416374The Very Persistent Gappers of FripGeorge Saunders;Lane SmithJuvenile Fictionhttp://books.google.com/books/content?id=xYJOP...A novel set in the seaside village of Frip fol...2006.04.1184.02317.019.0The Very Persistent Gappers of Frip9781932416374 A novel set in the seaside villa...
\n", "

390 rows × 14 columns

\n", "
" ], "text/plain": [ " isbn13 isbn10 \\\n", "30 9780006646006 000664600X \n", "79 9780020442608 0020442602 \n", "85 9780030547744 0030547741 \n", "86 9780060000141 0060000147 \n", "87 9780060001537 0060001534 \n", "... ... ... \n", "4845 9781590385814 1590385810 \n", "4930 9781596792500 1596792507 \n", "4942 9781599900056 159990005X \n", "5010 9781844580514 1844580512 \n", "5157 9781932416374 1932416374 \n", "\n", " title \\\n", "30 Ocean Star Express \n", "79 The voyage of the Dawn Treader \n", "85 Where the Red Fern Grows \n", "86 Poppy's Return \n", "87 Diary of a Spider \n", "... ... \n", "4845 Fablehaven \n", "4930 Sherlock Holmes and the Case of the Hound of t... \n", "4942 The Drift House \n", "5010 Attack of the Jaguar \n", "5157 The Very Persistent Gappers of Frip \n", "\n", " authors categories \\\n", "30 Mark Haddon;Peter Sutton Juvenile Fiction \n", "79 Clive Staples Lewis Juvenile Fiction \n", "85 Wilson Rawls Juvenile Fiction \n", "86 Avi Juvenile Fiction \n", "87 Doreen Cronin Juvenile Fiction \n", "... ... ... \n", "4845 Brandon Mull Juvenile Fiction \n", "4930 Arthur Conan Doyle;Malvina G. Vogel Juvenile Fiction \n", "4942 Dale Peck Juvenile Fiction \n", "5010 M. A. Harvey Juvenile Fiction \n", "5157 George Saunders;Lane Smith Juvenile Fiction \n", "\n", " thumbnail \\\n", "30 http://books.google.com/books/content?id=I2QZA... \n", "79 http://books.google.com/books/content?id=fDD3C... \n", "85 http://books.google.com/books/content?id=IHpRw... \n", "86 http://books.google.com/books/content?id=XbcMJ... \n", "87 http://books.google.com/books/content?id=UWvZo... \n", "... ... \n", "4845 http://books.google.com/books/content?id=tbVIP... \n", "4930 http://books.google.com/books/content?id=EWgWP... \n", "4942 http://books.google.com/books/content?id=kbwPY... \n", "5010 http://books.google.com/books/content?id=3HUdt... \n", "5157 http://books.google.com/books/content?id=xYJOP... \n", "\n", " description published_year \\\n", "30 Joe and his parents are enjoying a summer holi... 2002.0 \n", "79 The \"Dawn Treader\" is the first ship Narnia ha... 1970.0 \n", "85 A young boy living in the Ozarks achieves his ... 2000.0 \n", "86 There's trouble at Gray House, the girlhood ho... 2006.0 \n", "87 This is the diary ... of a spider. But don't b... 2005.0 \n", "... ... ... \n", "4845 When Kendra and Seth go to stay at their grand... 2006.0 \n", "4930 Sherlock Holmes and Dr. Watson travel to the b... 2005.0 \n", "4942 Sent to stay with their uncle in a ship-like h... 2006.0 \n", "5010 This training manual for operatives of Xtreme ... 2004.0 \n", "5157 A novel set in the seaside village of Frip fol... 2006.0 \n", "\n", " average_rating num_pages ratings_count agg_of_book \\\n", "30 3.50 32.0 1.0 23.0 \n", "79 4.09 216.0 2869.0 55.0 \n", "85 4.37 288.0 95.0 25.0 \n", "86 3.99 256.0 1086.0 19.0 \n", "87 4.25 40.0 7903.0 20.0 \n", "... ... ... ... ... \n", "4845 4.09 351.0 111896.0 19.0 \n", "4930 4.51 237.0 28.0 20.0 \n", "4942 3.64 437.0 595.0 19.0 \n", "5010 3.40 125.0 4.0 21.0 \n", "5157 4.11 84.0 2317.0 19.0 \n", "\n", " title_and_subtitle \\\n", "30 Ocean Star Express \n", "79 The voyage of the Dawn Treader \n", "85 Where the Red Fern Grows: The Story of Two Dog... \n", "86 Poppy's Return \n", "87 Diary of a Spider \n", "... ... \n", "4845 Fablehaven \n", "4930 Sherlock Holmes and the Case of the Hound of t... \n", "4942 The Drift House: The First Voyage \n", "5010 Attack of the Jaguar \n", "5157 The Very Persistent Gappers of Frip \n", "\n", " tagged_description \n", "30 9780006646006 Joe and his parents are enjoying... \n", "79 9780020442608 The \"Dawn Treader\" is the first ... \n", "85 9780030547744 A young boy living in the Ozarks... \n", "86 9780060000141 There's trouble at Gray House, t... \n", "87 9780060001537 This is the diary ... of a spide... \n", "... ... \n", "4845 9781590385814 When Kendra and Seth go to stay ... \n", "4930 9781596792500 Sherlock Holmes and Dr. Watson t... \n", "4942 9781599900056 Sent to stay with their uncle in... \n", "5010 9781844580514 This training manual for operati... \n", "5157 9781932416374 A novel set in the seaside villa... \n", "\n", "[390 rows x 14 columns]" ] }, "execution_count": 4, "metadata": {}, "output_type": "execute_result" } ], "source": [ "books[books[\"categories\"] == \"Juvenile Fiction\"]" ] }, { "cell_type": "code", "execution_count": null, "id": "a725550a", "metadata": {}, "outputs": [], "source": [ "books[books[\"categories\"] == \"Juvenile Nonfiction\"]" ] }, { "cell_type": "code", "execution_count": 6, "id": "586c5da4", "metadata": {}, "outputs": [], "source": [ "category_mapping = {'Fiction' : \"Fiction\",\n", " 'Juvenile Fiction': \"Children's Fiction\",\n", " 'Biography & Autobiography': \"Nonfiction\",\n", " 'History': \"Nonfiction\",\n", " 'Literary Criticism': \"Nonfiction\",\n", " 'Philosophy': \"Nonfiction\",\n", " 'Religion': \"Nonfiction\",\n", " 'Comics & Graphic Novels': \"Fiction\",\n", " 'Drama': \"Fiction\",\n", " 'Juvenile Nonfiction': \"Children's Nonfiction\",\n", " 'Science': \"Nonfiction\",\n", " 'Poetry': \"Fiction\"}\n", "\n", "books[\"simple_categories\"] = books[\"categories\"].map(category_mapping)" ] }, { "cell_type": "code", "execution_count": 7, "id": "62850edd", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
isbn13isbn10titleauthorscategoriesthumbnaildescriptionpublished_yearaverage_ratingnum_pagesratings_countagg_of_booktitle_and_subtitletagged_descriptionsimple_categories
097800020058830002005883GileadMarilynne RobinsonFictionhttp://books.google.com/books/content?id=KQZCP...A NOVEL THAT READERS and critics have been eag...2004.03.85247.0361.021.0Gilead9780002005883 A NOVEL THAT READERS and critics...Fiction
197800022619820002261987Spider's WebCharles Osborne;Agatha ChristieDetective and mystery storieshttp://books.google.com/books/content?id=gA5GP...A new 'Christie for Christmas' -- a full-lengt...2000.03.83241.05164.025.0Spider's Web: A Novel9780002261982 A new 'Christie for Christmas' -...NaN
297800061787360006178731Rage of angelsSidney SheldonFictionhttp://books.google.com/books/content?id=FKo2T...A memorable, mesmerizing heroine Jennifer -- b...1993.03.93512.029532.032.0Rage of angels9780006178736 A memorable, mesmerizing heroine...Fiction
397800062808970006280897The Four LovesClive Staples LewisChristian lifehttp://books.google.com/books/content?id=XhQ5X...Lewis' work on the nature of love divides love...2002.04.15170.033684.023.0The Four Loves9780006280897 Lewis' work on the nature of lov...NaN
497800062809340006280935The Problem of PainClive Staples LewisChristian lifehttp://books.google.com/books/content?id=Kk-uV...\"In The Problem of Pain, C.S. Lewis, one of th...2002.04.09176.037569.023.0The Problem of Pain9780006280934 \"In The Problem of Pain, C.S. Le...NaN
\n", "
" ], "text/plain": [ " isbn13 isbn10 title \\\n", "0 9780002005883 0002005883 Gilead \n", "1 9780002261982 0002261987 Spider's Web \n", "2 9780006178736 0006178731 Rage of angels \n", "3 9780006280897 0006280897 The Four Loves \n", "4 9780006280934 0006280935 The Problem of Pain \n", "\n", " authors categories \\\n", "0 Marilynne Robinson Fiction \n", "1 Charles Osborne;Agatha Christie Detective and mystery stories \n", "2 Sidney Sheldon Fiction \n", "3 Clive Staples Lewis Christian life \n", "4 Clive Staples Lewis Christian life \n", "\n", " thumbnail \\\n", "0 http://books.google.com/books/content?id=KQZCP... \n", "1 http://books.google.com/books/content?id=gA5GP... \n", "2 http://books.google.com/books/content?id=FKo2T... \n", "3 http://books.google.com/books/content?id=XhQ5X... \n", "4 http://books.google.com/books/content?id=Kk-uV... \n", "\n", " description published_year \\\n", "0 A NOVEL THAT READERS and critics have been eag... 2004.0 \n", "1 A new 'Christie for Christmas' -- a full-lengt... 2000.0 \n", "2 A memorable, mesmerizing heroine Jennifer -- b... 1993.0 \n", "3 Lewis' work on the nature of love divides love... 2002.0 \n", "4 \"In The Problem of Pain, C.S. Lewis, one of th... 2002.0 \n", "\n", " average_rating num_pages ratings_count agg_of_book \\\n", "0 3.85 247.0 361.0 21.0 \n", "1 3.83 241.0 5164.0 25.0 \n", "2 3.93 512.0 29532.0 32.0 \n", "3 4.15 170.0 33684.0 23.0 \n", "4 4.09 176.0 37569.0 23.0 \n", "\n", " title_and_subtitle tagged_description \\\n", "0 Gilead 9780002005883 A NOVEL THAT READERS and critics... \n", "1 Spider's Web: A Novel 9780002261982 A new 'Christie for Christmas' -... \n", "2 Rage of angels 9780006178736 A memorable, mesmerizing heroine... \n", "3 The Four Loves 9780006280897 Lewis' work on the nature of lov... \n", "4 The Problem of Pain 9780006280934 \"In The Problem of Pain, C.S. Le... \n", "\n", " simple_categories \n", "0 Fiction \n", "1 NaN \n", "2 Fiction \n", "3 NaN \n", "4 NaN " ] }, "execution_count": 7, "metadata": {}, "output_type": "execute_result" } ], "source": [ "books.head()" ] }, { "cell_type": "code", "execution_count": 8, "id": "807213f2", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
isbn13isbn10titleauthorscategoriesthumbnaildescriptionpublished_yearaverage_ratingnum_pagesratings_countagg_of_booktitle_and_subtitletagged_descriptionsimple_categories
097800020058830002005883GileadMarilynne RobinsonFictionhttp://books.google.com/books/content?id=KQZCP...A NOVEL THAT READERS and critics have been eag...2004.03.85247.0361.021.0Gilead9780002005883 A NOVEL THAT READERS and critics...Fiction
297800061787360006178731Rage of angelsSidney SheldonFictionhttp://books.google.com/books/content?id=FKo2T...A memorable, mesmerizing heroine Jennifer -- b...1993.03.93512.029532.032.0Rage of angels9780006178736 A memorable, mesmerizing heroine...Fiction
897800064820790006482074Warhost of VastmarkJanny WurtsFictionhttp://books.google.com/books/content?id=uOL0f...Tricked once more by his wily half-brother, Ly...1995.04.03522.02966.030.0Warhost of Vastmark9780006482079 Tricked once more by his wily ha...Fiction
309780006646006000664600XOcean Star ExpressMark Haddon;Peter SuttonJuvenile Fictionhttp://books.google.com/books/content?id=I2QZA...Joe and his parents are enjoying a summer holi...2002.03.5032.01.023.0Ocean Star Express9780006646006 Joe and his parents are enjoying...Children's Fiction
4697800071210140007121016Taken at the FloodAgatha ChristieFictionhttp://books.google.com/books/content?id=3gWlx...A Few Weeks After Marrying An Attractive Young...2002.03.71352.08852.023.0Taken at the Flood9780007121014 A Few Weeks After Marrying An At...Fiction
................................................
517897819336482791933648279Night Has a Thousand EyesCornell WoolrichFictionhttp://books.google.com/books/content?id=3Gk6s...\"Cornell Woolrich's novels define the essence ...2007.03.77344.0680.018.0Night Has a Thousand Eyes9781933648279 \"Cornell Woolrich's novels defin...Fiction
518897847700289694770028962Coin Locker Babies村上龍Fictionhttp://books.google.com/books/content?id=87DJw...Rescued from the lockers in which they were le...2002.03.75393.05560.023.0Coin Locker Babies9784770028969 Rescued from the lockers in whic...Fiction
518997881222008508122200850Cry, the PeacockAnita DesaiFictionhttp://books.google.com/books/content?id=_QKwV...This book is the story of a young girl obsesse...1980.03.22218.0134.045.0Cry, the Peacock9788122200850 This book is the story of a youn...Fiction
519597881853005358185300534I Am thatSri Nisargadatta Maharaj;Sudhakar S. DikshitPhilosophyhttp://books.google.com/books/content?id=Fv_JP...This collection of the timeless teachings of o...1999.04.51531.0104.026.0I Am that: Talks with Sri Nisargadatta Maharaj9788185300535 This collection of the timeless ...Nonfiction
519697890277120599027712050The Berlin PhenomenologyGeorg Wilhelm Friedrich HegelHistoryhttp://books.google.com/books/content?id=Vy7Sk...Since the three volume edition ofHegel's Philo...1981.00.00210.00.044.0The Berlin Phenomenology9789027712059 Since the three volume edition o...Nonfiction
\n", "

3743 rows × 15 columns

\n", "
" ], "text/plain": [ " isbn13 isbn10 title \\\n", "0 9780002005883 0002005883 Gilead \n", "2 9780006178736 0006178731 Rage of angels \n", "8 9780006482079 0006482074 Warhost of Vastmark \n", "30 9780006646006 000664600X Ocean Star Express \n", "46 9780007121014 0007121016 Taken at the Flood \n", "... ... ... ... \n", "5178 9781933648279 1933648279 Night Has a Thousand Eyes \n", "5188 9784770028969 4770028962 Coin Locker Babies \n", "5189 9788122200850 8122200850 Cry, the Peacock \n", "5195 9788185300535 8185300534 I Am that \n", "5196 9789027712059 9027712050 The Berlin Phenomenology \n", "\n", " authors categories \\\n", "0 Marilynne Robinson Fiction \n", "2 Sidney Sheldon Fiction \n", "8 Janny Wurts Fiction \n", "30 Mark Haddon;Peter Sutton Juvenile Fiction \n", "46 Agatha Christie Fiction \n", "... ... ... \n", "5178 Cornell Woolrich Fiction \n", "5188 村上龍 Fiction \n", "5189 Anita Desai Fiction \n", "5195 Sri Nisargadatta Maharaj;Sudhakar S. Dikshit Philosophy \n", "5196 Georg Wilhelm Friedrich Hegel History \n", "\n", " thumbnail \\\n", "0 http://books.google.com/books/content?id=KQZCP... \n", "2 http://books.google.com/books/content?id=FKo2T... \n", "8 http://books.google.com/books/content?id=uOL0f... \n", "30 http://books.google.com/books/content?id=I2QZA... \n", "46 http://books.google.com/books/content?id=3gWlx... \n", "... ... \n", "5178 http://books.google.com/books/content?id=3Gk6s... \n", "5188 http://books.google.com/books/content?id=87DJw... \n", "5189 http://books.google.com/books/content?id=_QKwV... \n", "5195 http://books.google.com/books/content?id=Fv_JP... \n", "5196 http://books.google.com/books/content?id=Vy7Sk... \n", "\n", " description published_year \\\n", "0 A NOVEL THAT READERS and critics have been eag... 2004.0 \n", "2 A memorable, mesmerizing heroine Jennifer -- b... 1993.0 \n", "8 Tricked once more by his wily half-brother, Ly... 1995.0 \n", "30 Joe and his parents are enjoying a summer holi... 2002.0 \n", "46 A Few Weeks After Marrying An Attractive Young... 2002.0 \n", "... ... ... \n", "5178 \"Cornell Woolrich's novels define the essence ... 2007.0 \n", "5188 Rescued from the lockers in which they were le... 2002.0 \n", "5189 This book is the story of a young girl obsesse... 1980.0 \n", "5195 This collection of the timeless teachings of o... 1999.0 \n", "5196 Since the three volume edition ofHegel's Philo... 1981.0 \n", "\n", " average_rating num_pages ratings_count agg_of_book \\\n", "0 3.85 247.0 361.0 21.0 \n", "2 3.93 512.0 29532.0 32.0 \n", "8 4.03 522.0 2966.0 30.0 \n", "30 3.50 32.0 1.0 23.0 \n", "46 3.71 352.0 8852.0 23.0 \n", "... ... ... ... ... \n", "5178 3.77 344.0 680.0 18.0 \n", "5188 3.75 393.0 5560.0 23.0 \n", "5189 3.22 218.0 134.0 45.0 \n", "5195 4.51 531.0 104.0 26.0 \n", "5196 0.00 210.0 0.0 44.0 \n", "\n", " title_and_subtitle \\\n", "0 Gilead \n", "2 Rage of angels \n", "8 Warhost of Vastmark \n", "30 Ocean Star Express \n", "46 Taken at the Flood \n", "... ... \n", "5178 Night Has a Thousand Eyes \n", "5188 Coin Locker Babies \n", "5189 Cry, the Peacock \n", "5195 I Am that: Talks with Sri Nisargadatta Maharaj \n", "5196 The Berlin Phenomenology \n", "\n", " tagged_description simple_categories \n", "0 9780002005883 A NOVEL THAT READERS and critics... Fiction \n", "2 9780006178736 A memorable, mesmerizing heroine... Fiction \n", "8 9780006482079 Tricked once more by his wily ha... Fiction \n", "30 9780006646006 Joe and his parents are enjoying... Children's Fiction \n", "46 9780007121014 A Few Weeks After Marrying An At... Fiction \n", "... ... ... \n", "5178 9781933648279 \"Cornell Woolrich's novels defin... Fiction \n", "5188 9784770028969 Rescued from the lockers in whic... Fiction \n", "5189 9788122200850 This book is the story of a youn... Fiction \n", "5195 9788185300535 This collection of the timeless ... Nonfiction \n", "5196 9789027712059 Since the three volume edition o... Nonfiction \n", "\n", "[3743 rows x 15 columns]" ] }, "execution_count": 8, "metadata": {}, "output_type": "execute_result" } ], "source": [ "books[~(books[\"simple_categories\"].isna())]" ] }, { "cell_type": "code", "execution_count": 10, "id": "87f7e85e", "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "Xet Storage is enabled for this repo, but the 'hf_xet' package is not installed. Falling back to regular HTTP download. For better performance, install the package with: `pip install huggingface_hub[hf_xet]` or `pip install hf_xet`\n", "Device set to use cuda\n" ] } ], "source": [ "from transformers import pipeline\n", "\n", "fiction_categories = [\"Fiction\", \"Nonfiction\"]\n", "\n", "pipe = pipeline(\"zero-shot-classification\",\n", " model=\"facebook/bart-large-mnli\",\n", " device=\"cuda\")" ] }, { "cell_type": "code", "execution_count": 11, "id": "de137173", "metadata": {}, "outputs": [], "source": [ "sequence = books.loc[books[\"simple_categories\"] == \"Fiction\", \"description\"].reset_index(drop=True)[0]" ] }, { "cell_type": "code", "execution_count": 12, "id": "1cbd4fe8", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "{'sequence': 'A NOVEL THAT READERS and critics have been eagerly anticipating for over a decade, Gilead is an astonishingly imagined story of remarkable lives. John Ames is a preacher, the son of a preacher and the grandson (both maternal and paternal) of preachers. It’s 1956 in Gilead, Iowa, towards the end of the Reverend Ames’s life, and he is absorbed in recording his family’s story, a legacy for the young son he will never see grow up. Haunted by his grandfather’s presence, John tells of the rift between his grandfather and his father: the elder, an angry visionary who fought for the abolitionist cause, and his son, an ardent pacifist. He is troubled, too, by his prodigal namesake, Jack (John Ames) Boughton, his best friend’s lost son who returns to Gilead searching for forgiveness and redemption. Told in John Ames’s joyous, rambling voice that finds beauty, humour and truth in the smallest of life’s details, Gilead is a song of celebration and acceptance of the best and the worst the world has to offer. At its heart is a tale of the sacred bonds between fathers and sons, pitch-perfect in style and story, set to dazzle critics and readers alike.',\n", " 'labels': ['Fiction', 'Nonfiction'],\n", " 'scores': [0.8438260555267334, 0.1561739593744278]}" ] }, "execution_count": 12, "metadata": {}, "output_type": "execute_result" } ], "source": [ "pipe(sequence, fiction_categories)" ] }, { "cell_type": "code", "execution_count": 13, "id": "70cd6946", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "'Fiction'" ] }, "execution_count": 13, "metadata": {}, "output_type": "execute_result" } ], "source": [ "\n", "import numpy as np\n", "\n", "max_index = np.argmax(pipe(sequence, fiction_categories)[\"scores\"])\n", "max_label = pipe(sequence, fiction_categories)[\"labels\"][max_index]\n", "max_label" ] }, { "cell_type": "code", "execution_count": 14, "id": "39793314", "metadata": {}, "outputs": [], "source": [ "def generate_predictions(sequence, categories):\n", " predictions = pipe(sequence, categories)\n", " max_index = np.argmax(predictions[\"scores\"])\n", " max_label = predictions[\"labels\"][max_index]\n", " return max_label" ] }, { "cell_type": "markdown", "id": "234ccfd7", "metadata": {}, "source": [ "## Model Evaluation" ] }, { "cell_type": "code", "execution_count": 15, "id": "ccd1dac4", "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ " 2%|▏ | 7/300 [00:01<00:50, 5.82it/s]You seem to be using the pipelines sequentially on GPU. In order to maximize efficiency please use a dataset\n", "100%|██████████| 300/300 [00:32<00:00, 9.35it/s]\n" ] } ], "source": [ "from tqdm import tqdm\n", "\n", "actual_cats = []\n", "predicted_cats = []\n", "\n", "for i in tqdm(range(0, 300)):\n", " sequence = books.loc[books[\"simple_categories\"] == \"Fiction\", \"description\"].reset_index(drop=True)[i]\n", " predicted_cats += [generate_predictions(sequence, fiction_categories)]\n", " actual_cats += [\"Fiction\"]" ] }, { "cell_type": "code", "execution_count": 16, "id": "599552d1", "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "100%|██████████| 300/300 [00:33<00:00, 8.86it/s]\n" ] } ], "source": [ "for i in tqdm(range(0, 300)):\n", " sequence = books.loc[books[\"simple_categories\"] == \"Nonfiction\", \"description\"].reset_index(drop=True)[i]\n", " predicted_cats += [generate_predictions(sequence, fiction_categories)]\n", " actual_cats += [\"Nonfiction\"]" ] }, { "cell_type": "code", "execution_count": 17, "id": "42009f86", "metadata": {}, "outputs": [], "source": [ "predictions_df = pd.DataFrame({\"actual_categories\": actual_cats, \"predicted_categories\": predicted_cats})" ] }, { "cell_type": "code", "execution_count": 18, "id": "0c295afe", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
actual_categoriespredicted_categories
0FictionFiction
1FictionFiction
2FictionFiction
3FictionNonfiction
4FictionFiction
.........
595NonfictionNonfiction
596NonfictionFiction
597NonfictionNonfiction
598NonfictionNonfiction
599NonfictionFiction
\n", "

600 rows × 2 columns

\n", "
" ], "text/plain": [ " actual_categories predicted_categories\n", "0 Fiction Fiction\n", "1 Fiction Fiction\n", "2 Fiction Fiction\n", "3 Fiction Nonfiction\n", "4 Fiction Fiction\n", ".. ... ...\n", "595 Nonfiction Nonfiction\n", "596 Nonfiction Fiction\n", "597 Nonfiction Nonfiction\n", "598 Nonfiction Nonfiction\n", "599 Nonfiction Fiction\n", "\n", "[600 rows x 2 columns]" ] }, "execution_count": 18, "metadata": {}, "output_type": "execute_result" } ], "source": [ "predictions_df" ] }, { "cell_type": "code", "execution_count": 19, "id": "a360f103", "metadata": {}, "outputs": [], "source": [ "predictions_df[\"correct_prediction\"] = (\n", " np.where(predictions_df[\"actual_categories\"] == predictions_df[\"predicted_categories\"], 1, 0)\n", ")" ] }, { "cell_type": "code", "execution_count": 20, "id": "e0c46466", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "np.float64(0.7783333333333333)" ] }, "execution_count": 20, "metadata": {}, "output_type": "execute_result" } ], "source": [ "predictions_df[\"correct_prediction\"].sum() / len(predictions_df)" ] }, { "cell_type": "code", "execution_count": 21, "id": "4cac9b23", "metadata": {}, "outputs": [], "source": [ "isbns = []\n", "predicted_cats = []\n", "\n", "missing_cats = books.loc[books[\"simple_categories\"].isna(), [\"isbn13\", \"description\"]].reset_index(drop=True)" ] }, { "cell_type": "markdown", "id": "68598062", "metadata": {}, "source": [ "Use the model to predict the categories of the missing categories book base on its description" ] }, { "cell_type": "code", "execution_count": 22, "id": "bb8156ba", "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "100%|██████████| 1454/1454 [02:34<00:00, 9.39it/s]\n" ] } ], "source": [ "for i in tqdm(range(0, len(missing_cats))):\n", " sequence = missing_cats[\"description\"][i]\n", " predicted_cats += [generate_predictions(sequence, fiction_categories)]\n", " isbns += [missing_cats[\"isbn13\"][i]]" ] }, { "cell_type": "code", "execution_count": 23, "id": "7fda2cf0", "metadata": {}, "outputs": [], "source": [ "missing_predicted_df = pd.DataFrame({\"isbn13\": isbns, \"predicted_categories\": predicted_cats})" ] }, { "cell_type": "code", "execution_count": 24, "id": "ad7a0c7c", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
isbn13predicted_categories
09780002261982Fiction
19780006280897Nonfiction
29780006280934Nonfiction
39780006380832Nonfiction
49780006470229Fiction
.........
14499788125026600Nonfiction
14509788171565641Fiction
14519788172235222Fiction
14529788173031014Nonfiction
14539788179921623Fiction
\n", "

1454 rows × 2 columns

\n", "
" ], "text/plain": [ " isbn13 predicted_categories\n", "0 9780002261982 Fiction\n", "1 9780006280897 Nonfiction\n", "2 9780006280934 Nonfiction\n", "3 9780006380832 Nonfiction\n", "4 9780006470229 Fiction\n", "... ... ...\n", "1449 9788125026600 Nonfiction\n", "1450 9788171565641 Fiction\n", "1451 9788172235222 Fiction\n", "1452 9788173031014 Nonfiction\n", "1453 9788179921623 Fiction\n", "\n", "[1454 rows x 2 columns]" ] }, "execution_count": 24, "metadata": {}, "output_type": "execute_result" } ], "source": [ "missing_predicted_df" ] }, { "cell_type": "code", "execution_count": 25, "id": "bdb7e240", "metadata": {}, "outputs": [], "source": [ "books = pd.merge(books, missing_predicted_df, on=\"isbn13\", how=\"left\")\n", "books[\"simple_categories\"] = np.where(books[\"simple_categories\"].isna(), books[\"predicted_categories\"], books[\"simple_categories\"])\n", "books = books.drop(columns = [\"predicted_categories\"])" ] }, { "cell_type": "code", "execution_count": 26, "id": "1f1ec77d", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
isbn13isbn10titleauthorscategoriesthumbnaildescriptionpublished_yearaverage_ratingnum_pagesratings_countagg_of_booktitle_and_subtitletagged_descriptionsimple_categories
097800020058830002005883GileadMarilynne RobinsonFictionhttp://books.google.com/books/content?id=KQZCP...A NOVEL THAT READERS and critics have been eag...2004.03.85247.0361.021.0Gilead9780002005883 A NOVEL THAT READERS and critics...Fiction
197800022619820002261987Spider's WebCharles Osborne;Agatha ChristieDetective and mystery storieshttp://books.google.com/books/content?id=gA5GP...A new 'Christie for Christmas' -- a full-lengt...2000.03.83241.05164.025.0Spider's Web: A Novel9780002261982 A new 'Christie for Christmas' -...Fiction
297800061787360006178731Rage of angelsSidney SheldonFictionhttp://books.google.com/books/content?id=FKo2T...A memorable, mesmerizing heroine Jennifer -- b...1993.03.93512.029532.032.0Rage of angels9780006178736 A memorable, mesmerizing heroine...Fiction
397800062808970006280897The Four LovesClive Staples LewisChristian lifehttp://books.google.com/books/content?id=XhQ5X...Lewis' work on the nature of love divides love...2002.04.15170.033684.023.0The Four Loves9780006280897 Lewis' work on the nature of lov...Nonfiction
497800062809340006280935The Problem of PainClive Staples LewisChristian lifehttp://books.google.com/books/content?id=Kk-uV...\"In The Problem of Pain, C.S. Lewis, one of th...2002.04.09176.037569.023.0The Problem of Pain9780006280934 \"In The Problem of Pain, C.S. Le...Nonfiction
................................................
519297881722352228172235224Mistaken IdentityNayantara SahgalIndic fiction (English)http://books.google.com/books/content?id=q-tKP...On A Train Journey Home To North India After L...2003.02.93324.00.022.0Mistaken Identity9788172235222 On A Train Journey Home To North...Fiction
519397881730310148173031010Journey to the EastHermann HesseAdventure storieshttp://books.google.com/books/content?id=rq6JP...This book tells the tale of a man who goes on ...2002.03.70175.024.023.0Journey to the East9788173031014 This book tells the tale of a ma...Nonfiction
51949788179921623817992162XThe Monk Who Sold His Ferrari: A Fable About F...Robin SharmaHealth & Fitnesshttp://books.google.com/books/content?id=c_7mf...Wisdom to Create a Life of Passion, Purpose, a...2003.03.82198.01568.022.0The Monk Who Sold His Ferrari: A Fable About F...9788179921623 Wisdom to Create a Life of Passi...Fiction
519597881853005358185300534I Am thatSri Nisargadatta Maharaj;Sudhakar S. DikshitPhilosophyhttp://books.google.com/books/content?id=Fv_JP...This collection of the timeless teachings of o...1999.04.51531.0104.026.0I Am that: Talks with Sri Nisargadatta Maharaj9788185300535 This collection of the timeless ...Nonfiction
519697890277120599027712050The Berlin PhenomenologyGeorg Wilhelm Friedrich HegelHistoryhttp://books.google.com/books/content?id=Vy7Sk...Since the three volume edition ofHegel's Philo...1981.00.00210.00.044.0The Berlin Phenomenology9789027712059 Since the three volume edition o...Nonfiction
\n", "

5197 rows × 15 columns

\n", "
" ], "text/plain": [ " isbn13 isbn10 \\\n", "0 9780002005883 0002005883 \n", "1 9780002261982 0002261987 \n", "2 9780006178736 0006178731 \n", "3 9780006280897 0006280897 \n", "4 9780006280934 0006280935 \n", "... ... ... \n", "5192 9788172235222 8172235224 \n", "5193 9788173031014 8173031010 \n", "5194 9788179921623 817992162X \n", "5195 9788185300535 8185300534 \n", "5196 9789027712059 9027712050 \n", "\n", " title \\\n", "0 Gilead \n", "1 Spider's Web \n", "2 Rage of angels \n", "3 The Four Loves \n", "4 The Problem of Pain \n", "... ... \n", "5192 Mistaken Identity \n", "5193 Journey to the East \n", "5194 The Monk Who Sold His Ferrari: A Fable About F... \n", "5195 I Am that \n", "5196 The Berlin Phenomenology \n", "\n", " authors \\\n", "0 Marilynne Robinson \n", "1 Charles Osborne;Agatha Christie \n", "2 Sidney Sheldon \n", "3 Clive Staples Lewis \n", "4 Clive Staples Lewis \n", "... ... \n", "5192 Nayantara Sahgal \n", "5193 Hermann Hesse \n", "5194 Robin Sharma \n", "5195 Sri Nisargadatta Maharaj;Sudhakar S. Dikshit \n", "5196 Georg Wilhelm Friedrich Hegel \n", "\n", " categories \\\n", "0 Fiction \n", "1 Detective and mystery stories \n", "2 Fiction \n", "3 Christian life \n", "4 Christian life \n", "... ... \n", "5192 Indic fiction (English) \n", "5193 Adventure stories \n", "5194 Health & Fitness \n", "5195 Philosophy \n", "5196 History \n", "\n", " thumbnail \\\n", "0 http://books.google.com/books/content?id=KQZCP... \n", "1 http://books.google.com/books/content?id=gA5GP... \n", "2 http://books.google.com/books/content?id=FKo2T... \n", "3 http://books.google.com/books/content?id=XhQ5X... \n", "4 http://books.google.com/books/content?id=Kk-uV... \n", "... ... \n", "5192 http://books.google.com/books/content?id=q-tKP... \n", "5193 http://books.google.com/books/content?id=rq6JP... \n", "5194 http://books.google.com/books/content?id=c_7mf... \n", "5195 http://books.google.com/books/content?id=Fv_JP... \n", "5196 http://books.google.com/books/content?id=Vy7Sk... \n", "\n", " description published_year \\\n", "0 A NOVEL THAT READERS and critics have been eag... 2004.0 \n", "1 A new 'Christie for Christmas' -- a full-lengt... 2000.0 \n", "2 A memorable, mesmerizing heroine Jennifer -- b... 1993.0 \n", "3 Lewis' work on the nature of love divides love... 2002.0 \n", "4 \"In The Problem of Pain, C.S. Lewis, one of th... 2002.0 \n", "... ... ... \n", "5192 On A Train Journey Home To North India After L... 2003.0 \n", "5193 This book tells the tale of a man who goes on ... 2002.0 \n", "5194 Wisdom to Create a Life of Passion, Purpose, a... 2003.0 \n", "5195 This collection of the timeless teachings of o... 1999.0 \n", "5196 Since the three volume edition ofHegel's Philo... 1981.0 \n", "\n", " average_rating num_pages ratings_count agg_of_book \\\n", "0 3.85 247.0 361.0 21.0 \n", "1 3.83 241.0 5164.0 25.0 \n", "2 3.93 512.0 29532.0 32.0 \n", "3 4.15 170.0 33684.0 23.0 \n", "4 4.09 176.0 37569.0 23.0 \n", "... ... ... ... ... \n", "5192 2.93 324.0 0.0 22.0 \n", "5193 3.70 175.0 24.0 23.0 \n", "5194 3.82 198.0 1568.0 22.0 \n", "5195 4.51 531.0 104.0 26.0 \n", "5196 0.00 210.0 0.0 44.0 \n", "\n", " title_and_subtitle \\\n", "0 Gilead \n", "1 Spider's Web: A Novel \n", "2 Rage of angels \n", "3 The Four Loves \n", "4 The Problem of Pain \n", "... ... \n", "5192 Mistaken Identity \n", "5193 Journey to the East \n", "5194 The Monk Who Sold His Ferrari: A Fable About F... \n", "5195 I Am that: Talks with Sri Nisargadatta Maharaj \n", "5196 The Berlin Phenomenology \n", "\n", " tagged_description simple_categories \n", "0 9780002005883 A NOVEL THAT READERS and critics... Fiction \n", "1 9780002261982 A new 'Christie for Christmas' -... Fiction \n", "2 9780006178736 A memorable, mesmerizing heroine... Fiction \n", "3 9780006280897 Lewis' work on the nature of lov... Nonfiction \n", "4 9780006280934 \"In The Problem of Pain, C.S. Le... Nonfiction \n", "... ... ... \n", "5192 9788172235222 On A Train Journey Home To North... Fiction \n", "5193 9788173031014 This book tells the tale of a ma... Nonfiction \n", "5194 9788179921623 Wisdom to Create a Life of Passi... Fiction \n", "5195 9788185300535 This collection of the timeless ... Nonfiction \n", "5196 9789027712059 Since the three volume edition o... Nonfiction \n", "\n", "[5197 rows x 15 columns]" ] }, "execution_count": 26, "metadata": {}, "output_type": "execute_result" } ], "source": [ "books" ] }, { "cell_type": "code", "execution_count": 27, "id": "905d016c", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
isbn13isbn10titleauthorscategoriesthumbnaildescriptionpublished_yearaverage_ratingnum_pagesratings_countagg_of_booktitle_and_subtitletagged_descriptionsimple_categories
2497800065130870006513085GravityTess GerritsenScience fictionhttp://books.google.com/books/content?id=KI66c...Emma Watson a research physician has been trai...2004.04.04342.08024.021.0Gravity9780006513087 Emma Watson a research physician...Nonfiction
47597800994103550099410354TraitorMatthew Woodring StoverScience fictionhttp://books.google.com/books/content?id=VbICO...From the depths of catastrophe, a glimmer of h...2002.04.00320.06765.023.0Traitor9780099410355 From the depths of catastrophe, ...Fiction
47897800994223410099422344Yeats is Dead!Joseph O'ConnorComedyhttp://books.google.com/books/content?id=DrE3I...In aid of Amnesty International, this is a bri...2002.03.39298.034.023.0Yeats is Dead!: A Novel by Fifteen Irish Writers9780099422341 In aid of Amnesty International,...Fiction
49197800994467290099446723Blackwood FarmAnne RiceHorrorhttp://books.google.com/books/content?id=cIn8T...Lestat Is Back, Saviour And Demon, Presiding O...2003.03.86774.026145.022.0Blackwood Farm9780099446729 Lestat Is Back, Saviour And Demo...Fiction
109097802611024220261102427The SilmarillionJohn Ronald Reuel TolkienFantasyhttp://books.google.com/books/content?id=22ePu...Tolkien's Silmarillion is the core work of the...1999.03.91384.0253.026.0The Silmarillion9780261102422 Tolkien's Silmarillion is the co...Fiction
143597803408379550340837950Stranger in a Strange LandRobert A. HeinleinScience fictionhttp://books.google.com/books/content?id=ZQhiP...Epic, entertaining, Stranger in a Strange Land...2005.03.92672.0563.020.0Stranger in a Strange Land9780340837955 Epic, entertaining, Stranger in ...Fiction
143997803452512200345251229Visions from NowhereWilliam ArrowScience fictionNaNThe first novel in the series, \"Return to the ...1976.03.23183.010.049.0Visions from Nowhere9780345251220 The first novel in the series, \"...Fiction
284597805750755970575075597ReplayKen GrimwoodFantasyhttp://books.google.com/books/content?id=9vmNP...At forty-three Jeff Winston is tired of his lo...2005.04.16272.0412.020.0Replay9780575075597 At forty-three Jeff Winston is t...Fiction
286097805902547620590254766The lion, the witch and the wardrobeClive Staples LewisFantasyNaNFour English school children enter the magic l...1995.04.21189.0860.030.0The lion, the witch and the wardrobe9780590254762 Four English school children ent...Nonfiction
328897807394238510739423851Wizard's CastleDiana Wynne JonesFantasyhttp://books.google.com/books/content?id=hB7hA...Howl's moving castle - Eldest of three sisters...2002.04.44376.0439.023.0Wizard's Castle9780739423851 Howl's moving castle - Eldest of...Fiction
328997807394397080739439707Time QuartetMadeleine L'EngleScience fictionNaNBlending magic with quantum physics, Madeleine...2003.04.35646.0165.022.0Time Quartet9780739439708 Blending magic with quantum phys...Fiction
35639780760728505076072850X20,000 Leagues Under the SeaJules VerneScience fictionNaNRetells the adventures of a French professor a...2002.03.88394.0135778.023.020,000 Leagues Under the Sea9780760728505 Retells the adventures of a Fren...Fiction
448397814165020431416502041The Voyage of the Jerle Shannara TrilogyTerry BrooksFantasyhttp://books.google.com/books/content?id=jqVsA...When the body of a half-drowned elf is found f...2004.04.261260.084.021.0The Voyage of the Jerle Shannara Trilogy9781416502043 When the body of a half-drowned ...Fiction
497997818414940811841494089The Darkness that Comes BeforeR. Scott BakkerFantasyhttp://books.google.com/books/content?id=BG8qG...A score of centuries has passed since the Firs...2005.03.79638.0317.020.0The Darkness that Comes Before9781841494081 A score of centuries has passed ...Nonfiction
50409781857231359185723135XUse of WeaponsIain BanksScience fictionNaNThe man known as Cheradenine Zakalwe was one o...1992.04.19411.030779.033.0Use of Weapons9781857231359 The man known as Cheradenine Zak...Nonfiction
505797818579893421857989341The Gods ThemselvesIsaac AsimovScience fictionhttp://books.google.com/books/content?id=8ClIP...In the year 2100, mankind on Earth, settlers i...2000.04.09288.041304.025.0The Gods Themselves9781857989342 In the year 2100, mankind on Ear...Fiction
\n", "
" ], "text/plain": [ " isbn13 isbn10 title \\\n", "24 9780006513087 0006513085 Gravity \n", "475 9780099410355 0099410354 Traitor \n", "478 9780099422341 0099422344 Yeats is Dead! \n", "491 9780099446729 0099446723 Blackwood Farm \n", "1090 9780261102422 0261102427 The Silmarillion \n", "1435 9780340837955 0340837950 Stranger in a Strange Land \n", "1439 9780345251220 0345251229 Visions from Nowhere \n", "2845 9780575075597 0575075597 Replay \n", "2860 9780590254762 0590254766 The lion, the witch and the wardrobe \n", "3288 9780739423851 0739423851 Wizard's Castle \n", "3289 9780739439708 0739439707 Time Quartet \n", "3563 9780760728505 076072850X 20,000 Leagues Under the Sea \n", "4483 9781416502043 1416502041 The Voyage of the Jerle Shannara Trilogy \n", "4979 9781841494081 1841494089 The Darkness that Comes Before \n", "5040 9781857231359 185723135X Use of Weapons \n", "5057 9781857989342 1857989341 The Gods Themselves \n", "\n", " authors categories \\\n", "24 Tess Gerritsen Science fiction \n", "475 Matthew Woodring Stover Science fiction \n", "478 Joseph O'Connor Comedy \n", "491 Anne Rice Horror \n", "1090 John Ronald Reuel Tolkien Fantasy \n", "1435 Robert A. Heinlein Science fiction \n", "1439 William Arrow Science fiction \n", "2845 Ken Grimwood Fantasy \n", "2860 Clive Staples Lewis Fantasy \n", "3288 Diana Wynne Jones Fantasy \n", "3289 Madeleine L'Engle Science fiction \n", "3563 Jules Verne Science fiction \n", "4483 Terry Brooks Fantasy \n", "4979 R. Scott Bakker Fantasy \n", "5040 Iain Banks Science fiction \n", "5057 Isaac Asimov Science fiction \n", "\n", " thumbnail \\\n", "24 http://books.google.com/books/content?id=KI66c... \n", "475 http://books.google.com/books/content?id=VbICO... \n", "478 http://books.google.com/books/content?id=DrE3I... \n", "491 http://books.google.com/books/content?id=cIn8T... \n", "1090 http://books.google.com/books/content?id=22ePu... \n", "1435 http://books.google.com/books/content?id=ZQhiP... \n", "1439 NaN \n", "2845 http://books.google.com/books/content?id=9vmNP... \n", "2860 NaN \n", "3288 http://books.google.com/books/content?id=hB7hA... \n", "3289 NaN \n", "3563 NaN \n", "4483 http://books.google.com/books/content?id=jqVsA... \n", "4979 http://books.google.com/books/content?id=BG8qG... \n", "5040 NaN \n", "5057 http://books.google.com/books/content?id=8ClIP... \n", "\n", " description published_year \\\n", "24 Emma Watson a research physician has been trai... 2004.0 \n", "475 From the depths of catastrophe, a glimmer of h... 2002.0 \n", "478 In aid of Amnesty International, this is a bri... 2002.0 \n", "491 Lestat Is Back, Saviour And Demon, Presiding O... 2003.0 \n", "1090 Tolkien's Silmarillion is the core work of the... 1999.0 \n", "1435 Epic, entertaining, Stranger in a Strange Land... 2005.0 \n", "1439 The first novel in the series, \"Return to the ... 1976.0 \n", "2845 At forty-three Jeff Winston is tired of his lo... 2005.0 \n", "2860 Four English school children enter the magic l... 1995.0 \n", "3288 Howl's moving castle - Eldest of three sisters... 2002.0 \n", "3289 Blending magic with quantum physics, Madeleine... 2003.0 \n", "3563 Retells the adventures of a French professor a... 2002.0 \n", "4483 When the body of a half-drowned elf is found f... 2004.0 \n", "4979 A score of centuries has passed since the Firs... 2005.0 \n", "5040 The man known as Cheradenine Zakalwe was one o... 1992.0 \n", "5057 In the year 2100, mankind on Earth, settlers i... 2000.0 \n", "\n", " average_rating num_pages ratings_count agg_of_book \\\n", "24 4.04 342.0 8024.0 21.0 \n", "475 4.00 320.0 6765.0 23.0 \n", "478 3.39 298.0 34.0 23.0 \n", "491 3.86 774.0 26145.0 22.0 \n", "1090 3.91 384.0 253.0 26.0 \n", "1435 3.92 672.0 563.0 20.0 \n", "1439 3.23 183.0 10.0 49.0 \n", "2845 4.16 272.0 412.0 20.0 \n", "2860 4.21 189.0 860.0 30.0 \n", "3288 4.44 376.0 439.0 23.0 \n", "3289 4.35 646.0 165.0 22.0 \n", "3563 3.88 394.0 135778.0 23.0 \n", "4483 4.26 1260.0 84.0 21.0 \n", "4979 3.79 638.0 317.0 20.0 \n", "5040 4.19 411.0 30779.0 33.0 \n", "5057 4.09 288.0 41304.0 25.0 \n", "\n", " title_and_subtitle \\\n", "24 Gravity \n", "475 Traitor \n", "478 Yeats is Dead!: A Novel by Fifteen Irish Writers \n", "491 Blackwood Farm \n", "1090 The Silmarillion \n", "1435 Stranger in a Strange Land \n", "1439 Visions from Nowhere \n", "2845 Replay \n", "2860 The lion, the witch and the wardrobe \n", "3288 Wizard's Castle \n", "3289 Time Quartet \n", "3563 20,000 Leagues Under the Sea \n", "4483 The Voyage of the Jerle Shannara Trilogy \n", "4979 The Darkness that Comes Before \n", "5040 Use of Weapons \n", "5057 The Gods Themselves \n", "\n", " tagged_description simple_categories \n", "24 9780006513087 Emma Watson a research physician... Nonfiction \n", "475 9780099410355 From the depths of catastrophe, ... Fiction \n", "478 9780099422341 In aid of Amnesty International,... Fiction \n", "491 9780099446729 Lestat Is Back, Saviour And Demo... Fiction \n", "1090 9780261102422 Tolkien's Silmarillion is the co... Fiction \n", "1435 9780340837955 Epic, entertaining, Stranger in ... Fiction \n", "1439 9780345251220 The first novel in the series, \"... Fiction \n", "2845 9780575075597 At forty-three Jeff Winston is t... Fiction \n", "2860 9780590254762 Four English school children ent... Nonfiction \n", "3288 9780739423851 Howl's moving castle - Eldest of... Fiction \n", "3289 9780739439708 Blending magic with quantum phys... Fiction \n", "3563 9780760728505 Retells the adventures of a Fren... Fiction \n", "4483 9781416502043 When the body of a half-drowned ... Fiction \n", "4979 9781841494081 A score of centuries has passed ... Nonfiction \n", "5040 9781857231359 The man known as Cheradenine Zak... Nonfiction \n", "5057 9781857989342 In the year 2100, mankind on Ear... Fiction " ] }, "execution_count": 27, "metadata": {}, "output_type": "execute_result" } ], "source": [ "books[books[\"categories\"].str.lower().isin([\n", " \"romance\",\n", " \"science fiction\",\n", " \"scifi\",\n", " \"fantasy\",\n", " \"horror\",\n", " \"mystery\",\n", " \"thriller\",\n", " \"comedy\",\n", " \"crime\",\n", " \"historical\"\n", "])]" ] }, { "cell_type": "code", "execution_count": 28, "id": "df5056dd", "metadata": {}, "outputs": [], "source": [ "books.to_csv(\"books_with_categories.csv\", index=False)" ] } ], "metadata": { "kernelspec": { "display_name": "books_env", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.13.1" } }, "nbformat": 4, "nbformat_minor": 5 }