Skip to content

Commit

Permalink
small formatting changes
Browse files Browse the repository at this point in the history
  • Loading branch information
nickeubank committed Jan 26, 2024
1 parent 070d17c commit 4fd95f0
Show file tree
Hide file tree
Showing 12 changed files with 264 additions and 144 deletions.
6 changes: 4 additions & 2 deletions notebooks/class_3/week_4/10_combining_concatenating.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -108,6 +108,8 @@
"source": [
"import pandas as pd\n",
"\n",
"pd.set_option(\"mode.copy_on_write\", True)\n",
"\n",
"df1 = pd.DataFrame(\n",
" {\n",
" \"animals\": [\"dog\", \"cat\", \"bird\", \"fish\"],\n",
Expand Down Expand Up @@ -1107,7 +1109,7 @@
}
],
"source": [
"b1 = pd.DataFrame(data={\"C1\": ['E','F']})\n",
"b1 = pd.DataFrame(data={\"C1\": [\"E\", \"F\"]})\n",
"b1"
]
},
Expand Down Expand Up @@ -1749,7 +1751,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.12.0"
"version": "3.11.7"
},
"orig_nbformat": 4,
"vscode": {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -97,6 +97,10 @@
}
],
"source": [
"import pandas as pd\n",
"\n",
"pd.set_option(\"mode.copy_on_write\", True)\n",
"\n",
"a = pd.DataFrame(data={\"C1\": [\"A\", \"B\", \"C\"], \"C2\": [\"x\", \"y\", \"z\"]})\n",
"a"
]
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -398,6 +398,10 @@
}
],
"source": [
"import pandas as pd\n",
"\n",
"pd.set_option(\"mode.copy_on_write\", True)\n",
"\n",
"population = pd.read_csv(\"data/population.csv\")\n",
"population"
]
Expand Down
17 changes: 10 additions & 7 deletions notebooks/class_3/week_4/15.3_validating_a_merge.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -97,7 +97,10 @@
],
"source": [
"import pandas as pd\n",
"df1 = pd.DataFrame({'key':['key1', 'key2'], 'df1_var':[1, 2]})\n",
"\n",
"pd.set_option(\"mode.copy_on_write\", True)\n",
"\n",
"df1 = pd.DataFrame({\"key\": [\"key1\", \"key2\"], \"df1_var\": [1, 2]})\n",
"df1"
]
},
Expand Down Expand Up @@ -158,7 +161,7 @@
}
],
"source": [
"df2 = pd.DataFrame({'key':['key1', 'Key2'], 'df2_var':['a', 'b']})\n",
"df2 = pd.DataFrame({\"key\": [\"key1\", \"Key2\"], \"df2_var\": [\"a\", \"b\"]})\n",
"df2"
]
},
Expand All @@ -175,7 +178,7 @@
"metadata": {},
"outputs": [],
"source": [
"new_data = pd.merge(df1, df2, on='key', how='outer')"
"new_data = pd.merge(df1, df2, on=\"key\", how=\"outer\")"
]
},
{
Expand Down Expand Up @@ -323,8 +326,8 @@
],
"source": [
"df2_correct = df2.copy()\n",
"df2_correct.loc[df2.key == \"Key2\", \"key\"] = 'key2'\n",
"pd.merge(df1, df2_correct, on='key', how='outer')"
"df2_correct.loc[df2.key == \"Key2\", \"key\"] = \"key2\"\n",
"pd.merge(df1, df2_correct, on=\"key\", how=\"outer\")"
]
},
{
Expand Down Expand Up @@ -357,7 +360,7 @@
}
],
"source": [
"new_data = pd.merge(df1, df2, on='key', how='outer', indicator=True)\n",
"new_data = pd.merge(df1, df2, on=\"key\", how=\"outer\", indicator=True)\n",
"new_data._merge.value_counts()"
]
},
Expand Down Expand Up @@ -476,7 +479,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.12.0"
"version": "3.11.7"
},
"vscode": {
"interpreter": {
Expand Down
4 changes: 3 additions & 1 deletion notebooks/class_3/week_4/15_combining_merging.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -94,6 +94,8 @@
"source": [
"import pandas as pd\n",
"\n",
"pd.set_option(\"mode.copy_on_write\", True)\n",
"\n",
"dfa = pd.DataFrame(\n",
" {\n",
" \"animals\": [\"dog\", \"cat\", \"bird\", \"fish\"],\n",
Expand Down Expand Up @@ -783,7 +785,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.11.4"
"version": "3.11.7"
},
"orig_nbformat": 4,
"vscode": {
Expand Down
136 changes: 103 additions & 33 deletions notebooks/class_3/week_4/16S_Exercise_merging_solutions.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -260,7 +260,10 @@
"source": [
"import pandas as pd\n",
"import numpy as np\n",
"pd.set_option('display.max_rows', 20)\n",
"\n",
"pd.set_option(\"mode.copy_on_write\", True)\n",
"\n",
"pd.set_option(\"display.max_rows\", 20)\n",
"\n",
"file2009 = \"data/ca_arrests_2009.csv\"\n",
"df_arrests_2009 = pd.read_csv(file2009)\n",
Expand Down Expand Up @@ -530,9 +533,11 @@
],
"source": [
"# Filter the data so we're only using data from the correct year and the correct State\n",
"df_pop_filtered_2009 = df_pop.loc[df_pop[\"YEAR\"]==\"2005-2009\"]\n",
"df_pop_filtered_2009 = df_pop_filtered_2009.loc[df_pop_filtered_2009[\"STATE\"]==\"California\"]\n",
"df_pop_filtered_2009.loc[df_pop_filtered_2009[\"COUNTY\"]==\"Alameda County\"]"
"df_pop_filtered_2009 = df_pop.loc[df_pop[\"YEAR\"] == \"2005-2009\"]\n",
"df_pop_filtered_2009 = df_pop_filtered_2009.loc[\n",
" df_pop_filtered_2009[\"STATE\"] == \"California\"\n",
"]\n",
"df_pop_filtered_2009.loc[df_pop_filtered_2009[\"COUNTY\"] == \"Alameda County\"]"
]
},
{
Expand Down Expand Up @@ -827,7 +832,9 @@
}
],
"source": [
"df_2009 = pd.merge(df_arrests_2009, df_pop_filtered_2009, how='left', on=\"COUNTY\", validate=\"1:1\")\n",
"df_2009 = pd.merge(\n",
" df_arrests_2009, df_pop_filtered_2009, how=\"left\", on=\"COUNTY\", validate=\"1:1\"\n",
")\n",
"df_2009"
]
},
Expand All @@ -852,7 +859,14 @@
}
],
"source": [
"df_2009 = pd.merge(df_arrests_2009, df_pop_filtered_2009, how='outer', on=\"COUNTY\", validate=\"1:1\", indicator=True)\n",
"df_2009 = pd.merge(\n",
" df_arrests_2009,\n",
" df_pop_filtered_2009,\n",
" how=\"outer\",\n",
" on=\"COUNTY\",\n",
" validate=\"1:1\",\n",
" indicator=True,\n",
")\n",
"df_2009._merge.value_counts()"
]
},
Expand Down Expand Up @@ -1162,7 +1176,10 @@
}
],
"source": [
"df_pop_fixed_2009 = df_pop_filtered_2009.replace(to_replace=[\"DelNorte County\",\"Injo County\"], value=[\"Del Norte County\", \"Inyo County\"])\n",
"df_pop_fixed_2009 = df_pop_filtered_2009.replace(\n",
" to_replace=[\"DelNorte County\", \"Injo County\"],\n",
" value=[\"Del Norte County\", \"Inyo County\"],\n",
")\n",
"df_pop_fixed_2009"
]
},
Expand All @@ -1187,7 +1204,14 @@
}
],
"source": [
"df_2009 = pd.merge(df_arrests_2009, df_pop_fixed_2009, how='outer', on=\"COUNTY\", validate=\"1:1\", indicator=True)\n",
"df_2009 = pd.merge(\n",
" df_arrests_2009,\n",
" df_pop_fixed_2009,\n",
" how=\"outer\",\n",
" on=\"COUNTY\",\n",
" validate=\"1:1\",\n",
" indicator=True,\n",
")\n",
"df_2009._merge.value_counts()"
]
},
Expand Down Expand Up @@ -1505,8 +1529,10 @@
"metadata": {},
"outputs": [],
"source": [
"df_2009['violent_arrest_rate_2009'] = df_2009['VIOLENT'] / df_2009['total_population']\n",
"df_2009['f_drugoff_arrest_rate_2009'] = df_2009['F_DRUGOFF'] / df_2009['total_population']"
"df_2009[\"violent_arrest_rate_2009\"] = df_2009[\"VIOLENT\"] / df_2009[\"total_population\"]\n",
"df_2009[\"f_drugoff_arrest_rate_2009\"] = (\n",
" df_2009[\"F_DRUGOFF\"] / df_2009[\"total_population\"]\n",
")"
]
},
{
Expand Down Expand Up @@ -1803,9 +1829,11 @@
}
],
"source": [
"df_pop_filtered_2018 = df_pop.loc[df_pop[\"YEAR\"]==\"2013-2017\"]\n",
"df_pop_filtered_2018 = df_pop_filtered_2018.loc[df_pop_filtered_2018[\"STATE\"]==\"California\"]\n",
"df_pop_filtered_2018.loc[df_pop_filtered_2018[\"COUNTY\"]==\"Alameda County\"]"
"df_pop_filtered_2018 = df_pop.loc[df_pop[\"YEAR\"] == \"2013-2017\"]\n",
"df_pop_filtered_2018 = df_pop_filtered_2018.loc[\n",
" df_pop_filtered_2018[\"STATE\"] == \"California\"\n",
"]\n",
"df_pop_filtered_2018.loc[df_pop_filtered_2018[\"COUNTY\"] == \"Alameda County\"]"
]
},
{
Expand Down Expand Up @@ -1958,7 +1986,10 @@
}
],
"source": [
"df_pop_fixed_2018 = df_pop_filtered_2018.replace(to_replace=[\"DelNorte County\",\"Injo County\"], value=[\"Del Norte County\", \"Inyo County\"])\n",
"df_pop_fixed_2018 = df_pop_filtered_2018.replace(\n",
" to_replace=[\"DelNorte County\", \"Injo County\"],\n",
" value=[\"Del Norte County\", \"Inyo County\"],\n",
")\n",
"df_pop_fixed_2018"
]
},
Expand All @@ -1983,7 +2014,14 @@
}
],
"source": [
"df_2018 = pd.merge(df_arrests_2018, df_pop_fixed_2018, how='outer', on=\"COUNTY\", validate=\"1:1\", indicator=True)\n",
"df_2018 = pd.merge(\n",
" df_arrests_2018,\n",
" df_pop_fixed_2018,\n",
" how=\"outer\",\n",
" on=\"COUNTY\",\n",
" validate=\"1:1\",\n",
" indicator=True,\n",
")\n",
"df_2018._merge.value_counts()"
]
},
Expand All @@ -2000,8 +2038,10 @@
"metadata": {},
"outputs": [],
"source": [
"df_2018['violent_arrest_rate_2018'] = df_2018['VIOLENT'] / df_2018['total_population']\n",
"df_2018['f_drugoff_arrest_rate_2018'] = df_2018['F_DRUGOFF'] / df_2018['total_population']"
"df_2018[\"violent_arrest_rate_2018\"] = df_2018[\"VIOLENT\"] / df_2018[\"total_population\"]\n",
"df_2018[\"f_drugoff_arrest_rate_2018\"] = (\n",
" df_2018[\"F_DRUGOFF\"] / df_2018[\"total_population\"]\n",
")"
]
},
{
Expand Down Expand Up @@ -2054,9 +2094,11 @@
}
],
"source": [
"df_2009 = df_2009.drop(columns='_merge')\n",
"df_2018 = df_2018.drop(columns='_merge')\n",
"df_all = pd.merge(df_2009, df_2018, how='outer', on=\"COUNTY\", validate=\"1:1\", indicator=True)\n",
"df_2009 = df_2009.drop(columns=\"_merge\")\n",
"df_2018 = df_2018.drop(columns=\"_merge\")\n",
"df_all = pd.merge(\n",
" df_2009, df_2018, how=\"outer\", on=\"COUNTY\", validate=\"1:1\", indicator=True\n",
")\n",
"df_all._merge.value_counts()"
]
},
Expand Down Expand Up @@ -2620,7 +2662,15 @@
}
],
"source": [
"df = df_all[['COUNTY','violent_arrest_rate_2009','violent_arrest_rate_2018','f_drugoff_arrest_rate_2009','f_drugoff_arrest_rate_2018']]\n",
"df = df_all[\n",
" [\n",
" \"COUNTY\",\n",
" \"violent_arrest_rate_2009\",\n",
" \"violent_arrest_rate_2018\",\n",
" \"f_drugoff_arrest_rate_2009\",\n",
" \"f_drugoff_arrest_rate_2018\",\n",
" ]\n",
"]\n",
"df"
]
},
Expand Down Expand Up @@ -2662,8 +2712,12 @@
}
],
"source": [
"percent_change = (df['f_drugoff_arrest_rate_2018'] - df['f_drugoff_arrest_rate_2009']) / df['f_drugoff_arrest_rate_2009'] * 100\n",
"percent_change.sort_values()\n"
"percent_change = (\n",
" (df[\"f_drugoff_arrest_rate_2018\"] - df[\"f_drugoff_arrest_rate_2009\"])\n",
" / df[\"f_drugoff_arrest_rate_2009\"]\n",
" * 100\n",
")\n",
"percent_change.sort_values()"
]
},
{
Expand Down Expand Up @@ -2728,7 +2782,11 @@
}
],
"source": [
"percent_change = (df['violent_arrest_rate_2018'] - df['violent_arrest_rate_2009']) / df['violent_arrest_rate_2009'] * 100\n",
"percent_change = (\n",
" (df[\"violent_arrest_rate_2018\"] - df[\"violent_arrest_rate_2009\"])\n",
" / df[\"violent_arrest_rate_2009\"]\n",
" * 100\n",
")\n",
"percent_change.sort_values()"
]
},
Expand Down Expand Up @@ -2785,9 +2843,9 @@
"metadata": {},
"outputs": [],
"source": [
"average = df['f_drugoff_arrest_rate_2009'].mean()\n",
"high_drug_arrests_2009 = df.loc[df['f_drugoff_arrest_rate_2009']>average]\n",
"low_drug_arrests_2009 = df.loc[df['f_drugoff_arrest_rate_2009']<=average]"
"average = df[\"f_drugoff_arrest_rate_2009\"].mean()\n",
"high_drug_arrests_2009 = df.loc[df[\"f_drugoff_arrest_rate_2009\"] > average]\n",
"low_drug_arrests_2009 = df.loc[df[\"f_drugoff_arrest_rate_2009\"] <= average]"
]
},
{
Expand Down Expand Up @@ -2815,8 +2873,14 @@
}
],
"source": [
"change_high = high_drug_arrests_2009['violent_arrest_rate_2018'] - high_drug_arrests_2009['violent_arrest_rate_2009']\n",
"change_low = low_drug_arrests_2009['violent_arrest_rate_2018'] - low_drug_arrests_2009['violent_arrest_rate_2009']\n",
"change_high = (\n",
" high_drug_arrests_2009[\"violent_arrest_rate_2018\"]\n",
" - high_drug_arrests_2009[\"violent_arrest_rate_2009\"]\n",
")\n",
"change_low = (\n",
" low_drug_arrests_2009[\"violent_arrest_rate_2018\"]\n",
" - low_drug_arrests_2009[\"violent_arrest_rate_2009\"]\n",
")\n",
"print(change_high.mean())\n",
"print(change_low.mean())"
]
Expand All @@ -2838,8 +2902,8 @@
"metadata": {},
"outputs": [],
"source": [
"def percent_change(x_new,x_base):\n",
" return (x_new - x_base) / x_base * 100 "
"def percent_change(x_new, x_base):\n",
" return (x_new - x_base) / x_base * 100"
]
},
{
Expand All @@ -2857,8 +2921,14 @@
}
],
"source": [
"pchange_high = percent_change(high_drug_arrests_2009['violent_arrest_rate_2018'],high_drug_arrests_2009['violent_arrest_rate_2009'])\n",
"pchange_low = percent_change(low_drug_arrests_2009['violent_arrest_rate_2018'],low_drug_arrests_2009['violent_arrest_rate_2009'])\n",
"pchange_high = percent_change(\n",
" high_drug_arrests_2009[\"violent_arrest_rate_2018\"],\n",
" high_drug_arrests_2009[\"violent_arrest_rate_2009\"],\n",
")\n",
"pchange_low = percent_change(\n",
" low_drug_arrests_2009[\"violent_arrest_rate_2018\"],\n",
" low_drug_arrests_2009[\"violent_arrest_rate_2009\"],\n",
")\n",
"print(pchange_high.mean())\n",
"print(pchange_low.mean())"
]
Expand Down
Loading

0 comments on commit 4fd95f0

Please sign in to comment.