From b90a3b90ceac58cb698bf4b31c9f768fcd18a40c Mon Sep 17 00:00:00 2001 From: "Benjamin T. Vincent" Date: Sat, 31 Aug 2024 16:24:51 +0100 Subject: [PATCH] add causal DAGs --- .../GLM-simpsons-paradox.ipynb | 609 +++++++++++------- .../GLM-simpsons-paradox.myst.md | 46 +- 2 files changed, 416 insertions(+), 239 deletions(-) diff --git a/examples/generalized_linear_models/GLM-simpsons-paradox.ipynb b/examples/generalized_linear_models/GLM-simpsons-paradox.ipynb index 842305efa..c6bded7a2 100644 --- a/examples/generalized_linear_models/GLM-simpsons-paradox.ipynb +++ b/examples/generalized_linear_models/GLM-simpsons-paradox.ipynb @@ -41,6 +41,7 @@ "outputs": [], "source": [ "import arviz as az\n", + "import graphviz as gr\n", "import matplotlib.pyplot as plt\n", "import numpy as np\n", "import pandas as pd\n", @@ -312,6 +313,73 @@ "First we examine the simplest model - plain linear regression which pools all the data and has no knowledge of the group/multi-level structure of the data." ] }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "From a causal perspective, this approach embodies the belief that $x$ causes $y$ and that this relationship is constant across all groups, or groups are simply not considered. This can be shown in the causal DAG below." + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": { + "tags": [ + "hide-input" + ] + }, + "outputs": [ + { + "data": { + "image/svg+xml": [ + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "x\n", + "\n", + "x\n", + "\n", + "\n", + "\n", + "y\n", + "\n", + "y\n", + "\n", + "\n", + "\n", + "x->y\n", + "\n", + "\n", + "\n", + "\n", + "\n" + ], + "text/plain": [ + "" + ] + }, + "execution_count": 7, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "g = gr.Digraph()\n", + "g.node(name=\"x\", label=\"x\")\n", + "g.node(name=\"y\", label=\"y\")\n", + "g.edge(tail_name=\"x\", head_name=\"y\")\n", + "g" + ] + }, { "cell_type": "markdown", "metadata": {}, @@ -337,7 +405,7 @@ }, { "cell_type": "code", - "execution_count": 7, + "execution_count": 8, "metadata": {}, "outputs": [], "source": [ @@ -352,7 +420,7 @@ }, { "cell_type": "code", - "execution_count": 8, + "execution_count": 9, "metadata": {}, "outputs": [ { @@ -373,11 +441,11 @@ "\n", "obs_id (100)\n", "\n", - "\n", + "\n", "\n", - "β0\n", + "β1\n", "\n", - "β0\n", + "β1\n", "~\n", "Normal\n", "\n", @@ -389,28 +457,14 @@ "~\n", "Deterministic\n", "\n", - "\n", - "\n", - "β0->μ\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "β1\n", - "\n", - "β1\n", - "~\n", - "Normal\n", - "\n", "\n", - "\n", + "\n", "β1->μ\n", - "\n", - "\n", + "\n", + "\n", "\n", "\n", - "\n", + "\n", "sigma\n", "\n", "sigma\n", @@ -426,11 +480,25 @@ "Normal\n", "\n", "\n", - "\n", + "\n", "sigma->y\n", "\n", "\n", "\n", + "\n", + "\n", + "β0\n", + "\n", + "β0\n", + "~\n", + "Normal\n", + "\n", + "\n", + "\n", + "β0->μ\n", + "\n", + "\n", + "\n", "\n", "\n", "x\n", @@ -440,13 +508,13 @@ "Data\n", "\n", "\n", - "\n", + "\n", "x->μ\n", "\n", "\n", "\n", "\n", - "\n", + "\n", "μ->y\n", "\n", "\n", @@ -455,10 +523,10 @@ "\n" ], "text/plain": [ - "" + "" ] }, - "execution_count": 8, + "execution_count": 9, "metadata": {}, "output_type": "execute_result" } @@ -476,7 +544,7 @@ }, { "cell_type": "code", - "execution_count": 9, + "execution_count": 10, "metadata": {}, "outputs": [ { @@ -492,7 +560,7 @@ { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "657418f0b4d64973be647066f88a2c52", + "model_id": "3207e809123a4eeaa8ecfc4993610a5d", "version_major": 2, "version_minor": 0 }, @@ -541,7 +609,7 @@ }, { "cell_type": "code", - "execution_count": 10, + "execution_count": 11, "metadata": {}, "outputs": [ { @@ -573,7 +641,7 @@ }, { "cell_type": "code", - "execution_count": 11, + "execution_count": 12, "metadata": {}, "outputs": [ { @@ -586,7 +654,7 @@ { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "cf4bcf07a14d43bb8afbb25cdf314687", + "model_id": "f53dc75a68f44839ba098b0191846b49", "version_major": 2, "version_minor": 0 }, @@ -633,7 +701,7 @@ }, { "cell_type": "code", - "execution_count": 12, + "execution_count": 13, "metadata": { "tags": [ "hide-input" @@ -743,14 +811,88 @@ "source": [ "## Model 2: Independent slopes and intercepts model\n", "\n", - "We will use the same data in this analysis, but this time we will use our knowledge that data come from groups. More specifically we will essentially fit independent regressions to data within each group. This could also be described as an unpooled model." + "We will use the same data in this analysis, but this time we will use our knowledge that data come from groups. From a causal perspective we are exploring the notion that both $x$ and $y$ are influenced by group membership. This can be shown in the causal DAG below.\n" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "metadata": { + "tags": [ + "hide-input" + ] + }, + "outputs": [ + { + "data": { + "image/svg+xml": [ + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "x\n", + "\n", + "x\n", + "\n", + "\n", + "\n", + "y\n", + "\n", + "y\n", + "\n", + "\n", + "\n", + "x->y\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "g\n", + "\n", + "group\n", + "\n", + "\n", + "\n", + "g->y\n", + "\n", + "\n", + "\n", + "\n", + "\n" + ], + "text/plain": [ + "" + ] + }, + "execution_count": 14, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "g = gr.Digraph()\n", + "g.node(name=\"x\", label=\"x\")\n", + "g.node(name=\"g\", label=\"group\")\n", + "g.node(name=\"y\", label=\"y\")\n", + "g.edge(tail_name=\"x\", head_name=\"y\")\n", + "g.edge(tail_name=\"g\", head_name=\"y\")\n", + "g" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ - "We could describe this model mathematically as:\n", + "More specifically we will essentially fit independent regressions to data within each group. This could also be described as an unpooled model. We could describe this model mathematically as:\n", "\n", "$$\n", "\\begin{aligned}\n", @@ -766,7 +908,7 @@ }, { "cell_type": "code", - "execution_count": 13, + "execution_count": 15, "metadata": {}, "outputs": [], "source": [ @@ -795,7 +937,7 @@ }, { "cell_type": "code", - "execution_count": 14, + "execution_count": 16, "metadata": {}, "outputs": [ { @@ -821,39 +963,39 @@ "\n", "obs_id (100)\n", "\n", - "\n", + "\n", "\n", - "β0\n", + "β1\n", "\n", - "β0\n", + "β1\n", "~\n", "Normal\n", "\n", "\n", - "\n", + "\n", "μ\n", "\n", "μ\n", "~\n", "Deterministic\n", "\n", - "\n", - "\n", - "β0->μ\n", + "\n", + "\n", + "β1->μ\n", "\n", "\n", "\n", - "\n", + "\n", "\n", - "β1\n", + "β0\n", "\n", - "β1\n", + "β0\n", "~\n", "Normal\n", "\n", - "\n", - "\n", - "β1->μ\n", + "\n", + "\n", + "β0->μ\n", "\n", "\n", "\n", @@ -874,7 +1016,7 @@ "Normal\n", "\n", "\n", - "\n", + "\n", "sigma->y\n", "\n", "\n", @@ -888,19 +1030,13 @@ "Data\n", "\n", "\n", - "\n", + "\n", "x->μ\n", "\n", "\n", "\n", - "\n", - "\n", - "μ->y\n", - "\n", - "\n", - "\n", "\n", - "\n", + "\n", "g\n", "\n", "g\n", @@ -908,19 +1044,25 @@ "Data\n", "\n", "\n", - "\n", + "\n", "g->μ\n", "\n", "\n", "\n", + "\n", + "\n", + "μ->y\n", + "\n", + "\n", + "\n", "\n", "\n" ], "text/plain": [ - "" + "" ] }, - "execution_count": 14, + "execution_count": 16, "metadata": {}, "output_type": "execute_result" } @@ -931,7 +1073,7 @@ }, { "cell_type": "code", - "execution_count": 15, + "execution_count": 17, "metadata": {}, "outputs": [ { @@ -947,7 +1089,7 @@ { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "c37a826bd55943b782c8523bbe560d99", + "model_id": "6c798fe9fbab4eb0822a0b5a490cd28e", "version_major": 2, "version_minor": 0 }, @@ -1020,7 +1162,7 @@ }, { "cell_type": "code", - "execution_count": 16, + "execution_count": 18, "metadata": {}, "outputs": [ { @@ -1033,7 +1175,7 @@ { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "d4879f1b34a64374aab16f7a7aef2f90", + "model_id": "40a72152fd71417ea31adaed67c16d94", "version_major": 2, "version_minor": 0 }, @@ -1085,7 +1227,7 @@ }, { "cell_type": "code", - "execution_count": 17, + "execution_count": 19, "metadata": { "tags": [ "hide-input" @@ -1202,9 +1344,8 @@ "metadata": {}, "source": [ "## Model 3: Hierarchical regression\n", - "We can go beyond Model 2 and incorporate even more knowledge about the structure of our data. Rather than treating each group as entirely independent, we can use our knowledge that these groups are drawn from a population-level distribution. These are sometimes called hyper-parameters. \n", "\n", - "In one sense this move from Model 2 to Model 3 can be seen as adding parameters, and therefore increasing model complexity. However, in another sense, adding this knowledge about the nested structure of the data actually provides a constraint over parameter space." + "Model 3 assumes the same causal DAG as model 2 (see above). However, we can go further and incorporate more knowledge about the structure of our data. Rather than treating each group as entirely independent, we can use our knowledge that these groups are drawn from a population-level distribution. " ] }, { @@ -1232,23 +1373,25 @@ "cell_type": "markdown", "metadata": {}, "source": [ - ":::{admonition} **Independence assumptions**\n", - ":class: note\n", - "\n", - "The hierarchical model we are considering contains a simplification in that the population level slope and intercept are assumed to be independent. It is possible to relax this assumption and model any correlation between these parameters by using a multivariate normal distribution.\n", - ":::" + "This model could also be called a partial pooling model. " ] }, { "cell_type": "markdown", "metadata": {}, "source": [ - "This model could also be called a partial pooling model. " + ":::{admonition} **Notes**\n", + ":class: note\n", + "\n", + "The hierarchical model we are considering contains a simplification in that the population level slope and intercept are assumed to be independent. It is possible to relax this assumption and model any correlation between these parameters by using a multivariate normal distribution.\n", + "\n", + "In one sense this move from Model 2 to Model 3 can be seen as adding parameters, and therefore increasing model complexity. However, in another sense, adding this knowledge about the nested structure of the data actually provides a constraint over parameter space.\n", + ":::" ] }, { "cell_type": "code", - "execution_count": 18, + "execution_count": 20, "metadata": {}, "outputs": [], "source": [ @@ -1292,7 +1435,7 @@ }, { "cell_type": "code", - "execution_count": 19, + "execution_count": 21, "metadata": {}, "outputs": [ { @@ -1304,216 +1447,216 @@ "\n", "\n", - "\n", + "\n", "\n", - "\n", + "\n", "\n", "clustergroup (5)\n", - "\n", - "group (5)\n", + "\n", + "group (5)\n", "\n", "\n", "clusterobs_id (100)\n", - "\n", - "obs_id (100)\n", - "\n", - "\n", - "\n", - "pop_intercept\n", - "\n", - "pop_intercept\n", - "~\n", - "Normal\n", - "\n", - "\n", - "\n", - "intercept_mu\n", - "\n", - "intercept_mu\n", - "~\n", - "Normal\n", - "\n", - "\n", - "\n", - "intercept_mu->pop_intercept\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "β0\n", - "\n", - "β0\n", - "~\n", - "Normal\n", - "\n", - "\n", - "\n", - "intercept_mu->β0\n", - "\n", - "\n", + "\n", + "obs_id (100)\n", "\n", "\n", - "\n", + "\n", "slope_sigma\n", - "\n", - "slope_sigma\n", - "~\n", - "Gamma\n", + "\n", + "slope_sigma\n", + "~\n", + "Gamma\n", "\n", "\n", - "\n", + "\n", "pop_slope\n", - "\n", - "pop_slope\n", - "~\n", - "Normal\n", + "\n", + "pop_slope\n", + "~\n", + "Normal\n", "\n", "\n", - "\n", + "\n", "slope_sigma->pop_slope\n", - "\n", - "\n", + "\n", + "\n", "\n", "\n", - "\n", + "\n", "β1\n", - "\n", - "β1\n", - "~\n", - "Normal\n", + "\n", + "β1\n", + "~\n", + "Normal\n", "\n", "\n", - "\n", + "\n", "slope_sigma->β1\n", - "\n", - "\n", + "\n", + "\n", "\n", - "\n", - "\n", - "intercept_sigma\n", - "\n", - "intercept_sigma\n", - "~\n", - "Gamma\n", + "\n", + "\n", + "intercept_mu\n", + "\n", + "intercept_mu\n", + "~\n", + "Normal\n", "\n", - "\n", + "\n", + "\n", + "pop_intercept\n", + "\n", + "pop_intercept\n", + "~\n", + "Normal\n", + "\n", + "\n", "\n", - "intercept_sigma->pop_intercept\n", - "\n", - "\n", + "intercept_mu->pop_intercept\n", + "\n", + "\n", "\n", - "\n", + "\n", + "\n", + "β0\n", + "\n", + "β0\n", + "~\n", + "Normal\n", + "\n", + "\n", "\n", - "intercept_sigma->β0\n", - "\n", - "\n", + "intercept_mu->β0\n", + "\n", + "\n", "\n", "\n", - "\n", + "\n", "slope_mu\n", - "\n", - "slope_mu\n", - "~\n", - "Normal\n", + "\n", + "slope_mu\n", + "~\n", + "Normal\n", "\n", "\n", - "\n", + "\n", "slope_mu->pop_slope\n", - "\n", - "\n", + "\n", + "\n", "\n", "\n", - "\n", + "\n", "slope_mu->β1\n", - "\n", - "\n", + "\n", + "\n", "\n", "\n", - "\n", + "\n", "sigma\n", - "\n", - "sigma\n", - "~\n", - "Gamma\n", + "\n", + "sigma\n", + "~\n", + "Gamma\n", "\n", "\n", "\n", "y\n", - "\n", - "y\n", - "~\n", - "Normal\n", + "\n", + "y\n", + "~\n", + "Normal\n", "\n", "\n", - "\n", + "\n", "sigma->y\n", - "\n", - "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "intercept_sigma\n", + "\n", + "intercept_sigma\n", + "~\n", + "Gamma\n", + "\n", + "\n", + "\n", + "intercept_sigma->pop_intercept\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "intercept_sigma->β0\n", + "\n", + "\n", "\n", "\n", - "\n", + "\n", "μ\n", - "\n", - "μ\n", - "~\n", - "Deterministic\n", - "\n", - "\n", - "\n", - "β0->μ\n", - "\n", - "\n", + "\n", + "μ\n", + "~\n", + "Deterministic\n", "\n", "\n", - "\n", + "\n", "β1->μ\n", - "\n", - "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "β0->μ\n", + "\n", + "\n", "\n", "\n", "\n", "x\n", - "\n", - "x\n", - "~\n", - "Data\n", + "\n", + "x\n", + "~\n", + "Data\n", "\n", "\n", - "\n", + "\n", "x->μ\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "μ->y\n", - "\n", - "\n", + "\n", + "\n", "\n", "\n", - "\n", + "\n", "g\n", - "\n", - "g\n", - "~\n", - "Data\n", + "\n", + "g\n", + "~\n", + "Data\n", "\n", "\n", - "\n", + "\n", "g->μ\n", - "\n", - "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "μ->y\n", + "\n", + "\n", "\n", "\n", "\n" ], "text/plain": [ - "" + "" ] }, - "execution_count": 19, + "execution_count": 21, "metadata": {}, "output_type": "execute_result" } @@ -1522,9 +1665,16 @@ "pm.model_to_graphviz(hierarchical)" ] }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "The nodes `pop_intercept` and `pop_slope` represent the population-level intercept and slope parameters. While the 5 $\\beta_0$ and $\\beta_1$ nodes represent intercepts and slopes for each of the 5 observed groups (respectively), the `pop_intercept` and `pop_slope` represent what we can infer about the population-level intercept and slope. Equivalently, we could say they represent our beliefs about an as yet unobserved group." + ] + }, { "cell_type": "code", - "execution_count": 20, + "execution_count": 22, "metadata": {}, "outputs": [ { @@ -1540,7 +1690,7 @@ { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "fb0b3a4873cb4b679bc6ed93c03e7572", + "model_id": "57059dafe7534f099d7a12c5536c764c", "version_major": 2, "version_minor": 0 }, @@ -1578,7 +1728,7 @@ "name": "stderr", "output_type": "stream", "text": [ - "Sampling 4 chains for 4_000 tune and 1_000 draw iterations (16_000 + 4_000 draws total) took 35 seconds.\n", + "Sampling 4 chains for 4_000 tune and 1_000 draw iterations (16_000 + 4_000 draws total) took 36 seconds.\n", "There were 1 divergences after tuning. Increase `target_accept` or reparameterize.\n" ] } @@ -1601,7 +1751,7 @@ }, { "cell_type": "code", - "execution_count": 21, + "execution_count": 23, "metadata": {}, "outputs": [ { @@ -1633,7 +1783,7 @@ }, { "cell_type": "code", - "execution_count": 22, + "execution_count": 24, "metadata": {}, "outputs": [ { @@ -1646,7 +1796,7 @@ { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "d993f1f20bf54cdb8c76c097ab62f3de", + "model_id": "10d952f0a31745ae814f7fbac6acf1b4", "version_major": 2, "version_minor": 0 }, @@ -1698,7 +1848,7 @@ }, { "cell_type": "code", - "execution_count": 23, + "execution_count": 25, "metadata": { "tags": [ "hide-input" @@ -1813,7 +1963,7 @@ }, { "cell_type": "code", - "execution_count": 24, + "execution_count": 26, "metadata": { "tags": [ "hide-input" @@ -1877,7 +2027,7 @@ }, { "cell_type": "code", - "execution_count": 25, + "execution_count": 27, "metadata": {}, "outputs": [ { @@ -1893,12 +2043,13 @@ "pytensor: 2.25.4\n", "xarray : 2024.7.0\n", "\n", + "pandas : 2.2.2\n", "xarray : 2024.7.0\n", "pymc : 5.16.2\n", - "matplotlib: 3.9.2\n", - "pandas : 2.2.2\n", - "arviz : 0.19.0\n", "numpy : 1.26.4\n", + "arviz : 0.19.0\n", + "matplotlib: 3.9.2\n", + "graphviz : 0.20.3\n", "\n", "Watermark: 2.4.3\n", "\n" diff --git a/examples/generalized_linear_models/GLM-simpsons-paradox.myst.md b/examples/generalized_linear_models/GLM-simpsons-paradox.myst.md index faebc5ca6..e81d1d400 100644 --- a/examples/generalized_linear_models/GLM-simpsons-paradox.myst.md +++ b/examples/generalized_linear_models/GLM-simpsons-paradox.myst.md @@ -35,6 +35,7 @@ This notebook covers: ```{code-cell} ipython3 import arviz as az +import graphviz as gr import matplotlib.pyplot as plt import numpy as np import pandas as pd @@ -109,6 +110,18 @@ First we examine the simplest model - plain linear regression which pools all th +++ +From a causal perspective, this approach embodies the belief that $x$ causes $y$ and that this relationship is constant across all groups, or groups are simply not considered. This can be shown in the causal DAG below. + +```{code-cell} ipython3 +:tags: [hide-input] + +g = gr.Digraph() +g.node(name="x", label="x") +g.node(name="y", label="y") +g.edge(tail_name="x", head_name="y") +g +``` + We could describe this model mathematically as: $$ @@ -240,11 +253,21 @@ One of the clear things about this analysis is that we have credible evidence th ## Model 2: Independent slopes and intercepts model -We will use the same data in this analysis, but this time we will use our knowledge that data come from groups. More specifically we will essentially fit independent regressions to data within each group. This could also be described as an unpooled model. +We will use the same data in this analysis, but this time we will use our knowledge that data come from groups. From a causal perspective we are exploring the notion that both $x$ and $y$ are influenced by group membership. This can be shown in the causal DAG below. -+++ +```{code-cell} ipython3 +:tags: [hide-input] -We could describe this model mathematically as: +g = gr.Digraph() +g.node(name="x", label="x") +g.node(name="g", label="group") +g.node(name="y", label="y") +g.edge(tail_name="x", head_name="y") +g.edge(tail_name="g", head_name="y") +g +``` + +More specifically we will essentially fit independent regressions to data within each group. This could also be described as an unpooled model. We could describe this model mathematically as: $$ \begin{aligned} @@ -393,9 +416,8 @@ In contrast to plain regression model (Model 1), when we model on the group leve +++ ## Model 3: Hierarchical regression -We can go beyond Model 2 and incorporate even more knowledge about the structure of our data. Rather than treating each group as entirely independent, we can use our knowledge that these groups are drawn from a population-level distribution. These are sometimes called hyper-parameters. -In one sense this move from Model 2 to Model 3 can be seen as adding parameters, and therefore increasing model complexity. However, in another sense, adding this knowledge about the nested structure of the data actually provides a constraint over parameter space. +Model 3 assumes the same causal DAG as model 2 (see above). However, we can go further and incorporate more knowledge about the structure of our data. Rather than treating each group as entirely independent, we can use our knowledge that these groups are drawn from a population-level distribution. +++ @@ -417,15 +439,17 @@ where $\beta_0$ and $\beta_1$ are the population-level parameters, and $\gamma_0 +++ -:::{admonition} **Independence assumptions** +This model could also be called a partial pooling model. + ++++ + +:::{admonition} **Notes** :class: note The hierarchical model we are considering contains a simplification in that the population level slope and intercept are assumed to be independent. It is possible to relax this assumption and model any correlation between these parameters by using a multivariate normal distribution. -::: -+++ - -This model could also be called a partial pooling model. +In one sense this move from Model 2 to Model 3 can be seen as adding parameters, and therefore increasing model complexity. However, in another sense, adding this knowledge about the nested structure of the data actually provides a constraint over parameter space. +::: ```{code-cell} ipython3 non_centered = False @@ -465,6 +489,8 @@ Plotting the DAG now makes it clear that the group-level intercept and slope par pm.model_to_graphviz(hierarchical) ``` +The nodes `pop_intercept` and `pop_slope` represent the population-level intercept and slope parameters. While the 5 $\beta_0$ and $\beta_1$ nodes represent intercepts and slopes for each of the 5 observed groups (respectively), the `pop_intercept` and `pop_slope` represent what we can infer about the population-level intercept and slope. Equivalently, we could say they represent our beliefs about an as yet unobserved group. + ```{code-cell} ipython3 with hierarchical: idata = pm.sample(tune=4000, target_accept=0.99, random_seed=rng)