diff --git a/content/2024-04-10.md b/content/2024-04-10.md new file mode 100644 index 000000000000..e69de29bb2d1 diff --git a/content/notes/canvas/ai.canvas b/content/notes/canvas/ai.canvas index 3dfb47f515d6..eb81b443f92e 100644 --- a/content/notes/canvas/ai.canvas +++ b/content/notes/canvas/ai.canvas @@ -2,42 +2,46 @@ "nodes":[ {"id":"fd9a61898087812e","type":"group","x":-2330,"y":284,"width":4399,"height":1542,"color":"5","label":"unsupervised"}, {"id":"c4ad0d9e52b412f5","type":"group","x":-1160,"y":2280,"width":2535,"height":1823,"color":"1","label":"supervised"}, - {"id":"875b039fe7cde699","type":"text","text":"# hierarchical ","x":-383,"y":800,"width":327,"height":83}, - {"id":"822eaf89e425da3e","type":"text","text":"## pros","x":80,"y":640,"width":250,"height":60}, - {"id":"833f43133e0c40b6","type":"text","text":"## cons","x":155,"y":812,"width":250,"height":60}, - {"id":"622f5d9344b98bc5","type":"text","text":"greedy search means no undoing of merges early on","x":575,"y":740,"width":250,"height":60}, - {"id":"9920d493e334331c","type":"text","text":"## dendrogram","x":776,"y":304,"width":250,"height":60}, - {"id":"762b6ac40b450630","type":"text","text":"no need to specify number of clusters","x":418,"y":364,"width":250,"height":60}, - {"id":"80a2dccede9dd085","type":"text","text":"graphical tree output, which is simple and fast","x":530,"y":520,"width":250,"height":60}, - {"id":"cf978e2a914e3054","type":"text","text":"in each iterations, looks for the two clusters with the **smallest minimum** pairwise distance between elements, merges these next","x":1719,"y":1000,"width":250,"height":137}, - {"id":"88632bfed85e619e","type":"text","text":"### single linkage","x":1349,"y":1169,"width":250,"height":60}, - {"id":"cd0972a0a06b3a50","type":"text","text":"in each iteration, looks for the two clusters with the **smallest maximum** pairwise distance within elements, and merges these next","x":1799,"y":1229,"width":250,"height":131}, - {"id":"f9b2aa4a8b5b4577","type":"text","text":"### complete linkage","x":1379,"y":1272,"width":276,"height":60}, - {"id":"2994eb905bcc3266","type":"text","text":"### average linkage","x":1349,"y":1384,"width":250,"height":60}, - {"id":"76c3a89ae05fe481","type":"text","text":"merge clusters that have the **smallest average** distance between all elements in one cluster and another","x":1719,"y":1469,"width":250,"height":106}, - {"id":"f5753c84f85ece4f","type":"text","text":"## methods (linkage)","x":813,"y":1255,"width":426,"height":94}, - {"id":"4129b725ee9bb0d0","type":"text","text":"flexibility in cluster definitions","x":813,"y":640,"width":250,"height":60}, - {"id":"59aa3a48f2d03002","type":"file","file":"notes/images/Pasted image 20240408130758.png","x":300,"y":1480,"width":400,"height":194}, - {"id":"099d8949af3f3dc5","type":"text","text":"can be computationally expensive as it is typically a $O(n^2\\log{n})$","x":575,"y":842,"width":252,"height":86}, - {"id":"baf4c81b90c332df","type":"text","text":"sensitive to noise and outliers","x":575,"y":980,"width":250,"height":60}, - {"id":"ea10c9a41c336e3c","type":"text","text":"# CLUSTERING","x":-699,"y":622,"width":250,"height":60,"color":"5"}, - {"id":"cdec0b4a22c85488","type":"text","text":"builds a hierarchy of clusters","x":-270,"y":1120,"width":250,"height":60}, - {"id":"5cbf08f8bb059f64","type":"text","text":"## agglomerative approach","x":-619,"y":1280,"width":355,"height":50}, - {"id":"861d6c10504d880f","type":"text","text":"treats each data point as a single cluster then merge into larger and larger","x":-492,"y":1520,"width":250,"height":80}, - {"id":"246f95f7fddaa9ea","type":"text","text":"until all points have been merged into a single cluster","x":-424,"y":1746,"width":250,"height":60}, - {"id":"f8e1e8964c4be9f1","type":"text","text":"each observation belongs to the cluster with the nearest mean","x":-1237,"y":1443,"width":250,"height":86}, - {"id":"efaec93ecdfa6374","type":"text","text":"simple and fast ($O(nki)$) where:\n- $n$: data points\n- $k$: clusters\n- $i$: iterations","x":-1957,"y":713,"width":250,"height":157}, - {"id":"cead019233b02c3f","type":"text","text":"elbow method","x":-2310,"y":902,"width":250,"height":60}, - {"id":"ee3799ed5815e8ad","type":"text","text":"choosing $k$","x":-1867,"y":1037,"width":250,"height":60}, - {"id":"88eb8b72b5a748e9","type":"text","text":"sensitivity to initial seeds","x":-1947,"y":1163,"width":250,"height":60}, - {"id":"17aa3e01ae818305","type":"text","text":"not suited to non-globular clusters or clusters of varying size and density","x":-1947,"y":1280,"width":240,"height":85}, - {"id":"22fa05ce66d4a9fe","type":"text","text":"## pros","x":-1427,"y":835,"width":250,"height":60}, - {"id":"ce86b0da13057431","type":"text","text":"# k-means","x":-1043,"y":1037,"width":250,"height":60}, - {"id":"1a391999a5ed632b","type":"text","text":"## disadvantages","x":-1487,"y":1067,"width":250,"height":60}, - {"id":"63a829206f8f8d82","type":"text","text":"partition $n$ observations into $k$ clusters","x":-1256,"y":1250,"width":250,"height":60}, - {"id":"d1c8c831fe0c0ea6","type":"text","text":"easily adaptable (works well with globular shapes)","x":-1832,"y":593,"width":250,"height":60}, - {"id":"67195c7d8c68c0c2","type":"text","text":"# CLASSIFICATION","x":-1140,"y":2763,"width":295,"height":59,"color":"5"}, - {"id":"a3c91c06640843cc","type":"text","text":"# KNN","x":-725,"y":2962,"width":120,"height":60}, + {"id":"9f5be4d498695d61","type":"text","text":"3. update weights and bias","x":-1110,"y":-321,"width":250,"height":60}, + {"id":"cef00bfe578d20ad","type":"text","text":"updates each weight and bias based on the error","x":-1093,"y":1,"width":250,"height":60}, + {"id":"84f74a9867685975","type":"text","text":"$e = y_d - y$","x":-1601,"y":-29,"width":250,"height":60,"color":"4"}, + {"id":"13b688351a740bb8","type":"text","text":"2. compute error","x":-1511,"y":-299,"width":250,"height":60}, + {"id":"3b0a2fcd287d2d85","type":"text","text":"compares output to desired output ($y_d$) (from training set)","x":-1405,"y":-132,"width":250,"height":60}, + {"id":"242f9af773cb1dc0","type":"text","text":"where $\\eta$ is the **learning rate**","x":226,"y":-271,"width":250,"height":60}, + {"id":"c7e4a50e2273c61e","type":"text","text":"where $\\Delta{w_i}$ and $\\Delta{\\theta}$ are the changes made to each weight and bias","x":-378,"y":-311,"width":250,"height":70}, + {"id":"fc2cd9028d5d42a5","type":"text","text":"$\\Delta{w_i} = \\eta \\times{e} \\times{x_i}$\n$\\Delta{\\theta} = \\eta \\times e$","x":226,"y":-431,"width":250,"height":60,"color":"4"}, + {"id":"4c8ad58060f2d1b7","type":"text","text":"a small positive number that controls how much the weights are adjusted during each step","x":526,"y":-480,"width":250,"height":99}, + {"id":"e7c905af89617042","type":"text","text":"$w_i(p+1) = w_i(p) + \\Delta{w_i}$\n$\\theta(p+1) = \\theta(p) + \\Delta{\\theta}$","x":-760,"y":-329,"width":250,"height":60,"color":"4"}, + {"id":"78d4d215bbe1236b","type":"file","file":"notes/images/Pasted image 20240408153951.png","x":-1160,"y":-1219,"width":400,"height":218}, + {"id":"3582b4985625f007","type":"text","text":"### basic components","x":-645,"y":-925,"width":250,"height":60}, + {"id":"7ce9806620880c92","type":"text","text":"## perceptron","x":-1085,"y":-835,"width":250,"height":60}, + {"id":"cfd08075d108edb9","type":"text","text":"#### steps","x":-1160,"y":-647,"width":250,"height":60}, + {"id":"4e5eba4c4f5edeff","type":"text","text":"update weights and biases based on the errors it makes in predictions","x":-645,"y":-579,"width":250,"height":71}, + {"id":"e923835897ae92f7","type":"text","text":"# MEASURING DISTANCE","x":1576,"y":-682,"width":350,"height":65,"color":"5"}, + {"id":"7782ed3d4010c241","type":"text","text":"### manhattan distance\n\n$d(x,y) = |x_1-y_1| + |x_2-y_2| + ... + |x_n-y_n|$","x":1751,"y":-497,"width":344,"height":116}, + {"id":"a35a35874ea36daa","type":"text","text":"sum of absolute differences between the coordinates of two points","x":1798,"y":-179,"width":250,"height":78}, + {"id":"10884a905fb581cb","type":"text","text":"### euclidean distance\n\n$d(x, y) = \\sqrt{(x_1-y_1)^2 + (x_2-y_2)^2 + ... + (x_n-y_n)^2}$\n\nwhere x is one row, and y the other!","x":1290,"y":-371,"width":400,"height":140}, + {"id":"7fcd261e349a67d4","type":"text","text":"square root of the sum of the squared differences","x":1240,"y":-99,"width":250,"height":60}, + {"id":"96af754a74d170b8","type":"text","text":"$y = activation(\\sum_i{(w_i\\times{x_i}) + \\theta})$","x":-2005,"y":-617,"width":250,"height":60,"color":"4"}, + {"id":"780affbfafe1cfa9","type":"text","text":"output $y$ by summing the weighted inputs and applying an activation function","x":-1851,"y":-499,"width":250,"height":93}, + {"id":"0930ad866b5c9900","type":"text","text":"1. calculate output","x":-1655,"y":-677,"width":250,"height":60}, + {"id":"606a05777fabb8a5","type":"file","file":"notes/images/Screenshot 2024-04-08 at 16.18.57.png","x":-1805,"y":-1480,"width":400,"height":370}, + {"id":"b2d0960fd33224f9","type":"text","text":"#### weights $(w_i)$","x":-270,"y":-1110,"width":250,"height":60}, + {"id":"e197c3fb3a8bea1d","type":"text","text":"#### bias ($\\theta$)","x":-268,"y":-924,"width":250,"height":60}, + {"id":"179ddfc7d295fdcd","type":"text","text":"determine the influence of each input on the output","x":105,"y":-1110,"width":250,"height":60}, + {"id":"89eefa13cc072794","type":"text","text":"constant added to the input to help model make better decisions","x":105,"y":-935,"width":250,"height":71}, + {"id":"7ab68a8670189fd7","type":"text","text":"adjusts the weights based on the errors made during training","x":-189,"y":-1339,"width":250,"height":92}, + {"id":"87a2a92db56c18de","type":"text","text":"shifting decision boundary","x":555,"y":-935,"width":250,"height":60}, + {"id":"c0bb580201449e3d","type":"text","text":"measure closeness","x":433,"y":3423,"width":250,"height":60}, + {"id":"411a945a39c53238","type":"text","text":"easy to interpret","x":551,"y":3686,"width":250,"height":60}, + {"id":"f8d9a3d157ac5e10","type":"text","text":"justification directly based on the nearest neighbours","x":591,"y":3863,"width":250,"height":60}, + {"id":"5801a0d632f3544c","type":"text","text":"no complex model assumptions","x":951,"y":3686,"width":250,"height":60}, + {"id":"46b54aa3d2bebaff","type":"text","text":"makes no assumptions about the underlying data distribution","x":434,"y":3998,"width":249,"height":85}, + {"id":"5fc8f93a55d70392","type":"text","text":"excels in **iterpretability**","x":580,"y":2300,"width":250,"height":60}, + {"id":"91d948da6d4cc690","type":"text","text":"clear visualisation of decisions","x":705,"y":2420,"width":250,"height":60}, + {"id":"4cc901f8fa52ff75","type":"text","text":"easier to explain model's decision","x":580,"y":2603,"width":250,"height":60}, + {"id":"92bc52f4dc1820c3","type":"text","text":"easier to visualise and interpret","x":580,"y":2723,"width":250,"height":60}, + {"id":"7eb5f39c40386599","type":"text","text":"# NN","x":-1405,"y":-865,"width":118,"height":60,"color":"5"}, {"id":"2728082dfc7bd196","type":"text","text":"## cons","x":-365,"y":2903,"width":250,"height":60}, {"id":"4b0b9b5be329f9ef","type":"text","text":"curse of dimensionality","x":146,"y":2903,"width":250,"height":60}, {"id":"0c4e329f3be25ffc","type":"text","text":"# Decision Trees","x":-204,"y":2443,"width":309,"height":60}, @@ -48,20 +52,17 @@ {"id":"443c9f7999260612","type":"text","text":"number of nearest neighbours","x":58,"y":3423,"width":250,"height":60}, {"id":"aa9259f264557472","type":"text","text":"transparent model","x":76,"y":3716,"width":250,"height":60}, {"id":"f7f13703979792e5","type":"text","text":"flexibility in data distribution","x":21,"y":3963,"width":250,"height":60}, - {"id":"c0bb580201449e3d","type":"text","text":"measure closeness","x":433,"y":3423,"width":250,"height":60}, - {"id":"411a945a39c53238","type":"text","text":"easy to interpret","x":551,"y":3686,"width":250,"height":60}, - {"id":"f8d9a3d157ac5e10","type":"text","text":"justification directly based on the nearest neighbours","x":591,"y":3863,"width":250,"height":60}, - {"id":"5801a0d632f3544c","type":"text","text":"no complex model assumptions","x":951,"y":3686,"width":250,"height":60}, - {"id":"46b54aa3d2bebaff","type":"text","text":"makes no assumptions about the underlying data distribution","x":434,"y":3998,"width":249,"height":85}, - {"id":"5fc8f93a55d70392","type":"text","text":"excels in **iterpretability**","x":580,"y":2300,"width":250,"height":60}, - {"id":"91d948da6d4cc690","type":"text","text":"clear visualisation of decisions","x":705,"y":2420,"width":250,"height":60}, + {"id":"88632bfed85e619e","type":"text","text":"### single linkage","x":1349,"y":1169,"width":250,"height":60}, + {"id":"cd0972a0a06b3a50","type":"text","text":"in each iteration, looks for the two clusters with the **smallest maximum** pairwise distance within elements, and merges these next","x":1799,"y":1229,"width":250,"height":131}, + {"id":"f9b2aa4a8b5b4577","type":"text","text":"### complete linkage","x":1379,"y":1272,"width":276,"height":60}, + {"id":"2994eb905bcc3266","type":"text","text":"### average linkage","x":1349,"y":1384,"width":250,"height":60}, + {"id":"f8e1e8964c4be9f1","type":"text","text":"each observation belongs to the cluster with the nearest mean","x":-1237,"y":1443,"width":250,"height":86}, + {"id":"88eb8b72b5a748e9","type":"text","text":"sensitivity to initial seeds","x":-1947,"y":1163,"width":250,"height":60}, + {"id":"17aa3e01ae818305","type":"text","text":"not suited to non-globular clusters or clusters of varying size and density","x":-1947,"y":1280,"width":240,"height":85}, + {"id":"67195c7d8c68c0c2","type":"text","text":"# CLASSIFICATION","x":-1140,"y":2763,"width":295,"height":59,"color":"5"}, + {"id":"a3c91c06640843cc","type":"text","text":"# KNN","x":-725,"y":2962,"width":120,"height":60}, {"id":"134edda0da0cbe64","type":"text","text":"shows which features contribute to the outcome at each node","x":1105,"y":2400,"width":250,"height":80}, - {"id":"4cc901f8fa52ff75","type":"text","text":"easier to explain model's decision","x":580,"y":2603,"width":250,"height":60}, - {"id":"92bc52f4dc1820c3","type":"text","text":"easier to visualise and interpret","x":580,"y":2723,"width":250,"height":60}, {"id":"b916eebbcd0d9e3c","type":"text","text":"# CONFUSION MATRIX","x":-3208,"y":2074,"width":311,"height":60,"color":"5"}, - {"id":"ea9cd5aa4066605b","type":"text","text":"$(TP + FN)$","x":-2966,"y":2613,"width":134,"height":60}, - {"id":"0e4368a7ffd23326","type":"text","text":"total positives","x":-3023,"y":2779,"width":250,"height":60}, - {"id":"f315b860cea1f5ce","type":"text","text":"## sensitivity","x":-3022,"y":2436,"width":250,"height":60}, {"id":"c32bf796cb0580ff","type":"text","text":"Of all the actual positives, how many did we correctly identify as positive?","x":-2717,"y":2307,"width":250,"height":75}, {"id":"df88c1465c3bf0d1","type":"text","text":"diagnosing diseases","x":-2617,"y":2583,"width":250,"height":60}, {"id":"7934d3ac95b3451d","type":"text","text":"## specificity","x":-2172,"y":2229,"width":250,"height":60}, @@ -70,37 +71,37 @@ {"id":"8636b8953fc4cc65","type":"text","text":"$(TP + TN)\\over{(TP+TN+FP+FN)}$","x":-3384,"y":2353,"width":176,"height":65}, {"id":"9f66908456d31cdb","type":"text","text":"Of all the actual negatives, how many did we correctly identify as negative?","x":-1737,"y":2113,"width":250,"height":83}, {"id":"891bcd6f9cbb001f","type":"text","text":"indicating a disease when there is none","x":-1692,"y":2406,"width":250,"height":60}, - {"id":"9f5be4d498695d61","type":"text","text":"3. update weights and bias","x":-1110,"y":-321,"width":250,"height":60}, - {"id":"cef00bfe578d20ad","type":"text","text":"updates each weight and bias based on the error","x":-1093,"y":1,"width":250,"height":60}, - {"id":"96af754a74d170b8","type":"text","text":"$y = activation(\\sum_i{(w_i\\times{x_i}) + \\theta})$","x":-2005,"y":-617,"width":250,"height":60,"color":"4"}, - {"id":"780affbfafe1cfa9","type":"text","text":"output $y$ by summing the weighted inputs and applying an activation function","x":-1851,"y":-499,"width":250,"height":93}, - {"id":"0930ad866b5c9900","type":"text","text":"1. calculate output","x":-1655,"y":-677,"width":250,"height":60}, - {"id":"84f74a9867685975","type":"text","text":"$e = y_d - y$","x":-1601,"y":-29,"width":250,"height":60,"color":"4"}, - {"id":"13b688351a740bb8","type":"text","text":"2. compute error","x":-1511,"y":-299,"width":250,"height":60}, - {"id":"3b0a2fcd287d2d85","type":"text","text":"compares output to desired output ($y_d$) (from training set)","x":-1405,"y":-132,"width":250,"height":60}, - {"id":"78d4d215bbe1236b","type":"file","file":"notes/images/Pasted image 20240408153951.png","x":-1160,"y":-1219,"width":400,"height":218}, - {"id":"3582b4985625f007","type":"text","text":"### basic components","x":-645,"y":-925,"width":250,"height":60}, - {"id":"7eb5f39c40386599","type":"text","text":"# NN","x":-1405,"y":-865,"width":118,"height":60,"color":"5"}, - {"id":"7ce9806620880c92","type":"text","text":"## perceptron","x":-1085,"y":-835,"width":250,"height":60}, - {"id":"cfd08075d108edb9","type":"text","text":"#### steps","x":-1160,"y":-647,"width":250,"height":60}, - {"id":"4e5eba4c4f5edeff","type":"text","text":"update weights and biases based on the errors it makes in predictions","x":-645,"y":-579,"width":250,"height":71}, - {"id":"606a05777fabb8a5","type":"file","file":"notes/images/Screenshot 2024-04-08 at 16.18.57.png","x":-1805,"y":-1480,"width":400,"height":370}, - {"id":"b2d0960fd33224f9","type":"text","text":"#### weights $(w_i)$","x":-270,"y":-1110,"width":250,"height":60}, - {"id":"e197c3fb3a8bea1d","type":"text","text":"#### bias ($\\theta$)","x":-268,"y":-924,"width":250,"height":60}, - {"id":"179ddfc7d295fdcd","type":"text","text":"determine the influence of each input on the output","x":105,"y":-1110,"width":250,"height":60}, - {"id":"89eefa13cc072794","type":"text","text":"constant added to the input to help model make better decisions","x":105,"y":-935,"width":250,"height":71}, - {"id":"7ab68a8670189fd7","type":"text","text":"adjusts the weights based on the errors made during training","x":-189,"y":-1339,"width":250,"height":92}, - {"id":"87a2a92db56c18de","type":"text","text":"shifting decision boundary","x":555,"y":-935,"width":250,"height":60}, - {"id":"242f9af773cb1dc0","type":"text","text":"where $\\eta$ is the **learning rate**","x":226,"y":-271,"width":250,"height":60}, - {"id":"c7e4a50e2273c61e","type":"text","text":"where $\\Delta{w_i}$ and $\\Delta{\\theta}$ are the changes made to each weight and bias","x":-378,"y":-311,"width":250,"height":70}, - {"id":"fc2cd9028d5d42a5","type":"text","text":"$\\Delta{w_i} = \\eta \\times{e} \\times{x_i}$\n$\\Delta{\\theta} = \\eta \\times e$","x":226,"y":-431,"width":250,"height":60,"color":"4"}, - {"id":"4c8ad58060f2d1b7","type":"text","text":"a small positive number that controls how much the weights are adjusted during each step","x":526,"y":-480,"width":250,"height":99}, - {"id":"e7c905af89617042","type":"text","text":"$w_i(p+1) = w_i(p) + \\Delta{w_i}$\n$\\theta(p+1) = \\theta(p) + \\Delta{\\theta}$","x":-760,"y":-329,"width":250,"height":60,"color":"4"}, - {"id":"e923835897ae92f7","type":"text","text":"# MEASURING DISTANCE","x":1576,"y":-682,"width":350,"height":65,"color":"5"}, - {"id":"7782ed3d4010c241","type":"text","text":"### manhattan distance\n\n$d(x,y) = |x_1-y_1| + |x_2-y_2| + ... + |x_n-y_n|$","x":1751,"y":-497,"width":344,"height":116}, - {"id":"a35a35874ea36daa","type":"text","text":"sum of absolute differences between the coordinates of two points","x":1798,"y":-179,"width":250,"height":78}, - {"id":"10884a905fb581cb","type":"text","text":"### euclidean distance\n\n$d(x, y) = \\sqrt{(x_1-y_1)^2 + (x_2-y_2)^2 + ... + (x_n-y_n)^2}$\n\nwhere x is one row, and y the other!","x":1290,"y":-371,"width":400,"height":140}, - {"id":"7fcd261e349a67d4","type":"text","text":"square root of the sum of the squared differences","x":1240,"y":-99,"width":250,"height":60} + {"id":"875b039fe7cde699","type":"text","text":"# hierarchical ","x":-383,"y":800,"width":327,"height":83}, + {"id":"822eaf89e425da3e","type":"text","text":"## pros","x":80,"y":640,"width":250,"height":60}, + {"id":"cdec0b4a22c85488","type":"text","text":"builds a hierarchy of clusters","x":-270,"y":1120,"width":250,"height":60}, + {"id":"22fa05ce66d4a9fe","type":"text","text":"## pros","x":-1427,"y":835,"width":250,"height":60}, + {"id":"ce86b0da13057431","type":"text","text":"# k-means","x":-1043,"y":1037,"width":250,"height":60}, + {"id":"1a391999a5ed632b","type":"text","text":"## disadvantages","x":-1487,"y":1067,"width":250,"height":60}, + {"id":"63a829206f8f8d82","type":"text","text":"partition $n$ observations into $k$ clusters","x":-1256,"y":1250,"width":250,"height":60}, + {"id":"ea9cd5aa4066605b","type":"text","text":"$(TP + FN)$","x":-2966,"y":2613,"width":134,"height":60}, + {"id":"0e4368a7ffd23326","type":"text","text":"total positives","x":-3023,"y":2779,"width":250,"height":60}, + {"id":"f315b860cea1f5ce","type":"text","text":"## sensitivity","x":-3022,"y":2436,"width":250,"height":60}, + {"id":"833f43133e0c40b6","type":"text","text":"## cons","x":155,"y":812,"width":250,"height":60}, + {"id":"762b6ac40b450630","type":"text","text":"no need to specify number of clusters","x":418,"y":364,"width":250,"height":60}, + {"id":"80a2dccede9dd085","type":"text","text":"graphical tree output, which is simple and fast","x":530,"y":520,"width":250,"height":60}, + {"id":"76c3a89ae05fe481","type":"text","text":"merge clusters that have the **smallest average** distance between all elements in one cluster and another","x":1719,"y":1469,"width":250,"height":106}, + {"id":"f5753c84f85ece4f","type":"text","text":"## methods (linkage)","x":813,"y":1255,"width":426,"height":94}, + {"id":"59aa3a48f2d03002","type":"file","file":"notes/images/Pasted image 20240408130758.png","x":300,"y":1480,"width":400,"height":194}, + {"id":"ea10c9a41c336e3c","type":"text","text":"# CLUSTERING","x":-699,"y":622,"width":250,"height":60,"color":"5"}, + {"id":"5cbf08f8bb059f64","type":"text","text":"## agglomerative approach","x":-619,"y":1280,"width":355,"height":50}, + {"id":"861d6c10504d880f","type":"text","text":"treats each data point as a single cluster then merge into larger and larger","x":-492,"y":1520,"width":250,"height":80}, + {"id":"246f95f7fddaa9ea","type":"text","text":"until all points have been merged into a single cluster","x":-424,"y":1746,"width":250,"height":60}, + {"id":"622f5d9344b98bc5","type":"text","text":"greedy search means no undoing of merges early on","x":575,"y":740,"width":250,"height":60}, + {"id":"9920d493e334331c","type":"text","text":"## dendrogram","x":776,"y":304,"width":250,"height":60}, + {"id":"cf978e2a914e3054","type":"text","text":"in each iterations, looks for the two clusters with the **smallest minimum** pairwise distance between elements, merges these next","x":1719,"y":1000,"width":250,"height":137}, + {"id":"4129b725ee9bb0d0","type":"text","text":"flexibility in cluster definitions","x":813,"y":640,"width":250,"height":60}, + {"id":"099d8949af3f3dc5","type":"text","text":"can be computationally expensive as it is typically a $O(n^2\\log{n})$","x":575,"y":842,"width":252,"height":86}, + {"id":"baf4c81b90c332df","type":"text","text":"sensitive to noise and outliers","x":575,"y":980,"width":250,"height":60}, + {"id":"efaec93ecdfa6374","type":"text","text":"simple and fast ($O(nki)$) where:\n- $n$: data points\n- $k$: clusters\n- $i$: iterations","x":-1957,"y":713,"width":250,"height":157}, + {"id":"cead019233b02c3f","type":"text","text":"elbow method","x":-2310,"y":902,"width":250,"height":60}, + {"id":"ee3799ed5815e8ad","type":"text","text":"choosing $k$","x":-1867,"y":1037,"width":250,"height":60}, + {"id":"d1c8c831fe0c0ea6","type":"text","text":"easily adaptable (works well with globular shapes)","x":-1832,"y":593,"width":250,"height":60}, + {"id":"008f83659dec5c6c","x":-1879,"y":-909,"width":250,"height":60,"type":"text","text":"## black box"} ], "edges":[ {"id":"9ac3ca9154cae903","fromNode":"10884a905fb581cb","fromSide":"bottom","toNode":"7fcd261e349a67d4","toSide":"top"}, @@ -197,6 +198,7 @@ {"id":"81dc5a5bdd83fb5a","fromNode":"242f9af773cb1dc0","fromSide":"right","toNode":"4c8ad58060f2d1b7","toSide":"bottom"}, {"id":"eca3acbf6e2ce120","fromNode":"7eb5f39c40386599","fromSide":"top","toNode":"606a05777fabb8a5","toSide":"bottom"}, {"id":"3a7f0fab35f28798","fromNode":"e923835897ae92f7","fromSide":"bottom","toNode":"10884a905fb581cb","toSide":"top"}, - {"id":"a73479532df69641","fromNode":"e923835897ae92f7","fromSide":"bottom","toNode":"7782ed3d4010c241","toSide":"top"} + {"id":"a73479532df69641","fromNode":"e923835897ae92f7","fromSide":"bottom","toNode":"7782ed3d4010c241","toSide":"top"}, + {"id":"257d2a84c19bfb94","fromNode":"7eb5f39c40386599","fromSide":"left","toNode":"008f83659dec5c6c","toSide":"right"} ] } \ No newline at end of file