From dab9a085ae53bddfba42a53da9dc754ece589486 Mon Sep 17 00:00:00 2001 From: Michael Zhang Date: Sun, 1 Oct 2023 22:47:05 -0500 Subject: [PATCH] format --- assignments/hwk01/HW1.md | 202 ++++++++++++++++++++++++++++++++-- assignments/hwk01/hw1solve.py | 29 +++-- 2 files changed, 208 insertions(+), 23 deletions(-) diff --git a/assignments/hwk01/HW1.md b/assignments/hwk01/HW1.md index ce9df61..078a7a4 100644 --- a/assignments/hwk01/HW1.md +++ b/assignments/hwk01/HW1.md @@ -1,25 +1,203 @@ --- geometry: margin=2cm output: pdf_document +title: Assignment 1 +subtitle: CSCI 5521 +date: \today + +author: | + | Michael Zhang + | zhan4854@umn.edu $\cdot$ ID: 5289259 --- \renewcommand{\c}[1]{\textcolor{gray}{#1}} -1. **(20 points)** - \c{Derive the VC dimension of the following classifiers.} +1. **(20 points)** \c{Derive the VC dimension of the following classifiers.} -2. + a. \c{What is the VC dimension, $d_c$, of a threshold $c$ in $\mathbb{R}$? The classification function + is specified by $f (x) = +1$ if $x > c$ and $f (x) = -1$ if $x \le c$. Prove your answer.} -3. **(20 points)** - \c{Let $P (x|C)$ denote a Bernoulli density function for a class $C \in {C_1, C_2}$ + - VC dimension is \boxed{2} + - Given c, pick one point below $c$ and another point above $c$ + - For ex: Choose points $\{2, 4\}$ . For any arrangement of + / - labels, you can always distinguish them by putting a threshold at 3 + - Cannot shatter 3 points since if there's something in the middle then it's not shatterable + - Choose any points $\{a, b, c\}$ in increasing order. The labeling a=+, b=-, c=+ cannot be achieved with any threshold + - The trivial case of any 2 equaling each other also doesn't work since the case where those 2 are labeled differently cannot be distinguished + + b. \c{What is the VC dimension, $d_I$ , of intervals in $\mathbb{R}$? The classification function + specified by an interval $[a,b]$ labels any example positive iff it lies inside the interval + $[a,b]$. Prove your answer.} + + - VC dimension is \boxed{2} + - Given the interval, pick one point in the interval and one outside + - For ex: Choose points $\{2, 4\}$ + - 2=+, 4=+ => interval (1, 5) + - 2=+, 4=- => interval (1, 3) + - 2=-, 4=+ => interval (3, 5) + - 2=-, 4=- => interval (6, 8) + - Cannot shatter 3 points with the (positive, negative, positive) pattern, since the inside of the interval must be interpreted as positive. + - Same as above, choose any points $\{a, b, c\}$ in increasing order. The labeling a=+, b=-, c=+ cannot be achieved with any interval since the positives are separated by a negative in between + +2. **(20 points)** \c{Find the Maximum Likelihood Estimation (MLE) for the following pdf. + In each case, consider a random sample of size $n$. Show your calculation} + + a. \c{$f(x|\theta) = \frac{1}{\theta} e^{-\frac{x}{\theta}} , x>0 , \theta>0$} + + - To find MLE, first find the log likelihood function: + $$\begin{split} + \mathfrak{L} (\theta|x) &=\log( \prod\limits_t \frac{1}{\theta} e^{-\frac{x^t}{\theta}} ) \\ + &=\sum\limits_t \left( \log(\frac{1}{\theta}) + \log(e^{-\frac{x^t}{\theta}}) \right) \\ + &=\sum\limits_t \left( \log(\frac{1}{\theta}) -\frac{x^t}{\theta} \right) + \end{split}$$ + - Then take the partial with respect to $\theta$ + $$\begin{split} + \frac{\partial\mathfrak{L}}{\partial\theta} &= \sum\limits_t \frac{\partial}{\partial\theta} \left( \log(\frac{1}{\theta}) -\frac{x^t}{\theta} \right) \\ + &=\sum\limits_t \left( -\frac{1}{\theta} + \frac{x^t}{\theta^2} \right) + \end{split}$$ + - Now set it to 0 to find a local maximum + $$\begin{split} + 0&=\sum\limits_t \left( -\frac{1}{\theta} + \frac{x^t}{\theta^2} \right) \\ + \sum\limits_t \frac{1}{\theta} &= \sum\limits_t \frac{x^t}{\theta^2} \\ + \sum\limits_t 1 &= \sum\limits_t \frac{x^t}{\theta} \\ + \sum\limits_t 1 &= \frac{1}{\theta} \sum\limits_t x^t \\ + N &= \frac{1}{\theta} \sum\limits_t x^t \\ + \theta &= \boxed{\frac{\sum\limits_t x^t}{N}} + \end{split}$$ + + b. \c{$f(x|\theta) = 2\theta x^{2\theta - 1} , 0P(C_2|x=0)$ else $C_2$ + - For $x=1$ , pick $C_1$ if $P(C_1|x=1)>P(C_2|x=1)$ else $C_2$ - - $p(x = 0 | C_i)$ is given to us as $p_1$ + b. \c{Consider D-dimensional independent Bernoulli densities} + + $$ + \c{ + P (x|C) = P (x_1, x_2, \cdots , x_D|C) = \prod\limits_j P (x_j |C) + } + $$ + + \c{specified by $p_ij \equiv p(x_j = 0|C_i)$ for i = 1, 2 and $j = 1, 2, \cdots , D$. Derive the classification rules for classifying a sample $\mathbf{x}$ into $C_1$ and $C_2$. It is sufficient to give your rule as a function of $\mathbf{x}$.} + + - The posteriors $P(C_i|x)$ can be found by expanding the Bayes' theorem equation: + - $P(C_i|x)=\frac{ p(\mathbf{x}|C_i) P(C_i) }{ \sum\limits_k^{\{1,2\}} p(\mathbf{x}|C_k) P(C_k) }$ + - Since $p_{ij}=p(x_j=0|C_i)$ , we can expand this into a general case for $p(\mathbf{x}|C_i)$ by using the multivariate form of the Bernoulli: $p(\mathbf{x}|C_i)= \prod\limits_{j=1}^{D} p_{ij}^{(1-x_j)} (1-p_{ij})^{x_j}$ + - To determine the classification rules, pick the $C_i$ with the maximum posterior + - We use the discriminant function found in the slides $g_i(\mathbf{x}) = p(\mathbf{x} |C_i)P(C_i)$ to select the posterior + - If $g_1(\mathbf{x}) > g_2(\mathbf{x})$ , then choose $C_1$ else choose $C_2$ + + c. \c{Follow the definition in 3(b) and assume $D = 2, p_{11} = 0.6, p_{12} = 0.1, p_{21} = 0.6$, and $p_{22} = 0.9$. For two different priors ($P (C_1) = 0.2$ or 0.8 and $P (C_2) = 1 - P (C_1)$), calculate the posterior probabilities $P (C_1|x)$ and $P (C_2|x)$. (Hint: Calcu- late the probabilities for all possible samples $(x1, x2) \in \{(0, 0), (0, 1), (1, 0), (1, 1)\}$).} + + - I wrote the following Python program to compute these values: + + ```py + def calc_posterior(p_c1: float, D: int, p_ij: dict[tuple[int, int], float]): + priors = { + 1: p_c1, + 2: 1 - p_c1, + } + + def p_x_given_Ci(xs: list[int], i: int): + s = 1.0 + for j in range(len(xs)): + s *= pow(p_ij[i, j], 1.0 - xs[j]) * pow(1.0 - p_ij[i, j], xs[j]) + return s + + posteriors = {} + for i in [1, 2]: + for xs in product([0, 1], repeat=D): + numer = p_x_given_Ci(xs, i) * priors[i] + + def each_denom(k): return p_x_given_Ci(xs, k) * priors[k] + denom = sum(map(each_denom, priors.keys())) + posteriors[*xs, i] = numer / denom + + print("Priors:", priors) + for xs in product([0, 1], repeat=D): + print(f"{xs = }") + for i in [1, 2]: + prob = posteriors[*xs, i] + print(f" * C{i}: {prob:0.3f}") + print() + + + def prob_3c(): + D = 2 + p_ij = {} + p_ij[1, 0] = 0.6 + p_ij[1, 1] = 0.1 + p_ij[2, 0] = 0.6 + p_ij[2, 1] = 0.9 + + calc_posterior(0.2, D, p_ij) + calc_posterior(0.8, D, p_ij) + ``` + + - The values that it output are: + + ``` + Priors: {1: 0.2, 2: 0.8} + xs = (0, 0) + * C1: 0.027 + * C2: 0.973 + + xs = (0, 1) + * C1: 0.692 + * C2: 0.308 + + xs = (1, 0) + * C1: 0.027 + * C2: 0.973 + + xs = (1, 1) + * C1: 0.692 + * C2: 0.308 + + Priors: {1: 0.8, 2: 0.19999999999999996} + xs = (0, 0) + * C1: 0.308 + * C2: 0.692 + + xs = (0, 1) + * C1: 0.973 + * C2: 0.027 + + xs = (1, 0) + * C1: 0.308 + * C2: 0.692 + + xs = (1, 1) + * C1: 0.973 + * C2: 0.027 + ``` diff --git a/assignments/hwk01/hw1solve.py b/assignments/hwk01/hw1solve.py index 5d1a5a0..ad4fe07 100644 --- a/assignments/hwk01/hw1solve.py +++ b/assignments/hwk01/hw1solve.py @@ -1,4 +1,5 @@ from itertools import product +from sympy import symbols, log, diff, exp, Product, Pow def calc_posterior(p_c1: float, D: int, p_ij: dict[tuple[int, int], float]): @@ -8,19 +9,11 @@ def calc_posterior(p_c1: float, D: int, p_ij: dict[tuple[int, int], float]): } def p_x_given_Ci(xs: list[int], i: int): - s = 0 + s = 1.0 for j in range(len(xs)): - s += pow(p_ij[i, j], 1.0 - xs[j]) * pow(1.0 - p_ij[i, j], xs[j]) + s *= pow(p_ij[i, j], 1.0 - xs[j]) * pow(1.0 - p_ij[i, j], xs[j]) return s - # print("===") - # for i in priors.keys(): - # for xs in product([0, 1], repeat=2): - # xs = list(xs) - # print("xs", xs) - # print(i, xs, p_x_given_Ci(xs, i)) - # print("===") - posteriors = {} for i in [1, 2]: for xs in product([0, 1], repeat=D): @@ -51,5 +44,19 @@ def prob_3c(): calc_posterior(0.8, D, p_ij) -# prob_2a() +def prob_2a(): + p, x, theta, k, n = symbols("p x theta k n") + + def get_mle(expr): + likelihood_func = Product(expr, (k, 1, n)) + log_likelihood_func = log(likelihood_func) + + print(diff(log_likelihood_func, x).simplify()) + + # print(diff(expr, x)) + print(get_mle(Pow(p, x) * Pow(1 - p, 1 - x))) + print(get_mle((1 / theta) * exp(-x / theta))) + + +prob_2a() prob_3c()