Skip to content

Commit

Permalink
EDIT: edit for pep8
Browse files Browse the repository at this point in the history
  • Loading branch information
ctgk committed Sep 27, 2024
1 parent b61a1fe commit c72cf74
Show file tree
Hide file tree
Showing 102 changed files with 635 additions and 404 deletions.
10 changes: 5 additions & 5 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -15,11 +15,11 @@ repos:
name: Check file encoding
entry: bash -c 'for file in "$@"; do file --mime-encoding $file | grep -q "ascii\|binary"; if [ $? != 0 ]; then echo $file; exit 1; fi; done' --
types: [text]
# - id: flake8
# name: Check Python format
# entry: flake8 --count --show-source --statistics
# language: system
# types: [python]
- id: flake8
name: Check Python format
entry: flake8 --count --show-source --statistics
language: system
types: [python]
- id: unittest
name: Run Python unittests
language: system
Expand Down
20 changes: 10 additions & 10 deletions notebooks/ch07_Sparse_Kernel_Machines.ipynb

Large diffs are not rendered by default.

6 changes: 4 additions & 2 deletions prml/bayesnet/discrete.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import numpy as np

from prml.bayesnet.probability_function import ProbabilityFunction
from prml.bayesnet.random_variable import RandomVariable

Expand Down Expand Up @@ -35,7 +36,7 @@ def __init__(self, n_class:int):
self.is_observed = False

def __repr__(self):
string = f"DiscreteVariable("
string = "DiscreteVariable("
if self.is_observed:
string += f"observed={self.proba})"
else:
Expand Down Expand Up @@ -201,7 +202,8 @@ def send_message(self, proprange, exclude=None):
if random_variable is not exclude:
self.send_message_to(random_variable, proprange)

if proprange == 0: return
if proprange == 0:
return

for random_variable in self.condition:
if random_variable is not exclude:
Expand Down
2 changes: 1 addition & 1 deletion prml/clustering/__init__.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
from .k_means import KMeans
from prml.clustering.k_means import KMeans


__all__ = [
Expand Down
20 changes: 10 additions & 10 deletions prml/clustering/k_means.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,13 +7,13 @@ class KMeans(object):
def __init__(self, n_clusters):
self.n_clusters = n_clusters

def fit(self, X, iter_max=100):
def fit(self, x, iter_max=100):
"""
perform k-means algorithm
Parameters
----------
X : (sample_size, n_features) ndarray
x : (sample_size, n_features) ndarray
input data
iter_max : int
maximum number of iterations
Expand All @@ -23,31 +23,31 @@ def fit(self, X, iter_max=100):
centers : (n_clusters, n_features) ndarray
center of each cluster
"""
I = np.eye(self.n_clusters)
centers = X[np.random.choice(len(X), self.n_clusters, replace=False)]
eye = np.eye(self.n_clusters)
centers = x[np.random.choice(len(x), self.n_clusters, replace=False)]
for _ in range(iter_max):
prev_centers = np.copy(centers)
D = cdist(X, centers)
D = cdist(x, centers)
cluster_index = np.argmin(D, axis=1)
cluster_index = I[cluster_index]
centers = np.sum(X[:, None, :] * cluster_index[:, :, None], axis=0) / np.sum(cluster_index, axis=0)[:, None]
cluster_index = eye[cluster_index]
centers = np.sum(x[:, None, :] * cluster_index[:, :, None], axis=0) / np.sum(cluster_index, axis=0)[:, None]
if np.allclose(prev_centers, centers):
break
self.centers = centers

def predict(self, X):
def predict(self, x):
"""
calculate closest cluster center index
Parameters
----------
X : (sample_size, n_features) ndarray
x : (sample_size, n_features) ndarray
input data
Returns
-------
index : (sample_size,) ndarray
indicates which cluster they belong
"""
D = cdist(X, self.centers)
D = cdist(x, self.centers)
return np.argmin(D, axis=1)
1 change: 1 addition & 0 deletions prml/dimreduction/autoencoder.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import numpy as np

from prml import nn


Expand Down
3 changes: 2 additions & 1 deletion prml/dimreduction/bayesian_pca.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import numpy as np

from prml.dimreduction.pca import PCA


Expand Down Expand Up @@ -26,7 +27,7 @@ def fit(self, X, iter_max=100, initial="random"):
"""
initial_list = ["random", "eigen"]
self.mean = np.mean(X, axis=0)
self.I = np.eye(self.n_components)
self.eye = np.eye(self.n_components)
if initial not in initial_list:
print("availabel initializations are {}".format(initial_list))
if initial == "random":
Expand Down
80 changes: 40 additions & 40 deletions prml/dimreduction/pca.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,14 +15,14 @@ def __init__(self, n_components):
assert isinstance(n_components, int)
self.n_components = n_components

def fit(self, X, method="eigen", iter_max=100):
"""
maximum likelihood estimate of pca parameters
def fit(self, x, method="eigen", iter_max=100):
r"""Maximum likelihood estimate of pca parameters.
x ~ \int_z N(x|Wz+mu,sigma^2)N(z|0,I)dz
Parameters
----------
X : (sample_size, n_features) ndarray
x : (sample_size, n_features) ndarray
input data
method : str
method to estimate the parameters
Expand All @@ -46,111 +46,111 @@ def fit(self, X, method="eigen", iter_max=100):
method_list = ["eigen", "em"]
if method not in method_list:
print("availabel methods are {}".format(method_list))
self.mean = np.mean(X, axis=0)
getattr(self, method)(X - self.mean, iter_max)
self.mean = np.mean(x, axis=0)
getattr(self, method)(x - self.mean, iter_max)

def eigen(self, X, *arg):
sample_size, n_features = X.shape
def eigen(self, x, *arg):
sample_size, n_features = x.shape
if sample_size >= n_features:
cov = np.cov(X, rowvar=False)
cov = np.cov(x, rowvar=False)
values, vectors = np.linalg.eigh(cov)
index = n_features - self.n_components
else:
cov = np.cov(X)
cov = np.cov(x)
values, vectors = np.linalg.eigh(cov)
vectors = (X.T @ vectors) / np.sqrt(sample_size * values)
vectors = (x.T @ vectors) / np.sqrt(sample_size * values)
index = sample_size - self.n_components
self.I = np.eye(self.n_components)
self.eye = np.eye(self.n_components)
if index == 0:
self.var = 0
else:
self.var = np.mean(values[:index])

self.W = vectors[:, index:].dot(np.sqrt(np.diag(values[index:]) - self.var * self.I))
self.__M = self.W.T @ self.W + self.var * self.I
self.W = vectors[:, index:].dot(np.sqrt(np.diag(values[index:]) - self.var * self.eye))
self.__M = self.W.T @ self.W + self.var * self.eye
self.C = self.W @ self.W.T + self.var * np.eye(n_features)
if index == 0:
self.Cinv = np.linalg.inv(self.C)
else:
self.Cinv = np.eye(n_features) / np.sqrt(self.var) - self.W @ np.linalg.inv(self.__M) @ self.W.T / self.var

def em(self, X, iter_max):
self.I = np.eye(self.n_components)
self.W = np.eye(np.size(X, 1), self.n_components)
def em(self, x, iter_max):
self.eye = np.eye(self.n_components)
self.W = np.eye(np.size(x, 1), self.n_components)
self.var = 1.
for i in range(iter_max):
W = np.copy(self.W)
stats = self._expectation(X)
self._maximization(X, *stats)
stats = self._expectation(x)
self._maximization(x, *stats)
if np.allclose(W, self.W):
break
self.C = self.W @ self.W.T + self.var * np.eye(np.size(X, 1))
self.C = self.W @ self.W.T + self.var * np.eye(np.size(x, 1))
self.Cinv = np.linalg.inv(self.C)

def _expectation(self, X):
self.__M = self.W.T @ self.W + self.var * self.I
def _expectation(self, x):
self.__M = self.W.T @ self.W + self.var * self.eye
Minv = np.linalg.inv(self.__M)
Ez = X @ self.W @ Minv
Ez = x @ self.W @ Minv
Ezz = self.var * Minv + Ez[:, :, None] * Ez[:, None, :]
return Ez, Ezz

def _maximization(self, X, Ez, Ezz):
self.W = X.T @ Ez @ np.linalg.inv(np.sum(Ezz, axis=0))
def _maximization(self, x, Ez, Ezz):
self.W = x.T @ Ez @ np.linalg.inv(np.sum(Ezz, axis=0))
self.var = np.mean(
np.mean(X ** 2, axis=1)
- 2 * np.mean(Ez @ self.W.T * X, axis=1)
+ np.trace((Ezz @ self.W.T @ self.W).T) / np.size(X, 1))
np.mean(x ** 2, axis=1)
- 2 * np.mean(Ez @ self.W.T * x, axis=1)
+ np.trace((Ezz @ self.W.T @ self.W).T) / np.size(x, 1))

def transform(self, X):
def transform(self, x):
"""
project input data into latent space
p(Z|X) = N(Z|(X-mu)WMinv, sigma^-2M)
p(Z|x) = N(Z|(x-mu)WMinv, sigma^-2M)
Parameters
----------
X : (sample_size, n_features) ndarray
x : (sample_size, n_features) ndarray
input data
Returns
-------
Z : (sample_size, n_components) ndarray
projected input data
"""
return np.linalg.solve(self.__M, ((X - self.mean) @ self.W).T).T
return np.linalg.solve(self.__M, ((x - self.mean) @ self.W).T).T

def fit_transform(self, X, method="eigen"):
def fit_transform(self, x, method="eigen"):
"""
perform pca and whiten the input data
Parameters
----------
X : (sample_size, n_features) ndarray
x : (sample_size, n_features) ndarray
input data
Returns
-------
Z : (sample_size, n_components) ndarray
projected input data
"""
self.fit(X, method)
return self.transform(X)
self.fit(x, method)
return self.transform(x)

def proba(self, X):
def proba(self, x):
"""
the marginal distribution of the observed variable
Parameters
----------
X : (sample_size, n_features) ndarray
x : (sample_size, n_features) ndarray
input data
Returns
-------
p : (sample_size,) ndarray
value of the marginal distribution
"""
d = X - self.mean
d = x - self.mean
return (
np.exp(-0.5 * np.sum(d @ self.Cinv * d, axis=-1))
/ np.sqrt(np.linalg.det(self.C))
/ np.power(2 * np.pi, 0.5 * np.size(X, 1)))
/ np.power(2 * np.pi, 0.5 * np.size(x, 1)))
5 changes: 2 additions & 3 deletions prml/kernel/__init__.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,7 @@
from prml.kernel.polynomial import PolynomialKernel
from prml.kernel.rbf import RBF

from prml.kernel.gaussian_process_classifier import GaussianProcessClassifier
from prml.kernel.gaussian_process_regressor import GaussianProcessRegressor
from prml.kernel.polynomial import PolynomialKernel
from prml.kernel.rbf import RBF
from prml.kernel.relevance_vector_classifier import RelevanceVectorClassifier
from prml.kernel.relevance_vector_regressor import RelevanceVectorRegressor
from prml.kernel.support_vector_classifier import SupportVectorClassifier
Expand Down
18 changes: 9 additions & 9 deletions prml/kernel/gaussian_process_classifier.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,18 +20,18 @@ def __init__(self, kernel, noise_level=1e-4):
def _sigmoid(self, a):
return np.tanh(a * 0.5) * 0.5 + 0.5

def fit(self, X, t):
if X.ndim == 1:
X = X[:, None]
self.X = X
def fit(self, x, t):
if x.ndim == 1:
x = x[:, None]
self.x = x
self.t = t
Gram = self.kernel(X, X)
Gram = self.kernel(x, x)
self.covariance = Gram + np.eye(len(Gram)) * self.noise_level
self.precision = np.linalg.inv(self.covariance)

def predict(self, X):
if X.ndim == 1:
X = X[:, None]
K = self.kernel(X, self.X)
def predict(self, x):
if x.ndim == 1:
x = x[:, None]
K = self.kernel(x, self.x)
a_mean = K @ self.precision @ self.t
return self._sigmoid(a_mean)
10 changes: 5 additions & 5 deletions prml/kernel/gaussian_process_regressor.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,18 +49,18 @@ def fit(self, X, t, iter_max=0, learning_rate=0.1):
log_likelihood_list = [-np.inf]
self.X = X
self.t = t
I = np.eye(len(X))
Gram = self.kernel(X, X)
self.covariance = Gram + I / self.beta
eye = np.eye(len(X))
gram = self.kernel(X, X)
self.covariance = gram + eye / self.beta
self.precision = np.linalg.inv(self.covariance)
for i in range(iter_max):
gradients = self.kernel.derivatives(X, X)
updates = np.array(
[-np.trace(self.precision.dot(grad)) + t.dot(self.precision.dot(grad).dot(self.precision).dot(t)) for grad in gradients])
for j in range(iter_max):
self.kernel.update_parameters(learning_rate * updates)
Gram = self.kernel(X, X)
self.covariance = Gram + I / self.beta
gram = self.kernel(X, X)
self.covariance = gram + eye / self.beta
self.precision = np.linalg.inv(self.covariance)
log_like = self.log_likelihood()
if log_like > log_likelihood_list[-1]:
Expand Down
1 change: 1 addition & 0 deletions prml/kernel/polynomial.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import numpy as np

from prml.kernel.kernel import Kernel


Expand Down
1 change: 1 addition & 0 deletions prml/kernel/rbf.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import numpy as np

from prml.kernel.kernel import Kernel


Expand Down
Loading

0 comments on commit c72cf74

Please sign in to comment.