import numpy as np
import pandas as pd
from matplotlib import pyplot as plt
from sklearn.datasets import load_wine
from sklearn.preprocessing import StandardScaler

class PCA():
    def __init__(self,n_components):	
        self.n_components=n_components

    def fit(self,X):
        n_elements=X.shape[0]      
        mu=np.average(X, axis= 0)   
        X0=X-mu
        self.cov_mat= X0.T @ X0 / (n_elements-1)
        mat=self.cov_mat
        rho=np.empty(self.n_components  )
        eigen_vecs=np.empty(( mat.shape[0], self.n_components  ))
        for nth in range(self.n_components ):
            rho[nth], v= self.__max_eigen(mat)  #  最大固有値と固有ベクトルの抽出
            eigen_vecs[ : ,nth]= v
            mat = mat - rho[nth]* np.outer(v,v) # 共分散行列 mat から v 成分を削除更新
        self.rho=rho
        self.eigen_vecs=eigen_vecs
        return rho, eigen_vecs

    def transform(self,X):
        return  X@self.eigen_vecs
   
    def get_explained_variance(self):
        return self.rho     
    
    def get_explained_variance_ratio(self):
        trace=np.trace(self.cov_mat)  
        return self.rho/trace
    
    def __max_eigen(self,mat,epsilon=1e-10):
        nth_row=np.argmax(np.square(mat).sum(axis=1))
        print(f'nth_row={nth_row}')
        v= mat[nth_row,:]  # 初期ベクトル
        v= v/np.sqrt(v@v)  
        while True:
            v_new= mat@v
            rho = np.sqrt(v_new@v_new)
            v_new= v_new/rho
            dv=v_new-v
            diff= np.sqrt(dv@dv)
            v= v_new
            if diff < epsilon: 
                break
        return rho, v

#
data=load_wine()
X=data['data']  # 13次元のワインの品質に関する特徴量ベクトルからなるデータ
print(f'X.shape={X.shape}')

sc=StandardScaler()     
X_std= sc.fit_transform(X)    # 標準化
#

pca=PCA(n_components=3)
pca.fit(X_std)
X_reduced=pca.transform(X_std)
print(X_reduced.shape)
print(f'固有値={pca.get_explained_variance()}')
print(f'因子寄与率={pca.get_explained_variance_ratio()}')


