-
Notifications
You must be signed in to change notification settings - Fork 4
/
PCA.py
70 lines (56 loc) · 1.74 KB
/
PCA.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
# -*- coding: utf-8 -*-
"""
Created on Wed Feb 27 20:05:37 2019
@author: Loujaina
"""
import numpy as np
from sklearn import datasets
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D
def PCA(D , alpha):
#compute mean, axis=1-->
global mean
mean=np.mean(D,axis=0)
#print("Mean=\n",mean)
#Centre the data
#no need to transpose the mean because
#default shape is a vector
Z=D-mean
#Compute covariance matrix
#rowvar=False --> each column reps a variable
#bias=Ture --> normalization is by N
global cov
cov=np.cov(Z,rowvar=False,bias=True)
#print("cov=\n",cov)
#EigenValues and EigenVecs:
global vals,vecs,reduced
vals, vecs = np.linalg.eigh(cov)
#Sorting of EigenVectors based on eigen values (descending order):
index=vals.argsort()[::-1] #return indices used for sorting
vals=vals[index]
vecs=vecs[:,index]
#print("vals=\n",vals)
#print("vecs=\n",vecs)
#Explained Variance -> choose dimensionality
sumVals=np.sum(vals)
reducedD=np.empty(shape=4)
#for each alpha, find dimension
for j in range(0,4):#four alphas
fractionVals=0
i=-1; #will use first i vecs
while (round(fractionVals/float(sumVals),2)<alpha[j] and i<vals.size-1):
i+=1
fractionVals+=vals[i]
#print(i,fractionVals/float(sumVals))
else:
reducedD[j]=i
return reducedD,vecs
#Testing using iris dataset
def main():
iris=datasets.load_iris()
x=iris.data[:150,:3]
reduced=PCA(x,1)
A=reduced.T.dot(x.T)
A=A.T
plt.scatter(A[:,0],A[:,1])
plt.show()