搜档网
当前位置:搜档网 › 基于R语言的PCA实例详解

基于R语言的PCA实例详解

基于R语言的PCA实例详解
基于R语言的PCA实例详解

基于R语言的主成分分析(PCA)实例详解

本质上来讲,主成分分析(PCA)主要是找到一个线性转换矩阵P,作用在矩阵X(X的列向量是一条记录,行向量是一个feature)上,使其转换(或称之为投影,投影可以使用矩阵形式表示)到一个新的空间中,得到矩阵Y。

R语言中内置两种PCA的实现分别为prcomp和princomp。前者采用SVD实现,后者采用上面的实对称矩阵对角化方式实现,两种的接口类似,只是前者的参数稍微多一些,具体如下:

>prcomp(x,...)

##S3method for class'formula'

prcomp(formula,data=NULL,subset,na.action,...)

##Default S3method:

prcomp(x,retx=TRUE,center=TRUE,scale.=FALSE,

tol=NULL,...)

##S3method for class'prcomp'

predict(object,newdata,...)

输出参数>names(pr)

[1]"sdev""rotation""center""scale""x""call"

>princomp(x,...)

##S3method for class'formula'

princomp(formula,data=NULL,subset,na.action,...)

##Default S3method:

princomp(x,cor=FALSE,scores=TRUE,covmat=NULL,

subset=rep_len(TRUE,nrow(as.matrix(x))),...)

##S3method for class'princomp'

predict(object,newdata,...)

输出参数>names(pr)

[1]"sdev""loadings""center""scale""n.obs""scores""call"

例子采用prcomp,旨在分析PCA分析的正过程和逆过程。

例子中数据都采用:

>conomy<-data.frame(

x1=c(149.3,161.2,171.5,175.5,180.8,190.7,

202.1,212.4,226.1,231.9,239.0),

x2=c(4.2,4.1,3.1,3.1,1.1,2.2,2.1,5.6,5.0,5.1,0.7),

x3=c(108.1,114.8,123.2,126.9,132.1,137.7,

146.0,154.1,162.3,164.3,167.6),

y=c(15.9,16.4,19.0,19.1,18.8,20.4,22.7,

26.5,28.1,27.6,26.3))

>conomy

x1x2x3y

1149.34.2108.115.9

2161.24.1114.816.4

3171.53.1123.219.0

4175.53.1126.919.1

5180.81.1132.118.8

6190.72.2137.720.4

7202.12.1146.022.7

8212.45.6154.126.5

9226.15.0162.328.1

10231.95.1164.327.6

11239.00.7167.626.3

实例1:PCA分析(不做标准化处理:不平移&不尺度化)

>pr<-prcomp(~x1+x2+x3,data=conomy,scale=F,center=F)#PCA分析(不做标准化处理:不平移&不尺度化)

>pr$x

PC1PC2PC3

1-184.3643-1.88162440.120442835

2-197.9363-0.9151848 1.293486904

3-211.1872-0.27573770.006869002

4-216.5936-0.5359147-0.619728451

5-223.90350.7587619-2.624453929

6-235.22570.5153778-1.098457784

7-249.32420.7229582-1.324352077

8-262.4610-2.4679990-0.232945692

9-278.3630-1.11693800.535960605

10-284.2423-0.2907220 2.085641330

11-291.8747 4.3590400 1.198693966

>cbind(conomy$x1,conomy$x2,conomy$x3)%*%pr$rotation#PCA正过程

PC1PC2PC3

[1,]-184.3643-1.88162440.120442835

[2,]-197.9363-0.9151848 1.293486904

[3,]-211.1872-0.27573770.006869002

[4,]-216.5936-0.5359147-0.619728451

[5,]-223.90350.7587619-2.624453929

[6,]-235.22570.5153778-1.098457784

[7,]-249.32420.7229582-1.324352077

[8,]-262.4610-2.4679990-0.232945692

[9,]-278.3630-1.11693800.535960605

[10,]-284.2423-0.2907220 2.085641330

[11,]-291.8747 4.3590400 1.198693966

>pr$x%*%solve(pr$rotation)#PCA逆过程

x1x2x3

1149.34.2108.1

2161.24.1114.8

3171.53.1123.2

4175.53.1126.9

5180.81.1132.1

6190.72.2137.7

7202.12.1146.0

8212.45.6154.1

9226.15.0162.3

10231.95.1164.3

11239.00.7167.6

实例2:PCA分析(部分标准化:只平移)

>conomy<-data.frame(

x1=c(149.3,161.2,171.5,175.5,180.8,190.7,

202.1,212.4,226.1,231.9,239.0),

x2=c(4.2,4.1,3.1,3.1,1.1,2.2,2.1,5.6,5.0,5.1,0.7),

x3=c(108.1,114.8,123.2,126.9,132.1,137.7,

146.0,154.1,162.3,164.3,167.6),

y=c(15.9,16.4,19.0,19.1,18.8,20.4,22.7,

26.5,28.1,27.6,26.3))

>pr<-prcomp(~x1+x2+x3,data=conomy,scale=F,center=T)#PCA分析(部分标准化:只平移)

>pr$x

PC1PC2PC3

1-55.2431860.8526477-0.6319214

2-41.6411980.4642629-1.7916913

3-28.396312-0.2823642-0.5011266

4-23.004179-0.11314370.2645521

5-15.693755-1.7829447 1.9673323

6-4.361502-0.94829860.7576149

79.734530-0.9774781 1.1588999

822.815447 2.6055542 1.1976004

938.749791 1.77566960.3621647

1044.662808 1.4979238-1.2531020

1152.377555-3.0918290-1.5303231

>cbind(conomy$x1-pr$center[1],conomy$x2-pr$center[2],conomy$x3-pr$center[3])%*%pr$rotation#PCA正过程PC1PC2PC3

[1,]-55.2431860.8526477-0.6319214

[2,]-41.6411980.4642629-1.7916913

[3,]-28.396312-0.2823642-0.5011266

[4,]-23.004179-0.11314370.2645521

[5,]-15.693755-1.7829447 1.9673323

[6,]-4.361502-0.94829860.7576149

[7,]9.734530-0.9774781 1.1588999

[8,]22.815447 2.6055542 1.1976004

[9,]38.749791 1.77566960.3621647

[10,]44.662808 1.4979238-1.2531020

[11,]52.377555-3.0918290-1.5303231

>temp<-pr$x%*%solve(pr$rotation)#PCA逆过程>cbind(temp[,1]+pr$center[1],temp[,2]+pr$center[2],temp[,3]+pr$center[3])

[,1][,2][,3]

1149.3 4.2108.1

2161.2 4.1114.8

3171.5 3.1123.2

4175.5 3.1126.9

5180.8 1.1132.1

6190.7 2.2137.7

7202.1 2.1146.0

8212.4 5.6154.1

9226.1 5.0162.3

10231.9 5.1164.3

11239.00.7167.6

实例3:PCA分析(标准化处理:平移&尺度化)

>conomy<-data.frame(

x1=c(149.3,161.2,171.5,175.5,180.8,190.7,

202.1,212.4,226.1,231.9,239.0),

x2=c(4.2,4.1,3.1,3.1,1.1,2.2,2.1,5.6,5.0,5.1,0.7),

x3=c(108.1,114.8,123.2,126.9,132.1,137.7,

146.0,154.1,162.3,164.3,167.6),

y=c(15.9,16.4,19.0,19.1,18.8,20.4,22.7,

26.5,28.1,27.6,26.3))

>pr<-prcomp(~x1+x2+x3,data=conomy,scale=T,center=T)#PCA分析(标准化处理:平移&尺度化)

>pr$x

PC1PC2PC3

1-2.12588720.63865815-0.02072230

2-1.61892730.55553922-0.07111317

3-1.1151675-0.07297970-0.02173008

4-0.8942966-0.082369980.01081318

5-0.6442081-1.306685230.07258248

6-0.1903514-0.659147450.02655252

70.3596219-0.743674470.04278124

80.9718018 1.354058770.06286252

9 1.55931590.964045580.02357446

10 1.7669951 1.01521706-0.04498818

11 1.9311034-1.66266195-0.08061267

>cbind((conomy$x1-pr$center[1])/pr$scale[1],(conomy$x2-pr$center[2])/pr$scale[2], (conomy$x3-pr$center[3])/pr$scale[3])%*%pr$rotation#PCA正过程PC1PC2PC3

[1,]-2.12588720.63865815-0.02072230

[2,]-1.61892730.55553922-0.07111317

[3,]-1.1151675-0.07297970-0.02173008

[4,]-0.8942966-0.082369980.01081318

[5,]-0.6442081-1.306685230.07258248

[6,]-0.1903514-0.659147450.02655252

[7,]0.3596219-0.743674470.04278124

[8,]0.9718018 1.354058770.06286252

[9,] 1.55931590.964045580.02357446

[10,] 1.7669951 1.01521706-0.04498818

[11,] 1.9311034-1.66266195-0.08061267

>temp<-pr$x%*%solve(pr$rotation)#PCA逆过程

>cbind(temp[,1]*pr$scale[1]+pr$center[1],temp[,2]*pr$scale[2]+pr$center[2],temp[,3]*pr$scale[3]+pr$center[3]) [,1][,2][,3]

1149.3 4.2108.1

2161.2 4.1114.8

3171.5 3.1123.2

4175.5 3.1126.9

5180.8 1.1132.1

6190.7 2.2137.7

7202.1 2.1146.0

8212.4 5.6154.1

9226.1 5.0162.3

10231.9 5.1164.3

11239.00.7167.6

相关主题