作业25答案

用到的包(如提示包不存在可以用install.packages("包名")的方式安装):

require('readr')
require('ggplot2')
require('dplyr')
require('tidyr')
require('caret')
require('corrplot')
require('Hmisc')
require('parallel')
require('doParallel')
require('ggthemes')
require('e1071')

练习1 数据导入

  1. 将数据集的csv文件导入
voice_Original <- read.csv("voice.csv",header = TRUE)

练习2 查看数据

  1. 用Hmisc包中describe 函数是对数据集进行概述
describe(voice_Original)

voice_Original 

 21  Variables      3168  Observations
-------------------------------------------------------------------
meanfreq 
      n missing  unique    Info    Mean     .05     .10     .25 
   3168       0    3166       1  0.1809  0.1260  0.1411  0.1637 
    .50     .75     .90     .95 
 0.1848  0.1991  0.2177  0.2291 

lowest : 0.03936 0.04825 0.05965 0.05978 0.06218
highest: 0.24353 0.24436 0.24704 0.24964 0.25112 
-------------------------------------------------------------------
sd 
      n missing  unique    Info    Mean     .05     .10     .25 
   3168       0    3166       1 0.05713 0.03162 0.03396 0.04195 
    .50     .75     .90     .95 
0.05916 0.06702 0.07966 0.08549 

lowest : 0.01836 0.02178 0.02400 0.02427 0.02456
highest: 0.11126 0.11126 0.11265 0.11451 0.11527 
-------------------------------------------------------------------
median 
      n missing  unique    Info    Mean     .05     .10     .25 
   3168       0    3077       1  0.1856  0.1164  0.1340  0.1696 
    .50     .75     .90     .95 
 0.1900  0.2106  0.2274  0.2358 

lowest : 0.01097 0.01359 0.01579 0.02699 0.02936
highest: 0.25663 0.25698 0.25742 0.26054 0.26122 
-------------------------------------------------------------------
Q25 
      n missing  unique    Info    Mean     .05     .10     .25 
   3168       0    3103       1  0.1405 0.04358 0.07509 0.11109 
    .50     .75     .90     .95 
0.14029 0.17594 0.20063 0.21524 

lowest : 0.0002288 0.0002355 0.0002395 0.0002502 0.0002669
highest: 0.2394595 0.2405416 0.2407352 0.2421235 0.2473469 
-------------------------------------------------------------------
Q75 
      n missing  unique    Info    Mean     .05     .10     .25 
   3168       0    3034       1  0.2248  0.1874  0.1963  0.2087 
    .50     .75     .90     .95 
 0.2257  0.2437  0.2536  0.2577 

lowest : 0.04295 0.05827 0.07596 0.09019 0.09267
highest: 0.26879 0.26892 0.26894 0.26985 0.27347 
-------------------------------------------------------------------
IQR 
      n missing  unique    Info    Mean     .05     .10     .25 
   3168       0    3073       1 0.08431 0.02549 0.02931 0.04256 
    .50     .75     .90     .95 
0.09428 0.11418 0.13284 0.15632 

lowest : 0.01456 0.01492 0.01511 0.01549 0.01659
highest: 0.24530 0.24597 0.24819 0.24877 0.25223 
-------------------------------------------------------------------
skew 
      n missing  unique    Info    Mean     .05     .10     .25 
   3168       0    3166       1    3.14   1.123   1.299   1.650 
    .50     .75     .90     .95 
  2.197   2.932   3.916   6.918 

lowest :  0.1417  0.2850  0.3260  0.5296  0.5487
highest: 32.3507 33.1673 33.5663 34.5375 34.7255 
-------------------------------------------------------------------
kurt 
      n missing  unique    Info    Mean     .05     .10     .25 
   3168       0    3166       1   36.57   3.755   4.293   5.670 
    .50     .75     .90     .95 
  8.318  13.649  27.294  75.169 

lowest :    2.068    2.210    2.269    2.293    2.463
highest: 1128.535 1193.434 1202.685 1271.354 1309.613 
-------------------------------------------------------------------
sp.ent 
      n missing  unique    Info    Mean     .05     .10     .25 
   3168       0    3166       1  0.8951  0.8168  0.8322  0.8618 
    .50     .75     .90     .95 
 0.9018  0.9287  0.9513  0.9630 

lowest : 0.7387 0.7476 0.7477 0.7485 0.7487
highest: 0.9764 0.9765 0.9765 0.9785 0.9820 
-------------------------------------------------------------------
sfm 
      n missing  unique    Info    Mean     .05     .10     .25 
   3168       0    3166       1  0.4082  0.1584  0.1883  0.2580 
    .50     .75     .90     .95 
 0.3963  0.5337  0.6713  0.7328 

lowest : 0.03688 0.08024 0.08096 0.08220 0.08266
highest: 0.82259 0.82267 0.82610 0.83135 0.84294 
-------------------------------------------------------------------
mode 
      n missing  unique    Info    Mean     .05     .10     .25 
   3168       0    2825       1  0.1653 0.00000 0.01629 0.11802 
    .50     .75     .90     .95 
0.18660 0.22110 0.24901 0.26081 

lowest : 0.0000000 0.0007279 0.0007749 0.0008008 0.0008427
highest: 0.2791181 0.2795230 0.2795852 0.2797034 0.2800000 
-------------------------------------------------------------------
centroid 
      n missing  unique    Info    Mean     .05     .10     .25 
   3168       0    3166       1  0.1809  0.1260  0.1411  0.1637 
    .50     .75     .90     .95 
 0.1848  0.1991  0.2177  0.2291 

lowest : 0.03936 0.04825 0.05965 0.05978 0.06218
highest: 0.24353 0.24436 0.24704 0.24964 0.25112 
-------------------------------------------------------------------
meanfun 
      n missing  unique    Info    Mean     .05     .10     .25 
   3168       0    3166       1  0.1428 0.09363 0.10160 0.11700 
    .50     .75     .90     .95 
0.14052 0.16958 0.18519 0.19343 

lowest : 0.05557 0.05705 0.06097 0.06254 0.06348
highest: 0.22342 0.22576 0.22915 0.23114 0.23764 
-------------------------------------------------------------------
minfun 
      n missing  unique    Info    Mean     .05     .10     .25 
   3168       0     913       1  0.0368 0.01579 0.01613 0.01822 
    .50     .75     .90     .95 
0.04611 0.04790 0.05054 0.05644 

lowest : 0.009775 0.009785 0.009901 0.009911 0.010163
highest: 0.168421 0.178571 0.185185 0.200000 0.204082 
-------------------------------------------------------------------
maxfun 
      n missing  unique    Info    Mean     .05     .10     .25 
   3168       0     123    0.99  0.2588  0.1925  0.2192  0.2540 
    .50     .75     .90     .95 
 0.2712  0.2775  0.2791  0.2791 

lowest : 0.1031 0.1053 0.1087 0.1111 0.1124
highest: 0.2774 0.2775 0.2778 0.2791 0.2791 
-------------------------------------------------------------------
meandom 
      n missing  unique    Info    Mean     .05     .10     .25 
   3168       0    2999       1  0.8292  0.1045  0.1888  0.4198 
    .50     .75     .90     .95 
 0.7658  1.1772  1.5602  1.8004 

lowest : 0.007812 0.007979 0.007990 0.008185 0.008247
highest: 2.544271 2.591580 2.676989 2.805246 2.957682 
-------------------------------------------------------------------
mindom 
       n  missing   unique     Info     Mean      .05      .10 
    3168        0       77     0.92  0.05265 0.007812 0.007812 
     .25      .50      .75      .90      .95 
0.007812 0.023438 0.070312 0.164062 0.187500 

lowest : 0.004883 0.007812 0.014648 0.015625 0.019531
highest: 0.343750 0.351562 0.400391 0.449219 0.458984 
-------------------------------------------------------------------
maxdom 
      n missing  unique    Info    Mean     .05     .10     .25 
   3168       0    1054       1   5.047  0.3125  0.6094  2.0703 
    .50     .75     .90     .95 
 4.9922  7.0078  9.4219 10.6406 

lowest :  0.007812  0.015625  0.023438  0.054688  0.070312
highest: 21.515625 21.562500 21.796875 21.843750 21.867188 
-------------------------------------------------------------------
dfrange 
      n missing  unique    Info    Mean     .05     .10     .25 
   3168       0    1091       1   4.995  0.2656  0.5607  2.0449 
    .50     .75     .90     .95 
 4.9453  6.9922  9.3750 10.6090 

lowest :  0.000000  0.007812  0.015625  0.019531  0.024414
highest: 21.492188 21.539062 21.773438 21.820312 21.843750 
-------------------------------------------------------------------
modindx 
      n missing  unique    Info    Mean     .05     .10     .25 
   3168       0    3079       1  0.1738 0.05775 0.07365 0.09977 
    .50     .75     .90     .95 
0.13936 0.20918 0.32436 0.40552 

lowest : 0.00000 0.01988 0.02165 0.02194 0.02217
highest: 0.84448 0.85470 0.85776 0.87950 0.93237 
-------------------------------------------------------------------
label 
      n missing  unique 
   3168       0       2 

female (1584, 50%), male (1584, 50%) 
-------------------------------------------------------------------

练习3 数据操作

  1. 将数据集中的sp.ent属性由0.9作为阈值分为两类
### add a categorcial variable
voice_Original <- voice_Original%>%
  mutate(sp.ent=
           ifelse(sp.ent>0.9,"High","Low"))

练习4 图形绘制操作

  1. 用ggplot画出meanfreq,dfrange与label属性的散点图
### visual exploration of the dataset
require(ggplot2)
voice_Original%>%
  ggplot(aes(x=meanfreq,y=dfrange))+
  geom_point(aes(color=label))+
  theme_wsj()

zuo-ye-25-1

results matching ""

    No results matching ""