『Data Science』R语言学习笔记,基础语法
发布时间:2021-02-27 15:07:21 所属栏目:大数据 来源:网络整理
导读:Data Types Data Object Vector x - c(0.5,0.6) ## numericx - c(TRUE,FALSE) ## logicalx - c(T,F) ## logicalx - c("a","b","c") ## characterx - 9:29 ## integerx - c(1+0i,2+4i) ## complexx - vector("numeric",length = 10) ## create a numeric vect
Matrices can be subsetted in the usual way with (i,j) type indices. > x <- matrix(1:6,3) > x[1,2] [1] 3 > x[1,] [1] 1 3 5 > x[,2] [1] 3 4 > x[1,drop = FALSE] [,1] [1,] 3 > x[1,1] [,2] [,3] [1,] 1 3 5 Partial MatchingPartial matching of names is allowed with > x <- list(aardvark = 1:5) > x$a [1] 1 2 3 4 5 > x[["a"]] NULL > x[["a",exact = FALSE]] [1] 1 2 3 4 5 Removing NA Values> x <- c(1,4,5) > bad <- is.na(x) > x[!bad] [1] 1 2 4 5 Use built-in function > x <- c(1,5) > y <- c("a","f") > good <- complete.cases(x,y) > good [1] TRUE TRUE FALSE TRUE FALSE TRUE > x[good] [1] 1 2 4 5 > y[good] [1] "a" "b" "d" "f" From data frame > airquality[1:6,] ## call a matrix Ozone Solar.R Wind Temp Month Day 1 41 190 7.4 67 5 1 2 36 118 8.0 72 5 2 3 12 149 12.6 74 5 3 4 18 313 11.5 62 5 4 5 NA NA 14.3 56 5 5 ## there a NA value in this vector 6 28 NA 14.9 66 5 6 ## there a NA value in this vector > good <- complete.cases(airquality) ## as there a NA value in 6s/7s row,so it is filtered. > airquality[good,][1:6,] Ozone Solar.R Wind Temp Month Day 1 41 190 7.4 67 5 1 2 36 118 8.0 72 5 2 3 12 149 12.6 74 5 3 4 18 313 11.5 62 5 4 7 23 299 8.6 65 5 7 8 19 99 13.8 59 5 8 Vectorized Operations
> x <- 1:4; y <- 6:9 > x + y [1] 7 9 11 13 > x > 2 [1] FALSE FALSE TRUE TRUE > y >= 2 [1] TRUE TRUE TRUE TRUE > y == 8 [1] FALSE FALSE TRUE FALSE > x * y [1] 6 14 24 36 > x / y [1] 0.1666667 0.2857143 0.3750000 0.4444444 Logic Controlif-else> if (x > 3) { + y <- 10 + } else { + y <- 0 + } For> x <- c("a","d") > for (i in 1:4) { + print(x[i]) + } [1] "a" [1] "b" [1] "c" [1] "d" > for(i in seq_along(x)) { + print(x[i]) + } [1] "a" [1] "b" [1] "c" [1] "d" > for(letter in x){ + print(letter) + } [1] "a" [1] "b" [1] "c" [1] "d" > for(i in 1:4) print(x[i]) [1] "a" [1] "b" [1] "c" [1] "d" While> count <- 0 > while(count < 10) { + print(count) + count <- count + 1 + } [1] 0 [1] 1 [1] 2 [1] 3 [1] 4 [1] 5 [1] 6 [1] 7 [1] 8 [1] 9 > z <- 5 > while(z >=3 && z <= 10) { + print(z) + coin <- rbinom(1,1,0.5) + + if(coin == 1) { + z <- z + 1 + } else { + z <- z - 1 + } + } [1] 5 [1] 4 [1] 3 [1] 4 [1] 5 [1] 4 [1] 5 [1] 4 [1] 3 Repeat> x0 <- 1 > tol <- 1e-8 > repeat { + x1 <- computeEstimate() + if(abs(x1 - x0) < tol) { + break + } else { + x0 <- x1 + } + } > for(i in 1:100) { + if(i <= 20) { + next ## jump into next loop + } + } Function> add2 <- function(x,y) { + x + y + } > add2(2,3) [1] 5 > above <- function(x,n = 10) { + use <- x >n + x[use] + } > x <- 1:20 > above(x,10) [1] 11 12 13 14 15 16 17 18 19 20 > columnmean <- function(y,removeNA = TRUE) { + nc <- ncol(y) + means <- numeric(nc) + for(i in 1:nc) { + means[i] <- mean(y[,i],na.rm = removeNA) + } + means ## return result + } > columnmean(airquality) ## compute the mean of values of columns of `airqulity`. [1] 42.129310 185.931507 9.957516 77.882353 6.993464 15.803922 The
|