Six weeks basic computing workshop


Week 5: R Studio and R Basics

  1. Asignment
  2. 
    var1 <- 5
    var2 <- 3
    var2
    
    [1] 3
    
    var3 <-  c(3,5,7,9)
    
  3. #Basic data types: #Numbers
  4. 
    a <-5
    
    b <- sqrt(a*a+3)
    
    #list of variables
    ls()
    
    [1] "a"    "b"    "var1" "var2" "var3"
    
    #Vectors
    a <- c(10,4,30,2,50)
    
    a[1]
    
    [1] 10
    
    a[2]
    
    [1] 4
    
    a[0]
    
    numeric(0)
    
    numeric(0)
    
    numeric(0)
    
    a[5]
    
    [1] 50
    
    a[6]
    
    [1] NA
    
    a <- numeric(10)
    b<-seq(1:10)
    
    a+1
    
     [1] 1 1 1 1 1 1 1 1 1 1
    
  5. Strings
  6. 
    
    a <- "Exper"
    a
    
    [1] "Exper"
    
    b <- c("Exper","Control")
    b
    
    [1] "Exper"   "Control"
    
    b[1]
    
    [1] "Exper"
    
    typeof(a)
    
    [1] "character"
    
  7. #Factors
  8. 
    conds1<-c("Exper","Exper","Control","Control")
    conds <- factor(c("Exper","Exper","Control","Control"))
    
    #Data frames
    a <- c(1,2,3,4)
    b <- c(2,4,6,8)
    conds <- factor(c("Exper","Exper","Control","Control"))
    
    genelevels <- data.frame(a, b, conds)
    
    colnames( genelevels ) <- c("library1", 
                                "library2",
                                "conditions")
    
    genelevels <- data.frame(lib1=a, 
                             lib2=b, 
                             f=conds)
    
    genelevels$lib1
    
    [1] 1 2 3 4
    
    genelevels$lib2
    
    [1] 2 4 6 8
    
    genelevels$f
    
    [1] Exper   Exper   Control Control
    Levels: Control Exper
    
  9. For comments use # symbol:
    # These are the same
    #genelevels[2:4, "lib1"] ==== genelevels$lib1[2:4]
  10. 
    genelevels[2:4, "lib1"]
    
    [1] 2 3 4
    
    genelevels$lib1[2:4]
    
    [1] 2 3 4
    
  11. #Factors
  12. 
    genelevels[2:4, c("lib1", "f")]
    
      lib1       f
    2    2   Exper
    3    3 Control
    4    4 Control
    
  13. Only Experiments
  14. 
    genelevels[genelevels$f == "Exper", ]
    
      lib1 lib2     f
    1    1    2 Exper
    2    2    4 Exper
    
    genelevels[1,]
    
      lib1 lib2     f
    1    1    2 Exper
    
    genelevels[3,]
    
      lib1 lib2       f
    3    3    6 Control
    
    genelevels[3,2]
    
    [1] 6
    
  15. Get a summary
  16. 
    summary(genelevels)
    
          lib1           lib2           f    
     Min.   :1.00   Min.   :2.0   Control:2  
     1st Qu.:1.75   1st Qu.:3.5   Exper  :2  
     Median :2.50   Median :5.0              
     Mean   :2.50   Mean   :5.0              
     3rd Qu.:3.25   3rd Qu.:6.5              
     Max.   :4.00   Max.   :8.0              
    
  17. Get the dimensions of a matrixx
  18. 
    
    dim(genelevels)
    
    [1] 4 3
    
    length(a)
    
    [1] 4
    
  19. #Logical variables
  20. 
    var4 <- c(TRUE, FALSE, TRUE, FALSE)
    genelevels[var4,]
    
      lib1 lib2       f
    1    1    2   Exper
    3    3    6 Control
    
  21. Factors
  22. 
    a <- factor(c("A","A","B","A","B","B","C","A","C"))
    results <- table(a)
    results
    
    a
    A B C 
    4 3 2 
    
    a <- c("Sometimes","Sometimes","Never","Always","Always","Sometimes","Sometimes","Never")
    b <- c("Maybe","Maybe","Yes","Maybe","Maybe","No","Yes","No")
    results <- table(a,b)
    results
    
               b
    a           Maybe No Yes
      Always        2  0   0
      Never         0  1   1
      Sometimes     2  1   1
    
  23. Matrix
  24. 
    occur <- matrix(c(4,3,2,5,4,5),
                    ncol=3,byrow=TRUE)
    occur
    
         [,1] [,2] [,3]
    [1,]    4    3    2
    [2,]    5    4    5
    
    libs<-matrix(c(50,20,65,40),ncol=2,byrow=TRUE)
    rownames(libs)<-c("lib1","lib2")
    colnames(libs)<-c("control1","exper1")
    libtable <- as.table(libs)
    
    mydataframe<-data.frame(libs)
    
  25. Basic Operations
  26. 
    a <- c(1,2,3,4)
    a
    
    [1] 1 2 3 4
    
    a + 5
    
    [1] 6 7 8 9
    
    mydataframe - 10
    
         control1 exper1
    lib1       40     10
    lib2       55     30
    
    mydataframe*4
    
         control1 exper1
    lib1      200     80
    lib2      260    160
    
    mydataframe/5
    
         control1 exper1
    lib1       10      4
    lib2       13      8
    
    b <- mydataframe - 10
    
    sqrt(mydataframe)
    
         control1 exper1
    lib1    7.071  4.472
    lib2    8.062  6.325
    
    exp(mydataframe)
    
          control1    exper1
    lib1 5.185e+21 4.852e+08
    lib2 1.695e+28 2.354e+17
    
    log(mydataframe)
    
         control1 exper1
    lib1    3.912  2.996
    lib2    4.174  3.689
    
    exp(log(a))
    
    [1] 1 2 3 4
    
    c <- (a + sqrt(a))/(exp(2)+1)
    c
    
    [1] 0.2384 0.4070 0.5641 0.7152
    
    a + b
    
         control1 exper1
    lib1       41     13
    lib2       57     34
    
    a*b
    
         control1 exper1
    lib1       40     30
    lib2      110    120
    
    a/b
    
         control1 exper1
    lib1  0.02500 0.3000
    lib2  0.03636 0.1333
    
    (a+3)/(sqrt(1-b)*2-1)
    
    Warning: NaNs produced
    Warning: NaNs produced
    
         control1 exper1
    lib1      NaN    NaN
    lib2      NaN    NaN
    
    a <- c(1,2,3)
    b <- c(10,11,12,13)
    a+b
    
    Warning: longer object length is not a multiple of shorter object length
    
    [1] 11 13 15 14
    
  27. open a file
  28. file<-"/project/umw_biocore/genes.tsv"
    rsem<- read.table(file, header=TRUE, row.names=1)
    
  29. learn column names
  30. colnames(rsem)
    
  31. Select rows 2 to 5 and columns 10 to 13
  32. rsem[ 2:5 , 10:13 ]
    
  33. Select gene Lage3 and columns 2 to 5
  34. rsem["Lage3", 2:5]
    
  35. select some genes and libraries
  36. rsem[c("Brd7","Lage3"), c("J2c.rep3", "J2c.rep2")]
    
  37. select 6 libraries/columns
  38. mydata<-rsem[, c("ALAB.rep1","ALAB.rep2","ALAB.rep3", "hfd.wt.rep1","hfd.wt.rep2", "hfd.wt.rep3")]

    Your homework (deadline: Aug 14, Thursday 17:00pm):


    1. Please create “homework/week5” directory in your home folder in rstudio.

    2. Wrtie the answers into week5.R file .

    3. Read /project/umw_biocore/genes.tsv file. Get help on the command read.table. What is the default missing value and decimal place argument. Please write them within comments.
    4. Select J1c.rep1, J1c.rep2, J1c.rep3 columns and find maximum and minumum values for each library.
    5. Using selected columns, find the genes that their read counts are over 400 in J1c.rep1 library. Hint: You are going to use > rather than == like in genelevels[genelevels$lib1 > 400, ]
    6. Make another data frame by selecting ALAB.rep1, ALAB.rep2, ALAB.rep3 in adition to the ones we selected before. Find average read counts using replicas for J1c and ALAB per gene. Hint: (rep1+rep2+rep3)/3
    7. Make a log10 plot using these average gene counts. Hint: use plot function with log10 like below; plot(log10(genelevels))