CC BY-NC-ND 3.0
Counting DNA Nucleotides
Problem: A string is simply an ordered collection of symbols selected from some alphabet and formed into a word; the length of a string is the number of symbols that it contains.
An example of a length 21 DNA string (whose alphabet contains the symbols ‘A’, ‘C’, ‘G’, and ‘T’) is “ATGCTTCAGAAAGGTCTTACG.”
Given: A DNA string s of length at most 1000 nt.
Return: Four integers (separated by spaces) counting the respective number of times that the symbols ‘A’, ‘C’, ‘G’, and ‘T’ occur in s
AGCTTTTCATTCTGACTGCAACGGGCAATATGTCTCTGTGTGGATTAAAAAAAGAGTGTCTGATAGCAGC
20 12 17 21
resDNA <- function(xx){
getL <- function(nuc, dna = xx){
sum(grepl(
pattern = nuc,
x = strsplit(dna, split = "")[[1]]))
}
resp <- c(getL("A", xx),
getL("C", xx),
getL("G", xx),
getL("T", xx))
return(resp)
}
dna <- "AGCTTTTCATTCTGACTGCAACGGGCAATATGTCTCTGTGTGGATTAAAAAAAGAGTGTCTGATAGCAGC"
resDNA(xx = dna)
## [1] 20 12 17 21
bdd <- read.table(
file = "./myData/E05C13.csv",
header = TRUE,
sep = ",",
dec = ".",
skip = 1,
stringsAsFactors = FALSE
)
names(bdd) <- c("id", "dateTime", "temp")
str(bdd)
## 'data.frame': 32786 obs. of 3 variables:
## $ id : int 1 2 3 4 5 6 7 8 9 10 ...
## $ dateTime: chr "11/12/15 23:00:00" "11/12/15 23:30:00" "11/13/15 00:00:00" "11/13/15 00:30:00" ...
## $ temp : num 4.97 4.77 4.84 4.84 5.08 ...
bdd$POSIX <- as.POSIXct(
bdd$dateTime,
tz = "Etc/GMT+4",
format = "%m/%d/%y %H:%M:%S")
str(bdd)
## 'data.frame': 32786 obs. of 4 variables:
## $ id : int 1 2 3 4 5 6 7 8 9 10 ...
## $ dateTime: chr "11/12/15 23:00:00" "11/12/15 23:30:00" "11/13/15 00:00:00" "11/13/15 00:30:00" ...
## $ temp : num 4.97 4.77 4.84 4.84 5.08 ...
## $ POSIX : POSIXct, format: "2015-11-12 23:00:00" "2015-11-12 23:30:00" ...
## Loading required package: ggplot2