## HW 05 Solutions
## Author: Dylan Schwilk
# Function: mateVectors
# Args:
# - x, y = Two vectors each representing a "genotype". The vectors should be
# atomic vectors, but this is not enforced. These vectors are the
# two parent genotypes and must be of the same length and type.
# - r = The per-locus recombination rate. A rate between 0 and 1 which
# indicates the probability of a crossover event between any two
# adjacent genes (vector elements).
# Returns:
# - vector of the same length and type as the parents.
mateVectors1 <- function(x, y, r) {
stopifnot(length(x) == length(y))
child <- y # make the child vector and assign parent y to it so we only need
# to worry about locations that should have parent x's alleles
usingX <- runif(1) <= 0.5 # coin flip. This boolean flag tells use which
# parent we are reading from.
for (i in 1:length(x)) {
if(usingX) {
child[i] <- x[i] # if we are reading from x, then assign the ith value of
# x to child[i]. If we are reading from y, do nothing as
# child already has y's alleles
}
# now test to see if we switch parents:
if(runif(1) <= r) {
usingX <- !usingX
}
}
return(child)
}
# A vectorized implementation of above without explicit loops. No comments
# provided.
mateVectors <- function(x, y, r) {
stopifnot(length(x)==length(y))
crossovers <- runif(length(x)) <= r
useFirstParent <- cumsum(crossovers) %% 2 == 0
if(runif(1) <= 0.5) {
child <- ifelse(useFirstParent, x, y)
} else {
child <- ifelse(useFirstParent, y, x)
}
return(child)
}
############################
## Examples and Tests ##
############################
mother <- rep(0,100)
father <- rep(1,100)
child1 <-mateVectors(mother,father, 0.05)
child2 <-mateVectors1(mother,father, 0.05)
child1
child2
## it works with vectors of single char strings.
alph <- unlist(strsplit("abcdefghijklmnopqrstuvwxyz",split=""))
mother <- alph #sample(alph,100,replace=TRUE)
father <- sample(alph,length(alph),replace=TRUE)
child1 <-mateVectors(mother,father, 0.05)
child2 <-mateVectors1(mother,father, 0.05)
## print the child vector as a string rather than a vector:
paste(child1,collapse="")
paste(child2,collapse="")
# Using vectors of words as the genotype:
words <- "It was the best of times, it was the worst
of times, it was the age of wisdom, it was the age of foolishness, it was the
epoch of belief, it was the epoch of incredulity, it was the season of Light,
it was the season of Darkness, it was the spring of hope, it was the winter of
despair, we had everything before us, we had nothing before us, we were all
going direct to heaven, we were all going direct the other way - in short, the
period was so far like the present period, that some of its noisiest
authorities insisted on its being received, for good or for evil, in the
superlative degree of comparison only."
words <- unlist(strsplit(words, split="[[:space:][:punct:]]+"))
mother <- words
father <- rep("____", length(words))
child1 <-mateVectors(mother,father, 0.05)
child2 <-mateVectors1(mother,father, 0.05)
paste(child1,collapse=" ")
paste(child2,collapse=" ")
##
mother <- unlist(strsplit("I am the mother", split=""))
father <- unlist(strsplit("i AM THE FATHER", split=""))
child <- mateVectors(mother,father, 0.05)
paste(child,collapse="")
## test if distribution of run lengths looks like bernoulli process (geometric
## distribution). As length of vectors goes to infinity, this distribution of
## parental run lengths should approach a geometric distribution. So mean
## should be 1 / p and variance should be (1 - p) / p^2
set.seed(100)
N_ALLELES <- 1000
mother <- rep(0,N_ALLELES)
father <- rep(1,N_ALLELES)
child <- mateVectors(mother, father, 0.05)
runs <- rle(child)
runlengths <- runs$lengths
# when r is 0.05 then
# Mean should be 1 / r = 20
# Variance should be (1 - r) / r^2 = 380
mean(runlengths)
var(runlengths)
# Hm hard to tell for a single trial. Try this a bunch and check the mean?
runLengthStats <- function(v) {
runs <- rle(v)
return(c(mean = mean(runs$lengths), var = var(runs$lengths)))
}
children <- matrix(replicate(10000, mateVectors(mother, father, 0.05)), nrow=N_ALLELES)
rl_stats <- apply(children, MARGIN=2, runLengthStats)
rowMeans(rl_stats) # should be 20 and 380
children <- matrix(replicate(10000, mateVectors(mother, father, 0.1)), nrow=N_ALLELES)
# should be 50% 0s vs 1s so easy test:
rl_stats <- apply(children, MARGIN=2, runLengthStats)
rowMeans(rl_stats) # should be 10 and 90
f <- function(x) {
return(x^2 - sin(x) )
}
d <- data.frame(x = seq(-10, 10, 0.01))
d$y <- f(d$x)
library(ggplot2)
ggplot(d, aes(x, y)) + geom_point()
nlm(f, 5)
optim(5, f, method = "Brent", lower = -100, upper = 100)
D(expression( x^2 + 2*x ), "x")
D(expression( exp(x^2) ), "x")
D(expression(log(2*x) + x^2), "x")
curve( x^2 + 2*x ,0,10)
curve(eval(D(expression( x^2 + 2*x ), "x") ), 0, 10)
f <- function(x) x^2
integrate(f,0,1)
qchisq(0.95,df=2)
qnorm( c(0.05, 0.95) )
dbinom(2, size=12, prob=0.5)
dbinom(10, size=12, prob=0.5)
pbinom(10, size=12, prob=0.5)
x <- rbinom(500000,5,0.5)
mean(x >= 4)
# The exact way:
1 - pbinom(3, 5, 0.5)
dbinom(4, 5, 0.5) + dbinom(5, 5, 0.5)
sum <- 0
nreps <- 100000
for (i in 1:nreps) {
xy <- rnorm(2)
sum <- sum + max(xy)
}
print(sum/nreps)
# or store in vector:
v <- replicate(nreps, max(rnorm(2)))
hist(v)
set.seed(100)
mean(rnorm(10000))
mean(rnorm(10000))
set.seed(100)
mean(rnorm(10000))
a <- matrix(c(1,3,2,4), ncol=2)
b <- matrix(c(1,0,-1,1), ncol=2)
a %*% b
a <- matrix(c(1,1,1,-1), ncol=2)
b <- c(4, 2)
solve(a, b)
w <- 12
f <- function(y) {
d <- 8
h <- function() {
return(d * (w+y) )
}
print(environment(h))
return(h())
}
environment(f)
ls()
f(2)
h()
f <- function(y, ftn) {
d <- 8
print(environment(ftn))
return(ftn(d, y))
}
h <- function(dd, yy) {
return(dd * (w + yy))
}
w <- 12
f(3, h)
x <- 1
increment.x <- function() {
print(x)
x <- x+1
print(x)
}
increment.x()
x
increment.x <- function() {
x <<- x+1
}
x
increment.x()
x
filelist <- list.files(pattern="*.org$")
#filelist
for(s in filelist) {
assign(s, readLines(file(s)) )
}
ls()