(a) \(\mathbf{Solution.}\qquad\) We can write \(F_{W}(w)\) as, \[\begin{align*} F_{W}(w) & =\mathbb{P}(-X\leq w)\\ & =1-F_{X}(-w) \end{align*}\]
Then, \[ F_{W}(-X)=1-F_{X}(X) \]
Next, the copula can be written as, \[\begin{align*} C\left(u_{x},u_{w}\right) & =\mathbb{P}\left(F_{X}(x)\leq u_{x},F_{W}(w)\leq u_{w}\right)\\ & =\mathbb{P}\left(F_{X}(x)\leq u_{x},1-u_{w}\leq F_{X}(x)\right)\\ & =\mathbb{P}\left(1-u_{w}\leq F_{x}(x)\leq u_{x}\right) \end{align*}\]
Since \(F_{X}(x)\sim Unif\left(0,1\right)\), the copula becomes, \[ C\left(u_{x},u_{w}\right)=\begin{cases} u_{x}+u_{u}-1 & \text{ if }\quad u_{x}\geq1-u_{w}\\ 0 & \mathrm{otherwise} \end{cases} \]
(b) \(\mathbf{Solution.}\qquad\) We are given that \(X\sim N\left(0,1\right)\) and \(Y\sim N\left(0,1\right)\) with \(\rho=0.5\). Then \(Z\sim N\left(0,3\right)\) since \(\mathrm{Var}\left(X+Y\right)=3\) and \(\mathbb{E}\left[X+Y\right]=0\). We can use the Gaussian copula \(C\left(u_{x},u_{z}\right)\) for \(X,Z\), with correlation, \[ \rho=\frac{3/2}{\sqrt{3}}=\sqrt{\frac{3}{2}} \]
(a) \(\mathbf{Solution.}\qquad\) Below we show normal QQ plots of the log returns for each asset.
# QUESTION 2A -----------------------------------------------------------------
# Read data and reverse it
nasdaq = read.csv("Nasdaq_wkly_92-12.csv") %>% purrr::map_df(rev)
snp = read.csv("SP400Mid_wkly_92-12.csv") %>% purrr::map_df(rev)
lrets = data.frame("NAS_lret" = c(NA, diff(log(nasdaq$Adj.Close))),
"SNP_lret" =c(NA, diff(log(snp$Adj.Close))) ,
row.names = snp$Date)[-1,]
# Parameter Estimates
mu = colMeans(lrets)
Sigma = cov(lrets)
cor_tau = cor(lrets[,1], lrets[,2], method="pearson")
# Create diagnostic plots - QQ Plots
# NASDAQ
p1 = ggplot(data = lrets, aes(sample = NAS_lret)) +
stat_qq(colour = "darkred", shape = 1) +
stat_qq_line(colour = "steelblue") +
ggtitle("Normal Plot of Nasdaq Log-Returns") +
theme(plot.title = element_text(hjust = 0.5)) +
xlab('Theoretical Quantiles') +
ylab('Sample Quantiles')
# SNP
p2 = ggplot(data = lrets, aes(sample = SNP_lret)) +
stat_qq(colour = "darkred", shape = 1) +
stat_qq_line(colour = "steelblue") +
ggtitle("Normal Plot of SNP Log-Returns") +
theme(plot.title = element_text(hjust = 0.5)) +
xlab('Theoretical Quantiles') +
ylab('Sample Quantiles')
grid.arrange(p1, p2, nrow = 1)
Next we show the contour plot comparing the empirical vs. theoretical bivariate normal distribution.
# Multivariate Normal Contours -----------------------------------------------
data1 = cbind(pnorm(lrets[,1], mean=mu[1], sd=sqrt(Sigma[1,1])),
pnorm(lrets[,2], mean=mu[2], sd=sqrt(Sigma[2,2])))
# Empirical distribution to bivariate normal
u1 = data1[,1]
u2 = data1[,2]
dem = pempiricalCopula(u1,u2)
contour(dem$x,dem$y,dem$z,main="Empirical Gaussian",col='blue',lty=1,lwd=2,
nlevel=20)
# Fitted Multivariate Normal
ct = normalCopula(cor_tau, dim = 2, dispstr = "un")
utdis = rCopula(100000, ct)
demt = pempiricalCopula(utdis[,1],utdis[,2])
contour(demt$x, demt$y, demt$z,main="t",col='red',lty=2,lwd=2,add=TRUE,
nlevel=20)
(b) \(\mathbf{Solution.}\qquad\) Below we compute the maximum likelihood estimate for the degrees of freedom to use for our multivariate t-distribution and also show 95% confidence interval on the degrees of freedom.
# QUESTION 2B ---------
# First find nu by maximizing profile likelihood
df = seq(2.0, 8, 0.01)
n = length(df)
loglik_profile = rep(0, n)
for (i in 1:n) {
fit = cov.trob(lrets, nu = df[i])
mu = as.vector(fit$center)
sigma = matrix(fit$cov, nrow = 4)
loglik_profile[i] = sum(log(dmt(lrets, mean = fit$center,
S= fit$cov, df = df[i])))
}
# Our nu estimate by MLE
i_max = which.max(loglik_profile)
nu = df[i_max]
# Split profile curve into two halves to find intersection
left = loglik_profile[1:i_max]
right = loglik_profile[i_max + 1:length(loglik_profile)]
# Build CI
h = max(loglik_profile) - 0.5*qchisq(0.95, df = 1)
ci_left = df[which.min(abs(left - h))]
ci_right = df[i_max + which.min(abs(right - h))]
CI_nu = c(ci_left, ci_right)
We have \(\hat{\nu}_{MLE}\) = 2.8 and the 95% CI for \(\nu\) is [2.43, 3.25]. Next, we have QQ plots for t distribution.
# Function for generating t-plot of scaled t distribution
qqt_scale = function(x, nu, asset){
xsort = sort(x)
n = length(xsort)
q = (1 / (n + 1)) * c(1:n)
qq = qt(q, nu)
# Construct dataframe to plot in ggplot
df = data.frame("quantiles" = qq, "data" = xsort)
# Compute theoretical quantiles
xx = quantile(qq, c(0.25, 0.75))
yy = quantile(xsort, c(0.25, 0.75))
slope = (yy[2] - yy[1]) / (xx[2] - xx[1])
inter = yy[1] - slope*xx[1]
g = ggplot(df, aes(x = quantiles, y = data)) +
geom_point(colour = "darkred", shape = 1) +
ggtitle(paste0("t-Plot of ", asset)) +
theme(text = element_text(size = 7),
plot.title = element_text(hjust = 0.5)) +
geom_abline(intercept = inter, slope = slope, colour = "steelblue") +
xlab('Theoretical Quantiles') +
ylab('Sample Quantiles')
return(g)
}
# Construct qq plots for each asset
plot_list = list()
i = 1
for (col in colnames(lrets)) {
plot_list[[i]] = local({
p1 = qqt_scale(as.matrix(lrets[col]), nu, col)
})
i = i + 1
}
# plot all subplots
n <- length(plot_list)
nCol <- ceiling(sqrt(n))
grid.arrange(grobs = plot_list, ncol = nCol,
top = textGrob("QQ-Plot of Assets with t-distribution"),
gp = gpar(fontsize = 13, font = 8))
Next we show the contour plot comparing the empirical vs. theoretical bivariate t-distribution.
# Prepare to fit copula -------------------------------------------------------
data1 = cbind(pstd(lrets[,1], mean=mu[1],
sd = sqrt(Sigma[1,1]), nu=nu),
pstd(lrets[,2], mean=mu[2],
sd = sqrt(Sigma[2,2]), nu=nu))
# Empirical distribution to bivariate t
u1 = data1[,1]
u2 = data1[,2]
dem = pempiricalCopula(u1,u2)
contour(dem$x,dem$y,dem$z,main="Empirical-t",col='blue',lty=1,lwd=2,nlevel=20)
# Fitted Multivariate t
ct = tCopula (cor_tau, dim = 2, dispstr = "un", df = nu)
utdis = rCopula(100000,ct)
demt = pempiricalCopula(utdis[,1],utdis[,2])
contour(demt$x, demt$y, demt$z,main="t",col='red',lty=2,lwd=2,add=TRUE,nlevel=20)
(c) \(\mathbf{Solution.}\qquad\) Below we compute the AIC for the multivariate normal and multivariate t distributions. We see that AIC for multivariate t is lower indicating that this could be a better model.
# QUESTION 2C ----------
# Final Fit
fit_t = cov.trob(lrets, nu=nu)
p_t = 2 * length(fit_t$center) + choose(n=length(fit_t$center), k=2) + 1
p_norm = 2 * length(mu) + choose(n=length(mu), k=2)
AIC_t = -2 * loglik_profile[i_max] + 2 * p_t
AIC_norm = -2 * sum(log(dmt(lrets, mean=mu, S=Sigma, df = 1000))) + 2 * p_norm
show(c(AIC_t, AIC_norm))
## [1] -11087.61 -10459.97
(d) \(\mathbf{Solution.}\qquad\) First we compute VaR and ES for normal distribution.
# QUESTION 2D -----------
draws = 1e7
alpha = 0.005
capital = 1e7
w = matrix(rep(1 / 2, 2), ncol = 1)
returns = exp(lrets) - 1
mu = as.numeric(colMeans(returns) %*% w)
sigma = sqrt(as.numeric(t(w) %*% cov(returns) %*% w))
# Norm VaR
r_VaR = -qnorm(alpha, mu, sigma)
VaR = capital * r_VaR
rets = rnorm(draws, mean = mu, sd = sigma)
ES = -mean(rets[rets < -r_VaR])
shortfall = capital * ES
show(c(VaR, shortfall))
## [1] 725985.7 817748.4
Next we compute VaR and ES for t-distribution.
# t-distribution VaR
sigma2 = sigma
r_VaR_t = qstd(alpha, mean = mu, sd = sigma2, nu = nu)
VaR_t = -r_VaR_t * capital
# t-distribution ES
rets = rstd(draws, mean = mu, sd=sigma2, nu = nu)
ES_t = -mean(rets[rets < r_VaR_t])
shortfall = ES_t * capital
show(c(VaR_t, shortfall))
## [1] 951636.7 1514780.2
Utilizing the same data as in \(2 .\), carry out a copula-based fitting of the bivariate distribution of the log-returns via the following parts:
(a) \(\mathbf{Solution.}\qquad\) Below we show QQ plots for weekly log-returns for each asset. The difference between these plots and the plots from 2b), being that we plot separate t-distributions for each asset with their own corresponding degrees of freedom.
# QUESTION 3A -----------------------------------------------------------------
# Fit a t-distribution to each asset
est.NAS = as.numeric(fitdistr(lrets[,1], "t")$estimate)
est.SP = as.numeric(fitdistr(lrets[,2], "t")$estimate)
N = length(lrets[,1])
# Create t-plots
plot_list = list()
i = 1
nu_vals = c(est.NAS[3], est.SP[3])
for (col in colnames(lrets)) {
plot_list[[i]] = local({
p1 = qqt_scale(as.matrix(lrets[col]), nu_vals[i], col)
})
i = i + 1
}
# plot all subplots
n <- length(plot_list)
nCol <- ceiling(sqrt(n))
grid.arrange(grobs = plot_list, ncol = nCol,
top = textGrob("QQ-Plot of Assets with t-distribution"),
gp = gpar(fontsize = 13, font = 8))
(b) \(\mathbf{Solution.}\qquad\) Below we show contour plot which compares our estimated bivariate cumulative distribution function with the bivariate empirical cdf.
# QUESTION 3B -----------
# Need to convert to standard deviation for incorporating within "pstd"
est.NAS[2] = est.NAS[2]*sqrt(est.NAS[3] / (est.NAS[3]-2))
est.SP[2] = est.SP[2]*sqrt(est.SP[3] / (est.SP[3]-2))
# Generating initial estimate of correlation for t-copula
cor_tau = cor(lrets[,1], lrets[,2],method="spearman")
# Prepare to fit copula
data1 = cbind(pstd(lrets[,1],
mean=est.NAS[1],sd=est.NAS[2],nu=est.NAS[3]),
pstd(lrets[,2], mean=est.SP[1], sd=est.SP[2],nu=est.SP[3]))
# Fit t-copula
cop_t_dim2 = tCopula(cor_tau, dim = 2, dispstr = "un", df = 4)
ft1 = fitCopula(cop_t_dim2, optim.method="L-BFGS-B", data = data1,
start = c(cor_tau,5) )
# Empirical distribution to bivariate t
u1 = data1[,1]
u2 = data1[,2]
dem = pempiricalCopula(u1,u2)
df = data.frame(x = dem$x, y = dem$y, z = dem$z)
contour(dem$x,dem$y,dem$z,main="Empirical-t",col='blue',lty=1,lwd=2,nlevel=20)
ct = tCopula (ft1@estimate[1], dim = 2, dispstr = "un", df = ft1@estimate[2])
utdis = rCopula(100000,ct)
demt = pempiricalCopula(utdis[,1],utdis[,2])
contour(demt$x, demt$y, demt$z,main="t",col='red',lty=2,lwd=2,add=TRUE,nlevel=20)
Next we compute the AIC for our fitted t-Copula. This AIC is slightly higher than the AIC from from 2c). However, the fit of the contour plot looks better, we can still choose to go with t-Copula model instead.
# AIC computations
loglike_f1 = sum(log(dstd(lrets[,1], mean=est.NAS[1], sd=est.NAS[2],
nu=est.NAS[3])))
loglike_f2 = sum(log(dstd(lrets[,2],mean=est.SP[1],sd=est.SP[2],
nu=est.SP[3])))
AIC = -2*(ft1@loglik + loglike_f1 + loglike_f2) +
2*(length(est.NAS) + length(est.SP) + 2)
AIC
## [1] -11065.22
(c) \(\mathbf{Solution.}\qquad\) Below we compute VaR and ES.
# QUESTION 3C -----------
draws = 1e6
capital = 1e7
# Generate Random Variates from our copula
ct = tCopula (ft1@estimate[1], dim = 2, dispstr = "un", df = ft1@estimate[2])
uvsim = rCopula(draws, ct)
# Compute VaR
data_quantiles = cbind(qstd(uvsim[,1], mean=est.NAS[1],
sd=est.NAS[2], nu=est.NAS[3]),
qstd(uvsim[,2], mean=est.SP[1],
sd=est.SP[2], nu=est.SP[3]))
datat = 0.5*(exp(data_quantiles[,1])-1) + 0.5*(exp(data_quantiles[,2])-1)
r_VaR = -quantile(datat,.005)
VaR = r_VaR * capital
# Compute Expected Shortfall
ES_t = -mean(datat[datat < -r_VaR])
shortfall = ES_t * capital
show(c(VaR, shortfall))
## 0.5%
## 936189.9 1307996.1