#source("../../rlehre/mlb.R")
require(mcgraph)
require(huge)

# Read in, keep raw data just in case
hypo.raw <- read.csv("data/IAKE804_resultater.csv", header=TRUE)

# First column includes non unique rownames, last row is empty, last column includes notes
hypo.cp <- hypo.raw[-nrow(hypo.raw), c(-1, -ncol(hypo.raw))]

# Rename
# humerus = 'Oberarmknochen', femur = 'Oberschenkelknochen', tibia = 'Schienbein',
# intet årstal = 'no year'
col.names <- c("Date", "Site", "Skeleton", "SexID", "Sex", "Age.min",
    "Age.max", "Quant", "Qual", "L.plus3", "L.minus3", "R.plus3",
    "R.minus3", "R.humerus", "L.humerus", "R.radius", "L.radius",
    "R.femur", "L.femur", "R.tibia", "L.tibia")
colnames(hypo.cp) <- col.names

# Convert danish to english for sexes, make it to 'factor', use regex
hypo.cp$Sex <- strtrim(gsub(x=hypo.cp$Sex,
                            pattern="[Kk]vinde*",
                            replacement="female"),
                            6)
hypo.cp$Sex[which(hypo.cp$Sex == "Mand")] <- "male"

# Replace no year string in Dates
hypo.cp$Site <- gsub(x=hypo.cp$Site, pattern='Intet årstal', replace=NA)

# Replace '/' with NA
hypo.tmp <- apply(hypo.cp, 2, gsub, pattern="/", replacement=NA)

# Convert relevant columns to factors
idx.fac  <- which(colnames(hypo.tmp) %in% c("Sex"))

# Convert relevant columns to numeric
idx.num <- which(colnames(hypo.tmp) %in% c("SexID", "Age.min", "Age.max", "Quant", "Qual",
    "R.humerus", "L.humerus", "L.plus3", "L.minus3", "R.plus3", "R.minus3",
    "R.radius", "L.radius", "R.femur", "L.femur", "R.tibia", "L.tibia"))

# Convert relevant columns from 'character' to 'numeric', merge with factor columns, use skeleton IDs as rownames
# Result: final data.frame
hypo.df <- data.frame(as.factor(hypo.tmp[, idx.fac]),  apply(hypo.tmp[, idx.num], 2, as.numeric))
rownames(hypo.df) <- hypo.cp$Skeleton

# Impute values by decision trees
hypo.df <- mcg.impute(hypo.df, method="rpart")

# Average of dent
canine.all <- apply(hypo.df[, c("L.plus3", "L.minus3", "R.plus3", "R.minus3")], 1, mean, na.rm=TRUE)

# Take the ratio of humerus / radius and femur / tibia, use explicit indexing, better to see
ratio.arms <- (hypo.df$R.humerus + hypo.df$L.humerus) / (hypo.df$R.radius + hypo.df$L.radius)
ratio.legs <- (hypo.df$R.femur + hypo.df$L.femur) / (hypo.df$R.tibia + hypo.df$L.tibia)

# Take the mean of each variable
humeri.length <- (hypo.df$R.humerus + hypo.df$L.humerus) / 2
radial.length <- (hypo.df$R.radius + hypo.df$L.radius) / 2
femora.length <- (hypo.df$R.femur + hypo.df$L.femur) / 2
tibiae.length <- (hypo.df$R.tibia + hypo.df$L.tibia) / 2

# Formatted data.frame
ndf <- data.frame(canine.all=canine.all, canine.low.L=hypo.df$L.minus3, canine.low.R=hypo.df$R.minus3,
                  canine.up.L=hypo.df$L.plus3, canine.up.R=hypo.df$R.plus3,
                  ratio.arms=ratio.arms, ratio.legs=ratio.legs, sex=hypo.df$SexID,
                  humeri.length=humeri.length, radial.length=radial.length, femora.length=femora.length, tibiae.length=tibiae.length)

# Make predictions
est <- mcg.lvs(ndf)

# Order layout
mcg.order.grid <- function(var_names, layout_matrix) {
    xy <- matrix(0, nrow=length(var_names), ncol=2)
    colnames(xy) <- c("x", "y")
    rownames(xy) <- var_names
    for (i in seq_along(var_names)) {
        # To get the right order for x, y and variables
        xy[i, 2:1] <- c(nrow(layout_matrix), ncol(layout_matrix)) - which(layout_matrix == var_names[i], arr.ind=TRUE, useNames=FALSE)
    }
    return(xy)
}

# layout_matrix <- rbind(c(NA, "canine.low.L", NA),
#                        c(NA, NA, NA),
#                        c(NA, "canine.low.R", NA),
#                        c("canine.all", NA, "canine.up.R"),
#                        c(NA, "canine.up.L", NA),
#                        c("ratio.arms", NA, "radial.length"),
#                        c(NA, "humeri.length", NA),
#                        c(NA, NA, NA),
#                        c("ratio.legs", "tibiae.length", "femora.length"),
#                        c(NA, NA, NA),
#                        c(NA, "sex", NA))

layout_matrix <- rbind(c(NA, "canine.low.L", NA),
                       c(NA, NA, NA),
                       c(NA, "canine.low.R", NA),
                       c("canine.all", NA, "canine.up.R"),
                       c(NA, "canine.up.L", NA),
                       c("ratio.arms", NA, "radial.length"),
                       c(NA, "humeri.length", NA),
                       c(NA, NA, NA),
                       c("ratio.legs", "tibiae.length", "femora.length"),
                       c(NA, NA, NA),
                       c(NA, "sex", NA))

# Plot
pdf("../figure/Figure8.pdf", width=5, height=5)
par(mar=c(0,0,2,0))
lay <- mcg.order.grid(rownames(est), layout_matrix)
plot(est, layout=lay,  vertex.size=2, vertex.height=1, vertex.color="grey90", vertex.symbol="rectangle", vertex.border.color="black",
     edge.color="grey60", edge.width=2.5, label.color="black", label.size=0.7, cex.main=0.9, main="Relationship of positive LEH scores and bone growth")
box()
dev.off()
