Loan - R - Kagle Competition
data.combined <- read.csv("loan.csv", header = TRUE) data.combined.BackUp <- data.combined str(data.combined) ############################################################################################### #GENERAL OVERVIEW #from issue_d I extrapolate the year year_loan = as.character(data.combined$issue_d) unique(year_loan) library(stringr) year_loan = strsplit(year_loan, "-") year_loan = sapply(year_loan, "[", 2) data.combined$year_loan = as.factor(year_loan) #some general graphs to have an overview #absolute number of loans by year library(ggplot2) ggplot(data.combined, aes(x=year_loan)) + geom_bar(fill="steelblue") + ggtitle("Absolute number of loans by year") #the absolute number of loans is increasing across years #total amount given as a loan by year library(plyr) tot_loan_year = ddply(data.combined, .(year_loan), summarise, tot_loan = sum(loan_amnt)) tot_loan_year ggplot(tot_loan_year, aes...