This server is intended for use for Academic Classwork related Git repositories only. Projects/repositories will generally be removed after 6 months following close of the semester. Inactive repositories from previous semester are now being archived when no activity for 365 days. They are renamed and marked as 'archived'. After 90 days in that state they will be removed from the system completely.

Commit a8d5b8dc authored by Drew Willey's avatar Drew Willey

Merge wx and mlb data without dupes.

parent 71de4761
This source diff could not be displayed because it is too large. You can view the blob instead.
......@@ -3,21 +3,61 @@ library(purrr)
library(httr)
library(parsedate)
# merge perf data with wx data by game id
merge_data <- function() {
# import mlb data
mlb_data <- read.csv(
file = "../data/2019_mlb_game_data.csv",
stringsAsFactors = TRUE
)
# import mlb data
mlb_data <- read.csv(
file = "../data/2019_mlb_game_data.csv",
stringsAsFactors = TRUE
)
# import mlb data
wx_data <- read.csv(
file = "../data/2019_mlb_game_data_wx.csv",
stringsAsFactors = TRUE
)
just_wx_data <- wx_data %>%
select(-c(2:17)) %>%
distinct()
# import mlb data
wx_data <- read.csv(
file = "../data/2019_mlb_game_data_wx.csv",
stringsAsFactors = TRUE
)
just_wx_data <- wx_data %>%
select(-c(2:17))
new_df <- merge(mlb_data, just_wx_data, by = "mlb_game_id", all.x = TRUE)
print(length(unique(wx_data$mlb_game_id)))
print(length(unique(mlb_data$mlb_game_id)))
print(nrow(new_df))
print(nrow(mlb_data))
new_df <- merge(mlb_data, just_wx_data, by = "mlb_game_id")
write.csv(new_df, "../data/new_data.csv", row.names = FALSE)
}
merge_data()
write.csv(new_df, "../data/2019_mlb_game_wx_data.csv", row.names = FALSE)
reimport <- function() {
# import all data
all_data <- read.csv(
file = "../data/mlb_games_and_weather_full_dataset.csv",
stringsAsFactors = TRUE
)
just_wx_data <- all_data %>%
select(-c(1:(ncol(all_data) - 5)))
# take a look at nas
print(summary(just_wx_data))
get_new_wx <- function(a, b, d, e, f) {
print(paste(a , b , d , e))
f
}
all_data$hourly_dry_bulb_temperature <- ifelse(
is.na(all_data$hourly_dry_bulb_temperature),
get_new_wx(
all_data$city,
all_data$mlb_data,
all_data$actual_start_local_time,
all_data$end_local_time,
all_data$hourly_dry_bulb_temperature
),
all_data$hourly_dry_bulb_temperature
)
}
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment