This template data frame provides a general structure for travel data that integrates with data synthesis and modeling functions. Stays (individuals reported as not traveling outside home location) are to be included in this data frame, where origin and destination are the same. Note that models fitted and then extrapolated using other data assume that the same method for defining population size is used throughout. Either dates or time span must be filled.
travel_data_template
a data frame with empty columns and generalized column names
date: beginning of the time interval for the trip count
date: end of the time interval for the trip count
integer: time span in days
integer: unique individual identifier
numeric: age of participant
logical: gender of perticipant
factor: if individual participants belong to different groups
character: name of highest administration level of origin location (Country)
character: name of administration level 1 of origin location (e.g. Division, State)
character: name of administration level 2 of origin location (e.g. District, County)
character: name of administration level 3 of origin location (e.g. Sub-district, Province)
character: name of administration level 4 of origin location (e.g. City, Municipality)
character: name of administration level 5 of origin location (e.g. Town, Village, Community, Ward)
character: administrative type for the origin location (e.g. sub-district, community vs town, or urban vs rural)
numeric: longitude of origin location centroid in decimal degrees (centroid of smallest admin unit)
numeric: latitude of origin location centroid in decimal degrees (centroid of smallest admin unit)
numeric: population size of lowest administrative unit for origin location
character: name of highest administration level of destination location (Country)
character: name of administration level 1 of destination location (e.g. Division, State)
character: name of administration level 2 of destination location (e.g. District, County)
character: name of administration level 3 of destination location (e.g. Sub-district, Province)
character: name of administration level 4 of destination location (e.g. City, Municipality)
character: name of administration level 5 of destination location (e.g. Town, Village, Community, Ward)
character: administrative type for the destination location (e.g. sub-district, community vs town, or urban vs rural)
numeric: longitude of destination location in decimal degrees (centroid of smallest admin unit)
numeric: latitude of destination location centroid in decimal degrees (centroid of smallest admin unit)
numeric: population size of lowest administrative unit for destination location
numeric: total number of observed trips made from origin to destination during time span
#-------------------------------- # Travel among locations #-------------------------------- trip <- travel_data_template n <- 3 # Add some observations trip[1:n,] <- NA # Time span of travel survey trip$date_start <- as.Date("2020-01-01") trip$date_stop <- trip$date_start + 30 trip$date_span <- difftime(trip$date_stop, trip$date_start, units='days') # Participant info trip$indiv_id <- sample(1:100, n) trip$indiv_age <- round(runif(n, 5, 80)) trip$indiv_sex <- rbinom(n, 1, 0.5) # Origin info trip$orig_adm0 <- 'A' trip$orig_adm1 <- 'A' trip$orig_adm2 <- 'A' trip$orig_adm3 <- LETTERS[1:n] trip$orig_type <- 'Sub-district' # Type of admin unit for lowest admin level trip$orig_x <- rnorm(n, 100, 5) trip$orig_y <- rnorm(n, 20, 2) trip$orig_pop <- rpois(n, 10000) # Destination info trip$dest_adm0 <- 'A' trip$dest_adm1 <- 'A' trip$dest_adm2 <- 'B' trip$dest_adm3 <- LETTERS[(n+1):(n*2)] trip$dest_type <- 'Sub-district' # Type of admin unit for lowest admin level trip$dest_x <- rnorm(n, 100, 5) trip$dest_y <- rnorm(n, 20, 2) trip$dest_pop <- rpois(n, 5000) # Number of reported trips trip$trips <- rpois(n, 10) head(trip)#> date_start date_stop date_span indiv_id indiv_age indiv_sex indiv_type #> 1 2020-01-01 2020-01-31 30 days 68 21 0 <NA> #> 2 2020-01-01 2020-01-31 30 days 15 30 0 <NA> #> 3 2020-01-01 2020-01-31 30 days 35 18 1 <NA> #> orig_adm0 orig_adm1 orig_adm2 orig_adm3 orig_adm4 orig_adm5 orig_type #> 1 A A A A <NA> <NA> Sub-district #> 2 A A A B <NA> <NA> Sub-district #> 3 A A A C <NA> <NA> Sub-district #> orig_x orig_y orig_pop dest_adm0 dest_adm1 dest_adm2 dest_adm3 dest_adm4 #> 1 97.03300 21.28207 9913 A A B D <NA> #> 2 99.38857 18.74082 10150 A A B E <NA> #> 3 105.89892 18.38453 9817 A A B F <NA> #> dest_adm5 dest_type dest_x dest_y dest_pop trips #> 1 <NA> Sub-district 97.09174 17.33321 5052 7 #> 2 <NA> Sub-district 99.16385 19.47607 5063 15 #> 3 <NA> Sub-district 102.42997 21.30477 5105 11#----------------------- # Stays in home location #----------------------- stay <- travel_data_template n <- 3 # add some observations stay[1:n,] <- NA # Time span of travel survey stay$date_start <- as.Date("2020-01-01") stay$date_stop <- stay$date_start + 30 stay$date_span <- difftime(trip$date_stop, trip$date_start, units='days') # Participant info stay$indiv_id <- sample(100:200, n) stay$indiv_age <- round(runif(n, 5, 80)) stay$indiv_sex <- rbinom(n, 1, 0.5) # Origin info stay$orig_adm0 <- stay$dest_adm0 <- 'A' stay$orig_adm1 <- stay$dest_adm1 <- 'A' stay$orig_adm2 <- stay$dest_adm2 <- 'A' stay$orig_adm3 <- stay$dest_adm3 <- LETTERS[1:n] stay$orig_type <- stay$dest_type <- 'Sub-district' stay$orig_x <- stay$dest_x <- rnorm(n, 100, 5) stay$orig_y <- stay$dest_y <- rnorm(n, 20, 2) stay$orig_pop <- stay$dest_pop <- rpois(n, 10000) stay$trips <- NA head(stay)#> date_start date_stop date_span indiv_id indiv_age indiv_sex indiv_type #> 1 2020-01-01 2020-01-31 30 days 168 11 1 <NA> #> 2 2020-01-01 2020-01-31 30 days 174 44 0 <NA> #> 3 2020-01-01 2020-01-31 30 days 196 28 1 <NA> #> orig_adm0 orig_adm1 orig_adm2 orig_adm3 orig_adm4 orig_adm5 orig_type #> 1 A A A A <NA> <NA> Sub-district #> 2 A A A B <NA> <NA> Sub-district #> 3 A A A C <NA> <NA> Sub-district #> orig_x orig_y orig_pop dest_adm0 dest_adm1 dest_adm2 dest_adm3 dest_adm4 #> 1 94.57423 22.72761 10029 A A A A <NA> #> 2 101.81080 18.57695 10019 A A A B <NA> #> 3 98.32164 21.32436 9879 A A A C <NA> #> dest_adm5 dest_type dest_x dest_y dest_pop trips #> 1 <NA> Sub-district 94.57423 22.72761 10029 NA #> 2 <NA> Sub-district 101.81080 18.57695 10019 NA #> 3 <NA> Sub-district 98.32164 21.32436 9879 NA#>head(survey_data)#> date_start date_stop date_span indiv_id indiv_age indiv_sex indiv_type #> 1 2020-01-01 2020-01-31 30 days 68 21 0 <NA> #> 2 2020-01-01 2020-01-31 30 days 15 30 0 <NA> #> 3 2020-01-01 2020-01-31 30 days 35 18 1 <NA> #> 4 2020-01-01 2020-01-31 30 days 168 11 1 <NA> #> 5 2020-01-01 2020-01-31 30 days 174 44 0 <NA> #> 6 2020-01-01 2020-01-31 30 days 196 28 1 <NA> #> orig_adm0 orig_adm1 orig_adm2 orig_adm3 orig_adm4 orig_adm5 orig_type #> 1 A A A A <NA> <NA> Sub-district #> 2 A A A B <NA> <NA> Sub-district #> 3 A A A C <NA> <NA> Sub-district #> 4 A A A A <NA> <NA> Sub-district #> 5 A A A B <NA> <NA> Sub-district #> 6 A A A C <NA> <NA> Sub-district #> orig_x orig_y orig_pop dest_adm0 dest_adm1 dest_adm2 dest_adm3 dest_adm4 #> 1 97.03300 21.28207 9913 A A B D <NA> #> 2 99.38857 18.74082 10150 A A B E <NA> #> 3 105.89892 18.38453 9817 A A B F <NA> #> 4 94.57423 22.72761 10029 A A A A <NA> #> 5 101.81080 18.57695 10019 A A A B <NA> #> 6 98.32164 21.32436 9879 A A A C <NA> #> dest_adm5 dest_type dest_x dest_y dest_pop trips #> 1 <NA> Sub-district 97.09174 17.33321 5052 7 #> 2 <NA> Sub-district 99.16385 19.47607 5063 15 #> 3 <NA> Sub-district 102.42997 21.30477 5105 11 #> 4 <NA> Sub-district 94.57423 22.72761 10029 NA #> 5 <NA> Sub-district 101.81080 18.57695 10019 NA #> 6 <NA> Sub-district 98.32164 21.32436 9879 NA#---------------------------------------- # Dataset with which to extrapolate model #---------------------------------------- pred <- travel_data_template n <- 6 # Add some observations pred[1:n,] <- NA # Time span of the interval over which to extrapolate the fitted model pred$date_span <- as.difftime(7, units='days') # Origin info pred$orig_adm0 <- 'A' pred$orig_adm1 <- 'A' pred$orig_adm2 <- LETTERS[1:n] pred$orig_type <- 'District' # Type of admin unit for lowest admin level pred$orig_x <- rnorm(n, 100, 5) pred$orig_y <- rnorm(n, 20, 2) pred$orig_pop <- rpois(n, 1e+05) # Number of reported trips (unobserved for extrapolation data) trip$trips <- NA head(pred)#> date_start date_stop date_span indiv_id indiv_age indiv_sex indiv_type #> 1 <NA> <NA> 7 NA NA NA <NA> #> 2 <NA> <NA> 7 NA NA NA <NA> #> 3 <NA> <NA> 7 NA NA NA <NA> #> 4 <NA> <NA> 7 NA NA NA <NA> #> 5 <NA> <NA> 7 NA NA NA <NA> #> 6 <NA> <NA> 7 NA NA NA <NA> #> orig_adm0 orig_adm1 orig_adm2 orig_adm3 orig_adm4 orig_adm5 orig_type #> 1 A A A <NA> <NA> <NA> District #> 2 A A B <NA> <NA> <NA> District #> 3 A A C <NA> <NA> <NA> District #> 4 A A D <NA> <NA> <NA> District #> 5 A A E <NA> <NA> <NA> District #> 6 A A F <NA> <NA> <NA> District #> orig_x orig_y orig_pop dest_adm0 dest_adm1 dest_adm2 dest_adm3 dest_adm4 #> 1 100.84887 22.07762 100559 <NA> <NA> <NA> <NA> <NA> #> 2 100.57056 19.10041 100200 <NA> <NA> <NA> <NA> <NA> #> 3 96.01916 18.58272 100116 <NA> <NA> <NA> <NA> <NA> #> 4 98.81975 20.06085 100436 <NA> <NA> <NA> <NA> <NA> #> 5 105.92918 19.10025 99725 <NA> <NA> <NA> <NA> <NA> #> 6 100.93978 21.34105 100173 <NA> <NA> <NA> <NA> <NA> #> dest_adm5 dest_type dest_x dest_y dest_pop trips #> 1 <NA> <NA> NA NA NA NA #> 2 <NA> <NA> NA NA NA NA #> 3 <NA> <NA> NA NA NA NA #> 4 <NA> <NA> NA NA NA NA #> 5 <NA> <NA> NA NA NA NA #> 6 <NA> <NA> NA NA NA NA