This template data frame provides a general structure for travel data that integrates with data synthesis and modeling functions. Stays (individuals reported as not traveling outside home location) are to be included in this data frame, where origin and destination are the same. Note that models fitted and then extrapolated using other data assume that the same method for defining population size is used throughout. Either dates or time span must be filled.

travel_data_template

Format

a data frame with empty columns and generalized column names

date_start

date: beginning of the time interval for the trip count

date_stop

date: end of the time interval for the trip count

date_span

integer: time span in days

indiv_id

integer: unique individual identifier

indiv_age

numeric: age of participant

indiv_sex

logical: gender of perticipant

indiv_type

factor: if individual participants belong to different groups

orig_adm0

character: name of highest administration level of origin location (Country)

orig_adm1

character: name of administration level 1 of origin location (e.g. Division, State)

orig_adm2

character: name of administration level 2 of origin location (e.g. District, County)

orig_adm3

character: name of administration level 3 of origin location (e.g. Sub-district, Province)

orig_adm4

character: name of administration level 4 of origin location (e.g. City, Municipality)

orig_adm5

character: name of administration level 5 of origin location (e.g. Town, Village, Community, Ward)

orig_type

character: administrative type for the origin location (e.g. sub-district, community vs town, or urban vs rural)

orig_x

numeric: longitude of origin location centroid in decimal degrees (centroid of smallest admin unit)

orig_y

numeric: latitude of origin location centroid in decimal degrees (centroid of smallest admin unit)

orig_pop

numeric: population size of lowest administrative unit for origin location

dest_adm0

character: name of highest administration level of destination location (Country)

dest_adm1

character: name of administration level 1 of destination location (e.g. Division, State)

dest_adm2

character: name of administration level 2 of destination location (e.g. District, County)

dest_adm3

character: name of administration level 3 of destination location (e.g. Sub-district, Province)

dest_adm4

character: name of administration level 4 of destination location (e.g. City, Municipality)

dest_adm5

character: name of administration level 5 of destination location (e.g. Town, Village, Community, Ward)

dest_type

character: administrative type for the destination location (e.g. sub-district, community vs town, or urban vs rural)

dest_x

numeric: longitude of destination location in decimal degrees (centroid of smallest admin unit)

dest_y

numeric: latitude of destination location centroid in decimal degrees (centroid of smallest admin unit)

dest_pop

numeric: population size of lowest administrative unit for destination location

trips

numeric: total number of observed trips made from origin to destination during time span

Examples

#-------------------------------- # Travel among locations #-------------------------------- trip <- travel_data_template n <- 3 # Add some observations trip[1:n,] <- NA # Time span of travel survey trip$date_start <- as.Date("2020-01-01") trip$date_stop <- trip$date_start + 30 trip$date_span <- difftime(trip$date_stop, trip$date_start, units='days') # Participant info trip$indiv_id <- sample(1:100, n) trip$indiv_age <- round(runif(n, 5, 80)) trip$indiv_sex <- rbinom(n, 1, 0.5) # Origin info trip$orig_adm0 <- 'A' trip$orig_adm1 <- 'A' trip$orig_adm2 <- 'A' trip$orig_adm3 <- LETTERS[1:n] trip$orig_type <- 'Sub-district' # Type of admin unit for lowest admin level trip$orig_x <- rnorm(n, 100, 5) trip$orig_y <- rnorm(n, 20, 2) trip$orig_pop <- rpois(n, 10000) # Destination info trip$dest_adm0 <- 'A' trip$dest_adm1 <- 'A' trip$dest_adm2 <- 'B' trip$dest_adm3 <- LETTERS[(n+1):(n*2)] trip$dest_type <- 'Sub-district' # Type of admin unit for lowest admin level trip$dest_x <- rnorm(n, 100, 5) trip$dest_y <- rnorm(n, 20, 2) trip$dest_pop <- rpois(n, 5000) # Number of reported trips trip$trips <- rpois(n, 10) head(trip)
#> date_start date_stop date_span indiv_id indiv_age indiv_sex indiv_type #> 1 2020-01-01 2020-01-31 30 days 68 21 0 <NA> #> 2 2020-01-01 2020-01-31 30 days 15 30 0 <NA> #> 3 2020-01-01 2020-01-31 30 days 35 18 1 <NA> #> orig_adm0 orig_adm1 orig_adm2 orig_adm3 orig_adm4 orig_adm5 orig_type #> 1 A A A A <NA> <NA> Sub-district #> 2 A A A B <NA> <NA> Sub-district #> 3 A A A C <NA> <NA> Sub-district #> orig_x orig_y orig_pop dest_adm0 dest_adm1 dest_adm2 dest_adm3 dest_adm4 #> 1 97.03300 21.28207 9913 A A B D <NA> #> 2 99.38857 18.74082 10150 A A B E <NA> #> 3 105.89892 18.38453 9817 A A B F <NA> #> dest_adm5 dest_type dest_x dest_y dest_pop trips #> 1 <NA> Sub-district 97.09174 17.33321 5052 7 #> 2 <NA> Sub-district 99.16385 19.47607 5063 15 #> 3 <NA> Sub-district 102.42997 21.30477 5105 11
#----------------------- # Stays in home location #----------------------- stay <- travel_data_template n <- 3 # add some observations stay[1:n,] <- NA # Time span of travel survey stay$date_start <- as.Date("2020-01-01") stay$date_stop <- stay$date_start + 30 stay$date_span <- difftime(trip$date_stop, trip$date_start, units='days') # Participant info stay$indiv_id <- sample(100:200, n) stay$indiv_age <- round(runif(n, 5, 80)) stay$indiv_sex <- rbinom(n, 1, 0.5) # Origin info stay$orig_adm0 <- stay$dest_adm0 <- 'A' stay$orig_adm1 <- stay$dest_adm1 <- 'A' stay$orig_adm2 <- stay$dest_adm2 <- 'A' stay$orig_adm3 <- stay$dest_adm3 <- LETTERS[1:n] stay$orig_type <- stay$dest_type <- 'Sub-district' stay$orig_x <- stay$dest_x <- rnorm(n, 100, 5) stay$orig_y <- stay$dest_y <- rnorm(n, 20, 2) stay$orig_pop <- stay$dest_pop <- rpois(n, 10000) stay$trips <- NA head(stay)
#> date_start date_stop date_span indiv_id indiv_age indiv_sex indiv_type #> 1 2020-01-01 2020-01-31 30 days 168 11 1 <NA> #> 2 2020-01-01 2020-01-31 30 days 174 44 0 <NA> #> 3 2020-01-01 2020-01-31 30 days 196 28 1 <NA> #> orig_adm0 orig_adm1 orig_adm2 orig_adm3 orig_adm4 orig_adm5 orig_type #> 1 A A A A <NA> <NA> Sub-district #> 2 A A A B <NA> <NA> Sub-district #> 3 A A A C <NA> <NA> Sub-district #> orig_x orig_y orig_pop dest_adm0 dest_adm1 dest_adm2 dest_adm3 dest_adm4 #> 1 94.57423 22.72761 10029 A A A A <NA> #> 2 101.81080 18.57695 10019 A A A B <NA> #> 3 98.32164 21.32436 9879 A A A C <NA> #> dest_adm5 dest_type dest_x dest_y dest_pop trips #> 1 <NA> Sub-district 94.57423 22.72761 10029 NA #> 2 <NA> Sub-district 101.81080 18.57695 10019 NA #> 3 <NA> Sub-district 98.32164 21.32436 9879 NA
# Combine survey_data <- dplyr::full_join(trip, stay)
#> Joining, by = c("date_start", "date_stop", "date_span", "indiv_id", "indiv_age", "indiv_sex", "indiv_type", "orig_adm0", "orig_adm1", "orig_adm2", "orig_adm3", "orig_adm4", "orig_adm5", "orig_type", "orig_x", "orig_y", "orig_pop", "dest_adm0", "dest_adm1", "dest_adm2", "dest_adm3", "dest_adm4", "dest_adm5", "dest_type", "dest_x", "dest_y", "dest_pop", "trips")
head(survey_data)
#> date_start date_stop date_span indiv_id indiv_age indiv_sex indiv_type #> 1 2020-01-01 2020-01-31 30 days 68 21 0 <NA> #> 2 2020-01-01 2020-01-31 30 days 15 30 0 <NA> #> 3 2020-01-01 2020-01-31 30 days 35 18 1 <NA> #> 4 2020-01-01 2020-01-31 30 days 168 11 1 <NA> #> 5 2020-01-01 2020-01-31 30 days 174 44 0 <NA> #> 6 2020-01-01 2020-01-31 30 days 196 28 1 <NA> #> orig_adm0 orig_adm1 orig_adm2 orig_adm3 orig_adm4 orig_adm5 orig_type #> 1 A A A A <NA> <NA> Sub-district #> 2 A A A B <NA> <NA> Sub-district #> 3 A A A C <NA> <NA> Sub-district #> 4 A A A A <NA> <NA> Sub-district #> 5 A A A B <NA> <NA> Sub-district #> 6 A A A C <NA> <NA> Sub-district #> orig_x orig_y orig_pop dest_adm0 dest_adm1 dest_adm2 dest_adm3 dest_adm4 #> 1 97.03300 21.28207 9913 A A B D <NA> #> 2 99.38857 18.74082 10150 A A B E <NA> #> 3 105.89892 18.38453 9817 A A B F <NA> #> 4 94.57423 22.72761 10029 A A A A <NA> #> 5 101.81080 18.57695 10019 A A A B <NA> #> 6 98.32164 21.32436 9879 A A A C <NA> #> dest_adm5 dest_type dest_x dest_y dest_pop trips #> 1 <NA> Sub-district 97.09174 17.33321 5052 7 #> 2 <NA> Sub-district 99.16385 19.47607 5063 15 #> 3 <NA> Sub-district 102.42997 21.30477 5105 11 #> 4 <NA> Sub-district 94.57423 22.72761 10029 NA #> 5 <NA> Sub-district 101.81080 18.57695 10019 NA #> 6 <NA> Sub-district 98.32164 21.32436 9879 NA
#---------------------------------------- # Dataset with which to extrapolate model #---------------------------------------- pred <- travel_data_template n <- 6 # Add some observations pred[1:n,] <- NA # Time span of the interval over which to extrapolate the fitted model pred$date_span <- as.difftime(7, units='days') # Origin info pred$orig_adm0 <- 'A' pred$orig_adm1 <- 'A' pred$orig_adm2 <- LETTERS[1:n] pred$orig_type <- 'District' # Type of admin unit for lowest admin level pred$orig_x <- rnorm(n, 100, 5) pred$orig_y <- rnorm(n, 20, 2) pred$orig_pop <- rpois(n, 1e+05) # Number of reported trips (unobserved for extrapolation data) trip$trips <- NA head(pred)
#> date_start date_stop date_span indiv_id indiv_age indiv_sex indiv_type #> 1 <NA> <NA> 7 NA NA NA <NA> #> 2 <NA> <NA> 7 NA NA NA <NA> #> 3 <NA> <NA> 7 NA NA NA <NA> #> 4 <NA> <NA> 7 NA NA NA <NA> #> 5 <NA> <NA> 7 NA NA NA <NA> #> 6 <NA> <NA> 7 NA NA NA <NA> #> orig_adm0 orig_adm1 orig_adm2 orig_adm3 orig_adm4 orig_adm5 orig_type #> 1 A A A <NA> <NA> <NA> District #> 2 A A B <NA> <NA> <NA> District #> 3 A A C <NA> <NA> <NA> District #> 4 A A D <NA> <NA> <NA> District #> 5 A A E <NA> <NA> <NA> District #> 6 A A F <NA> <NA> <NA> District #> orig_x orig_y orig_pop dest_adm0 dest_adm1 dest_adm2 dest_adm3 dest_adm4 #> 1 100.84887 22.07762 100559 <NA> <NA> <NA> <NA> <NA> #> 2 100.57056 19.10041 100200 <NA> <NA> <NA> <NA> <NA> #> 3 96.01916 18.58272 100116 <NA> <NA> <NA> <NA> <NA> #> 4 98.81975 20.06085 100436 <NA> <NA> <NA> <NA> <NA> #> 5 105.92918 19.10025 99725 <NA> <NA> <NA> <NA> <NA> #> 6 100.93978 21.34105 100173 <NA> <NA> <NA> <NA> <NA> #> dest_adm5 dest_type dest_x dest_y dest_pop trips #> 1 <NA> <NA> NA NA NA NA #> 2 <NA> <NA> NA NA NA NA #> 3 <NA> <NA> NA NA NA NA #> 4 <NA> <NA> NA NA NA NA #> 5 <NA> <NA> NA NA NA NA #> 6 <NA> <NA> NA NA NA NA