BatchDownload.R 14 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258
  1. BatchDownload <-
  2. function(lat.long, start.date, end.date, MODIS.start, MODIS.end, Bands, Products, Size, StartDate, Transect, SaveDir)
  3. {
  4. # DEFINE
  5. NCOL_SERVER_RES <- 10
  6. # Split band names into sets for different products.
  7. which.bands <- lapply(Products, function(x) which(Bands %in% GetBands(x)))
  8. # Loop set up to make request and write a subset file for each location.
  9. for(i in 1:nrow(lat.long))
  10. {
  11. # Retrieve the list of date codes to be requested and organise them in batches of time series's of length 10.
  12. dates <- lapply(Products, function(x) GetDates(lat.long$lat[i], lat.long$long[i], x))
  13. # Check that time-series fall within date range of MODIS data.
  14. if(any((start.date$year + 1900) < 2000 & (end.date$year + 1900) < 2000)){
  15. stop("Time-series found that falls entirely outside the range of available MODIS dates.")
  16. }
  17. if(any((start.date$year + 1900) > max(unlist(dates)) & (end.date$year + 1900) > max(unlist(dates)))){
  18. stop("Time-series found that falls entirely outside the range of available MODIS dates.")
  19. }
  20. if(any((end.date$year + 1900) < 2000) | any((end.date$year + 1900) > max(unlist(dates)))){
  21. stop("Some dates have been found that are beyond the range of MODIS observations available for download.")
  22. }
  23. if(any((start.date$year + 1900) < 2000) | any((start.date$year + 1900) > max(unlist(dates)))){
  24. warning("Dates found beyond range of MODIS observations. Downloading from earliest date.", immediate. = TRUE)
  25. }
  26. ##### Initialise objects that will store downloaded data.
  27. # Find the start date and end date specific for each subset.
  28. start.dates <- lapply(dates, function(x) which(x >= MODIS.start[i]))
  29. end.dates <- lapply(dates, function(x) which(x >= MODIS.end[i]))
  30. # Extract the string of time-steps by snipping end.dates off the end.
  31. date.res <- mapply(function(x, y) x[which(!x %in% y)], x = start.dates, y = end.dates, SIMPLIFY = FALSE)
  32. allProblemDates <- c() # will store any empty dates that come up, so they can be returned to the user
  33. subsets <- mapply(function(x, y) rep(NA, length = (length(x) * length(y))), x = which.bands, y = date.res, SIMPLIFY = FALSE)
  34. subsets.length <- length(unlist(subsets))
  35. #####
  36. cat("Getting subset for location ", i, " of ", nrow(lat.long), "...\n", sep = "")
  37. for(prod in 1:length(Products)){
  38. # Organise relevant MODIS dates into batches of 10. Web service getsubset function will only take 10 at a time.
  39. # Fill up any remaining rows in the final column to avoid data recycling.
  40. ifelse((length(date.res[[prod]]) %% NCOL_SERVER_RES) == 0,
  41. date.list <- matrix(dates[[prod]][date.res[[prod]]], nrow = NCOL_SERVER_RES),
  42. date.list <- matrix(c(dates[[prod]][date.res[[prod]]], rep(NA, NCOL_SERVER_RES - (length(date.res[[prod]]) %% NCOL_SERVER_RES))),
  43. nrow = NCOL_SERVER_RES))
  44. # Set bands for this product.
  45. bands <- Bands[which.bands[[prod]]]
  46. # Loop subset request for each band specified, storing each run into subsets object.
  47. for(n in 1:length(bands)){
  48. if(ncol(date.list) > 1){
  49. # Above statement stops (ncol(date.list)-1)=0 occurring in the loop (i.e. ask for the 0th column of dates).
  50. for(x in 1:(ncol(date.list) - 1)){
  51. # getsubset function return object of ModisData class, with a subset slot that only allows 10 elements
  52. # (i.e. 10 dates), looped until all requested dates have been retrieved.
  53. # Retrieve the batch of MODIS data and store in result
  54. result <- try(GetSubset(lat.long$lat[i], lat.long$long[i], Products[prod], bands[n],
  55. date.list[1,x], date.list[NCOL_SERVER_RES,x], Size[1], Size[2]))
  56. if(!is.list(result)) stop("Downloading from the web service is currently not working. Please try again later.")
  57. if(length(strsplit(as.character(result$subset[[1]][1]), ",")[[1]]) == 5){
  58. stop("Downloading from the web service is currently not working. Please try again later.")
  59. }
  60. busy <- FALSE
  61. if(class(result) != "try-error"){
  62. busy <- grepl("Server is busy handling other requests", result$subset[1])
  63. if(busy) cat("The server is busy handling other requests...\n")
  64. }
  65. # Check data downloaded. If not, wait 30 secs and try again until successful or function times out.
  66. if(class(result) == "try-error" || is.na(result) || busy){
  67. timer <- 1
  68. while(timer <= 10){
  69. cat("Connection to the MODIS Web Service failed: trying again in 30secs...attempt", timer, "\n")
  70. Sys.sleep(30)
  71. result <- try(GetSubset(lat.long$lat[i], lat.long$long[i], Products[prod], bands[n],
  72. date.list[1,x], date.list[NCOL_SERVER_RES,x], Size[1], Size[2]))
  73. if(!is.list(result)) stop("Downloading from the web service is currently not working. Please try again later.")
  74. if(length(strsplit(as.character(result$subset[[1]][1]), ",")[[1]]) == 5){
  75. stop("Downloading from the web service is currently not working. Please try again later.")
  76. }
  77. timer <- timer + 1
  78. ifelse(class(result) == "try-error" || is.na(result) || busy, next, break)
  79. }
  80. ifelse(class(result) == "try-error" || is.na(result) || busy,
  81. cat("Connection to the MODIS Web Service failed: timed out after 10 failed attempts...stopping download.\n"),
  82. break)
  83. stop(result)
  84. }
  85. # Store retrieved data in subsets. If more than 10 time-steps are requested, this runs until the final
  86. # column, which is downloaded after this loop.
  87. result <- with(result, paste(nrow, ncol, xll, yll, pixelsize, subset[[1]], sep = ','))
  88. # Check whether result contains the expected number of dates. If not, find missing dates, add NA placemark, and print warning.
  89. if(length(result) < sum(!is.na(date.list[ ,x]))){
  90. resultDates <- sapply(result, function(x) strsplit(x, ',')[[1]][8], USE.NAMES=FALSE)
  91. whichProblemDates <- which(!(date.list[ ,x] %in% resultDates))
  92. problemDates <- date.list[whichProblemDates,x]
  93. allProblemDates <- c(allProblemDates,problemDates)
  94. result <- replace(rep(NA,sum(!is.na(date.list[ ,x]))), date.list[ ,x] %in% resultDates, result)
  95. warning("There is no data for some requested dates:\n",
  96. "Latitude = ",lat.long$lat[i],"\n",
  97. "Longitude = ",lat.long$long[i],"\n",
  98. "Product = ",Products[prod],"\n",
  99. "Band = ",Bands[n],"\n",
  100. "Dates = ",problemDates,"\n",
  101. call.=FALSE, immediate.=TRUE)
  102. }
  103. subsets[[prod]][(((n - 1) * length(date.res[[prod]])) + ((x * NCOL_SERVER_RES) - (NCOL_SERVER_RES - 1))):
  104. (((n - 1) * length(date.res[[prod]])) + (x * NCOL_SERVER_RES))] <- result
  105. } # End of loop that reiterates for multiple batches of time-steps if the time-series is > 10 time-steps long.
  106. }
  107. #####
  108. # This will download the last column of dates left (either final column or only column if < 10 dates).
  109. result <- try(GetSubset(lat.long$lat[i], lat.long$long[i], Products[prod], bands[n], date.list[1,ncol(date.list)],
  110. date.list[max(which(!is.na(date.list[ ,ncol(date.list)]))),ncol(date.list)], Size[1], Size[2]))
  111. if(!is.list(result)) stop("Downloading from the web service is currently not working. Please try again later.")
  112. if(length(strsplit(as.character(result$subset[[1]][1]), ",")[[1]]) == 5){
  113. stop("Downloading from the web service is currently not working. Please try again later.")
  114. }
  115. busy <- FALSE
  116. if(class(result) != "try-error"){
  117. busy <- grepl("Server is busy handling other requests", result$subset[1])
  118. if(busy) cat("The server is busy handling other requests...\n")
  119. }
  120. if(class(result) == "try-error" || is.na(result) || busy){
  121. timer <- 1
  122. while(timer <= 10){
  123. cat("Connection to the MODIS Web Service failed: trying again in 30secs...attempt", timer, "\n")
  124. Sys.sleep(30)
  125. result <- try(GetSubset(lat.long$lat[i], lat.long$long[i], Products[prod], bands[n], date.list[1,ncol(date.list)],
  126. date.list[max(which(!is.na(date.list[ ,ncol(date.list)]))),ncol(date.list)], Size[1], Size[2]))
  127. if(!is.list(result)) stop("Downloading from the web service is currently not working. Please try again later.")
  128. if(length(strsplit(as.character(result$subset[[1]][1]), ",")[[1]]) == 5){
  129. stop("Downloading from the web service is currently not working. Please try again later.")
  130. }
  131. timer <- timer + 1
  132. ifelse(class(result) == "try-error" || is.na(result) || busy, next, break)
  133. }
  134. ifelse(class(result) == "try-error" || is.na(result) || busy,
  135. cat("Connection to the MODIS Web Service failed: timed out after 10 failed attempts...stopping download.\n"),
  136. break)
  137. stop(result)
  138. }
  139. # Check downloaded subset request contains data: if it contains the following message instead, abort function.
  140. if(regexpr("Server is busy handling other requests in queue", result$subset[[1]][1]) != -1){
  141. stop("Server is busy handling other requests in queue. Please try your subset order later.")
  142. }
  143. # All MODIS data for a given product band now retrieved and stored in subsets.
  144. result <- with(result, paste(nrow, ncol, xll, yll, pixelsize, subset[[1]], sep = ','))
  145. # Check whether result contains the expected number of dates. If not, find missing dates, add NA placemark, and print warning.
  146. if(length(result) < sum(!is.na(date.list[ ,ncol(date.list)]))){
  147. resultDates <- sapply(result, function(x) strsplit(x, ',')[[1]][8], USE.NAMES=FALSE)
  148. whichProblemDates <- which(!(date.list[ ,ncol(date.list)] %in% resultDates))
  149. problemDates <- date.list[whichProblemDates,ncol(date.list)]
  150. allProblemDates <- c(allProblemDates,problemDates)
  151. result <- replace(rep(NA,sum(!is.na(date.list[ ,ncol(date.list)]))), date.list[ ,ncol(date.list)] %in% resultDates, result)
  152. warning("There is no data for some requested dates:\n",
  153. "Latitude = ",lat.long$lat[i],"\n",
  154. "Longitude = ",lat.long$long[i],"\n",
  155. "Product = ",Products[prod],"\n",
  156. "Band = ",Bands[n],"\n",
  157. "Dates = ",problemDates,"\n",
  158. call.=FALSE, immediate.=TRUE)
  159. }
  160. subsets[[prod]][(((n - 1) * length(date.res[[prod]])) + (((ncol(date.list) - 1) * NCOL_SERVER_RES) + 1)):
  161. (((n - 1) * length(date.res[[prod]])) + length(date.res[[prod]]))] <- result
  162. # Check whether any dates in subset are empty and store their subset info for future use.
  163. whichBandN <- (((n-1)*length(date.res[[prod]]))+1) : (n*length(date.res[[prod]]))
  164. if(any(emptySubsets <- sapply(subsets[[prod]][whichBandN], function(x) grepl("character(0)",x,fixed=TRUE)))){
  165. problemDates <- dates[[prod]][date.res[[prod]][which(emptySubsets)]]
  166. allProblemDates <- c(allProblemDates,problemDates)
  167. warning("There is no data for some requested dates:\n",
  168. "Latitude = ",lat.long$lat[i],"\n",
  169. "Longitude = ",lat.long$long[i],"\n",
  170. "Product = ",Products[prod],"\n",
  171. "Band = ",Bands[n],"\n",
  172. "Dates = ",problemDates,"\n",
  173. call.=FALSE, immediate.=TRUE)
  174. }
  175. } # End of loop for each band.
  176. } # End of loop for each product.
  177. subsets <- do.call("c", subsets)
  178. ##### Check that there is no missing data in the download & log download status accordingly.
  179. if(length(subsets) != subsets.length | any(is.na(subsets))){
  180. lat.long$Status[i] <- paste("Some dates were missing:", paste(unique(allProblemDates),collapse="; "))
  181. subsets <- subsets[!is.na(subsets)]
  182. } else {
  183. lat.long$Status[i] <- "Successful download"
  184. }
  185. if("," %in% substr(subsets, nchar(subsets), nchar(subsets))){
  186. lat.long$Status[i] <- "Missing data in subset: try downloading again"
  187. cat("Missing information for time-series ", lat.long$SubsetID[i], ". See subset download file.\n", sep = "")
  188. } else {
  189. lat.long$Status[i] <- "Successful download"
  190. }
  191. #####
  192. # Remove any empty subsets
  193. if(any(problemDates <- grep("character(0)", subsets, fixed=TRUE))){
  194. allProblemDates <- c(allProblemDates,problemDates)
  195. subsets <- subsets[-problemDates]
  196. lat.long$Status[i] <- paste("Some dates were missing:", paste(unique(allProblemDates),collapse="; "))
  197. }
  198. # Write an ascii file with all dates for each band at a given location into the working directory.
  199. prods <- paste(Products, collapse = "_")
  200. if(!Transect) write(subsets, file = file.path(SaveDir, paste(lat.long$SubsetID[i], "___", prods, ".asc", sep = "")), sep = "")
  201. if(Transect){
  202. if(i == 1) write(subsets, file = file.path(SaveDir, paste(lat.long$SubsetID[i], "___", prods, ".asc", sep = "")), sep = "")
  203. if(i != 1) write(subsets, file = file.path(SaveDir, paste(lat.long$SubsetID[i], "___", prods, ".asc", sep = "")), sep = "", append = TRUE)
  204. }
  205. if(i == nrow(lat.long)) cat("Full subset download complete. Writing the subset download file...\n")
  206. }
  207. return(lat.long)
  208. }