getHdf.R 15 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393
  1. if ( !isGeneric("getHdf") ) {
  2. setGeneric("getHdf", function(product, ...)
  3. standardGeneric("getHdf"))
  4. }
  5. #' Create or Update Local Subset of Online MODIS Data Pool
  6. #'
  7. #' @description
  8. #' Create or update a local user-defined subset of the global MODIS grid data
  9. #' archive. Based on user-specific parameters the function checks in the local
  10. #' archive for available data and downloads missing data from the online MODIS
  11. #' data pool. When run in a schedule job, the function manage the continuous
  12. #' update of the local MODIS data archive.
  13. #'
  14. #' @param product \code{character}. MODIS grid product to be downloaded, see
  15. #' \code{\link{getProduct}}. Use dot notation to address Terra and Aqua products
  16. #' (e.g. \code{M.D13Q1}).
  17. #' @param begin \code{character}. Begin date of MODIS time series, see
  18. #' \code{\link{transDate}} for formatting.
  19. #' @param end \code{character}. End date, compatible with future dates for
  20. #' continuous updates via scheduled jobs.
  21. #' @param tileH \code{numeric} or \code{character}. Horizontal tile number(s),
  22. #' see \code{\link{getTile}}.
  23. #' @param tileV \code{numeric} or \code{character}. Vertical tile number(s),
  24. #' see \code{tileH}.
  25. #' @param extent See Details in \code{\link{getTile}}.
  26. #' @param collection Desired MODIS product collection as \code{character},
  27. #' \code{integer}, or \code{list} as returned by \code{\link{getCollection}}.
  28. #' @param HdfName \code{character} vector or \code{list}. Full HDF file name(s)
  29. #' to download a small set of files. If specified, other file-related parameters
  30. #' (i.e., \code{begin}, \code{end}, \code{collection}, etc.) are ignored.
  31. #' @param checkIntegrity \code{logical}. If \code{TRUE} (default), the size of
  32. #' each downloaded file is checked. In case of inconsistencies, the function
  33. #' tries to re-download broken files.
  34. #' @param forceDownload \code{logical}. If \code{TRUE} (default), try to
  35. #' download data irrespective of whether online information could be retrieved
  36. #' via \code{MODIS:::getStruc} or not.
  37. #' @param ... Further arguments passed to \code{\link{MODISoptions}}, eg 'wait'.
  38. #'
  39. #' @return
  40. #' An invisible vector of downloaded data and paths.
  41. #'
  42. #' @references
  43. #' MODIS data is obtained through the online Data Pool at the NASA Land
  44. #' Processes Distributed Active Archive Center (LP DAAC), USGS/Earth Resources
  45. #' Observation and Science (EROS) Center, Sioux Falls, South Dakota
  46. #' \url{https://lpdaac.usgs.gov/get_data}.
  47. #'
  48. #' @author
  49. #' Matteo Mattiuzzi
  50. #'
  51. #' @examples
  52. #' \dontrun{
  53. #' # One or more specific file (no regular erpression allowed here)
  54. #' a <- getHdf(HdfName = c("MYD11A1.A2009001.h18v04.006.2015363221538.hdf",
  55. #' "MYD11A1.A2009009.h18v04.006.2015364055036.hdf",
  56. #' "MYD11A1.A2009017.h18v04.006.2015364115403.hdf"))
  57. #' a
  58. #'
  59. #' # Get all MODIS Terra and Aqua M*D11A1 data from 1 December 2016 up to today
  60. #' # (can be ran in a sceduled job for daily archive update)
  61. #' b1 <- getHdf(product = "M.D11A1", begin = "2016.12.01",
  62. #' tileH = 18:19, tileV = 4)
  63. #' b1
  64. #'
  65. #' # Same tiles with a 'list' extent
  66. #' Austria <- list(xmax = 17.47, xmin = 9.2, ymin = 46.12, ymax = 49.3)
  67. #' b2 <- getHdf(product = "M.D11A1", begin = "2016336", extent = Austria)
  68. #' b2
  69. #'
  70. #' # Using country boarders from 'mapdata' package
  71. #' c <- getHdf(product = "M.D11A1", begin = "2016306", end = "2016335",
  72. #' extent = "Luxembourg")
  73. #' c
  74. #'
  75. #' # Interactive selection of spatial extent, see getTile()
  76. #' d <- getHdf(product = "M.D11A1", begin = "2016306", end = "2016307")
  77. #' d
  78. #' }
  79. #'
  80. #' @export getHdf
  81. #' @name getHdf
  82. ################################################################################
  83. ### function using 'character' input ###########################################
  84. #' @aliases getHdf,character-method
  85. #' @rdname getHdf
  86. setMethod("getHdf",
  87. signature(product = "character"),
  88. function(product, HdfName,
  89. begin = NULL, end = NULL,
  90. tileH = NULL, tileV = NULL, extent = NULL,
  91. collection = NULL, checkIntegrity = TRUE,
  92. forceDownload = TRUE, ...) {
  93. ## if 'HdfName' is provided, call 'missing'-method
  94. if (!missing(HdfName))
  95. getHdf(HdfName = HdfName, checkIntegrity = checkIntegrity, ...)
  96. opts <- combineOptions(...)
  97. sturheit <- stubborn(level=opts$stubbornness)
  98. wait <- as.numeric(opts$wait)
  99. # TODO HdfName as regex
  100. if (missing(product))
  101. stop("Please provide a supported 'product', see getProduct().\n")
  102. #######
  103. # check product
  104. product <- getProduct(x=product,quiet=TRUE)
  105. # check if missing collection, else believe it
  106. product$CCC <- if (is.null(collection)) {
  107. unlist(getCollection(product = product, quiet = TRUE, forceCheck = TRUE))
  108. } else {
  109. sprintf("%03d",as.numeric(unlist(collection)[1]))
  110. }
  111. #########
  112. if (product$SENSOR[1]=="MODIS")
  113. {
  114. if (is.null(begin))
  115. {
  116. cat("No begin(-date) set, getting data from the beginning\n")
  117. }
  118. if (is.null(end))
  119. {
  120. cat("No end(-date) set, getting data up to the most actual\n")
  121. }
  122. # tranform dates
  123. tLimits <- transDate(begin=begin,end=end)
  124. }
  125. dates <- list()
  126. output <- list() # path info for the invisible output
  127. l=0
  128. for(z in seq_along(product$PRODUCT))
  129. { # Platforms MOD/MYD
  130. if (product$TYPE[z]=="Swath")
  131. {
  132. cat("'Swath'-products not yet supported, jumping to the next.\n")
  133. } else
  134. {
  135. todo <- paste0(product$PRODUCT[z],".",product$CCC[z])
  136. for (u in seq_along(todo))
  137. {
  138. # tileID
  139. if (product$TYPE[z]=="CMG")
  140. {
  141. tileID="GLOBAL"
  142. ntiles=1
  143. } else
  144. {
  145. if (!inherits(extent, "MODISextent")) {
  146. extent = getTile(x = extent, tileH = tileH, tileV = tileV)
  147. }
  148. tileID <- extent@tile
  149. ntiles <- length(tileID)
  150. }
  151. ## ensure compatibility with servers other than those specified in
  152. ## `opts$MODISserverOrder`, e.g. when downloading 'MOD16A2' from NTSG
  153. server <- product$SOURCE[[z]]
  154. if (length(server) > 1) {
  155. # alternative server, i.e. when priority is not reachable
  156. server_alt <- server[which(server != opts$MODISserverOrder[1])]
  157. # priority server from which structure will be tried to retrieve first
  158. server <- server[which(server == opts$MODISserverOrder[1])]
  159. } else {
  160. opts$MODISserverOrder <- server
  161. }
  162. ## this time, suppress console output from `getStruc`
  163. jnk <- capture.output(
  164. onlineInfo <- suppressWarnings(
  165. getStruc(product = product$PRODUCT[z], server = server,
  166. collection = product$CCC[z], begin = tLimits$begin,
  167. end = tLimits$end, wait = wait)
  168. )
  169. )
  170. if(!is.na(onlineInfo$online))
  171. {
  172. if (!onlineInfo$online & length(opts$MODISserverOrder)==2 &
  173. server %in% c("LPDAAC", "LAADS"))
  174. {
  175. cat(server," seems not online, trying on '",server_alt,"':\n",sep="")
  176. jnk = capture.output(
  177. onlineInfo <- getStruc(product = product$PRODUCT[z], collection = product$CCC[z],
  178. begin = tLimits$begin, end = tLimits$end,
  179. wait = wait, server = server_alt)
  180. )
  181. }
  182. if(is.null(onlineInfo$dates))
  183. {
  184. stop("Could not connect to server(s), and no data is available offline!\n")
  185. }
  186. if(!is.na(onlineInfo$online))
  187. {
  188. if(!onlineInfo$online & !forceDownload)
  189. {
  190. cat("Could not connect to server(s), data download disabled! Try to set 'forceDownload = TRUE' in order to enable online file download...\n")
  191. }
  192. }
  193. }
  194. datedirs <- as.Date(onlineInfo$dates)
  195. datedirs <- datedirs[!is.na(datedirs)]
  196. sel <- datedirs
  197. us <- sel >= tLimits$begin & sel <= tLimits$end
  198. if (sum(us,na.rm=TRUE)>0)
  199. {
  200. suboutput <- list()
  201. l=l+1
  202. dates[[l]] <- datedirs[us]
  203. dates[[l]] <- cbind(as.character(dates[[l]]),matrix(rep(NA, length(dates[[l]])*ntiles),ncol=ntiles,nrow=length(dates[[l]])))
  204. colnames(dates[[l]]) <- c("date",tileID)
  205. for (i in 1:nrow(dates[[l]]))
  206. {
  207. year <- format(as.Date(dates[[l]][i,1]), "%Y")
  208. doy <- as.integer(format(as.Date(dates[[l]][i,1]), "%j"))
  209. doy <- sprintf("%03d",doy)
  210. mtr <- rep(1,ntiles) # for file availability flaging
  211. path <- genString(x = strsplit(todo[u], "\\.")[[1]][1]
  212. , collection = strsplit(todo[u], "\\.")[[1]][2]
  213. , date = dates[[l]][i, 1])
  214. for(j in 1:ntiles)
  215. {
  216. dates[[l]][i,j+1] <- paste0(strsplit(todo[u],"\\.")[[1]][1],".",paste0("A",year,doy),".",if (tileID[j]!="GLOBAL") {paste0(tileID[j],".")},strsplit(todo[u],"\\.")[[1]][2],".*.hdf$") # create pattern
  217. if (length(dir(path$localPath,pattern=dates[[l]][i,j+1]))>0)
  218. { # if available locally
  219. HDF <- dir(path$localPath,pattern=dates[[l]][i,j+1]) # extract HDF file
  220. if (length(HDF)>1)
  221. { # in very recent files sometimes there is more than 1 file/tile/date if so get the most recent processing date
  222. select <- list()
  223. for (d in 1:length(HDF))
  224. {
  225. select[[d]]<- strsplit(HDF[d],"\\.")[[1]][5]
  226. }
  227. HDF <- HDF[which.max(unlist(select))]
  228. }
  229. dates[[l]][i,j+1] <- HDF
  230. mtr[j] <- 0
  231. }
  232. }
  233. if (sum(mtr)!=0 & (onlineInfo$online | is.na(onlineInfo$online) | forceDownload))
  234. { # if one or more of the tiles in the given date is missing, its necessary to go online
  235. if(exists("ftpfiles"))
  236. {
  237. rm(ftpfiles)
  238. }
  239. for (g in 1:sturheit)
  240. { # get list of FILES in remote dir
  241. # server <- names(path$remotePath)[g%%length(path$remotePath)+1]
  242. ftpfiles <- try(filesUrl(path$remotePath[[which(names(path$remotePath)==onlineInfo$source)]]),silent=TRUE)
  243. if(ftpfiles[1]==FALSE)
  244. {
  245. rm(ftpfiles)
  246. }
  247. if(exists("ftpfiles"))
  248. {
  249. break
  250. }
  251. Sys.sleep(wait)
  252. }
  253. if(!exists("ftpfiles"))
  254. {
  255. stop("Problems with online connections try a little later")
  256. }
  257. if (ftpfiles[1] != "total 0")
  258. {
  259. ftpfiles <- unlist(lapply(strsplit(ftpfiles," "), function(x) {
  260. x[length(x)]
  261. })) # found empty dir!
  262. if (onlineInfo$source == "NTSG") {
  263. ftpfiles = gsub(paste0("\\.", product$CCC[z], "\\.")
  264. , ifelse(product$PF3 == "MOD16", ".105.", ".305.")
  265. , ftpfiles)
  266. }
  267. for(j in 1:ntiles)
  268. { # j=1
  269. if(mtr[j]==1)
  270. { # if tile is missing get it
  271. dts = dates[[l]][i, j+1]
  272. if (onlineInfo$source == "NTSG") {
  273. dts = gsub(paste0("\\.", product$CCC[z], "\\.")
  274. , ifelse(product$PF3 == "MOD16", ".105.", ".305.")
  275. , dts)
  276. dts = paste(c(strsplit(dts, "\\.")[[1]][1:4], "*.hdf"), collapse = ".")
  277. }
  278. onFtp = grep(ftpfiles,pattern = dts,value = TRUE)
  279. HDF <- grep(onFtp,pattern=".hdf$",value=TRUE)
  280. if(length(HDF)>0)
  281. {
  282. if (length(HDF)>1)
  283. { # in very recent files sometimes there is more than 1 file/tile/date if so get the last
  284. select <- list()
  285. for (d in seq_along(HDF))
  286. {
  287. select[[d]] <- strsplit(HDF[d],"\\.")[[1]][5]
  288. }
  289. HDF <- HDF[which.max(unlist(select))]
  290. }
  291. dates[[l]][i,j+1] <- HDF
  292. hdf <- ModisFileDownloader(HDF, opts = opts)
  293. mtr[j] <- hdf
  294. } else
  295. {
  296. dates[[l]][i,j+1] <- NA
  297. }
  298. }
  299. }
  300. } else
  301. {
  302. dates[[l]][i,(j+1):ncol(dates[[l]])] <- NA
  303. } # on ftp is possible to find empty folders!
  304. }
  305. if(checkIntegrity)
  306. { # after each 'i' do the sizeCheck
  307. isIn <- doCheckIntegrity(paste0(path$localPath,dates[[l]][i,-1]), opts = opts)
  308. }
  309. suboutput[[i]] <- paste0(path$localPath,dates[[l]][i,-1])
  310. } # end i
  311. output[[l]] <- as.character(unlist(suboutput))
  312. names(output)[l] <- todo[u]
  313. } else
  314. {
  315. cat(paste0("No files on ftp in date range for: ",todo[u],"\n\n"))
  316. }
  317. }
  318. }
  319. }
  320. return(invisible(output))
  321. }) ## END: FTP vs ARC check and download
  322. ################################################################################
  323. ### function using 'missing' input #############################################
  324. #' @aliases getHdf,missing-method
  325. #' @rdname getHdf
  326. setMethod("getHdf",
  327. signature(product = "missing"),
  328. function(HdfName, checkIntegrity = TRUE, ...) {
  329. opts <- combineOptions(...)
  330. wait <- as.numeric(opts$wait)
  331. ## loop over 'HdfName'
  332. if (inherits(HdfName, "list"))
  333. HdfName <- unlist(HdfName)
  334. HdfName <- basename(HdfName)
  335. dates <- sapply(HdfName, function(i) {
  336. path <- genString(i, opts = opts)
  337. path$localPath <- setPath(path$localPath)
  338. if (!file.exists(paste0(path$localPath, "/", i)))
  339. ModisFileDownloader(i, opts = opts)
  340. if(checkIntegrity)
  341. jnk <- doCheckIntegrity(i, opts = opts)
  342. paste0(path$local, "/", i)
  343. })
  344. ## return output
  345. return(invisible(dates))
  346. })