MODISSummaries.R 20 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376
  1. MODISSummaries <-
  2. function(LoadDat, FileSep = NULL, Dir = ".", Product, Bands, ValidRange, NoDataFill, ScaleFactor, StartDate = FALSE,
  3. QualityScreen = FALSE, QualityBand = NULL, QualityThreshold = NULL, Mean = TRUE, SD = TRUE, Min = TRUE, Max = TRUE,
  4. Yield = FALSE, Interpolate = FALSE, InterpolateN = NULL, DiagnosticPlot = FALSE)
  5. {
  6. # DEFINE
  7. NUM_METADATA_COLS <- 10
  8. if(Dir == '.') cat('MODIS data files from ', getwd(),
  9. ' will be summarised.\nSummary files will be written to the same directory.\n', sep = '')
  10. if(Dir != '.') cat('MODIS data files from ', Dir,
  11. ' will be summarised.\nSummary files will be written to the same directory.\n', sep = '')
  12. # Load input time-series data file; external data file, or an R object.
  13. if(!is.object(LoadDat) & !is.character(LoadDat)) stop("LoadDat must be an object in R or a file path character string.")
  14. if(is.object(LoadDat)) details <- data.frame(LoadDat)
  15. if(is.character(LoadDat)){
  16. if(!file.exists(LoadDat)) stop("Character string input for LoadDat does not resemble an existing file path.")
  17. if(is.null(FileSep)) stop("To load a file as input, you must also specify its delimiter (FileSep).")
  18. details <- read.delim(LoadDat, sep = FileSep)
  19. }
  20. #####
  21. # Check lat and long data frame columns are named "lat" and "long" as necessary.
  22. if(!any(names(details) == "lat") | !any(names(details) == "long")){
  23. stop("Could not find columns for latitude and/or longitude in your data set. Must be named 'lat' and 'long'.")
  24. }
  25. if(!file.exists(Dir)) stop("Character string input for Dir argument does not resemble an existing file path.")
  26. # Check valid inputs for the quality screening process.
  27. if(!is.logical(QualityScreen)) stop("QualityScreen argument should be a logical input. See help ?MODISSummaries.")
  28. if(QualityScreen){
  29. if(is.null(QualityBand) | is.null(QualityThreshold)) stop("QualityBand and QualityThreshold not specified.")
  30. }
  31. product.bands <- try(GetBands(Product), silent = TRUE)
  32. if(class(product.bands) != "try-error"){
  33. # Check Band and QualityBand belong to Product.
  34. if(!all(Bands %in% product.bands)) stop(paste("Band input does not match with", Product, "product.", sep = " "))
  35. if(QualityScreen){
  36. if(Product == "MCD43A4"){
  37. if(QualityBand != "BRDF_Albedo_Band_Quality") stop("QualityBand input is not QA data for MCD43A4 product.")
  38. } else {
  39. if(!any(product.bands == QualityBand)) stop(paste("QualityBand is not QA data for", Product, "product.", sep = " "))
  40. }
  41. }
  42. } else {
  43. cat("MODIS server temporarily overloaded so user input checking skipped.")
  44. }
  45. # NoDataFill should be one integer.
  46. if(length(NoDataFill) != 1) stop("NoDataFill input must be one integer.")
  47. if(!is.numeric(NoDataFill)) stop("NoDataFill should be numeric class. One integer.")
  48. if(abs(NoDataFill - round(NoDataFill)) > .Machine$double.eps^0.5) stop("NoDataFill input must be one integer.")
  49. # ValidRange should be a numeric vector, length 2.
  50. if(length(ValidRange) != 2) stop("ValidRange input must be a numeric vector - an upper and lower bound.")
  51. if(!is.numeric(ValidRange)) stop("ValidRange should be numeric class.")
  52. # ScaleFactor should be numeric, length 1.
  53. if(length(ScaleFactor) != 1) stop("ScaleFactor input must be one numeric element.")
  54. if(!is.numeric(ScaleFactor)) stop("ScaleFactor should be numeric class.")
  55. # Year or posixt date format?
  56. if(all(is.na(details$end.date))) stop("Time-series end.date variable in LoadDat is empty.")
  57. Year <- FALSE
  58. POSIXt <- FALSE
  59. posix.compatible <- try(as.POSIXlt(details$end.date), silent=TRUE)
  60. if(any(class(details$end.date) == "POSIXt") | all(class(posix.compatible) != "try-error")) POSIXt <- TRUE
  61. if(all(is.numeric(details$end.date) & nchar(as.character(details$end.date)) == 4) &
  62. any(class(posix.compatible) == "try-error")) Year <- TRUE
  63. if(!Year & !POSIXt) stop("Date informDate information in LoadDat is not recognised as years or as POSIXt format.")
  64. if(Year & POSIXt) stop("Date information in LoadDat is recognised as both year and POSIXt formats.")
  65. if(StartDate){
  66. if(!any(names(details) == "start.date") | all(is.na(details$start.date))){
  67. stop("StartDate == TRUE, but no start.date field found in LoadDat.")
  68. }
  69. }
  70. #####
  71. # Date and time stamp
  72. date <- as.POSIXlt(Sys.time())
  73. file.date <- paste(as.Date(date),
  74. paste(paste0("h",date$hour), paste0("m",date$min), paste0("s",round(date$sec,digits=0)), sep = "-"),
  75. sep = "_")
  76. # Get a list of all downloaded subset (.asc) files in the data directory.
  77. file.list <- list.files(path = Dir, pattern = paste(Product, ".*asc$", sep = ""))
  78. if(length(file.list) == 0) stop("Found no MODIS data files in Dir that match the request.")
  79. size.test <- sapply(file.path(Dir, file.list), function(x) ncol(read.csv(x)[1, ]) - NUM_METADATA_COLS)
  80. if(!all(size.test == size.test[1])) stop("The number of pixels (Size) in subsets identified are not all the same.")
  81. # size.test checked all subsets are compatible for processing to one summary file. Can now just use size.test[1]
  82. num.pixels <- unname(size.test[1])
  83. # Extract IDs for ASCII files so they can be included in summary output; ncol = length(file.list), nrow = size.test.
  84. where.id <- regexpr("___", file.list)
  85. id <- matrix(unname(mapply(function(x, y, z) rep(substr(x, 1, y - 1), z), x = file.list, y = where.id, z = size.test,
  86. SIMPLIFY = "array")), nrow = size.test, ncol = length(file.list))
  87. # Time-series analysis for each time-series (ASCII file) consecutively.
  88. band.data.site <- lapply((size.test * length(Bands)), function(x) matrix(nrow = x, ncol = 13))
  89. for(counter in 1:length(file.list)){
  90. cat("Processing file ", counter, " of ", length(file.list), "...\n", sep = "")
  91. ##### Load selected .asc file into a data frame, name columns and tell user what's being processed.
  92. ds <- read.csv(file.path(Dir, file.list[counter]), header = FALSE, as.is = TRUE)
  93. names(ds) <- c("nrow", "ncol", "xll", "yll", "pixelsize", "row.id", "product.code", "MODIS.acq.date",
  94. "where", "MODIS.proc.date", 1:(ncol(ds) - NUM_METADATA_COLS))
  95. # Extract year and day from the metadata and make POSIXlt dates (YYYY-MM-DD), ready for time-series analysis.
  96. year <- as.numeric(substr(ds$MODIS.acq.date, 2, 5))
  97. day <- as.numeric(substr(ds$MODIS.acq.date, 6, 8))
  98. date <- strptime(paste(year, "-", day, sep = ""), "%Y-%j")
  99. ds <- cbind(ds[,1:NUM_METADATA_COLS], date, ds[,(NUM_METADATA_COLS+1):ncol(ds)])
  100. w.ds.dat <- which(names(ds) == "date") + 1
  101. #####
  102. ##### Section that uses the files metadata strings [ ,1:5] for each time-series to extract necessary information.
  103. wherelong <- regexpr("Lon", ds$where[1])
  104. whereSamp <- regexpr("Samp", ds$where[1])
  105. lat <- as.numeric(substr(ds$where[1], 4, wherelong - 1))
  106. long <- as.numeric(substr(ds$where[1], wherelong + 3, whereSamp - 1))
  107. rowIdBands <- Reduce(rbind, strsplit(ds$row.id, "[.]"))
  108. rowIdBands <- rowIdBands[ ,ncol(rowIdBands)]
  109. # Check that all bands listed are in the ASCII files.
  110. if(!all(Bands %in% rowIdBands)) stop("Not all Bands are represented in data file.")
  111. # Identify which rows in ds correspond to the quality data and put into a matrix.
  112. if(QualityScreen){
  113. ifelse(QualityBand %in% rowIdBands,
  114. which.QA <- which(QualityBand == rowIdBands),
  115. stop("Cannot find quality data in LoadDat. Download quality data with band data in MODISSubsets."))
  116. QA.time.series <- as.matrix(ds[which.QA,w.ds.dat:ncol(ds)], nrow = length(which.QA), ncol = length(w.ds.dat:ncol(ds)))
  117. }
  118. #####
  119. for(bands in 1:length(Bands)){
  120. # Find rows in ds that correspond to Bands[bands] data and put into a matrix.
  121. ifelse(Bands[bands] %in% rowIdBands,
  122. which.band <- which(Bands[bands] == rowIdBands),
  123. stop("Cannot find band data in LoadDat. Make sure ASCII files in the directory are from MODISSubsets."))
  124. band.time.series <- as.matrix(ds[which.band,w.ds.dat:ncol(ds)],
  125. nrow = length(which.band), ncol = length(w.ds.dat:ncol(ds)))
  126. ##### Screen pixels in band.time.series: any NoDataFill or pixels < QualityThreshold, will be replaced with NA.
  127. ifelse(QualityScreen,
  128. band.time.series <- QualityCheck(Data = band.time.series, QualityScores = QA.time.series, Band = Bands[bands],
  129. NoDataFill = NoDataFill, QualityBand = QualityBand, Product = Product,
  130. QualityThreshold = QualityThreshold),
  131. band.time.series <- matrix(ifelse(band.time.series != NoDataFill, band.time.series, NA),
  132. nrow = length(which.band)))
  133. # Final check, that band values all fall within the ValidRange (as defined for given MODIS product band).
  134. if(any(!(band.time.series >= ValidRange[1] && band.time.series <= ValidRange[2]), na.rm = TRUE)){
  135. stop("Some values fall outside the valid range, after no fill values should have been removed.")
  136. }
  137. ####
  138. # Initialise objects for various summaries.
  139. mean.band <- rep(NA, num.pixels)
  140. sd.band <- rep(NA, num.pixels)
  141. band.yield <- rep(NA, num.pixels)
  142. nofill <- rep(NA, num.pixels)
  143. poorquality <- rep(NA, num.pixels)
  144. band.min <- rep(NA, num.pixels)
  145. band.max <- rep(NA, num.pixels)
  146. if(DiagnosticPlot & num.pixels != 1){
  147. cat("Can not provide diagnostic plots for subsets larger than a single pixel")
  148. DiagnosticPlot <- FALSE
  149. }
  150. # Run time-series analysis for the ith
  151. for(i in 1:ncol(band.time.series)){
  152. # Minimum and maximum band values observed.
  153. minobsband <- min(as.numeric(band.time.series[ ,i]) * ScaleFactor, na.rm = TRUE)
  154. maxobsband <- max(as.numeric(band.time.series[ ,i]) * ScaleFactor, na.rm = TRUE)
  155. # Assess quality of data at this time-step, to decide how to proceed with analysis.
  156. data.quality <- ifelse(QualityScreen, sum(!is.na(band.time.series[ ,i])), 2)
  157. if(data.quality >= 2){
  158. # Linearly interpolate between screened data points, for each pixel, over time (default is daily).
  159. if(Interpolate){
  160. if(is.null(InterpolateN)){
  161. #N <- max(ds$date[!is.na(band.time.series[ ,i])]) - min(ds$date[!is.na(band.time.series[ ,i])])
  162. InterpolateN <- round(0.9 * as.numeric(length(band.time.series)))
  163. }
  164. sout <- approx(x = 1:nrow(band.time.series), y = as.numeric(band.time.series[ ,i]) * ScaleFactor,
  165. method = "linear", n = InterpolateN)
  166. }
  167. # Carry out all the relevant summary analyses, set by options in the function call.
  168. # (((365*length(years))-16)*365) = average annual yield (i.e. work out daily yield * 365).
  169. if(Yield) band.yield[i] <- (sum(sout$y) - minobsband * length(sout$x)) / length(sout$x)
  170. if(Mean){
  171. ifelse(Interpolate,
  172. mean.band[i] <- mean(sout$y),
  173. mean.band[i] <- mean(as.numeric(band.time.series[ ,i]) * ScaleFactor, na.rm = TRUE))
  174. }
  175. if(SD){
  176. ifelse(Interpolate,
  177. sd.band[i] <- sd(sout$y),
  178. sd.band[i] <- sd(as.numeric(band.time.series[ ,i]) * ScaleFactor, na.rm = TRUE))
  179. }
  180. }
  181. if(data.quality == 1){
  182. warning("Only single data point that passed the quality screen: cannot summarise", immediate. = TRUE)
  183. if(Mean) mean.band[i] <- mean(as.numeric(band.time.series[ ,i]) * ScaleFactor, na.rm = TRUE)
  184. }
  185. if(data.quality == 0) warning("No reliable data for this pixel", immediate. = TRUE)
  186. # Complete final optional summaries, irrespective of data quality.
  187. if(Min) band.min[i] <- minobsband
  188. if(Max) band.max[i] <- maxobsband
  189. nofill[i] <- paste(round((sum(ds[ ,i + (w.ds.dat - 1)] == NoDataFill) / length(band.time.series[ ,i])) * 100, 2),
  190. "% (", sum(ds[ ,i + (w.ds.dat - 1)] == NoDataFill), "/", length(band.time.series[ ,i]), ")",
  191. sep = "")
  192. ifelse(QualityScreen,
  193. poorquality[i] <-
  194. paste(round((sum(QA.time.series[ ,i] > QualityThreshold) / length(QA.time.series[ ,i])) * 100, 2),
  195. "% (", sum(QA.time.series[ ,i] > QualityThreshold), "/", length(QA.time.series[ ,i]), ")", sep = ""),
  196. poorquality[i] <- NA)
  197. } # End of loop for time-series summary analysis for each pixel.
  198. # Compile time-series information and relevant summaries data into a final output listed by-sites (.asc files).
  199. band.data.site[[counter]][(((bands - 1) * num.pixels) + 1):(num.pixels * bands), ] <-
  200. matrix(data = c(ID = id[ ,counter],
  201. lat = rep(lat, num.pixels),
  202. long = rep(long, num.pixels),
  203. start.date = rep(as.character(min(ds$date)), num.pixels),
  204. end.date = rep(as.character(max(ds$date)), num.pixels),
  205. data.band = rep(Bands[bands], num.pixels),
  206. min.band = band.min,
  207. max.band = band.max,
  208. mean.band = mean.band,
  209. sd.band = sd.band,
  210. band.yield = band.yield,
  211. no.fill.data = nofill,
  212. poor.quality.data = poorquality),
  213. nrow = num.pixels, ncol = 13)
  214. if(DiagnosticPlot){
  215. directory <- file.path(Dir, "DiagnosticPlots")
  216. if(file.exists(directory) == FALSE) dir.create(directory)
  217. filename <- file.path(directory, paste(id[counter], Product,
  218. Bands[bands], file.date, ".pdf", sep = "_"))
  219. if(data.quality == 1 | data.quality == 0){
  220. cat("Too few data points for diagnostic plot for this site\n")
  221. } else {
  222. pdf(filename)
  223. SiteId <- max(paste(lat, long, year))
  224. plot(band.time.series[,i] * ScaleFactor, pch = 19, main = id[counter], xlab = "Timestep", ylab = Bands[bands], ylim = c(-0.1, 1))
  225. if(Interpolate) lines(sout, col = "red")
  226. if(Mean) abline(a = mean.band[i], b=0, lty = 2, col = "red")
  227. if(Min) abline(a = band.min[i], b=0, lty = 2)
  228. if(Max) abline(a = band.max[i], b=0, lty = 2)
  229. dev.off()
  230. }
  231. }
  232. } # End of loop for Bands.
  233. } # End of loop for each ASCII file.
  234. band.data.site <- as.data.frame(do.call("rbind", band.data.site), stringsAsFactors = FALSE)
  235. names(band.data.site) <- c("ID", "lat", "long", "start.date", "end.date", "data.band", "min.band", "max.band",
  236. "mean.band", "sd.band", "band.yield", "no.fill.data", "poor.quality.data")
  237. ### Create SubsetID variable as created in MODISSubsets for correct matching.
  238. if(StartDate){
  239. lat.long <- details[!is.na(details$lat) | !is.na(details$long) | !is.na(details$end.date) | !is.na(details$start.date), ]
  240. lat.long <- lat.long[!duplicated(data.frame(lat.long$lat, lat.long$long, lat.long$end.date, lat.long$start.date)), ]
  241. } else if(!StartDate){
  242. lat.long <- details[!is.na(details$lat) | !is.na(details$long) | !is.na(details$end.date), ]
  243. lat.long <- lat.long[!duplicated(data.frame(lat.long$lat, lat.long$long, lat.long$end.date)), ]
  244. }
  245. ID.check <- ifelse(any(names(details) == "ID"), TRUE, FALSE)
  246. n.unique <- FALSE
  247. if(ID.check){
  248. # Check if ID variable can identify unique time series
  249. n.unique <- length(unique(lat.long$ID)) == nrow(lat.long)
  250. if(n.unique) names(lat.long)[names(lat.long) == "ID"] <- "SubsetID"
  251. } else if(!ID.check | !n.unique){
  252. # Create end.date and start.date
  253. Year <- FALSE
  254. POSIXt <- FALSE
  255. posix.compatible <- try(as.POSIXlt(lat.long$end.date), silent = TRUE)
  256. if(any(class(lat.long$end.date) == "POSIXt") | all(class(posix.compatible) != "try-error")) POSIXt <- TRUE
  257. if(all(is.numeric(lat.long$end.date) & nchar(as.character(lat.long$end.date)) == 4) &
  258. any(class(posix.compatible) == "try-error")) Year <- TRUE
  259. if(!Year & !POSIXt) stop("Date information in LoadDat is not recognised as years or as POSIXt format.")
  260. if(Year & POSIXt) stop("Date information in LoadDat is recognised as both year and POSIXt formats.")
  261. if(Year){
  262. end.date <- strptime(paste(lat.long$end.date, "-12-31", sep = ""), "%Y-%m-%d")
  263. if(StartDate){
  264. start.year.fail <- any(!is.numeric(lat.long$start.date) | nchar(lat.long$start.date) != 4)
  265. if(start.year.fail) stop("end.date identified as year dates, but start.date does not match.")
  266. start.date <- strptime(paste(lat.long$start.date, "-01-01", sep = ""), "%Y-%m-%d")
  267. }
  268. } else if(POSIXt){
  269. end.date <- strptime(lat.long$end.date, "%Y-%m-%d")
  270. if(StartDate){
  271. start.posix.fail <- any(class(try(as.POSIXlt(lat.long$end.date), silent = TRUE)) == "try-error")
  272. if(start.posix.fail) stop("end.date identified as POSIXt dates, but start.date does not match.")
  273. start.date <- strptime(lat.long$start.date, "%Y-%m-%d")
  274. }
  275. }
  276. if(!StartDate){
  277. date.ids <- mapply(function(x,y,z) unique(band.data.site$ID[band.data.site$lat == x &
  278. band.data.site$long == y &
  279. grepl(z, band.data.site$ID)]),
  280. x = lat.long$lat, y = lat.long$long, z = as.character(end.date))
  281. if(!all(sapply(date.ids, length) == 1)){
  282. stop("Couldn't match summarised data back with original dataset.\n",
  283. "Make sure LoadDat is exact same dataset used to download MODIS data being summarised.")
  284. }
  285. start.date <- substr(date.ids, regexpr("Start", date.ids)+5, regexpr("End", date.ids)-1)
  286. }
  287. lat.long <- data.frame(SubsetID = paste("Lat", sprintf("%.5f", lat.long$lat), "Lon", sprintf("%.5f", lat.long$long),
  288. "Start", start.date, "End", end.date, sep = ""),
  289. lat.long, stringsAsFactors = FALSE)
  290. }
  291. ###
  292. res <- data.frame(cbind(details, matrix(NA, nrow = nrow(details), ncol = 1+(num.pixels * length(Bands)))))
  293. names(res) <-
  294. c(names(details), "SubsetID",
  295. paste(rep(Bands, each = num.pixels), "_pixel", rep(1:num.pixels, times = length(Bands)), sep = ""))
  296. for(i in 1:length(lat.long$SubsetID)){
  297. res$SubsetID[sprintf("%.5f", res$lat) %in% sprintf("%.5f", as.numeric(lat.long$lat[i])) &
  298. sprintf("%.5f", res$long) %in% sprintf("%.5f", as.numeric(lat.long$long[i])) &
  299. res$end.date %in% lat.long$end.date[i]] <- lat.long$SubsetID[i]
  300. res[res$SubsetID %in% lat.long$SubsetID[i],(which(names(res) == "SubsetID")+1):ncol(res)] <-
  301. matrix(band.data.site$mean.band[band.data.site$ID == lat.long$SubsetID[i]],
  302. nrow = nrow(as.matrix(res[res$SubsetID %in% lat.long$SubsetID[i],(which(names(res) == "SubsetID")+1):ncol(res)])),
  303. ncol = ncol(as.matrix(res[res$SubsetID %in% lat.long$SubsetID[i],(which(names(res) == "SubsetID")+1):ncol(res)])),
  304. byrow = TRUE)
  305. }
  306. #####
  307. # Write output summary file by appending summary data from all files, producing one file of summary stats output.
  308. write.table(band.data.site, file = file.path(Dir, paste("MODIS_Summary_", Product, "_", file.date, ".csv", sep = "")),
  309. sep = ",", row.names = FALSE)
  310. # Write the final appended dataset to a csv file, ready for use, in Dir.
  311. write.table(res, file = file.path(Dir, paste("MODIS_Data_", Product, "_", file.date, ".csv", sep = "")),
  312. sep = ",", col.names = TRUE, row.names = FALSE)
  313. #####
  314. cat("Done! Check the 'MODIS Summary' and 'MODIS Data' output files.\n")
  315. }