downmodis.py 37 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941
  1. #!/usr/bin/env python
  2. # class to download modis data
  3. #
  4. # (c) Copyright Luca Delucchi 2010-2016
  5. # (c) Copyright Logan C Byers 2014
  6. # Authors: Luca Delucchi
  7. # Logan C Byers
  8. # Email: luca dot delucchi at fmach dot it
  9. # [email protected]
  10. #
  11. ##################################################################
  12. #
  13. # This MODIS Python class is licensed under the terms of GNU GPL 2.
  14. # This program is free software; you can redistribute it and/or
  15. # modify it under the terms of the GNU General Public License as
  16. # published by the Free Software Foundation; either version 2 of
  17. # the License, or (at your option) any later version.
  18. # This program is distributed in the hope that it will be useful,
  19. # but WITHOUT ANY WARRANTY; without even implied warranty of
  20. # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
  21. # See the GNU General Public License for more details.
  22. #
  23. ##################################################################
  24. """Module to download MODIS HDF files from NASA repository.
  25. It supports both FTP and HTTP repositories
  26. Classes:
  27. * :class:`modisHtmlParser`
  28. * :class:`downModis`
  29. Functions:
  30. * :func:`urljoin`
  31. * :func:`getNewerVersion`
  32. * :func:`str2date`
  33. """
  34. # python 2 and 3 compatibility
  35. from __future__ import print_function
  36. from builtins import dict
  37. from datetime import date
  38. from datetime import timedelta
  39. import os
  40. import sys
  41. import glob
  42. import logging
  43. import socket
  44. from ftplib import FTP
  45. import ftplib
  46. import requests
  47. # urllib in python 2 and 3
  48. try:
  49. from future.standard_library import install_aliases
  50. install_aliases()
  51. except ImportError:
  52. raise ImportError("Future library not found, please install it")
  53. from urllib.request import urlopen
  54. import urllib.request
  55. import urllib.error
  56. from base64 import b64encode
  57. from html.parser import HTMLParser
  58. import re
  59. import netrc
  60. # urlparse in python 2 and 3
  61. try:
  62. from urlparse import urlparse
  63. URLPARSE = True
  64. except ImportError:
  65. try:
  66. from urllib.parse import urlparse
  67. URLPARSE = True
  68. except ImportError:
  69. URLPARSE = False
  70. print('WARNING: urlparse not found, it is not possible to use'
  71. ' netrc file')
  72. global GDAL
  73. try:
  74. import osgeo.gdal as gdal
  75. GDAL = True
  76. except ImportError:
  77. try:
  78. import gdal
  79. GDAL = True
  80. except ImportError:
  81. GDAL = False
  82. print('WARNING: Python GDAL library not found, please install it to'
  83. ' check data downloaded with pyModis')
  84. # setup gdal
  85. if GDAL:
  86. gdal.UseExceptions()
  87. gdalDriver = gdal.GetDriverByName('HDF4')
  88. if not gdalDriver:
  89. GDAL = False
  90. print("GDAL installation has no support for HDF4, please update GDAL")
  91. def urljoin(*args):
  92. """Joins given arguments into a url. Trailing but not leading slashes are
  93. stripped for each argument.
  94. http://stackoverflow.com/a/11326230
  95. :return: a string
  96. """
  97. return "/".join([str(x).rstrip('/') for x in args])
  98. def getNewerVersion(oldFile, newFile):
  99. """Check two files to determine which is newer
  100. :param str oldFile: one of the two similar files
  101. :param str newFile: one of the two similar files
  102. :return: the name of newer file
  103. """
  104. # get the processing date (YYYYDDDHHMMSS) from the file strings
  105. if oldFile.split('.')[4] > newFile.split('.')[4]:
  106. return oldFile
  107. else:
  108. return newFile
  109. def str2date(datestring):
  110. """Convert to datetime.date object from a string
  111. :param str datestring string with format (YYYY-MM-DD)
  112. :return: a datetime.date object representing datestring
  113. """
  114. if '-' in datestring:
  115. stringSplit = datestring.split('-')
  116. elif '.' in datestring:
  117. stringSplit = datestring.split('.')
  118. elif ' ' in datestring:
  119. stringSplit = datestring.split(' ')
  120. return date(int(stringSplit[0]), int(stringSplit[1]), int(stringSplit[2]))
  121. class ModisHTTPRedirectHandler(urllib.request.HTTPRedirectHandler):
  122. """Class to return 302 error"""
  123. def http_error_302(self, req, fp, code, msg, headers):
  124. return urllib.request.HTTPRedirectHandler.http_error_302(self, req, fp,
  125. code, msg,
  126. headers)
  127. class modisHtmlParser(HTMLParser):
  128. """A class to parse HTML
  129. :param fh: content of http request
  130. """
  131. def __init__(self, fh):
  132. """Function to initialize the object"""
  133. HTMLParser.__init__(self)
  134. self.fileids = []
  135. self.feed(str(fh))
  136. def handle_starttag(self, tag, attrs):
  137. if tag == 'a':
  138. attrD = dict(attrs)
  139. self.fileids.append(attrD['href'].replace('/', ''))
  140. def get_all(self):
  141. """Return everything"""
  142. return self.fileids
  143. def get_dates(self):
  144. """Return a list of directories with date"""
  145. regex = re.compile('(\d{4})[/.-](\d{2})[/.-](\d{2})$')
  146. alldata = set([elem for elem in self.fileids if regex.match(elem)])
  147. return sorted(list(alldata))
  148. def get_tiles(self, prod, tiles, jpeg=False):
  149. """Return a list of files to download
  150. :param str prod: the code of MODIS product that we are going to
  151. analyze
  152. :param list tiles: the list of tiles to consider
  153. :param bool jpeg: True to also check for jpeg data
  154. """
  155. finalList = []
  156. for i in self.fileids:
  157. # distinguish jpg from hdf by where the tileID is within the string
  158. # jpgs have the tileID at index 3, hdf have tileID at index 2
  159. name = i.split('.')
  160. # if product is not in the filename, move to next filename in list
  161. if not name.count(prod):
  162. continue
  163. # if tiles are not specified and the file is not a jpg, add to list
  164. if not tiles and not (name.count('jpg') or name.count('BROWSE')):
  165. finalList.append(i)
  166. # if tiles are specified
  167. if tiles:
  168. # if a tileID is at index 3 and jpgs are to be downloaded
  169. if tiles.count(name[3]) == 1 and jpeg:
  170. finalList.append(i)
  171. # if a tileID is at in index 2, it is known to be HDF
  172. elif tiles.count(name[2]) == 1:
  173. finalList.append(i)
  174. return finalList
  175. class downModis:
  176. """A class to download MODIS data from NASA FTP or HTTP repositories
  177. :param str destinationFolder: where the files will be stored
  178. :param str password: the password required by NASA authentication system
  179. :param str user: the user namerequired by NASA authentication system
  180. :param str url: the base url from where to download the MODIS data,
  181. it can be FTP or HTTP but it has to start with
  182. 'ftp://' or 'http://' or 'https://'
  183. :param str path: the directory where the data that you want to
  184. download are stored on the FTP server. For HTTP
  185. requests, this is the part of the url between the 'url'
  186. parameter and the 'product' parameter.
  187. :param str product: the code of the product to download, the code
  188. should be idential to the one of the url
  189. :param str tiles: a set of tiles to be downloaded, None == all tiles.
  190. This can be passed as a string of tileIDs separated
  191. by commas, or as a list of individual tileIDs
  192. :param str today: the day to start downloading; in order to pass a
  193. date different from today use the format YYYY-MM-DD
  194. :param str enddate: the day to end downloading; in order to pass a
  195. date use the format YYYY-MM-DD. This day must be
  196. before the 'today' parameter. Downloading happens
  197. in reverse order (currently)
  198. :param int delta: timelag i.e. the number of days starting from
  199. today backwards. Will be overwritten if
  200. 'enddate' is specifed during instantiation
  201. :param bool jpeg: set to True if you want to download the JPG overview
  202. file in addition to the HDF
  203. :param bool debug: set to True if you want to obtain debug information
  204. :param int timeout: Timeout value for HTTP server (seconds)
  205. :param bool checkgdal: variable to set the GDAL check
  206. """
  207. def __init__(self, destinationFolder, password=None, user=None,
  208. url="https://e4ftl01.cr.usgs.gov", tiles=None, path="MOLT",
  209. product="MOD11A1.005", today=None, enddate=None, delta=10,
  210. jpg=False, debug=False, timeout=30, checkgdal=True):
  211. """Function to initialize the object"""
  212. # prepare the base url and set the url type (ftp/http)
  213. if 'ftp://' in url:
  214. self.url = url.replace('ftp://', '').rstrip('/')
  215. self.urltype = 'ftp'
  216. elif 'http://' in url:
  217. self.url = url
  218. self.urltype = 'http'
  219. elif 'https://' in url:
  220. self.url = url
  221. self.urltype = 'http'
  222. else:
  223. raise IOError("The url should contain 'ftp://' or 'http://'")
  224. if not user and not password and not URLPARSE:
  225. raise IOError("Please use 'user' and 'password' parameters")
  226. elif not user and not password and URLPARSE:
  227. self.domain = urlparse(self.url).hostname
  228. try:
  229. nt = netrc.netrc()
  230. except:
  231. raise IOError("Please set 'user' and 'password' parameters"
  232. ", netrc file does not exist")
  233. try:
  234. account = nt.hosts[self.domain]
  235. except:
  236. try:
  237. account = nt.hosts['urs.earthdata.nasa.gov']
  238. except:
  239. raise IOError("Please set 'user' and 'password' parameters"
  240. ", netrc file does not contain parameter "
  241. "for NASA url")
  242. # user for download
  243. self.user = account[0]
  244. # password for download
  245. self.password = account[2]
  246. else:
  247. # user for download
  248. self.user = user
  249. # password for download
  250. self.password = password
  251. self.userpwd = "{us}:{pw}".format(us=self.user,
  252. pw=self.password)
  253. userAndPass = b64encode(str.encode(self.userpwd)).decode("ascii")
  254. self.http_header = {'Authorization': 'Basic %s' % userAndPass}
  255. cookieprocessor = urllib.request.HTTPCookieProcessor()
  256. opener = urllib.request.build_opener(ModisHTTPRedirectHandler,
  257. cookieprocessor)
  258. urllib.request.install_opener(opener)
  259. # the product (product_code.004 or product_cod.005)
  260. self.product = product
  261. self.product_code = product.split('.')[0]
  262. # url directory where data are located
  263. self.path = urljoin(path, self.product)
  264. # tiles to downloads
  265. if isinstance(tiles, str):
  266. self.tiles = tiles.split(',')
  267. else: # tiles are list, tuple, or None
  268. self.tiles = tiles
  269. # set destination folder
  270. if not os.path.isdir(destinationFolder):
  271. os.makedirs(destinationFolder)
  272. self.writeFilePath = destinationFolder
  273. elif os.access(destinationFolder, os.W_OK):
  274. self.writeFilePath = destinationFolder
  275. else:
  276. try:
  277. os.mkdir(destinationFolder)
  278. self.writeFilePath = destinationFolder
  279. except:
  280. raise Exception("Folder to store downloaded files does not "
  281. "exist or is not writeable")
  282. # return the name of product
  283. if len(self.path.split('/')) == 2:
  284. self.product = self.path.split('/')[1]
  285. elif len(self.path.split('/')) == 3:
  286. self.product = self.path.split('/')[2]
  287. # write a file with the name of file to be downloaded
  288. self.filelist = open(os.path.join(self.writeFilePath,
  289. 'listfile{pro}.txt'.format(pro=self.product)),
  290. 'w')
  291. # set if to download jpgs
  292. self.jpeg = jpg
  293. # today, or the last day in the download series chronologically
  294. self.today = today
  295. # chronologically the first day in the download series
  296. self.enday = enddate
  297. # default number of days to consider if enddate not specified
  298. self.delta = delta
  299. # status of tile download
  300. self.status = True
  301. # for debug, you can download only xml files
  302. self.debug = debug
  303. # for logging
  304. log_filename = os.path.join(self.writeFilePath,
  305. 'modis{pro}.log'.format(pro=self.product))
  306. log_format = '%(asctime)s - %(levelname)s - %(message)s'
  307. logging.basicConfig(filename=log_filename, level=logging.DEBUG,
  308. format=log_format)
  309. logging.captureWarnings(True)
  310. # global connection attempt counter
  311. self.nconnection = 0
  312. # timeout for HTTP connection before failing (seconds)
  313. self.timeout = timeout
  314. # files within the directory where data will be saved
  315. self.fileInPath = []
  316. for f in os.listdir(self.writeFilePath):
  317. if os.path.isfile(os.path.join(self.writeFilePath, f)):
  318. self.fileInPath.append(f)
  319. global GDAL
  320. if not GDAL and checkgdal:
  321. logging.warning("WARNING: Python GDAL library not found")
  322. elif GDAL and not checkgdal:
  323. GDAL = False
  324. self.dirData = []
  325. def removeEmptyFiles(self):
  326. """Function to remove files in the download directory that have
  327. filesize equal to 0
  328. """
  329. year = str(date.today().year)
  330. prefix = self.product.split('.')[0]
  331. files = glob.glob1(self.writeFilePath, '%s.A%s*' % (prefix, year))
  332. for f in files:
  333. fil = os.path.join(self.writeFilePath, f)
  334. if os.path.getsize(fil) == 0:
  335. os.remove(fil)
  336. def connect(self, ncon=20):
  337. """Connect to the server and fill the dirData variable
  338. :param int ncon: maximum number of attempts to connect to the HTTP
  339. server before failing
  340. """
  341. if self.urltype == 'ftp':
  342. self._connectFTP(ncon)
  343. elif self.urltype == 'http':
  344. self._connectHTTP(ncon)
  345. if len(self.dirData) == 0:
  346. raise Exception("There are some troubles with the server. "
  347. "The directory seems to be empty")
  348. def _connectHTTP(self, ncon=20):
  349. """Connect to HTTP server, create a list of directories for all days
  350. :param int ncon: maximum number of attempts to connect to the HTTP
  351. server before failing. If ncon < 0, connection
  352. attempts are unlimited in number
  353. """
  354. self.nconnection += 1
  355. try:
  356. url = urljoin(self.url, self.path)
  357. try:
  358. req = urllib.request.Request(url, headers=self.http_header)
  359. http = urllib.request.urlopen(req)
  360. self.dirData = modisHtmlParser(http.read()).get_dates()
  361. except Exception as e:
  362. logging.error('Error in connection. Code {code}, '
  363. 'reason {re}'.format(code=e.code, re=e.reason))
  364. http = urlopen(url, timeout=self.timeout)
  365. self.dirData = modisHtmlParser(http.read()).get_dates()
  366. self.dirData.reverse()
  367. except Exception as e:
  368. try:
  369. logging.error('Error in connection. Code {code}, '
  370. 'reason {re}'.format(code=e.code, re=e.reason))
  371. except:
  372. logging.error('Error {er}'.format(er=e))
  373. if self.nconnection <= ncon or ncon < 0:
  374. self._connectHTTP()
  375. def _connectFTP(self, ncon=20):
  376. """Set connection to ftp server, move to path where data are stored,
  377. and create a list of directories for all days
  378. :param int ncon: maximum number of attempts to connect to the FTP
  379. server before failing.
  380. """
  381. self.nconnection += 1
  382. try:
  383. # connect to ftp server
  384. self.ftp = FTP(self.url)
  385. self.ftp.login(self.user, self.password)
  386. # enter in directory
  387. self.ftp.cwd(self.path)
  388. self.dirData = []
  389. # return data inside directory
  390. self.ftp.dir(self.dirData.append)
  391. # reverse order of data for have first the nearest to today
  392. self.dirData.reverse()
  393. # ensure dirData contains only directories, remove all references to files
  394. self.dirData = [elem.split()[-1] for elem in self.dirData if elem.startswith("d")]
  395. if self.debug:
  396. logging.debug("Open connection {url}".format(url=self.url))
  397. except (EOFError, ftplib.error_perm) as e:
  398. logging.error('Error in connection: {err}'.format(err=e))
  399. if self.nconnection <= ncon:
  400. self._connectFTP()
  401. def closeFTP(self):
  402. """Close ftp connection and close the file list document"""
  403. self.ftp.quit()
  404. self.closeFilelist()
  405. if self.debug:
  406. logging.debug("Close connection {url}".format(url=self.url))
  407. def closeFilelist(self):
  408. """Function to close the file list of where the files are downloaded"""
  409. self.filelist.close()
  410. def setDirectoryIn(self, day):
  411. """Enter into the file directory of a specified day
  412. :param str day: a string representing a day in format YYYY.MM.DD
  413. """
  414. try:
  415. self.ftp.cwd(day)
  416. except (ftplib.error_reply, socket.error) as e:
  417. logging.error("Error {err} entering in directory "
  418. "{name}".format(err=e, name=day))
  419. self.setDirectoryIn(day)
  420. def setDirectoryOver(self):
  421. """Move up within the file directory"""
  422. try:
  423. self.ftp.cwd('..')
  424. except (ftplib.error_reply, socket.error) as e:
  425. logging.error("Error {err} when trying to come back".format(err=e))
  426. self.setDirectoryOver()
  427. def _getToday(self):
  428. """Set the dates for the start and end of downloading"""
  429. if self.today is None:
  430. # set today variable from datetime.date method
  431. self.today = date.today()
  432. elif isinstance(self.today, str):
  433. # set today variable from string data passed by user
  434. self.today = str2date(self.today)
  435. # set enday variable to data passed by user
  436. if isinstance(self.enday, str):
  437. self.enday = str2date(self.enday)
  438. # set delta
  439. if self.today and self.enday:
  440. if self.today < self.enday:
  441. self.today, self.enday = self.enday, self.today
  442. delta = self.today - self.enday
  443. self.delta = abs(delta.days) + 1
  444. def getListDays(self):
  445. """Return a list of all selected days"""
  446. self._getToday()
  447. today_s = self.today.strftime("%Y.%m.%d")
  448. # dirData is reverse sorted
  449. for i, d in enumerate(self.dirData):
  450. if d <= today_s:
  451. today_index = i
  452. break
  453. # else:
  454. # logging.error("No data available for requested days")
  455. # import sys
  456. # sys.exit()
  457. days = self.dirData[today_index:][:self.delta]
  458. # this is useful for 8/16 days data, delta could download more images
  459. # that you want
  460. if self.enday is not None:
  461. enday_s = self.enday.strftime("%Y.%m.%d")
  462. delta = 0
  463. # make a full cycle from the last index and find
  464. # it make a for cicle from the last value and find the internal
  465. # delta to remove file outside temporaly range
  466. for i in range(0, len(days)):
  467. if days[i] < enday_s:
  468. break
  469. else:
  470. delta = delta + 1
  471. # remove days outside new delta
  472. days = days[:delta]
  473. return days
  474. def getAllDays(self):
  475. """Return a list of all days"""
  476. return self.dirData
  477. def getFilesList(self, day=None):
  478. """Returns a list of files to download. HDF and XML files are
  479. downloaded by default. JPG files will be downloaded if
  480. self.jpeg == True.
  481. :param str day: the date of data in format YYYY.MM.DD
  482. :return: a list of files to download for the day
  483. """
  484. if self.urltype == 'http':
  485. return self._getFilesListHTTP(day)
  486. elif self.urltype == 'ftp':
  487. return self._getFilesListFTP()
  488. def _getFilesListHTTP(self, day):
  489. """Returns a list of files to download from http server, which will
  490. be HDF and XML files, and optionally JPG files if specified by
  491. self.jpeg
  492. :param str day: the date of data in format YYYY.MM.DD
  493. """
  494. # return the files list inside the directory of each day
  495. try:
  496. url = urljoin(self.url, self.path, day)
  497. if self.debug:
  498. logging.debug("The url is: {url}".format(url=url))
  499. try:
  500. http = modisHtmlParser(requests.get(url,
  501. timeout=self.timeout).content)
  502. except:
  503. http = modisHtmlParser(urlopen(url,
  504. timeout=self.timeout).read())
  505. # download JPG files also
  506. if self.jpeg:
  507. # if tiles not specified, download all files
  508. if not self.tiles:
  509. finalList = http.get_all()
  510. # if tiles specified, download all files with jpegs
  511. else:
  512. finalList = http.get_tiles(self.product_code,
  513. self.tiles, jpeg=True)
  514. # if JPG files should not be downloaded, get only HDF and XML
  515. else:
  516. finalList = http.get_tiles(self.product_code, self.tiles)
  517. if self.debug:
  518. logging.debug("The number of file to download is: "
  519. "{num}".format(num=len(finalList)))
  520. return finalList
  521. except (socket.error) as e:
  522. logging.error("Error {err} when try to receive list of "
  523. "files".format(err=e))
  524. self._getFilesListHTTP(day)
  525. def _getFilesListFTP(self):
  526. """Create a list of files to download from FTP server, it is possible
  527. choose to download also the JPG overview files or only the HDF files
  528. """
  529. def cicle_file(jpeg=False):
  530. """Check the type of file"""
  531. finalList = []
  532. for i in self.listfiles:
  533. name = i.split('.')
  534. # distinguish jpeg files from hdf files by the number of index
  535. # where find the tile index
  536. if not self.tiles and not (name.count('jpg') or
  537. name.count('BROWSE')):
  538. finalList.append(i)
  539. # is a jpeg of tiles number
  540. if self.tiles:
  541. if self.tiles.count(name[3]) == 1 and jpeg:
  542. finalList.append(i)
  543. # is a hdf of tiles number
  544. elif self.tiles.count(name[2]) == 1:
  545. finalList.append(i)
  546. return finalList
  547. # return the file's list inside the directory of each day
  548. try:
  549. self.listfiles = self.ftp.nlst()
  550. # download also jpeg
  551. if self.jpeg:
  552. # finallist is ugual to all file with jpeg file
  553. if not self.tiles:
  554. finalList = self.listfiles
  555. # finallist is ugual to tiles file with jpeg file
  556. else:
  557. finalList = cicle_file(jpeg=True)
  558. # not download jpeg
  559. else:
  560. finalList = cicle_file()
  561. if self.debug:
  562. logging.debug("The number of file to download is: "
  563. "{num}".format(num=len(finalList)))
  564. return finalList
  565. except (ftplib.error_reply, socket.error) as e:
  566. logging.error("Error {err} when trying to receive list of "
  567. "files".format(err=e))
  568. self._getFilesListFTP()
  569. def checkDataExist(self, listNewFile, move=False):
  570. """Check if a file already exists in the local download directory
  571. :param list listNewFile: list of all files, returned by getFilesList
  572. function
  573. :param bool move: it is useful to know if a function is called from
  574. download or move function
  575. :return: list of files to download
  576. """
  577. # different return if this method is used from downloadsAllDay() or
  578. # moveFile()
  579. if not listNewFile and not self.fileInPath:
  580. logging.error("checkDataExist both lists are empty")
  581. elif not listNewFile:
  582. listNewFile = list()
  583. elif not self.fileInPath:
  584. self.fileInPath = list()
  585. if not move:
  586. listOfDifferent = list(set(listNewFile) - set(self.fileInPath))
  587. elif move:
  588. listOfDifferent = list(set(self.fileInPath) - set(listNewFile))
  589. return listOfDifferent
  590. def checkFile(self, filHdf):
  591. """Check by using GDAL to be sure that the download went ok
  592. :param str filHdf: name of the HDF file to check
  593. :return: 0 if file is correct, 1 for error
  594. """
  595. try:
  596. gdal.Open(filHdf)
  597. return 0
  598. except (RuntimeError) as e:
  599. logging.error(e)
  600. return 1
  601. def downloadFile(self, filDown, filHdf, day):
  602. """Download a single file
  603. :param str filDown: name of the file to download
  604. :param str filHdf: name of the file to write to
  605. :param str day: the day in format YYYY.MM.DD
  606. """
  607. if self.urltype == 'http':
  608. self._downloadFileHTTP(filDown, filHdf, day)
  609. elif self.urltype == 'ftp':
  610. self._downloadFileFTP(filDown, filHdf)
  611. def _downloadFileHTTP(self, filDown, filHdf, day):
  612. """Download a single file from the http server
  613. :param str filDown: name of the file to download
  614. :param str filHdf: name of the file to write to
  615. :param str day: the day in format YYYY.MM.DD
  616. """
  617. filSave = open(filHdf, "wb")
  618. url = urljoin(self.url, self.path, day, filDown)
  619. orig_size = None
  620. try: # download and write the file
  621. req = urllib.request.Request(url, headers=self.http_header)
  622. http = urllib.request.urlopen(req)
  623. orig_size = http.headers['Content-Length']
  624. filSave.write(http.read())
  625. # if local file has an error, try to download the file again
  626. except Exception as e:
  627. logging.warning("Tried to downlaod with urllib but got this "
  628. "error {ex}, reason {re}".format(e.code,
  629. re=e.reason))
  630. try:
  631. http = requests.get(url, timeout=self.timeout)
  632. orig_size = http.headers['Content-Length']
  633. filSave.write(http.content)
  634. except Exception as e:
  635. logging.warning("Tried to downlaod with requests but got this "
  636. "error {co}, reason {re}".format(co=e.code,
  637. re=e.reason))
  638. logging.error("Cannot download {name}. "
  639. "Retrying...".format(name=filDown))
  640. filSave.close()
  641. os.remove(filSave.name)
  642. import time
  643. time.sleep(5)
  644. self._downloadFileHTTP(filDown, filHdf, day)
  645. filSave.close()
  646. transf_size = os.path.getsize(filSave.name)
  647. if not orig_size:
  648. self.filelist.write("{name}\n".format(name=filDown))
  649. self.filelist.flush()
  650. if self.debug:
  651. logging.debug("File {name} downloaded but not "
  652. "check the size".format(name=filDown))
  653. return 0
  654. if int(orig_size) == int(transf_size):
  655. # if no xml file, delete the HDF and redownload
  656. if filHdf.find('.xml') == -1:
  657. test = False
  658. if GDAL:
  659. test = self.checkFile(filHdf)
  660. if test:
  661. os.remove(filSave.name)
  662. self._downloadFileHTTP(filDown, filHdf, day)
  663. else:
  664. self.filelist.write("{name}\n".format(name=filDown))
  665. self.filelist.flush()
  666. if self.debug:
  667. logging.debug("File {name} downloaded "
  668. "correctly".format(name=filDown))
  669. return 0
  670. else: # xml exists
  671. self.filelist.write("{name}\n".format(name=filDown))
  672. self.filelist.flush()
  673. if self.debug:
  674. logging.debug("File {name} downloaded "
  675. "correctly".format(name=filDown))
  676. return 0
  677. # if filesizes are different, delete and try again
  678. else:
  679. logging.warning("Different size for file {name} - original data: "
  680. "{orig}, downloaded: {down}".format(name=filDown,
  681. orig=orig_size,
  682. down=transf_size))
  683. os.remove(filSave.name)
  684. self._downloadFileHTTP(filDown, filHdf, day)
  685. def _downloadFileFTP(self, filDown, filHdf):
  686. """Download a single file from ftp server
  687. :param str filDown: name of the file to download
  688. :param str filHdf: name of the file to write to
  689. """
  690. filSave = open(filHdf, "wb")
  691. try: # transfer file from ftp
  692. self.ftp.retrbinary("RETR " + filDown, filSave.write)
  693. self.filelist.write("{name}\n".format(name=filDown))
  694. self.filelist.flush()
  695. if self.debug:
  696. logging.debug("File {name} downloaded".format(name=filDown))
  697. # if error during download process, try to redownload the file
  698. except (ftplib.error_reply, socket.error, ftplib.error_temp,
  699. EOFError) as e:
  700. logging.error("Cannot download {name}, the error was '{err}'. "
  701. "Retrying...".format(name=filDown, err=e))
  702. filSave.close()
  703. os.remove(filSave.name)
  704. try:
  705. self.ftp.pwd()
  706. except (ftplib.error_temp, EOFError) as e:
  707. self._connectFTP()
  708. self._downloadFileFTP(filDown, filHdf)
  709. filSave.close()
  710. orig_size = self.ftp.size(filDown)
  711. transf_size = os.path.getsize(filSave.name)
  712. if orig_size == transf_size:
  713. return 0
  714. else:
  715. logging.warning("Different size for file {name} - original data: "
  716. "{orig}, downloaded: {down}".format(name=filDown,
  717. orig=orig_size,
  718. down=transf_size))
  719. os.remove(filSave.name)
  720. self._downloadFileFTP(filDown, filHdf)
  721. def dayDownload(self, day, listFilesDown):
  722. """Downloads tiles for the selected day
  723. :param str day: the day in format YYYY.MM.DD
  724. :param list listFilesDown: list of the files to download, returned
  725. by checkDataExist function
  726. """
  727. # for each file in files' list
  728. for i in listFilesDown:
  729. fileSplit = i.split('.')
  730. filePrefix = "{a}.{b}.{c}.{d}".format(a=fileSplit[0],
  731. b=fileSplit[1],
  732. c=fileSplit[2],
  733. d=fileSplit[3])
  734. # check if this file already exists in the save directory
  735. oldFile = glob.glob1(self.writeFilePath, filePrefix + "*"
  736. + fileSplit[-1])
  737. numFiles = len(oldFile)
  738. # if it doesn't exist
  739. if numFiles == 0:
  740. file_hdf = os.path.join(self.writeFilePath, i)
  741. # if one does exist
  742. elif numFiles == 1:
  743. # check the version of file, delete local file if it is older
  744. fileDown = getNewerVersion(oldFile[0], i)
  745. if fileDown != oldFile[0]:
  746. os.remove(os.path.join(self.writeFilePath, oldFile[0]))
  747. file_hdf = os.path.join(self.writeFilePath, fileDown)
  748. elif numFiles > 1:
  749. logging.error("There are to many files for "
  750. "{name}".format(name=i))
  751. if numFiles == 0 or (numFiles == 1 and fileDown != oldFile[0]):
  752. self.downloadFile(i, file_hdf, day)
  753. def downloadsAllDay(self, clean=False, allDays=False):
  754. """Download all requested days
  755. :param bool clean: if True remove the empty files, they could have
  756. some problems in the previous download
  757. :param bool allDays: download all passable days
  758. """
  759. if clean:
  760. self.removeEmptyFiles()
  761. # get the days to download
  762. if allDays:
  763. days = self.getAllDays()
  764. else:
  765. days = self.getListDays()
  766. # log the days to download
  767. if self.debug:
  768. logging.debug("The number of days to download is: "
  769. "{num}".format(num=len(days)))
  770. # download the data
  771. if self.urltype == 'http':
  772. self._downloadAllDaysHTTP(days)
  773. elif self.urltype == 'ftp':
  774. self._downloadAllDaysFTP(days)
  775. def _downloadAllDaysHTTP(self, days):
  776. """Downloads all the tiles considered from HTTP server
  777. :param list days: the list of days to download
  778. """
  779. # for each day
  780. for day in days:
  781. # obtain list of all files
  782. listAllFiles = self.getFilesList(day)
  783. # filter files based on local files in save directory
  784. listFilesDown = self.checkDataExist(listAllFiles)
  785. # download files for a day
  786. self.dayDownload(day, listFilesDown)
  787. self.closeFilelist()
  788. if self.debug:
  789. logging.debug("Download terminated")
  790. return 0
  791. def _downloadAllDaysFTP(self, days):
  792. """Downloads all the tiles considered from FTP server
  793. :param list days: the list of days to download
  794. """
  795. # for each day
  796. for day in days:
  797. # enter in the directory of day
  798. self.setDirectoryIn(day)
  799. # obtain list of all files
  800. listAllFiles = self.getFilesList()
  801. # filter files based on local files in save directory
  802. listFilesDown = self.checkDataExist(listAllFiles)
  803. # download files for a day
  804. self.dayDownload(day, listFilesDown)
  805. self.setDirectoryOver()
  806. self.closeFTP()
  807. if self.debug:
  808. logging.debug("Download terminated")
  809. return 0
  810. def debugLog(self):
  811. """Function to create the debug file
  812. :return: a Logger object to use to write debug info
  813. """
  814. # create logger
  815. logger = logging.getLogger("PythonLibModis debug")
  816. logger.setLevel(logging.DEBUG)
  817. # create console handler and set level to debug
  818. ch = logging.StreamHandler()
  819. ch.setLevel(logging.DEBUG)
  820. # create formatter
  821. formatter = logging.Formatter("%(asctime)s - %(name)s - "
  822. "%(levelname)s - %(message)s")
  823. # add formatter to console handler
  824. ch.setFormatter(formatter)
  825. # add console handler to logger
  826. logger.addHandler(ch)
  827. return logger
  828. def debugDays(self):
  829. """This function is useful to debug the number of days"""
  830. logger = self.debugLog()
  831. days = self.getListDays()
  832. # if length of list of days and the delta of days are different
  833. if len(days) != self.delta:
  834. # for each day
  835. for i in range(1, self.delta + 1):
  836. # calculate the current day using datetime.timedelta
  837. delta = timedelta(days=i)
  838. day = self.today - delta
  839. day = day.strftime("%Y.%m.%d")
  840. # check if day is in the days list
  841. if day not in days:
  842. logger.critical("This day {day} is not present on "
  843. "list".format(day=day))
  844. # the length of list of days and delta are equal
  845. else:
  846. logger.info("debugDays() : getListDays() and self.delta are same "
  847. "length")
  848. def debugMaps(self):
  849. """ Prints the files to download to the debug stream"""
  850. logger = self.debugLog()
  851. days = self.getListDays()
  852. for day in days:
  853. listAllFiles = self.getFilesList(day)
  854. string = day + ": " + str(len(listAllFiles)) + "\n"
  855. logger.debug(string)