Coverage for aisdb/webdata/bathymetry.py: 100%

48 statements  

« prev     ^ index     » next       coverage.py v7.3.1, created at 2023-09-30 04:22 +0000

1''' load bathymetry data from GEBCO raster files ''' 

2 

3import hashlib 

4import os 

5import zipfile 

6from functools import reduce 

7 

8import numpy as np 

9import requests 

10import warnings 

11from tqdm import tqdm 

12 

13from aisdb.gis import shiftcoord 

14from aisdb.webdata.load_raster import RasterFile 

15 

16url = 'https://www.bodc.ac.uk/data/open_download/gebco/gebco_2022/geotiff/' 

17 

18 

19def _filebounds(fpath): 

20 return { 

21 f[0]: float(f[1:]) 

22 for f in fpath.split('gebco_2022_', 1)[1].rsplit('.tif', 1)[0].split( 

23 '_') 

24 } 

25 

26 

27class Gebco(): 

28 

29 def __init__(self, data_dir): 

30 ''' 

31 args: 

32 data_dir (string) 

33 folder where rasters should be stored 

34 ''' 

35 self.data_dir = data_dir 

36 assert os.path.isdir(data_dir) 

37 self.__enter__() 

38 

39 def __enter__(self): 

40 self.rasterfiles = { 

41 f: _filebounds(f) 

42 for f in { 

43 k: None 

44 for k in sorted([ 

45 f for f in os.listdir(self.data_dir) 

46 if f[-4:] == '.tif' and 'gebco_2022' in f 

47 ]) 

48 } 

49 } 

50 

51 # download bathymetry rasters if missing 

52 self.fetch_bathymetry_grid() 

53 

54 return self 

55 

56 def __exit__(self, exc_type, exc_val, exc_tb): 

57 self._close_all() 

58 

59 def fetch_bathymetry_grid(self): # pragma: no cover 

60 """ download geotiff zip archive and extract it """ 

61 

62 zipf = os.path.join(self.data_dir, "gebco_2022_geotiff.zip") 

63 try: 

64 

65 # download the file if necessary 

66 if not os.path.isfile(zipf): 

67 print('downloading gebco bathymetry...') 

68 with requests.get(url, stream=True) as payload: 

69 assert payload.status_code == 200, 'error fetching file' 

70 with open(zipf, 'wb') as f: 

71 with tqdm(total=4011413504, 

72 desc=zipf, 

73 unit='B', 

74 unit_scale=True) as t: 

75 for chunk in payload.iter_content(chunk_size=8192): 

76 _ = t.update(f.write(chunk)) 

77 

78 with open(zipf, 'rb') as z: 

79 sha256sum = hashlib.sha256(z.read()).hexdigest() 

80 print('verifying checksum...') 

81 assert sha256sum == '5ade15083909fcd6003409df678bdc6537b8691df996f8d806b48de962470cc3',\ 

82 'checksum failed!' 

83 with zipfile.ZipFile(zipf, 'r') as zip_ref: 

84 members = list( 

85 set(zip_ref.namelist()) - 

86 set(sorted(os.listdir(self.data_dir)))) 

87 print('extracting bathymetry data...') 

88 zip_ref.extractall(path=self.data_dir, members=members) 

89 except (Exception, KeyboardInterrupt) as err: 

90 os.remove(os.path.join(self.data_dir, "gebco_2022_geotiff.zip")) 

91 raise (err) 

92 return 

93 

94 def _load_raster(self, key): 

95 self.rasterfiles[key]['raster'] = RasterFile( 

96 imgpath=os.path.join(self.data_dir, key)) 

97 

98 def _check_in_bounds(self, track): 

99 for lon, lat in zip(track['lon'], track['lat']): 

100 if not (-180 <= lon <= 180) or not (-90 <= lat <= 

101 90): # pragma: no cover 

102 warnings.warn('coordinates out of range!') 

103 lon = shiftcoord([lon])[0] 

104 lat = shiftcoord([lat], rng=90)[0] 

105 

106 if os.environ.get('DEBUG'): 

107 tracer = False 

108 for key, bounds in self.rasterfiles.items(): 

109 if (bounds['w'] <= lon <= bounds['e'] 

110 and bounds['s'] <= lat <= bounds['n']): 

111 tracer = True 

112 if 'raster' not in bounds.keys(): 

113 self._load_raster(key) 

114 yield key 

115 break 

116 if os.environ.get('DEBUG') and not tracer: 

117 print(f'{lon = } {lat = }') 

118 assert tracer 

119 return 

120 

121 def _close_all(self): 

122 for filepath, bounds in self.rasterfiles.items(): 

123 if 'raster' in bounds.keys(): 

124 bounds['raster'].img.close() 

125 

126 def merge_tracks(self, tracks): 

127 ''' append `depth_metres` column to track dictionaries ''' 

128 for track in tracks: 

129 # mapping of filepaths to the corresponding boundary region 

130 raster_keys = np.array(list(self._check_in_bounds(track)), 

131 dtype=object) 

132 

133 # ensure that each vector time slice has a value 

134 if len(raster_keys) != len(track['time']): 

135 raise ValueError('no rasters found for track') 

136 bathy_segments = np.append( 

137 np.append([0], 

138 np.where(raster_keys[:-1] != raster_keys[:1])[0]), 

139 [len(raster_keys)], 

140 ) 

141 track['depth_metres'] = reduce(np.append, [ 

142 self.rasterfiles[raster_keys[i]] 

143 ['raster']._track_coordinate_values( 

144 track, rng=range(bathy_segments[i], bathy_segments[i + 1])) 

145 for i in range(len(bathy_segments) - 1) 

146 ]) * -1 

147 

148 track['dynamic'] = set(track['dynamic']).union( 

149 set(['depth_metres'])) 

150 yield track