Coverage for aisdb/denoising_encoder.py: 99%

56 statements  

« prev     ^ index     » next       coverage.py v7.3.1, created at 2023-09-30 04:14 +0000

1import warnings 

2from functools import reduce 

3 

4import numpy as np 

5 

6from aisdb.aisdb import encoder_score_fcn 

7from aisdb.gis import delta_knots, delta_meters 

8 

9 

10def _score_idx(scores): 

11 ''' Returns indices of score array where value at index is equal to the 

12 highest score. In tie cases, the last index will be selected 

13 ''' 

14 assert len(scores) > 0 

15 return np.where(scores == np.max(scores))[0][-1] 

16 

17 

18def _segments_idx(track, distance_threshold, speed_threshold, **_): 

19 segments_idx1 = reduce( 

20 np.append, ([0], np.where(delta_knots(track) > speed_threshold)[0] + 1, 

21 [track['time'].size])) 

22 segments_idx2 = reduce( 

23 np.append, 

24 ([0], np.where(delta_meters(track) > distance_threshold)[0] + 1, 

25 [track['time'].size])) 

26 

27 return reduce(np.union1d, (segments_idx1, segments_idx2)) 

28 

29 

30def _scoresarray(track, *, pathways, i, segments_idx, distance_threshold, 

31 speed_threshold, minscore): 

32 scores = np.array( 

33 [ 

34 encoder_score_fcn( 

35 x1=pathway['lon'][-1], 

36 y1=pathway['lat'][-1], 

37 t1=pathway['time'][-1], 

38 x2=track['lon'][segments_idx[i]], 

39 y2=track['lat'][segments_idx[i]], 

40 t2=track['time'][segments_idx[i]], 

41 dist_thresh=distance_threshold, 

42 speed_thresh=speed_threshold, 

43 ) for pathway in pathways 

44 ], 

45 dtype=np.float32, 

46 ) 

47 highscore = (scores[np.where( 

48 scores == np.max(scores))[0][0]] if scores.size > 0 else minscore) 

49 return scores, highscore 

50 

51 

52def _append_highscore(track, *, highscoreidx, pathways, i, segments_idx): 

53 return dict( 

54 **{k: track[k] 

55 for k in track['static']}, 

56 **{ 

57 k: 

58 np.append(pathways[highscoreidx][k], 

59 track[k][segments_idx[i]:segments_idx[i + 1]]) 

60 for k in track['dynamic'] 

61 }, 

62 static=track['static'], 

63 dynamic=track['dynamic'], 

64 ) 

65 

66 

67def _split_pathway(track, *, i, segments_idx): 

68 path = dict( 

69 **{k: track[k] 

70 for k in track['static']}, 

71 **{ 

72 k: track[k][segments_idx[i]:segments_idx[i + 1]] 

73 for k in track['dynamic'] 

74 }, 

75 static=track['static'], 

76 dynamic=track['dynamic'], 

77 ) 

78 return path 

79 

80 

81def encode_score(track, distance_threshold, speed_threshold, minscore): 

82 ''' Encodes likelihood of persistent track membership when given distance, 

83 speed, and score thresholds, using track speed deltas computed using 

84 distance computed by haversine function divided by elapsed time 

85 

86 A higher distance threshold will increase the maximum distance in 

87 meters allowed between pings for same trajectory membership. A higher 

88 speed threshold will allow vessels travelling up to this value in knots 

89 to be kconsidered for persistent track membership. 

90 The minscore assigns a minimum score needed to be considered for 

91 membership, typically 0 or very close to 0 such as 1e-5. 

92 

93 For example: a vessel travelling at a lower speed with short intervals 

94 between pings will have a higher likelihood of persistence. 

95 A trajectory with higher average speed or long intervals between 

96 pings may indicate two separate trajectories and will be segmented 

97 forming alternate trajectories according to highest likelihood of 

98 membership. 

99 ''' 

100 assert 'time' in track.keys() 

101 assert len(track['time']) > 0 

102 params = dict(distance_threshold=distance_threshold, 

103 speed_threshold=speed_threshold, 

104 minscore=minscore) 

105 segments_idx = _segments_idx(track, **params) 

106 pathways = [] 

107 warned = False 

108 for i in range(segments_idx.size - 1): 

109 if len(pathways) == 0: 

110 path = _split_pathway(track, i=i, segments_idx=segments_idx) 

111 assert path is not None 

112 pathways.append(path) 

113 continue 

114 elif not warned and len(pathways) > 100: 

115 warnings.warn( 

116 f'excessive number of pathways! mmsi={track["mmsi"]}') 

117 warned = True 

118 assert len(track['time']) > 0, f'{track=}' 

119 

120 scores, highscore = _scoresarray(track, 

121 pathways=pathways, 

122 i=i, 

123 segments_idx=segments_idx, 

124 **params) 

125 assert len(scores) > 0, f'{track}' 

126 if (highscore >= minscore): 

127 highscoreidx = _score_idx(scores) 

128 pathways[highscoreidx] = _append_highscore( 

129 track, 

130 highscoreidx=highscoreidx, 

131 pathways=pathways, 

132 i=i, 

133 segments_idx=segments_idx) 

134 else: 

135 path = _split_pathway(track, i=i, segments_idx=segments_idx) 

136 assert path is not None 

137 pathways.append(path.copy()) 

138 

139 for pathway, label in zip(pathways, range(len(pathways))): 

140 pathway['label'] = label 

141 pathway['static'] = set(pathway['static']).union({'label'}) 

142 assert 'label' in pathway.keys() 

143 assert 'time' in pathway.keys(), f'{pathway=}' 

144 yield pathway 

145 

146 

147def encode_greatcircledistance( 

148 tracks, 

149 *, 

150 distance_threshold, 

151 speed_threshold=50, 

152 minscore=1e-6, 

153): 

154 ''' Partitions tracks where delta speeds exceed speed_threshold or 

155 delta_meters exceeds distance_threshold. 

156 concatenates track segments with the highest likelihood of being 

157 sequential, as encoded by the encode_score function 

158 

159 args: 

160 tracks (aisdb.track_gen.TrackGen) 

161 track vectors generator 

162 distance_threshold (int) 

163 distance in meters that will be used as a 

164 speed score numerator 

165 speed_threshold (float) 

166 maximum speed in knots that should be considered a continuous 

167 trajectory 

168 minscore (float) 

169 minimum score threshold at which to allow track 

170 segments to be linked. Value range: (0, 1). 

171 A minscore closer to 0 will be less restrictive towards 

172 trajectory grouping. A reasonable value for this is 1e-6. 

173 This score is computed by the function 

174 :func:`aisdb.denoising_encoder.encode_score` 

175 

176 >>> import os 

177 >>> from datetime import datetime, timedelta 

178 >>> from aisdb import SQLiteDBConn, DBQuery, TrackGen 

179 >>> from aisdb import decode_msgs, encode_greatcircledistance, sqlfcn_callbacks 

180 

181 >>> # create example database file 

182 >>> dbpath = 'encoder_test.db' 

183 >>> filepaths = ['aisdb/tests/testdata/test_data_20210701.csv', 

184 ... 'aisdb/tests/testdata/test_data_20211101.nm4'] 

185 

186 >>> with SQLiteDBConn(dbpath) as dbconn: 

187 ... decode_msgs(filepaths=filepaths, dbconn=dbconn, 

188 ... source='TESTING', verbose=False) 

189 

190 >>> with SQLiteDBConn(dbpath) as dbconn: 

191 ... q = DBQuery(callback=sqlfcn_callbacks.in_timerange_validmmsi, 

192 ... dbconn=dbconn, 

193 ... start=datetime(2021, 7, 1), 

194 ... end=datetime(2021, 7, 7)) 

195 ... tracks = TrackGen(q.gen_qry(), decimate=True) 

196 ... for track in encode_greatcircledistance( 

197 ... tracks, 

198 ... distance_threshold=250000, # metres 

199 ... speed_threshold=50, # knots 

200 ... minscore=0, 

201 ... ): 

202 ... print(track['mmsi']) 

203 ... print(track['lon'], track['lat']) 

204 ... break 

205 204242000 

206 [-8.931666] [41.45] 

207 

208 ''' 

209 ''' 

210 >>> os.remove(dbpath) 

211 ''' 

212 for track in tracks: 

213 assert isinstance(track, dict), f'got {type(track)} {track}' 

214 for path in encode_score(track, distance_threshold, speed_threshold, 

215 minscore): 

216 yield path