Coverage for aisdb/denoising_encoder.py: 99%
56 statements
« prev ^ index » next coverage.py v7.3.1, created at 2023-09-30 04:14 +0000
« prev ^ index » next coverage.py v7.3.1, created at 2023-09-30 04:14 +0000
1import warnings
2from functools import reduce
4import numpy as np
6from aisdb.aisdb import encoder_score_fcn
7from aisdb.gis import delta_knots, delta_meters
10def _score_idx(scores):
11 ''' Returns indices of score array where value at index is equal to the
12 highest score. In tie cases, the last index will be selected
13 '''
14 assert len(scores) > 0
15 return np.where(scores == np.max(scores))[0][-1]
18def _segments_idx(track, distance_threshold, speed_threshold, **_):
19 segments_idx1 = reduce(
20 np.append, ([0], np.where(delta_knots(track) > speed_threshold)[0] + 1,
21 [track['time'].size]))
22 segments_idx2 = reduce(
23 np.append,
24 ([0], np.where(delta_meters(track) > distance_threshold)[0] + 1,
25 [track['time'].size]))
27 return reduce(np.union1d, (segments_idx1, segments_idx2))
30def _scoresarray(track, *, pathways, i, segments_idx, distance_threshold,
31 speed_threshold, minscore):
32 scores = np.array(
33 [
34 encoder_score_fcn(
35 x1=pathway['lon'][-1],
36 y1=pathway['lat'][-1],
37 t1=pathway['time'][-1],
38 x2=track['lon'][segments_idx[i]],
39 y2=track['lat'][segments_idx[i]],
40 t2=track['time'][segments_idx[i]],
41 dist_thresh=distance_threshold,
42 speed_thresh=speed_threshold,
43 ) for pathway in pathways
44 ],
45 dtype=np.float32,
46 )
47 highscore = (scores[np.where(
48 scores == np.max(scores))[0][0]] if scores.size > 0 else minscore)
49 return scores, highscore
52def _append_highscore(track, *, highscoreidx, pathways, i, segments_idx):
53 return dict(
54 **{k: track[k]
55 for k in track['static']},
56 **{
57 k:
58 np.append(pathways[highscoreidx][k],
59 track[k][segments_idx[i]:segments_idx[i + 1]])
60 for k in track['dynamic']
61 },
62 static=track['static'],
63 dynamic=track['dynamic'],
64 )
67def _split_pathway(track, *, i, segments_idx):
68 path = dict(
69 **{k: track[k]
70 for k in track['static']},
71 **{
72 k: track[k][segments_idx[i]:segments_idx[i + 1]]
73 for k in track['dynamic']
74 },
75 static=track['static'],
76 dynamic=track['dynamic'],
77 )
78 return path
81def encode_score(track, distance_threshold, speed_threshold, minscore):
82 ''' Encodes likelihood of persistent track membership when given distance,
83 speed, and score thresholds, using track speed deltas computed using
84 distance computed by haversine function divided by elapsed time
86 A higher distance threshold will increase the maximum distance in
87 meters allowed between pings for same trajectory membership. A higher
88 speed threshold will allow vessels travelling up to this value in knots
89 to be kconsidered for persistent track membership.
90 The minscore assigns a minimum score needed to be considered for
91 membership, typically 0 or very close to 0 such as 1e-5.
93 For example: a vessel travelling at a lower speed with short intervals
94 between pings will have a higher likelihood of persistence.
95 A trajectory with higher average speed or long intervals between
96 pings may indicate two separate trajectories and will be segmented
97 forming alternate trajectories according to highest likelihood of
98 membership.
99 '''
100 assert 'time' in track.keys()
101 assert len(track['time']) > 0
102 params = dict(distance_threshold=distance_threshold,
103 speed_threshold=speed_threshold,
104 minscore=minscore)
105 segments_idx = _segments_idx(track, **params)
106 pathways = []
107 warned = False
108 for i in range(segments_idx.size - 1):
109 if len(pathways) == 0:
110 path = _split_pathway(track, i=i, segments_idx=segments_idx)
111 assert path is not None
112 pathways.append(path)
113 continue
114 elif not warned and len(pathways) > 100:
115 warnings.warn(
116 f'excessive number of pathways! mmsi={track["mmsi"]}')
117 warned = True
118 assert len(track['time']) > 0, f'{track=}'
120 scores, highscore = _scoresarray(track,
121 pathways=pathways,
122 i=i,
123 segments_idx=segments_idx,
124 **params)
125 assert len(scores) > 0, f'{track}'
126 if (highscore >= minscore):
127 highscoreidx = _score_idx(scores)
128 pathways[highscoreidx] = _append_highscore(
129 track,
130 highscoreidx=highscoreidx,
131 pathways=pathways,
132 i=i,
133 segments_idx=segments_idx)
134 else:
135 path = _split_pathway(track, i=i, segments_idx=segments_idx)
136 assert path is not None
137 pathways.append(path.copy())
139 for pathway, label in zip(pathways, range(len(pathways))):
140 pathway['label'] = label
141 pathway['static'] = set(pathway['static']).union({'label'})
142 assert 'label' in pathway.keys()
143 assert 'time' in pathway.keys(), f'{pathway=}'
144 yield pathway
147def encode_greatcircledistance(
148 tracks,
149 *,
150 distance_threshold,
151 speed_threshold=50,
152 minscore=1e-6,
153):
154 ''' Partitions tracks where delta speeds exceed speed_threshold or
155 delta_meters exceeds distance_threshold.
156 concatenates track segments with the highest likelihood of being
157 sequential, as encoded by the encode_score function
159 args:
160 tracks (aisdb.track_gen.TrackGen)
161 track vectors generator
162 distance_threshold (int)
163 distance in meters that will be used as a
164 speed score numerator
165 speed_threshold (float)
166 maximum speed in knots that should be considered a continuous
167 trajectory
168 minscore (float)
169 minimum score threshold at which to allow track
170 segments to be linked. Value range: (0, 1).
171 A minscore closer to 0 will be less restrictive towards
172 trajectory grouping. A reasonable value for this is 1e-6.
173 This score is computed by the function
174 :func:`aisdb.denoising_encoder.encode_score`
176 >>> import os
177 >>> from datetime import datetime, timedelta
178 >>> from aisdb import SQLiteDBConn, DBQuery, TrackGen
179 >>> from aisdb import decode_msgs, encode_greatcircledistance, sqlfcn_callbacks
181 >>> # create example database file
182 >>> dbpath = 'encoder_test.db'
183 >>> filepaths = ['aisdb/tests/testdata/test_data_20210701.csv',
184 ... 'aisdb/tests/testdata/test_data_20211101.nm4']
186 >>> with SQLiteDBConn(dbpath) as dbconn:
187 ... decode_msgs(filepaths=filepaths, dbconn=dbconn,
188 ... source='TESTING', verbose=False)
190 >>> with SQLiteDBConn(dbpath) as dbconn:
191 ... q = DBQuery(callback=sqlfcn_callbacks.in_timerange_validmmsi,
192 ... dbconn=dbconn,
193 ... start=datetime(2021, 7, 1),
194 ... end=datetime(2021, 7, 7))
195 ... tracks = TrackGen(q.gen_qry(), decimate=True)
196 ... for track in encode_greatcircledistance(
197 ... tracks,
198 ... distance_threshold=250000, # metres
199 ... speed_threshold=50, # knots
200 ... minscore=0,
201 ... ):
202 ... print(track['mmsi'])
203 ... print(track['lon'], track['lat'])
204 ... break
205 204242000
206 [-8.931666] [41.45]
208 '''
209 '''
210 >>> os.remove(dbpath)
211 '''
212 for track in tracks:
213 assert isinstance(track, dict), f'got {type(track)} {track}'
214 for path in encode_score(track, distance_threshold, speed_threshold,
215 minscore):
216 yield path