Source code for aisdb.denoising_encoder

import warnings
from functools import reduce

import numpy as np

from aisdb.aisdb import encoder_score_fcn
from aisdb.gis import delta_knots, delta_meters


def _score_idx(scores):
    ''' Returns indices of score array where value at index is equal to the
        highest score. In tie cases, the last index will be selected
    '''
    assert len(scores) > 0
    return np.where(scores == np.max(scores))[0][-1]


def _segments_idx(track, distance_threshold, speed_threshold, **_):
    segments_idx1 = reduce(
        np.append, ([0], np.where(delta_knots(track) > speed_threshold)[0] + 1,
                    [track['time'].size]))
    segments_idx2 = reduce(
        np.append,
        ([0], np.where(delta_meters(track) > distance_threshold)[0] + 1,
         [track['time'].size]))

    return reduce(np.union1d, (segments_idx1, segments_idx2))


def _scoresarray(track, *, pathways, i, segments_idx, distance_threshold,
                 speed_threshold, minscore):
    scores = np.array(
        [
            encoder_score_fcn(
                x1=pathway['lon'][-1],
                y1=pathway['lat'][-1],
                t1=pathway['time'][-1],
                x2=track['lon'][segments_idx[i]],
                y2=track['lat'][segments_idx[i]],
                t2=track['time'][segments_idx[i]],
                dist_thresh=distance_threshold,
                speed_thresh=speed_threshold,
            ) for pathway in pathways
        ],
        dtype=np.float32,
    )
    highscore = (scores[np.where(
        scores == np.max(scores))[0][0]] if scores.size > 0 else minscore)
    return scores, highscore


def _append_highscore(track, *, highscoreidx, pathways, i, segments_idx):
    return dict(
        **{k: track[k]
           for k in track['static']},
        **{
            k:
            np.append(pathways[highscoreidx][k],
                      track[k][segments_idx[i]:segments_idx[i + 1]])
            for k in track['dynamic']
        },
        static=track['static'],
        dynamic=track['dynamic'],
    )


def _split_pathway(track, *, i, segments_idx):
    path = dict(
        **{k: track[k]
           for k in track['static']},
        **{
            k: track[k][segments_idx[i]:segments_idx[i + 1]]
            for k in track['dynamic']
        },
        static=track['static'],
        dynamic=track['dynamic'],
    )
    return path


[docs] def encode_score(track, distance_threshold, speed_threshold, minscore): ''' Encodes likelihood of persistent track membership when given distance, speed, and score thresholds, using track speed deltas computed using distance computed by haversine function divided by elapsed time A higher distance threshold will increase the maximum distance in meters allowed between pings for same trajectory membership. A higher speed threshold will allow vessels travelling up to this value in knots to be kconsidered for persistent track membership. The minscore assigns a minimum score needed to be considered for membership, typically 0 or very close to 0 such as 1e-5. For example: a vessel travelling at a lower speed with short intervals between pings will have a higher likelihood of persistence. A trajectory with higher average speed or long intervals between pings may indicate two separate trajectories and will be segmented forming alternate trajectories according to highest likelihood of membership. ''' assert 'time' in track.keys() assert len(track['time']) > 0 params = dict(distance_threshold=distance_threshold, speed_threshold=speed_threshold, minscore=minscore) segments_idx = _segments_idx(track, **params) pathways = [] warned = False for i in range(segments_idx.size - 1): if len(pathways) == 0: path = _split_pathway(track, i=i, segments_idx=segments_idx) assert path is not None pathways.append(path) continue elif not warned and len(pathways) > 100: warnings.warn( f'excessive number of pathways! mmsi={track["mmsi"]}') warned = True assert len(track['time']) > 0, f'{track=}' scores, highscore = _scoresarray(track, pathways=pathways, i=i, segments_idx=segments_idx, **params) assert len(scores) > 0, f'{track}' if (highscore >= minscore): highscoreidx = _score_idx(scores) pathways[highscoreidx] = _append_highscore( track, highscoreidx=highscoreidx, pathways=pathways, i=i, segments_idx=segments_idx) else: path = _split_pathway(track, i=i, segments_idx=segments_idx) assert path is not None pathways.append(path.copy()) for pathway, label in zip(pathways, range(len(pathways))): pathway['label'] = label pathway['static'] = set(pathway['static']).union({'label'}) assert 'label' in pathway.keys() assert 'time' in pathway.keys(), f'{pathway=}' yield pathway
[docs] def encode_greatcircledistance( tracks, *, distance_threshold, speed_threshold=50, minscore=1e-6, ): ''' Partitions tracks where delta speeds exceed speed_threshold or delta_meters exceeds distance_threshold. concatenates track segments with the highest likelihood of being sequential, as encoded by the encode_score function args: tracks (aisdb.track_gen.TrackGen) track vectors generator distance_threshold (int) distance in meters that will be used as a speed score numerator speed_threshold (float) maximum speed in knots that should be considered a continuous trajectory minscore (float) minimum score threshold at which to allow track segments to be linked. Value range: (0, 1). A minscore closer to 0 will be less restrictive towards trajectory grouping. A reasonable value for this is 1e-6. This score is computed by the function :func:`aisdb.denoising_encoder.encode_score` >>> import os >>> from datetime import datetime, timedelta >>> from aisdb import SQLiteDBConn, DBQuery, TrackGen >>> from aisdb import decode_msgs, encode_greatcircledistance, sqlfcn_callbacks >>> # create example database file >>> dbpath = 'encoder_test.db' >>> filepaths = ['aisdb/tests/testdata/test_data_20210701.csv', ... 'aisdb/tests/testdata/test_data_20211101.nm4'] >>> with SQLiteDBConn(dbpath) as dbconn: ... decode_msgs(filepaths=filepaths, dbconn=dbconn, ... source='TESTING', verbose=False) >>> with SQLiteDBConn(dbpath) as dbconn: ... q = DBQuery(callback=sqlfcn_callbacks.in_timerange_validmmsi, ... dbconn=dbconn, ... start=datetime(2021, 7, 1), ... end=datetime(2021, 7, 7)) ... tracks = TrackGen(q.gen_qry(), decimate=True) ... for track in encode_greatcircledistance( ... tracks, ... distance_threshold=250000, # metres ... speed_threshold=50, # knots ... minscore=0, ... ): ... print(track['mmsi']) ... print(track['lon'], track['lat']) ... break 204242000 [-8.931666] [41.45] ''' ''' >>> os.remove(dbpath) ''' for track in tracks: assert isinstance(track, dict), f'got {type(track)} {track}' for path in encode_score(track, distance_threshold, speed_threshold, minscore): yield path