Re: Geospatial tracking for maritime anomalies

carlos arenas Mon, 22 Feb 2016 13:54:58 -0800

The positions are supposed to have a two minutes interval. Here you have an
extract of how the data gets to me and I have attached the principal
modules of my code. The rest of it is the same as Geospatial Tracking.


MMSI, LAT, LON, SPEED, COURSE, STATUS, TIMESTAMP
210047000,43.468670,-9.770435,82,29,0,2016-02-22T17:18:24
212376000,43.243820,-10.084700,92,191,0,2016-02-22T17:20:11
219023000,43.146660,-9.937616,105,349,0,2016-02-22T17:18:56
224013910,43.066790,-9.612607,9,0,15,2016-02-22T17:19:18
224123730,43.101720,-9.610230,21,226,7,2016-02-22T17:16:03
235084298,43.426110,-9.640910,192,17,0,2016-02-22T17:20:47
235096368,43.040520,-9.771927,120,358,7,2016-02-22T17:21:17
244650165,42.986370,-9.797475,89,357,0,2016-02-22T17:20:28
245947000,43.236970,-9.724459,94,27,0,2016-02-22T17:20:35
247325500,43.293460,-9.927738,123,28,0,2016-02-22T17:20:13
256612000,43.125930,-10.072610,116,185,0,2016-02-22T17:18:56
257833000,43.380730,-9.852883,108,12,0,2016-02-22T17:21:24
258649000,43.369920,-9.643563,168,30,0,2016-02-22T17:20:36
304031000,43.204720,-10.103680,115,179,0,2016-02-22T17:19:33
304050982,43.399410,-10.119990,139,207,0,2016-02-22T17:22:01
351675000,43.376810,-10.049390,164,205,0,2016-02-22T17:16:14
355289000,43.149670,-9.784833,180,7,0,2016-02-22T17:21:37
428044000,42.999350,-9.777610,116,357,3,2016-02-22T17:19:22
566577000,42.976810,-9.956157,122,1,0,2016-02-22T17:20:20
636015262,43.199380,-9.751516,94,27,0,2016-02-22T17:19:09
636015529,43.194890,-9.781404,137,1,0,2016-02-22T17:16:14

#!/usr/bin/env python


import csv
import datetime
import sys



def preprocess(dataPath, outPath, verbose=False):
  with open(dataPath) as csvfile:
    reader = csv.reader(csvfile)
    writer = csv.writer(open(outPath, "wb"))
    reader1 = []
    for row in reader:
      line = []
      for item in row:
        line.append(str(item))
      reader1.append(line)
    
    reader2 = reader1
    
    usedtracks = []
    for row in reader2:
      trackname = str(row[0])
      check = 0
      for track in usedtracks:
        if track == trackname:
          check += 1
      if check == 0:
        lastlat = 0
        lastlon = 0
        lasttimestamp = 0
        for i in reader1:
          keep = True
          if i[0] == trackname:
            if abs(float(i[2]) - lastlon) <= 0.0005 and abs(float(i[3]) -lastlat) <= 0.0005:
             keep = False
            if abs(int(i[1]) - lasttimestamp) <= 30:
              keep = False
            if keep == True:
              if verbose:
                print "Keeping row:\t{0}".format(row)
              writer.writerow(i)
          lastlon = float(i[2])
          lastlat = float(i[3])
          lasttimestamp = int(i[1])
        usedtracks.append(trackname)
    print len(usedtracks)
 
    

if __name__ == "__main__":
  if len(sys.argv) < 3:
    print ("Usage: {0} "
           "/path/to/data.csv /path/to/outfile.csv").format(sys.argv[0])
    sys.exit(0)

  dataPath = sys.argv[1]
  outPath = sys.argv[2]
  preprocess(dataPath, outPath)

#!/usr/bin/env python
# ----------------------------------------------------------------------
# Numenta Platform for Intelligent Computing (NuPIC)
# Copyright (C) 2014, Numenta, Inc.  Unless you have an agreement
# with Numenta, Inc., for a separate license for this software code, the
# following terms and conditions apply:
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU Affero Public License version 3 as
# published by the Free Software Foundation.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
# See the GNU Affero Public License for more details.
#
# You should have received a copy of the GNU Affero Public License
# along with this program.  If not, see http://www.gnu.org/licenses.
#
# http://numenta.org/licenses/
# ----------------------------------------------------------------------

"""
A simple client to create a CLA anomaly detection model for geospatial data.
"""

import csv
import datetime
import sys

from nupic.frameworks.opf.modelfactory import ModelFactory

import model_params



DEFAULT_DATA_PATH = "data/commute.csv"
DEFAULT_OUTPUT_PATH = "anomaly_scores.csv"

ACCURACY_THRESHOLD = 80  # meters
INTERVAL_THRESHOLD = 30  # seconds



def addTimeEncoders(params):
  params["modelParams"]["sensorParams"]["encoders"]["timestamp_timeOfDay"] = {
    "fieldname": u"timestamp",
    "name": u"timestamp_timeOfDay",
    "timeOfDay": (51, 9.5),
    "type": "DateEncoder"
  }
  return params



def setEncoderScale(params, scale):
  params["modelParams"]["sensorParams"]["encoders"]["vector"]["scale"] = \
    int(scale)
  return params



def createModel(useTimeEncoders, scale, verbose):
  params = model_params.MODEL_PARAMS
  if useTimeEncoders:
    params = addTimeEncoders(params)
  if scale:
    params = setEncoderScale(params, scale)
  if verbose:
    print "Model parameters:"
    print params
  model = ModelFactory.create(params)
  model.enableInference({"predictedField": "vector"})
  return model



def runGeospatialAnomaly(dataPath, outputPath,
                         scale=False,
                         autoSequence=True,
                         useTimeEncoders=False,
                         verbose=False):

  model = createModel(useTimeEncoders, scale, verbose)

  with open (dataPath) as fin:
    reader = csv.reader(fin)
    csvWriter = csv.writer(open(outputPath,"wb"))
    csvWriter.writerow(["trackName",
		       "timestamp",
                       "longitude",
                       "latitude",
                       "speed",
                       "anomaly_score",
                       "new_sequence"])

    reader.next()
    reader.next()
    reader.next()

    lastTimestamp = None
    lastTrackName = None
    outputFormat = "%Y-%m-%dT%H:%M:%S"

    for _, record in enumerate(reader, start=1):
      trackName = record[0]
      timestamp = datetime.datetime.fromtimestamp(int(record[1]))
      longitude = float(record[2])
      latitude = float(record[3])
      speed = float(record[5])
      accuracy = float(record[7])

      altitude = float(record[4]) if record[4] != "" else None
      
      if accuracy > ACCURACY_THRESHOLD:
        continue

      newSequence = False
      # Handle the automatic sequence creation
      if autoSequence:
        if lastTimestamp and (
          (timestamp - lastTimestamp).total_seconds() > INTERVAL_THRESHOLD):
          newSequence = True
      # Manual sequence resets depend on the track name
      else:
        if trackName != lastTrackName:
          newSequence = True

      lastTimestamp = timestamp
      lastTrackName = trackName

      if newSequence:
        if verbose:
          print "Starting new sequence..."
        model.resetSequenceStates()

      modelInput = {
        "vector": (speed, longitude, latitude, altitude)
      }
      
      if useTimeEncoders:
        modelInput["timestamp"] = timestamp

      result = model.run(modelInput)
      anomalyScore = result.inferences["anomalyScore"]

      csvWriter.writerow([trackName,
			  timestamp.strftime(outputFormat),
                          longitude,
                          latitude,
                          speed,
                          anomalyScore,
                          1 if newSequence else 0])
      if verbose:
        print "[{0} - {1}] - Anomaly score: {2}.".format(trackName, timestamp, anomalyScore)

  print "Anomaly scores have been written to {0}".format(outputPath)



if __name__ == "__main__":
  dataPath = DEFAULT_DATA_PATH
  outputPath = DEFAULT_OUTPUT_PATH

  if len(sys.argv) > 1:
    dataPath = sys.argv[1]

  if len(sys.argv) > 2:
    outputPath = sys.argv[2]

  runGeospatialAnomaly(dataPath, outputPath)

#!/usr/bin/env python

import os
import sys

from tools.preprocess_data import preprocess
from tools.anomaly_to_js_data import postprocess
from model.geospatial_anomaly import runGeospatialAnomaly

verbose = False
scriptDir = os.path.dirname(os.path.realpath(__file__))

def run(inputPath, outputPath, useTimeEncoders, scale, autoSequence, verbose):

  preProcessedOutputPath = os.path.join(outputPath, "preprocessed_data.csv")
  if verbose: print "Pre-processing %s..." % inputPath
  preprocess(inputPath, preProcessedOutputPath, verbose=verbose)

  anomalyOutputPath = os.path.join(outputPath, "anomaly_scores.csv")
  if verbose: print "Running NuPIC on %s..." % preProcessedOutputPath
  runGeospatialAnomaly(preProcessedOutputPath,
                       anomalyOutputPath,
                       scale=scale,
                       autoSequence=autoSequence,
                       useTimeEncoders=useTimeEncoders,
                       verbose=verbose)

  visualizationOutputPath = os.path.join(scriptDir, "static/js/data.js")
  if verbose: print "Creating visualization at %s..." % visualizationOutputPath
  postprocess(anomalyOutputPath, visualizationOutputPath)


if __name__ == "__main__":
  (options, args) = parser.parse_args(sys.argv[1:])
  try:
    input_path = args.pop(0)
  except IndexError:
    parser.print_help(sys.stderr)
    sys.exit()

  verbose = options.verbose

  run(input_path, outputDir, useTimeEncoders, scale, manualSequence, verbose)

#!/usr/bin/env python

import os
import sys
from optparse import OptionParser

from run import run
from tools.convertion import convertion
from tools.anomaly_to_kml import anomalyrepresentation as KML

DEFAULT_OUTPUT_DIR = "output"
verbose = False
scriptDir = os.path.dirname(os.path.realpath(__file__))


parser = OptionParser(
  usage="%prog <path/to/input/file> [options]\n\nRun NuPIC on specified "
        "location file, which should already be in the proper format "
        "(downloaded from the simulator)."
)

parser.add_option(
  "-m",
  "--manual-sequence",
  action="store_true",
  default=False,
  dest="manualSequence",
  help="Automatically breaks into sequences based upon time gaps."
)
parser.add_option(
  "-t",
  "--time-encoders",
  action="store_true",
  default=False,
  dest="useTimeEncoders",
  help="Adds time of day encoder to model params."
)
parser.add_option(
  "-v",
  "--verbose",
  action="store_true",
  default=False,
  dest="verbose",
  help="Print debugging statements."
)
parser.add_option(
  "-o",
  "--output-dir",
  default=DEFAULT_OUTPUT_DIR,
  dest="outputDir",
  help="Where to write the output file."
)
parser.add_option(
  "-s",
  "--scale",
  default=False,
  dest="scale",
  help="Meter resolution for Geospatial Coordinate Encoder (default 5m)."
)

def maritimeanomalies(inputPath, outputDir, useTimeEncoders, scale, autoSequence):
  
  outputPath = os.path.abspath(outputDir)
  if not os.path.exists(outputPath):
    os.makedirs(outputPath)

  convertedOutputPath = os.path.join(outputPath, "converted_data.csv")
  if verbose: print "Converting %s..." % inputPath
  convertion(inputPath, convertedOutputPath)

  run(convertedOutputPath, outputPath, useTimeEncoders=useTimeEncoders, scale=scale, autoSequence=autoSequence, verbose=verbose)
  
  representationDataPath = os.path.join(outputPath, "anomaly_scores.csv")
  representationOutputPath = os.path.join(outputPath, "anomaly_representation.kml")
  if verbose: print "Creating visualization at %s..." % representationOutputPath
  KML(representationDataPath,representationOutputPath)


if __name__ == "__main__":
  (options, args) = parser.parse_args(sys.argv[1:])
  try:
    input_path = args.pop(0)
  except IndexError:
    parser.print_help(sys.stderr)
    sys.exit()

  verbose = options.verbose

  maritimeanomalies(
    input_path,
    options.outputDir,
    options.useTimeEncoders,
    options.scale,
    not options.manualSequence)

import csv
import time
import calendar

def readais(dataPath):
	"""It opens the original file, takes all the information and stores it in filelist. Then return file list"""

	my_file = open(dataPath,'rb')
	reader = csv.reader(my_file)
	filelist = []
	for row in reader:
		rowlist = []
		for item in row:
			rowlist.append(item)
		filelist.append(rowlist)
		rowlist = []	
	my_file.close()
	return filelist

def aisconvertion(filelist):
	"""It takes filelist and changes it to the format required by nupic.geospatial"""

	fileconverted = []
	for i in range(1,int(len(filelist))):
		rowlist = []
		rowlist.append(filelist[i][0])
		timetuple = time.strptime(str(filelist[i][6]),"%Y-%m-%dT%H:%M:%S")
		dt = calendar.timegm(timetuple)
		rowlist.append(int(dt))
		rowlist.append(filelist[i][2])
		rowlist.append(filelist[i][1])
		rowlist.append("")
		speed = float(filelist[i][3]) * 0.05144
		rowlist.append(speed)
		rowlist.append("")
		rowlist.append(1)

		fileconverted.append(rowlist)	

		rowlist = []	
		
	return fileconverted

def writeais(outPath,fileconverted):
	"""Writes the converted file returned by convertion in the file that we will run with nupic.geospatial"""

	my_file = open(outPath,'wb')
	writer = csv.writer(my_file, delimiter=',', quotechar="", quoting=csv.QUOTE_NONE)
	for row in fileconverted:
		writer.writerow(row)	
	my_file.close()

def convertion(dataPath, outPath):

  filelist = readais(dataPath)
  fileconverted = aisconvertion(filelist)
  writeais(outPath, fileconverted)


if __name__ == "__main__":
  if len(sys.argv) < 3:
    print ("Usage: {0} "
           "/path/to/data.csv /path/to/outfile.csv").format(sys.argv[0])
    sys.exit(0)

  dataPath = sys.argv[1]
  outPath = sys.argv[2]
  convertion(dataPath, outPath)

#!/usr/bin/env python

import csv
import xml.dom.minidom
import sys

def extractCoordinates(row):
  # This extracts the coordinates from a row and returns it as a list. This requires knowing
  # ahead of time what the columns are that hold the address information.
  return '%s,%s' % (row[2], row[3])

def createPlacemark(kmlDoc, row, order):
  # This creates a <Placemark> element for a row of data.
  # A row is a dict.
  placemarkElement = kmlDoc.createElement('Placemark')
  extElement = kmlDoc.createElement('ExtendedData')
  placemarkElement.appendChild(extElement)
  
  # Loop through the columns and create a <Data> element for every field that has a value.
  for i in range(0, len(order)):
      dataElement = kmlDoc.createElement('Data')
      dataElement.setAttribute('name', order[i])
      valueElement = kmlDoc.createElement('value')
      dataElement.appendChild(valueElement)
      valueText = kmlDoc.createTextNode(row[i])
      valueElement.appendChild(valueText)
      extElement.appendChild(dataElement)
  
  if float(row[5]) <= 0.25:
    styleElement = kmlDoc.createElement('styleUrl')
    styleElement.appendChild(kmlDoc.createTextNode('#green'))
    placemarkElement.appendChild(styleElement)
  elif float(row[5]) <= 0.75:
    styleElement = kmlDoc.createElement('styleUrl')
    styleElement.appendChild(kmlDoc.createTextNode('#yellow'))
    placemarkElement.appendChild(styleElement)
  else:
    styleElement = kmlDoc.createElement('styleUrl')
    styleElement.appendChild(kmlDoc.createTextNode('#red'))
    placemarkElement.appendChild(styleElement)
  pointElement = kmlDoc.createElement('Point')
  placemarkElement.appendChild(pointElement)
  coordinates = extractCoordinates(row)
  coorElement = kmlDoc.createElement('coordinates')
  coorElement.appendChild(kmlDoc.createTextNode(coordinates))
  pointElement.appendChild(coorElement)
  return placemarkElement


def createKML(csvReader, csvreader1, fileName, order):
  # This constructs the KML document from the CSV file.
  kmlDoc = xml.dom.minidom.Document()
  
  kmlElement = kmlDoc.createElementNS('http://earth.google.com/kml/2.2', 'kml')
  kmlElement.setAttribute('xmlns', 'http://earth.google.com/kml/2.2')
  kmlElement = kmlDoc.appendChild(kmlElement)
  documentElement = kmlDoc.createElement('Document')
  styleElement = kmlDoc.createElement('Style')
  styleElement.setAttribute('id', 'green')
  iconstyleElement = kmlDoc.createElement('IconStyle')
  scaleElement = kmlDoc.createElement('scale')
  scaleText = kmlDoc.createTextNode('0.5')
  scaleElement.appendChild(scaleText)
  iconstyleElement.appendChild(scaleElement)
  iconElement = kmlDoc.createElement('Icon')
  hrefElement = kmlDoc.createElement('href')
  hrefText = kmlDoc.createTextNode('http://maps.google.com/mapfiles/kml/paddle/grn-blank.png')
  hrefElement.appendChild(hrefText)
  iconElement.appendChild(hrefElement)
  iconstyleElement.appendChild(iconElement)
  styleElement.appendChild(iconstyleElement)
  documentElement.appendChild(styleElement)
  styleElement = kmlDoc.createElement('Style')
  styleElement.setAttribute('id', 'yellow')
  iconstyleElement = kmlDoc.createElement('IconStyle')
  scaleElement = kmlDoc.createElement('scale')
  scaleText = kmlDoc.createTextNode('0.6')
  scaleElement.appendChild(scaleText)
  iconstyleElement.appendChild(scaleElement)
  iconElement = kmlDoc.createElement('Icon')
  hrefElement = kmlDoc.createElement('href')
  hrefText = kmlDoc.createTextNode('http://maps.google.com/mapfiles/kml/paddle/ylw-blank.png')
  hrefElement.appendChild(hrefText)
  iconElement.appendChild(hrefElement)
  iconstyleElement.appendChild(iconElement)
  styleElement.appendChild(iconstyleElement)
  documentElement.appendChild(styleElement)
  styleElement = kmlDoc.createElement('Style')
  styleElement.setAttribute('id', 'red')
  iconstyleElement = kmlDoc.createElement('IconStyle')
  scaleElement = kmlDoc.createElement('scale')
  scaleText = kmlDoc.createTextNode('0.8')
  scaleElement.appendChild(scaleText)
  iconstyleElement.appendChild(scaleElement)
  iconElement = kmlDoc.createElement('Icon')
  hrefElement = kmlDoc.createElement('href')
  hrefText = kmlDoc.createTextNode('http://maps.google.com/mapfiles/kml/paddle/red-blank.png')
  hrefElement.appendChild(hrefText)
  iconElement.appendChild(hrefElement)
  iconstyleElement.appendChild(iconElement)
  styleElement.appendChild(iconstyleElement)
  documentElement.appendChild(styleElement)
  documentElement = kmlElement.appendChild(documentElement)

  # Skip the header line.
  csvReader.next()
  usedtracks = ["0"]
  for row in csvReader:
    trackName = str(row['trackName'])
    check = 0
    for track in usedtracks:
      if track == trackName:
        check += 1
    if check == 0:
        folderElement = kmlDoc.createElement('Folder')
	nameElement = kmlDoc.createElement('name')
	nameText = kmlDoc.createTextNode(trackName)
	nameElement.appendChild(nameText)
	folderElement.appendChild(nameElement)
        csvreader1.remove(csvreader1[0])
	for item in csvreader1:
	  if item[0] == trackName:	
	    placemarkElement = createPlacemark(kmlDoc, item, order)
	    folderElement.appendChild(placemarkElement)
        usedtracks.append(trackName)
	documentElement.appendChild(folderElement)

  kmlFile = open(fileName, 'w')
  kmlFile.write(kmlDoc.toprettyxml('  ', newl = '\n', encoding = 'utf-8'))

def anomalyrepresentation(dataPath, outPath):
  # This reader opens up 'anomaly_scores.csv'.
  # It creates a KML file called 'anomaly_representation.kml'.
  
  # If an argument was passed to the script, it splits the argument on a comma
  # and uses the resulting list to specify an order for when columns get added.
  # Otherwise, it defaults to the order used in the sample.
  
 
  order = ['trackName','timestamp','longitude','latitude','speed','anomaly_score','new_sequence']
  csvreader = csv.DictReader(open(dataPath),order)
  csvreader1 = []
  for row in csvreader:
    line = []
    for key in order:
      line.append(str(row[key]))
    csvreader1.append(line)
  csvreader = csv.DictReader(open(dataPath),order)
  kml = createKML(csvreader, csvreader1, outPath, order)

if __name__ == "__main__":
  if len(sys.argv) < 3:
    print ("Usage: {0} "
           "/path/to/data.csv /path/to/outfile.csv").format(sys.argv[0])
    sys.exit(0)

  dataPath = sys.argv[1]
  outPath = sys.argv[2]
  anomalyrepresentation(dataPath, outPath)

Re: Geospatial tracking for maritime anomalies

Reply via email to