The positions are supposed to have a two minutes interval. Here you have an
extract of how the data gets to me and I have attached the principal
modules of my code. The rest of it is the same as Geospatial Tracking.
MMSI, LAT, LON, SPEED, COURSE, STATUS, TIMESTAMP
210047000,43.468670,-9.770435,82,29,0,2016-02-22T17:18:24
212376000,43.243820,-10.084700,92,191,0,2016-02-22T17:20:11
219023000,43.146660,-9.937616,105,349,0,2016-02-22T17:18:56
224013910,43.066790,-9.612607,9,0,15,2016-02-22T17:19:18
224123730,43.101720,-9.610230,21,226,7,2016-02-22T17:16:03
235084298,43.426110,-9.640910,192,17,0,2016-02-22T17:20:47
235096368,43.040520,-9.771927,120,358,7,2016-02-22T17:21:17
244650165,42.986370,-9.797475,89,357,0,2016-02-22T17:20:28
245947000,43.236970,-9.724459,94,27,0,2016-02-22T17:20:35
247325500,43.293460,-9.927738,123,28,0,2016-02-22T17:20:13
256612000,43.125930,-10.072610,116,185,0,2016-02-22T17:18:56
257833000,43.380730,-9.852883,108,12,0,2016-02-22T17:21:24
258649000,43.369920,-9.643563,168,30,0,2016-02-22T17:20:36
304031000,43.204720,-10.103680,115,179,0,2016-02-22T17:19:33
304050982,43.399410,-10.119990,139,207,0,2016-02-22T17:22:01
351675000,43.376810,-10.049390,164,205,0,2016-02-22T17:16:14
355289000,43.149670,-9.784833,180,7,0,2016-02-22T17:21:37
428044000,42.999350,-9.777610,116,357,3,2016-02-22T17:19:22
566577000,42.976810,-9.956157,122,1,0,2016-02-22T17:20:20
636015262,43.199380,-9.751516,94,27,0,2016-02-22T17:19:09
636015529,43.194890,-9.781404,137,1,0,2016-02-22T17:16:14
#!/usr/bin/env python
import csv
import datetime
import sys
def preprocess(dataPath, outPath, verbose=False):
with open(dataPath) as csvfile:
reader = csv.reader(csvfile)
writer = csv.writer(open(outPath, "wb"))
reader1 = []
for row in reader:
line = []
for item in row:
line.append(str(item))
reader1.append(line)
reader2 = reader1
usedtracks = []
for row in reader2:
trackname = str(row[0])
check = 0
for track in usedtracks:
if track == trackname:
check += 1
if check == 0:
lastlat = 0
lastlon = 0
lasttimestamp = 0
for i in reader1:
keep = True
if i[0] == trackname:
if abs(float(i[2]) - lastlon) <= 0.0005 and abs(float(i[3]) -lastlat) <= 0.0005:
keep = False
if abs(int(i[1]) - lasttimestamp) <= 30:
keep = False
if keep == True:
if verbose:
print "Keeping row:\t{0}".format(row)
writer.writerow(i)
lastlon = float(i[2])
lastlat = float(i[3])
lasttimestamp = int(i[1])
usedtracks.append(trackname)
print len(usedtracks)
if __name__ == "__main__":
if len(sys.argv) < 3:
print ("Usage: {0} "
"/path/to/data.csv /path/to/outfile.csv").format(sys.argv[0])
sys.exit(0)
dataPath = sys.argv[1]
outPath = sys.argv[2]
preprocess(dataPath, outPath)
#!/usr/bin/env python
# ----------------------------------------------------------------------
# Numenta Platform for Intelligent Computing (NuPIC)
# Copyright (C) 2014, Numenta, Inc. Unless you have an agreement
# with Numenta, Inc., for a separate license for this software code, the
# following terms and conditions apply:
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU Affero Public License version 3 as
# published by the Free Software Foundation.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
# See the GNU Affero Public License for more details.
#
# You should have received a copy of the GNU Affero Public License
# along with this program. If not, see http://www.gnu.org/licenses.
#
# http://numenta.org/licenses/
# ----------------------------------------------------------------------
"""
A simple client to create a CLA anomaly detection model for geospatial data.
"""
import csv
import datetime
import sys
from nupic.frameworks.opf.modelfactory import ModelFactory
import model_params
DEFAULT_DATA_PATH = "data/commute.csv"
DEFAULT_OUTPUT_PATH = "anomaly_scores.csv"
ACCURACY_THRESHOLD = 80 # meters
INTERVAL_THRESHOLD = 30 # seconds
def addTimeEncoders(params):
params["modelParams"]["sensorParams"]["encoders"]["timestamp_timeOfDay"] = {
"fieldname": u"timestamp",
"name": u"timestamp_timeOfDay",
"timeOfDay": (51, 9.5),
"type": "DateEncoder"
}
return params
def setEncoderScale(params, scale):
params["modelParams"]["sensorParams"]["encoders"]["vector"]["scale"] = \
int(scale)
return params
def createModel(useTimeEncoders, scale, verbose):
params = model_params.MODEL_PARAMS
if useTimeEncoders:
params = addTimeEncoders(params)
if scale:
params = setEncoderScale(params, scale)
if verbose:
print "Model parameters:"
print params
model = ModelFactory.create(params)
model.enableInference({"predictedField": "vector"})
return model
def runGeospatialAnomaly(dataPath, outputPath,
scale=False,
autoSequence=True,
useTimeEncoders=False,
verbose=False):
model = createModel(useTimeEncoders, scale, verbose)
with open (dataPath) as fin:
reader = csv.reader(fin)
csvWriter = csv.writer(open(outputPath,"wb"))
csvWriter.writerow(["trackName",
"timestamp",
"longitude",
"latitude",
"speed",
"anomaly_score",
"new_sequence"])
reader.next()
reader.next()
reader.next()
lastTimestamp = None
lastTrackName = None
outputFormat = "%Y-%m-%dT%H:%M:%S"
for _, record in enumerate(reader, start=1):
trackName = record[0]
timestamp = datetime.datetime.fromtimestamp(int(record[1]))
longitude = float(record[2])
latitude = float(record[3])
speed = float(record[5])
accuracy = float(record[7])
altitude = float(record[4]) if record[4] != "" else None
if accuracy > ACCURACY_THRESHOLD:
continue
newSequence = False
# Handle the automatic sequence creation
if autoSequence:
if lastTimestamp and (
(timestamp - lastTimestamp).total_seconds() > INTERVAL_THRESHOLD):
newSequence = True
# Manual sequence resets depend on the track name
else:
if trackName != lastTrackName:
newSequence = True
lastTimestamp = timestamp
lastTrackName = trackName
if newSequence:
if verbose:
print "Starting new sequence..."
model.resetSequenceStates()
modelInput = {
"vector": (speed, longitude, latitude, altitude)
}
if useTimeEncoders:
modelInput["timestamp"] = timestamp
result = model.run(modelInput)
anomalyScore = result.inferences["anomalyScore"]
csvWriter.writerow([trackName,
timestamp.strftime(outputFormat),
longitude,
latitude,
speed,
anomalyScore,
1 if newSequence else 0])
if verbose:
print "[{0} - {1}] - Anomaly score: {2}.".format(trackName, timestamp, anomalyScore)
print "Anomaly scores have been written to {0}".format(outputPath)
if __name__ == "__main__":
dataPath = DEFAULT_DATA_PATH
outputPath = DEFAULT_OUTPUT_PATH
if len(sys.argv) > 1:
dataPath = sys.argv[1]
if len(sys.argv) > 2:
outputPath = sys.argv[2]
runGeospatialAnomaly(dataPath, outputPath)
#!/usr/bin/env python
import os
import sys
from tools.preprocess_data import preprocess
from tools.anomaly_to_js_data import postprocess
from model.geospatial_anomaly import runGeospatialAnomaly
verbose = False
scriptDir = os.path.dirname(os.path.realpath(__file__))
def run(inputPath, outputPath, useTimeEncoders, scale, autoSequence, verbose):
preProcessedOutputPath = os.path.join(outputPath, "preprocessed_data.csv")
if verbose: print "Pre-processing %s..." % inputPath
preprocess(inputPath, preProcessedOutputPath, verbose=verbose)
anomalyOutputPath = os.path.join(outputPath, "anomaly_scores.csv")
if verbose: print "Running NuPIC on %s..." % preProcessedOutputPath
runGeospatialAnomaly(preProcessedOutputPath,
anomalyOutputPath,
scale=scale,
autoSequence=autoSequence,
useTimeEncoders=useTimeEncoders,
verbose=verbose)
visualizationOutputPath = os.path.join(scriptDir, "static/js/data.js")
if verbose: print "Creating visualization at %s..." % visualizationOutputPath
postprocess(anomalyOutputPath, visualizationOutputPath)
if __name__ == "__main__":
(options, args) = parser.parse_args(sys.argv[1:])
try:
input_path = args.pop(0)
except IndexError:
parser.print_help(sys.stderr)
sys.exit()
verbose = options.verbose
run(input_path, outputDir, useTimeEncoders, scale, manualSequence, verbose)
#!/usr/bin/env python
import os
import sys
from optparse import OptionParser
from run import run
from tools.convertion import convertion
from tools.anomaly_to_kml import anomalyrepresentation as KML
DEFAULT_OUTPUT_DIR = "output"
verbose = False
scriptDir = os.path.dirname(os.path.realpath(__file__))
parser = OptionParser(
usage="%prog <path/to/input/file> [options]\n\nRun NuPIC on specified "
"location file, which should already be in the proper format "
"(downloaded from the simulator)."
)
parser.add_option(
"-m",
"--manual-sequence",
action="store_true",
default=False,
dest="manualSequence",
help="Automatically breaks into sequences based upon time gaps."
)
parser.add_option(
"-t",
"--time-encoders",
action="store_true",
default=False,
dest="useTimeEncoders",
help="Adds time of day encoder to model params."
)
parser.add_option(
"-v",
"--verbose",
action="store_true",
default=False,
dest="verbose",
help="Print debugging statements."
)
parser.add_option(
"-o",
"--output-dir",
default=DEFAULT_OUTPUT_DIR,
dest="outputDir",
help="Where to write the output file."
)
parser.add_option(
"-s",
"--scale",
default=False,
dest="scale",
help="Meter resolution for Geospatial Coordinate Encoder (default 5m)."
)
def maritimeanomalies(inputPath, outputDir, useTimeEncoders, scale, autoSequence):
outputPath = os.path.abspath(outputDir)
if not os.path.exists(outputPath):
os.makedirs(outputPath)
convertedOutputPath = os.path.join(outputPath, "converted_data.csv")
if verbose: print "Converting %s..." % inputPath
convertion(inputPath, convertedOutputPath)
run(convertedOutputPath, outputPath, useTimeEncoders=useTimeEncoders, scale=scale, autoSequence=autoSequence, verbose=verbose)
representationDataPath = os.path.join(outputPath, "anomaly_scores.csv")
representationOutputPath = os.path.join(outputPath, "anomaly_representation.kml")
if verbose: print "Creating visualization at %s..." % representationOutputPath
KML(representationDataPath,representationOutputPath)
if __name__ == "__main__":
(options, args) = parser.parse_args(sys.argv[1:])
try:
input_path = args.pop(0)
except IndexError:
parser.print_help(sys.stderr)
sys.exit()
verbose = options.verbose
maritimeanomalies(
input_path,
options.outputDir,
options.useTimeEncoders,
options.scale,
not options.manualSequence)
import csv
import time
import calendar
def readais(dataPath):
"""It opens the original file, takes all the information and stores it in filelist. Then return file list"""
my_file = open(dataPath,'rb')
reader = csv.reader(my_file)
filelist = []
for row in reader:
rowlist = []
for item in row:
rowlist.append(item)
filelist.append(rowlist)
rowlist = []
my_file.close()
return filelist
def aisconvertion(filelist):
"""It takes filelist and changes it to the format required by nupic.geospatial"""
fileconverted = []
for i in range(1,int(len(filelist))):
rowlist = []
rowlist.append(filelist[i][0])
timetuple = time.strptime(str(filelist[i][6]),"%Y-%m-%dT%H:%M:%S")
dt = calendar.timegm(timetuple)
rowlist.append(int(dt))
rowlist.append(filelist[i][2])
rowlist.append(filelist[i][1])
rowlist.append("")
speed = float(filelist[i][3]) * 0.05144
rowlist.append(speed)
rowlist.append("")
rowlist.append(1)
fileconverted.append(rowlist)
rowlist = []
return fileconverted
def writeais(outPath,fileconverted):
"""Writes the converted file returned by convertion in the file that we will run with nupic.geospatial"""
my_file = open(outPath,'wb')
writer = csv.writer(my_file, delimiter=',', quotechar="", quoting=csv.QUOTE_NONE)
for row in fileconverted:
writer.writerow(row)
my_file.close()
def convertion(dataPath, outPath):
filelist = readais(dataPath)
fileconverted = aisconvertion(filelist)
writeais(outPath, fileconverted)
if __name__ == "__main__":
if len(sys.argv) < 3:
print ("Usage: {0} "
"/path/to/data.csv /path/to/outfile.csv").format(sys.argv[0])
sys.exit(0)
dataPath = sys.argv[1]
outPath = sys.argv[2]
convertion(dataPath, outPath)
#!/usr/bin/env python
import csv
import xml.dom.minidom
import sys
def extractCoordinates(row):
# This extracts the coordinates from a row and returns it as a list. This requires knowing
# ahead of time what the columns are that hold the address information.
return '%s,%s' % (row[2], row[3])
def createPlacemark(kmlDoc, row, order):
# This creates a <Placemark> element for a row of data.
# A row is a dict.
placemarkElement = kmlDoc.createElement('Placemark')
extElement = kmlDoc.createElement('ExtendedData')
placemarkElement.appendChild(extElement)
# Loop through the columns and create a <Data> element for every field that has a value.
for i in range(0, len(order)):
dataElement = kmlDoc.createElement('Data')
dataElement.setAttribute('name', order[i])
valueElement = kmlDoc.createElement('value')
dataElement.appendChild(valueElement)
valueText = kmlDoc.createTextNode(row[i])
valueElement.appendChild(valueText)
extElement.appendChild(dataElement)
if float(row[5]) <= 0.25:
styleElement = kmlDoc.createElement('styleUrl')
styleElement.appendChild(kmlDoc.createTextNode('#green'))
placemarkElement.appendChild(styleElement)
elif float(row[5]) <= 0.75:
styleElement = kmlDoc.createElement('styleUrl')
styleElement.appendChild(kmlDoc.createTextNode('#yellow'))
placemarkElement.appendChild(styleElement)
else:
styleElement = kmlDoc.createElement('styleUrl')
styleElement.appendChild(kmlDoc.createTextNode('#red'))
placemarkElement.appendChild(styleElement)
pointElement = kmlDoc.createElement('Point')
placemarkElement.appendChild(pointElement)
coordinates = extractCoordinates(row)
coorElement = kmlDoc.createElement('coordinates')
coorElement.appendChild(kmlDoc.createTextNode(coordinates))
pointElement.appendChild(coorElement)
return placemarkElement
def createKML(csvReader, csvreader1, fileName, order):
# This constructs the KML document from the CSV file.
kmlDoc = xml.dom.minidom.Document()
kmlElement = kmlDoc.createElementNS('http://earth.google.com/kml/2.2', 'kml')
kmlElement.setAttribute('xmlns', 'http://earth.google.com/kml/2.2')
kmlElement = kmlDoc.appendChild(kmlElement)
documentElement = kmlDoc.createElement('Document')
styleElement = kmlDoc.createElement('Style')
styleElement.setAttribute('id', 'green')
iconstyleElement = kmlDoc.createElement('IconStyle')
scaleElement = kmlDoc.createElement('scale')
scaleText = kmlDoc.createTextNode('0.5')
scaleElement.appendChild(scaleText)
iconstyleElement.appendChild(scaleElement)
iconElement = kmlDoc.createElement('Icon')
hrefElement = kmlDoc.createElement('href')
hrefText = kmlDoc.createTextNode('http://maps.google.com/mapfiles/kml/paddle/grn-blank.png')
hrefElement.appendChild(hrefText)
iconElement.appendChild(hrefElement)
iconstyleElement.appendChild(iconElement)
styleElement.appendChild(iconstyleElement)
documentElement.appendChild(styleElement)
styleElement = kmlDoc.createElement('Style')
styleElement.setAttribute('id', 'yellow')
iconstyleElement = kmlDoc.createElement('IconStyle')
scaleElement = kmlDoc.createElement('scale')
scaleText = kmlDoc.createTextNode('0.6')
scaleElement.appendChild(scaleText)
iconstyleElement.appendChild(scaleElement)
iconElement = kmlDoc.createElement('Icon')
hrefElement = kmlDoc.createElement('href')
hrefText = kmlDoc.createTextNode('http://maps.google.com/mapfiles/kml/paddle/ylw-blank.png')
hrefElement.appendChild(hrefText)
iconElement.appendChild(hrefElement)
iconstyleElement.appendChild(iconElement)
styleElement.appendChild(iconstyleElement)
documentElement.appendChild(styleElement)
styleElement = kmlDoc.createElement('Style')
styleElement.setAttribute('id', 'red')
iconstyleElement = kmlDoc.createElement('IconStyle')
scaleElement = kmlDoc.createElement('scale')
scaleText = kmlDoc.createTextNode('0.8')
scaleElement.appendChild(scaleText)
iconstyleElement.appendChild(scaleElement)
iconElement = kmlDoc.createElement('Icon')
hrefElement = kmlDoc.createElement('href')
hrefText = kmlDoc.createTextNode('http://maps.google.com/mapfiles/kml/paddle/red-blank.png')
hrefElement.appendChild(hrefText)
iconElement.appendChild(hrefElement)
iconstyleElement.appendChild(iconElement)
styleElement.appendChild(iconstyleElement)
documentElement.appendChild(styleElement)
documentElement = kmlElement.appendChild(documentElement)
# Skip the header line.
csvReader.next()
usedtracks = ["0"]
for row in csvReader:
trackName = str(row['trackName'])
check = 0
for track in usedtracks:
if track == trackName:
check += 1
if check == 0:
folderElement = kmlDoc.createElement('Folder')
nameElement = kmlDoc.createElement('name')
nameText = kmlDoc.createTextNode(trackName)
nameElement.appendChild(nameText)
folderElement.appendChild(nameElement)
csvreader1.remove(csvreader1[0])
for item in csvreader1:
if item[0] == trackName:
placemarkElement = createPlacemark(kmlDoc, item, order)
folderElement.appendChild(placemarkElement)
usedtracks.append(trackName)
documentElement.appendChild(folderElement)
kmlFile = open(fileName, 'w')
kmlFile.write(kmlDoc.toprettyxml(' ', newl = '\n', encoding = 'utf-8'))
def anomalyrepresentation(dataPath, outPath):
# This reader opens up 'anomaly_scores.csv'.
# It creates a KML file called 'anomaly_representation.kml'.
# If an argument was passed to the script, it splits the argument on a comma
# and uses the resulting list to specify an order for when columns get added.
# Otherwise, it defaults to the order used in the sample.
order = ['trackName','timestamp','longitude','latitude','speed','anomaly_score','new_sequence']
csvreader = csv.DictReader(open(dataPath),order)
csvreader1 = []
for row in csvreader:
line = []
for key in order:
line.append(str(row[key]))
csvreader1.append(line)
csvreader = csv.DictReader(open(dataPath),order)
kml = createKML(csvreader, csvreader1, outPath, order)
if __name__ == "__main__":
if len(sys.argv) < 3:
print ("Usage: {0} "
"/path/to/data.csv /path/to/outfile.csv").format(sys.argv[0])
sys.exit(0)
dataPath = sys.argv[1]
outPath = sys.argv[2]
anomalyrepresentation(dataPath, outPath)