[ 
https://issues.apache.org/jira/browse/METRON-562?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=15678079#comment-15678079
 ] 

ASF GitHub Bot commented on METRON-562:
---------------------------------------

Github user james-sirota commented on a diff in the pull request:

    https://github.com/apache/incubator-metron/pull/352#discussion_r88757736
  
    --- Diff: 
metron-analytics/metron-statistics/src/main/java/org/apache/metron/statistics/outlier/MedianAbsoluteDeviationFunctions.java
 ---
    @@ -0,0 +1,193 @@
    +/*
    + *
    + *  Licensed to the Apache Software Foundation (ASF) under one
    + *  or more contributor license agreements.  See the NOTICE file
    + *  distributed with this work for additional information
    + *  regarding copyright ownership.  The ASF licenses this file
    + *  to you under the Apache License, Version 2.0 (the
    + *  "License"); you may not use this file except in compliance
    + *  with the License.  You may obtain a copy of the License at
    + *
    + *      http://www.apache.org/licenses/LICENSE-2.0
    + *
    + *  Unless required by applicable law or agreed to in writing, software
    + *  distributed under the License is distributed on an "AS IS" BASIS,
    + *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 
implied.
    + *  See the License for the specific language governing permissions and
    + *  limitations under the License.
    + *
    + */
    +package org.apache.metron.statistics.outlier;
    +
    +import org.apache.metron.common.dsl.Context;
    +import org.apache.metron.common.dsl.ParseException;
    +import org.apache.metron.common.dsl.Stellar;
    +import org.apache.metron.common.dsl.StellarFunction;
    +import org.apache.metron.statistics.OnlineStatisticsProvider;
    +
    +import java.util.ArrayList;
    +import java.util.List;
    +import java.util.Optional;
    +
    +public class MedianAbsoluteDeviationFunctions {
    +  public static class State {
    +    OnlineStatisticsProvider tickMedianProvider;
    +    OnlineStatisticsProvider tickMADProvider;
    +    OnlineStatisticsProvider windowMedianProvider;
    +    OnlineStatisticsProvider windowMADProvider;
    +
    +    public State() {
    +      tickMedianProvider = new OnlineStatisticsProvider();
    +      tickMADProvider = new OnlineStatisticsProvider();
    +      windowMedianProvider = new OnlineStatisticsProvider();
    +      windowMADProvider = new OnlineStatisticsProvider();
    +    }
    +
    +    public State(Optional<List<State>> previousStates, Optional<State> 
currentState)
    +    {
    +      tickMedianProvider = new OnlineStatisticsProvider();
    +      tickMADProvider = new OnlineStatisticsProvider();
    +      windowMedianProvider = 
currentState.isPresent()?currentState.get().tickMedianProvider:new 
OnlineStatisticsProvider();
    +      windowMADProvider = 
currentState.isPresent()?currentState.get().tickMADProvider:new 
OnlineStatisticsProvider();
    +      for(State s : previousStates.orElse(new ArrayList<>())) {
    +        windowMedianProvider = (OnlineStatisticsProvider) 
windowMedianProvider.merge(s.tickMedianProvider);
    +        windowMADProvider = (OnlineStatisticsProvider) 
windowMADProvider.merge(s.tickMADProvider);
    +      }
    +    }
    +
    +    public void add(Double d) {
    +      if(!Double.isNaN(d)) {
    +        tickMedianProvider.addValue(d);
    +        double deviation = Math.abs(d - 
windowMedianProvider.getPercentile(50));
    +        windowMedianProvider.addValue(d);
    +        if(!Double.isNaN(deviation)) {
    +          windowMADProvider.addValue(deviation);
    +          tickMADProvider.addValue(deviation);
    +        }
    +      }
    +    }
    +  }
    +
    +  @Stellar(namespace="OUTLIER"
    +          ,name="MAD_STATE_MERGE"
    +          ,description="Update the statistical state required to compute 
the Median Absolute Deviation"
    +          ,params= {
    +            "[state] - A list of Median Absolute Deviation States to 
merge.  Generally these are states across time."
    +           ,"currentState? - The current state (optional)"
    +          }
    +          ,returns="The Median Absolute Deviation state."
    +  )
    +  public static class StateUpdate implements StellarFunction{
    +
    +    @Override
    +    public Object apply(List<Object> args, Context context) throws 
ParseException {
    +      State state = null;
    +      List<State> states = (List<State>) args.get(0);
    +      State currentState = null;
    +      if(args.size() > 1) {
    +        currentState = (State) args.get(1);
    +      }
    +      state = new State(Optional.ofNullable(states), 
Optional.ofNullable(currentState));
    +      return state;
    +    }
    +
    +    @Override
    +    public void initialize(Context context) {
    +
    +    }
    +
    +    @Override
    +    public boolean isInitialized() {
    +      return true;
    +    }
    +  }
    +
    +  @Stellar(namespace="OUTLIER"
    +          ,name="MAD_ADD"
    +          ,params= {
    +            "state - The MAD state"
    +          , "value - The numeric value to add"
    +                   }
    +          ,description="Add a piece of data to the state."
    +          ,returns="The MAD state."
    +  )
    +  public static class PointUpdate implements StellarFunction{
    +
    +    @Override
    +    public Object apply(List<Object> args, Context context) throws 
ParseException {
    +      State state = (State) args.get(0);
    +      Object o = args.get(1);
    +      List<Double> data = new ArrayList<>();
    +      if(o != null) {
    +        if (o instanceof List) {
    +          for (Object datum : (List<Object>) o) {
    +            Number n = (Number) datum;
    +            data.add(n.doubleValue());
    +          }
    +        } else {
    +          Number n = (Number)o;
    +          data.add(n.doubleValue());
    +        }
    +      }
    +      if(state != null) {
    +        for(Double d : data) {
    +          state.add(d);
    +        }
    +      }
    +      return state;
    +    }
    +
    +    @Override
    +    public void initialize(Context context) {
    +
    +    }
    +
    +    @Override
    +    public boolean isInitialized() {
    +      return true;
    +    }
    +  }
    +
    +  @Stellar(namespace="OUTLIER"
    +          ,name="MAD_SCORE"
    +          ,params = {
    +            "state - The MAD state"
    +           ,"value - The value to score"
    +           ,"scale? - Optionally the scale to use when computing the 
modified z-score.  Default is 0.6745, see the first page of 
http://web.ipac.caltech.edu/staff/fmasci/home/astro_refs/BetterThanMAD.pdf";
    +            }
    +          ,description="Get the modified z-score normalized by the MAD: 
scale * | x_i - median(X) | / MAD.  See the first page of 
http://web.ipac.caltech.edu/staff/fmasci/home/astro_refs/BetterThanMAD.pdf";
    +          ,returns="The modified z-score."
    +  )
    +  public static class Score implements StellarFunction{
    +
    +    @Override
    +    public Object apply(List<Object> args, Context context) throws 
ParseException {
    +      double scale = 0.6745;
    --- End diff --
    
    this is 1/b from the caltech paper


> Add rudimentary statistical outlier detection
> ---------------------------------------------
>
>                 Key: METRON-562
>                 URL: https://issues.apache.org/jira/browse/METRON-562
>             Project: Metron
>          Issue Type: New Feature
>            Reporter: Casey Stella
>            Assignee: Casey Stella
>   Original Estimate: 48h
>  Remaining Estimate: 48h
>
> With the advent of the profiler, we can now capture state.  Furthermore, with 
> Stellar, we can capture statistical summaries.  We should provide rudimentary 
> outlier detection functionality in the form of Stellar functions that can 
> operate on captured state from the profiler.
> To begin, we should enable simple outlier tests using distance from a central 
> measure such as Median Absolute Deviation (see 
> http://www.itl.nist.gov/div898/handbook/eda/section3/eda35h.htm).



--
This message was sent by Atlassian JIRA
(v6.3.4#6332)

Reply via email to