here is my schema
and the algorithm =
CREATE QUERY FloodingDetection(/* Parameters here */) FOR GRAPH mygraph {
// this query detects the addresses that flooded one service with large number of requests.
// and possibly the ip addresses have logged in to many accounts
// [IP] -> [login event] -> [user id] -> [request event] -> [service]
TypeDef tuple<vertex entity, vertex URL, int cnt, float mean, STRING std> result_tuple;
AvgAccum @mean;
SumAccum<STRING> @std;
MapAccum<vertex, SumAccum<int>> @countMap;
ListAccum<result_tuple> @@result_list;
IPs = {host.*};
login_events = select t from IPs-(has)-event:t
accum event.@countMap += (IPs->1);
url = select u from login_events-(has)-URL:u
accum URL.@countMap += login_events.@countMap
post-accum
case when u.@countMap.size() > 1 then
// calculate the mean
foreach (user,cnt) in u.@countMap do
s.@mean += cnt
end,
// calculate the standard deviation
foreach (user,cnt) in u.@countMap do
***s.@std += pow(cnt - u.@mean, 2)*** <----(the error line)
end,
s.@std = sqrt(u.@std/(u.@countMap.size()-1)),
case when u.@std != 0 then
// calculate the out lier
foreach (user,cnt) in u.@countMap do
case when cnt-u.@mean > 3*u.@std then
@@result_list += result_tuple(user,u,cnt,u.@mean,u.@std)
end
end
end
end;
print @@result_list;
print services;
}