This is a bit embarrassing but I was able to figure out the cause while assembling sample data. Turns out a very small minority of data samples we had been using had gotten some of their fields to be blank. If this happened to a source or group then it would cause the query to break and timeout. When I had my program emergency fill out any fields it found to be empty it works 100% of the time.
Apologies for wasting your time.
EDIT: For posterity
{
'eventIdentifier': 'identifierfortheevent',
'jSchema': 'NONE SPECIFIED',
'eventVersion': '0.0',
'timestamp': '2021-03-29T16:45:08.682',
'author': 'tester@testing.team.com',
'verdict': 'TEST SUCCESSFUL',
'groups': '[{"identifier":"testidentifierone" ,"name":"test-group.1"},{"identifier":"testidentifiertwo" ,"name":"test-group-2"},{"identifier":"testidentifierthree" ,"name":"test-group-3"}]',
'sources': '[{"identifier":"" ,"name":"sourceone","link":"https://source.one.com"},{"identifier":"" ,"name":"sourcetwo","link":"https://source.two.com"},{"identifier":"x0400000014C84108" ,"name":"sourcethree","link":"https://source.three.com"},{"identifier":"24123394" ,"name":"sourcefour","link":"https://source.four.com"},{"identifier":"22771634" ,"name":"sourcefive","link":"https://source.five.com"},{"identifier":"24128568" ,"name":"sourcesix","link":"https://source.six.com"},{"identifier":"e58514a13f0254b11ddaa7fbc6a1332cb3e43a9408ab88313b93901dd781773f" ,"name":"sourceseven","link":"https://source.seven.com"},{"identifier":"24126869" ,"name":"sourceight","link":"https://source.eight.com"},{"identifier":"24123309" ,"name":"sourcenine","link":"https://source.nine.com"},{"identifier":"22771641" ,"name":"sourceten","link":"https://source.ten.com"},{"identifier":"24128564" ,"name":"sourceeleven","link":"https://source.eleven.com"},{"identifier":"b4bfff95b74fbd3e217c8465c221b51e0a3f9b5e3b2848583c54a0d0ce43c23b" ,"name":"sourcetwelve","link":"https://source.twelve.com"},{"identifier":"24126884" ,"name":"sourcethirteen","link":"https://source.thirteen.com"}]',
'link': 'http://www.publisher.testing.link.com',
'publisherSources': '[{"identifier":"publisherSourceIdentifier","link":"http://www.publisher.source.testing.link.com"}]'
}
CREATE QUERY AddEvent(STRING eventIdentifier, STRING jSchema, STRING eventVersion, STRING timestamp, STRING author, STRING verdict, STRING groups, STRING sources, STRING link, STRING publisherSources) FOR GRAPH EventTraceability {
JSONARRAY groupJson;
JSONARRAY sourceJson;
JSONARRAY publisherSourceJson;
STRING groupName;
STRING groupIdentifier;
STRING sourceIdentifier;
STRING sourceName;
STRING sourceLink;
STRING publisherIdentifier;
groupJson = parse_json_array(groups);
sourceJson = parse_json_array(sources);
publisherSourceJson = parse_json_array(publisherSources);
INSERT INTO event VALUES (eventIdentifier, jSchema, eventVersion, timestamp, author, verdict);
FOREACH i IN range[0, groupJson.size()-1] DO
/*
[
{
"identifier": "id",
"name": "name"
}
]*/
groupIdentifier = groupJson.getJsonObject(i).getString("identifier");
groupName = groupJson.getJsonObject(i).getString("name");
INSERT INTO eventGroup VALUES (groupIdentifier, groupName);
INSERT INTO PART_OF VALUES (eventIdentifier,groupIdentifier);
END;
FOREACH j IN range[0, sourceJson.size()-1] DO
/*
[
{
"identifier": "id",
"name": "name",
"link": "link"
}
]*/
sourceIdentifier = sourceJson.getJsonObject(j).getString("identifier");
sourceName = sourceJson.getJsonObject(j).getString("name");
sourceLink = sourceJson.getJsonObject(j).getString("link");
INSERT INTO source VALUES (sourceIdentifier, sourceName, sourceLink);
INSERT INTO SOURCE VALUES (eventIdentifier event, sourceIdentifier source);
END;
INSERT INTO publisher VALUES (sourceLink);
INSERT INTO PUBLISHER VALUES (eventIdentifier event, sourceLink);
/*
[
{
"identifier": "id",
"link": "link"
}
]*/
publisherIdentifier = publisherSourceJson.getJsonObject(0).getString("identifier");
INSERT INTO source (PRIMARY_ID, link) VALUES (publisherIdentifier, publisherSourceJson.getJsonObject(0).getString("link"));
INSERT INTO SOURCE VALUES (link publisher, publisherSourceJson.getJsonObject(0).getString("identifier"));
}
CREATE GRAPH EventTraceability()
CREATE SCHEMA_CHANGE JOB schema_change_job_TevS4H FOR GRAPH EventTraceability {
ADD VERTEX event(PRIMARY_ID identifier STRING, schema STRING, eventVersion STRING, timestamp STRING, author STRING, verdict STRING) WITH STATS="OUTDEGREE_BY_EDGETYPE", PRIMARY_ID_AS_ATTRIBUTE="true";
ADD VERTEX publisher(PRIMARY_ID link STRING) WITH STATS="OUTDEGREE_BY_EDGETYPE", PRIMARY_ID_AS_ATTRIBUTE="true";
ADD VERTEX source(PRIMARY_ID identifier STRING, name STRING, link STRING) WITH STATS="OUTDEGREE_BY_EDGETYPE";
ADD VERTEX eventGroup(PRIMARY_ID identifier STRING, name STRING) WITH STATS="OUTDEGREE_BY_EDGETYPE", PRIMARY_ID_AS_ATTRIBUTE="true";
ADD DIRECTED EDGE PUBLISHER(FROM event, TO publisher) WITH REVERSE_EDGE="reverse_PUBLISHER";
ADD DIRECTED EDGE SOURCE(FROM publisher, TO source|FROM event, TO source) WITH REVERSE_EDGE="reverse_SOURCE";
ADD DIRECTED EDGE PART_OF(FROM event, TO eventGroup) WITH REVERSE_EDGE="reverse_PART_OF";
ADD DIRECTED EDGE CAUSED_BY(FROM event, TO event) WITH REVERSE_EDGE="reverse_CAUSED_BY";
}
Query does not work if the above data is used as-is. If the two empty identifiers are given values then it will work correctly.