feat: Add graphUtils for sub-workflow extraction (no-changelog) (#14781)

This commit is contained in:
Charlie Kolb
2025-04-28 10:51:15 +02:00
committed by GitHub
parent 614579026d
commit 9f2182568a
2 changed files with 531 additions and 0 deletions

View File

@@ -0,0 +1,208 @@
type MultipleInputNodesError = {
errorCode: 'Multiple Input Nodes';
nodes: Set<string>;
};
type MultipleOutputNodesError = {
errorCode: 'Multiple Output Nodes';
nodes: Set<string>;
};
type InputEdgeToNonRootNode = {
errorCode: 'Input Edge To Non-Root Node';
node: string;
};
type OutputEdgeFromNonLeafNode = {
errorCode: 'Output Edge From Non-Leaf Node';
node: string;
};
type NoContinuousPathFromRootToLeaf = {
errorCode: 'No Continuous Path From Root To Leaf In Selection';
start: string;
end: string;
};
export type ExtractableErrorResult =
| MultipleInputNodesError
| MultipleOutputNodesError
| InputEdgeToNonRootNode
| OutputEdgeFromNonLeafNode
| NoContinuousPathFromRootToLeaf;
type AdjacencyList = Map<string, Set<string>>;
/**
* Find all edges leading into the graph described in `graphIds`.
*/
export function getInputEdges(
graphIds: Set<string>,
adjacencyList: AdjacencyList,
): Array<[string, string]> {
const result: Array<[string, string]> = [];
for (const [from, tos] of adjacencyList.entries()) {
if (graphIds.has(from)) continue;
for (const to of tos) {
if (graphIds.has(to)) {
result.push([from, to]);
}
}
}
return result;
}
/**
* Find all edges leading out of the graph described in `graphIds`.
*/
export function getOutputEdges(
graphIds: Set<string>,
adjacencyList: AdjacencyList,
): Array<[string, string]> {
const result: Array<[string, string]> = [];
for (const [from, tos] of adjacencyList.entries()) {
if (!graphIds.has(from)) continue;
for (const to of tos) {
if (!graphIds.has(to)) {
result.push([from, to]);
}
}
}
return result;
}
function intersection<T>(a: Set<T>, b: Set<T>): Set<T> {
const result = new Set<T>();
for (const x of a) {
if (b.has(x)) result.add(x);
}
return result;
}
function union<T>(a: Set<T>, b: Set<T>): Set<T> {
const result = new Set<T>();
for (const x of a) result.add(x);
for (const x of b) result.add(x);
return result;
}
function difference<T>(minuend: Set<T>, subtrahend: Set<T>): Set<T> {
const result = new Set<T>(minuend.values());
for (const x of subtrahend) {
result.delete(x);
}
return result;
}
export function getRootNodes(graphIds: Set<string>, adjacencyList: AdjacencyList): Set<string> {
// Inner nodes are all nodes with an incoming edge from another node in the graph
let innerNodes = new Set<string>();
for (const nodeId of graphIds) {
innerNodes = union(innerNodes, adjacencyList.get(nodeId) ?? new Set());
}
return difference(graphIds, innerNodes);
}
export function getLeafNodes(graphIds: Set<string>, adjacencyList: AdjacencyList): Set<string> {
const result = new Set<string>();
for (const nodeId of graphIds) {
if (intersection(adjacencyList.get(nodeId) ?? new Set(), graphIds).size === 0) {
result.add(nodeId);
}
}
return result;
}
export function hasPath(start: string, end: string, adjacencyList: AdjacencyList) {
const seen = new Set<string>();
const paths: string[] = [start];
while (true) {
const next = paths.pop();
if (next === end) return true;
if (next === undefined) return false;
seen.add(next);
paths.push(...difference(adjacencyList.get(next) ?? new Set<string>(), seen));
}
}
export type ExtractableSubgraphData = {
start?: string;
end?: string;
};
/**
* A subgraph is considered extractable if the following properties hold:
* - 0-1 input nodes from outside the subgraph, to a root node
* - 0-1 output nodes to outside the subgraph, from a leaf node
* - continuous path between input and output nodes if they exist
*
* This also covers the requirement that all "inner" nodes between the root node
* and the output node are selected, since this would otherwise create extra
* input or output nodes.
*
* @returns An object containing optional start and end nodeIds
* indicating which nodes have outside connections, OR
* An array of errors if the selection is not valid.
*/
export function parseExtractableSubgraphSelection(
graphIds: Set<string>,
adjacencyList: AdjacencyList,
): ExtractableSubgraphData | ExtractableErrorResult[] {
const errors: ExtractableErrorResult[] = [];
// 0-1 Input nodes
const inputEdges = getInputEdges(graphIds, adjacencyList);
const inputNodes = new Set(inputEdges.map((x) => x[1]));
const rootNodes = getRootNodes(graphIds, adjacencyList);
for (const inputNode of difference(inputNodes, rootNodes).values()) {
errors.push({
errorCode: 'Input Edge To Non-Root Node',
node: inputNode,
});
}
const rootInputNodes = intersection(rootNodes, inputNodes);
if (rootInputNodes.size > 1) {
errors.push({
errorCode: 'Multiple Input Nodes',
nodes: rootInputNodes,
});
}
// 0-1 Output nodes
const outputEdges = getOutputEdges(graphIds, adjacencyList);
const outputNodes = new Set(outputEdges.map((x) => x[0]));
const leafNodes = getLeafNodes(graphIds, adjacencyList);
for (const outputNode of difference(outputNodes, leafNodes).values()) {
errors.push({
errorCode: 'Output Edge From Non-Leaf Node',
node: outputNode,
});
}
const leafOutputNodes = intersection(leafNodes, outputNodes);
if (leafOutputNodes.size > 1) {
errors.push({
errorCode: 'Multiple Output Nodes',
nodes: leafOutputNodes,
});
}
const start = rootInputNodes.values().next().value;
const end = leafOutputNodes.values().next().value;
if (start && end && !hasPath(start, end, adjacencyList)) {
errors.push({
errorCode: 'No Continuous Path From Root To Leaf In Selection',
start,
end,
});
}
return errors.length > 0 ? errors : { start, end };
}

View File

@@ -0,0 +1,323 @@
import {
getInputEdges,
getOutputEdges,
getRootNodes,
getLeafNodes,
parseExtractableSubgraphSelection,
hasPath,
} from '../../src/Graph/graphUtils';
describe('graphUtils', () => {
describe('getInputEdges', () => {
it('should return edges leading into the graph', () => {
const graphIds = new Set(['B', 'C']);
const adjacencyList = new Map<string, Set<string>>([
['A', new Set(['B'])],
['B', new Set(['C'])],
]);
const result = getInputEdges(graphIds, adjacencyList);
expect(result).toEqual([['A', 'B']]);
});
it('should return an empty array if there are no input edges', () => {
const graphIds = new Set(['A', 'B']);
const adjacencyList = new Map<string, Set<string>>([
['A', new Set(['B'])],
['B', new Set()],
]);
const result = getInputEdges(graphIds, adjacencyList);
expect(result).toEqual([]);
});
});
describe('getOutputEdges', () => {
it('should return edges leading out of the graph', () => {
const graphIds = new Set(['A', 'B']);
const adjacencyList = new Map<string, Set<string>>([
['A', new Set(['B'])],
['B', new Set(['C'])],
['C', new Set()],
]);
const result = getOutputEdges(graphIds, adjacencyList);
expect(result).toEqual([['B', 'C']]);
});
it('should return an empty array if there are no output edges', () => {
const graphIds = new Set(['A', 'B']);
const adjacencyList = new Map<string, Set<string>>([['A', new Set(['B'])]]);
const result = getOutputEdges(graphIds, adjacencyList);
expect(result).toEqual([]);
});
});
describe('getRootNodes', () => {
it('should return root nodes of the graph', () => {
const graphIds = new Set(['A', 'B', 'C']);
const adjacencyList = new Map<string, Set<string>>([['A', new Set(['B'])]]);
const result = getRootNodes(graphIds, adjacencyList);
expect(result).toEqual(new Set(['A', 'C']));
});
it('should return all nodes if there are no incoming edges', () => {
const graphIds = new Set(['A', 'B']);
const adjacencyList = new Map<string, Set<string>>();
const result = getRootNodes(graphIds, adjacencyList);
expect(result).toEqual(new Set(['A', 'B']));
});
});
describe('getLeafNodes', () => {
it('should return leaf nodes of the graph', () => {
const graphIds = new Set(['A', 'B', 'C']);
const adjacencyList = new Map<string, Set<string>>([
['A', new Set(['B'])],
['B', new Set(['C'])],
['C', new Set()],
]);
const result = getLeafNodes(graphIds, adjacencyList);
expect(result).toEqual(new Set(['C']));
});
it('should return all nodes if there are no outgoing edges', () => {
const graphIds = new Set(['A', 'B']);
const adjacencyList = new Map<string, Set<string>>([
['A', new Set()],
['B', new Set()],
]);
const result = getLeafNodes(graphIds, adjacencyList);
expect(result).toEqual(new Set(['A', 'B']));
});
});
describe('parseExtractableSubgraphSelection', () => {
it('should return successfully for a valid extractable subgraph', () => {
const graphIds = new Set(['A', 'B']);
const adjacencyList = new Map<string, Set<string>>([
['C', new Set(['A'])],
['A', new Set(['B'])],
]);
const result = parseExtractableSubgraphSelection(graphIds, adjacencyList);
expect(result).toMatchObject({ start: 'A' });
});
it('should return successfully for multiple edges into single input node', () => {
const graphIds = new Set(['A', 'B']);
const adjacencyList = new Map<string, Set<string>>([
['X', new Set(['A'])],
['Y', new Set(['A'])],
['A', new Set(['B'])],
['B', new Set()],
]);
const result = parseExtractableSubgraphSelection(graphIds, adjacencyList);
expect(result).toMatchObject({ start: 'A' });
});
it('should return successfully for multiple edges from single output nodes', () => {
const graphIds = new Set(['A', 'B']);
const adjacencyList = new Map<string, Set<string>>([
['A', new Set(['B'])],
['B', new Set(['X', 'Y'])],
]);
const result = parseExtractableSubgraphSelection(graphIds, adjacencyList);
expect(result).toMatchObject({});
});
it('should return errors for input edge to non-root node', () => {
const graphIds = new Set(['A', 'B']);
const adjacencyList = new Map<string, Set<string>>([
['X', new Set(['B'])],
['A', new Set(['B'])],
['B', new Set()],
]);
const result = parseExtractableSubgraphSelection(graphIds, adjacencyList);
expect(result).toEqual([
{
errorCode: 'Input Edge To Non-Root Node',
node: 'B',
},
]);
});
it('should return errors for output edge from non-leaf node', () => {
const graphIds = new Set(['A', 'B']);
const adjacencyList = new Map<string, Set<string>>([['A', new Set(['B', 'X'])]]);
const result = parseExtractableSubgraphSelection(graphIds, adjacencyList);
expect(result).toEqual([
{
errorCode: 'Output Edge From Non-Leaf Node',
node: 'A',
},
]);
});
it('should return successfully for multiple root nodes with 1 input', () => {
const graphIds = new Set(['A', 'B', 'C']);
const adjacencyList = new Map<string, Set<string>>([
['A', new Set(['C'])],
['B', new Set(['C'])],
['X', new Set(['A'])],
]);
const result = parseExtractableSubgraphSelection(graphIds, adjacencyList);
expect(result).toMatchObject({});
});
it('should return an error for multiple root nodes with inputs', () => {
const graphIds = new Set(['A', 'B', 'C']);
const adjacencyList = new Map<string, Set<string>>([
['A', new Set(['C'])],
['B', new Set(['C'])],
['X', new Set(['A'])],
['Y', new Set(['B'])],
]);
const result = parseExtractableSubgraphSelection(graphIds, adjacencyList);
expect(result).toEqual([
{
errorCode: 'Multiple Input Nodes',
nodes: new Set(['A', 'B']),
},
]);
});
it('should return successfully for multiple leaf nodes with 1 output', () => {
const graphIds = new Set(['A', 'B', 'C']);
const adjacencyList = new Map<string, Set<string>>([
['A', new Set(['B', 'C'])],
['C', new Set(['X'])],
]);
const result = parseExtractableSubgraphSelection(graphIds, adjacencyList);
expect(result).toMatchObject({});
});
it('should return an error for multiple leaf nodes with outputs', () => {
const graphIds = new Set(['A', 'B', 'C']);
const adjacencyList = new Map<string, Set<string>>([
['A', new Set(['B', 'C'])],
['B', new Set(['X'])],
['C', new Set(['X'])],
]);
const result = parseExtractableSubgraphSelection(graphIds, adjacencyList);
expect(result).toEqual([
{
errorCode: 'Multiple Output Nodes',
nodes: new Set(['B', 'C']),
},
]);
});
it('should return an error for a non-continuous selection', () => {
const graphIds = new Set(['A', 'D']);
const adjacencyList = new Map<string, Set<string>>([
['A', new Set(['B'])],
['B', new Set(['C'])],
['C', new Set(['D'])],
]);
const result = parseExtractableSubgraphSelection(graphIds, adjacencyList);
expect(result).toEqual([
{
errorCode: 'No Continuous Path From Root To Leaf In Selection',
start: 'D',
end: 'A',
},
]);
});
});
describe('hasPath', () => {
it('should return true for a direct path between start and end', () => {
const adjacencyList = new Map<string, Set<string>>([
['A', new Set(['B'])],
['B', new Set(['C'])],
]);
const result = hasPath('A', 'C', adjacencyList);
expect(result).toBe(true);
});
it('should return false if there is no path between start and end', () => {
const adjacencyList = new Map<string, Set<string>>([
['A', new Set(['B'])],
['C', new Set(['D'])],
]);
const result = hasPath('A', 'D', adjacencyList);
expect(result).toBe(false);
});
it('should return true for a path with multiple intermediate nodes', () => {
const adjacencyList = new Map<string, Set<string>>([
['A', new Set(['B'])],
['B', new Set(['C'])],
['C', new Set(['D'])],
]);
const result = hasPath('A', 'D', adjacencyList);
expect(result).toBe(true);
});
it('should return false if the start node is not in the adjacency list', () => {
const adjacencyList = new Map<string, Set<string>>([
['B', new Set(['C'])],
['C', new Set(['D'])],
]);
const result = hasPath('A', 'D', adjacencyList);
expect(result).toBe(false);
});
it('should return false if the end node is not in the adjacency list', () => {
const adjacencyList = new Map<string, Set<string>>([
['A', new Set(['B'])],
['B', new Set(['C'])],
]);
const result = hasPath('A', 'D', adjacencyList);
expect(result).toBe(false);
});
it('should return true for a cyclic graph where a path exists', () => {
const adjacencyList = new Map<string, Set<string>>([
['A', new Set(['B'])],
['B', new Set(['C'])],
['C', new Set(['A'])],
]);
const result = hasPath('A', 'C', adjacencyList);
expect(result).toBe(true);
});
it('should return false for a cyclic graph where no path exists', () => {
const adjacencyList = new Map<string, Set<string>>([
['A', new Set(['B'])],
['B', new Set(['A'])],
['C', new Set(['D'])],
]);
const result = hasPath('A', 'D', adjacencyList);
expect(result).toBe(false);
});
it('should return true for a self-loop', () => {
const adjacencyList = new Map<string, Set<string>>([['A', new Set(['A'])]]);
const result = hasPath('A', 'A', adjacencyList);
expect(result).toBe(true);
});
});
});