petgraph/algo/scc/tarjan_scc.rs
1use alloc::vec::Vec;
2use core::num::NonZeroUsize;
3
4use crate::visit::{IntoNeighbors, IntoNodeIdentifiers, NodeIndexable};
5
6#[derive(Copy, Clone, Debug)]
7struct NodeData {
8 rootindex: Option<NonZeroUsize>,
9}
10
11/// A reusable state for computing the *strongly connected components* using [Tarjan's algorithm][1].
12///
13/// [1]: https://en.wikipedia.org/wiki/Tarjan%27s_strongly_connected_components_algorithm
14#[derive(Debug)]
15pub struct TarjanScc<N> {
16 index: usize,
17 componentcount: usize,
18 nodes: Vec<NodeData>,
19 stack: Vec<N>,
20}
21
22impl<N> Default for TarjanScc<N> {
23 fn default() -> Self {
24 Self::new()
25 }
26}
27
28impl<N> TarjanScc<N> {
29 /// Creates a new `TarjanScc`
30 pub fn new() -> Self {
31 TarjanScc {
32 index: 1, // Invariant: index < componentcount at all times.
33 componentcount: usize::MAX, // Will hold if componentcount is initialized to number of nodes - 1 or higher.
34 nodes: Vec::new(),
35 stack: Vec::new(),
36 }
37 }
38
39 /// Compute the *strongly connected components* using Algorithm 3 in
40 /// [A Space-Efficient Algorithm for Finding Strongly Connected Components][1] by David J. Pierce,
41 /// which is a memory-efficient variation of [Tarjan's algorithm][2].
42 ///
43 ///
44 /// [1]: https://homepages.ecs.vuw.ac.nz/~djp/files/P05.pdf
45 /// [2]: https://en.wikipedia.org/wiki/Tarjan%27s_strongly_connected_components_algorithm
46 ///
47 /// Calls `f` for each strongly connected component (scc).
48 /// The order of node ids within each scc is arbitrary, but the order of
49 /// the sccs is their postorder (reverse topological sort).
50 ///
51 /// For an undirected graph, the sccs are simply the connected components.
52 ///
53 /// This implementation is recursive and does one pass over the nodes.
54 pub fn run<G, F>(&mut self, g: G, mut f: F)
55 where
56 G: IntoNodeIdentifiers<NodeId = N> + IntoNeighbors<NodeId = N> + NodeIndexable<NodeId = N>,
57 F: FnMut(&[N]),
58 N: Copy + PartialEq,
59 {
60 self.nodes.clear();
61 self.nodes
62 .resize(g.node_bound(), NodeData { rootindex: None });
63
64 for n in g.node_identifiers() {
65 let visited = self.nodes[g.to_index(n)].rootindex.is_some();
66 if !visited {
67 self.visit(n, g, &mut f);
68 }
69 }
70
71 debug_assert!(self.stack.is_empty());
72 }
73
74 fn visit<G, F>(&mut self, v: G::NodeId, g: G, f: &mut F)
75 where
76 G: IntoNeighbors<NodeId = N> + NodeIndexable<NodeId = N>,
77 F: FnMut(&[N]),
78 N: Copy + PartialEq,
79 {
80 macro_rules! node {
81 ($node:expr) => {
82 self.nodes[g.to_index($node)]
83 };
84 }
85
86 let node_v = &mut node![v];
87 debug_assert!(node_v.rootindex.is_none());
88
89 let mut v_is_local_root = true;
90 let v_index = self.index;
91 node_v.rootindex = NonZeroUsize::new(v_index);
92 self.index += 1;
93
94 for w in g.neighbors(v) {
95 if node![w].rootindex.is_none() {
96 self.visit(w, g, f);
97 }
98 if node![w].rootindex < node![v].rootindex {
99 node![v].rootindex = node![w].rootindex;
100 v_is_local_root = false
101 }
102 }
103
104 if v_is_local_root {
105 // Pop the stack and generate an SCC.
106 let mut indexadjustment = 1;
107 let c = NonZeroUsize::new(self.componentcount);
108 let nodes = &mut self.nodes;
109 let start = self
110 .stack
111 .iter()
112 .rposition(|&w| {
113 if nodes[g.to_index(v)].rootindex > nodes[g.to_index(w)].rootindex {
114 true
115 } else {
116 nodes[g.to_index(w)].rootindex = c;
117 indexadjustment += 1;
118 false
119 }
120 })
121 .map(|x| x + 1)
122 .unwrap_or_default();
123 nodes[g.to_index(v)].rootindex = c;
124 self.stack.push(v); // Pushing the component root to the back right before getting rid of it is somewhat ugly, but it lets it be included in f.
125 f(&self.stack[start..]);
126 self.stack.truncate(start);
127 self.index -= indexadjustment; // Backtrack index back to where it was before we ever encountered the component.
128 self.componentcount -= 1;
129 } else {
130 self.stack.push(v); // Stack is filled up when backtracking, unlike in Tarjans original algorithm.
131 }
132 }
133
134 /// Returns the index of the component in which v has been assigned. Allows for using self as a lookup table for an scc decomposition produced by self.run().
135 pub fn node_component_index<G>(&self, g: G, v: N) -> usize
136 where
137 G: IntoNeighbors<NodeId = N> + NodeIndexable<NodeId = N>,
138 N: Copy + PartialEq,
139 {
140 let rindex: usize = self.nodes[g.to_index(v)]
141 .rootindex
142 .map(NonZeroUsize::get)
143 .unwrap_or(0); // Compiles to no-op.
144 debug_assert!(
145 rindex != 0,
146 "Tried to get the component index of an unvisited node."
147 );
148 debug_assert!(
149 rindex > self.componentcount,
150 "Given node has been visited but not yet assigned to a component."
151 );
152 usize::MAX - rindex
153 }
154}
155
156/// Compute the *strongly connected components* using [Tarjan's algorithm][1].
157///
158/// This implementation is recursive and does one pass over the nodes. It is based on
159/// [A Space-Efficient Algorithm for Finding Strongly Connected Components][2] by David J. Pierce,
160/// to provide a memory-efficient implementation of [Tarjan's algorithm][1].
161///
162/// # Arguments
163/// * `g`: a directed or undirected graph.
164///
165/// # Returns
166/// Return a vector where each element is a strongly connected component (scc).
167/// The order of node ids within each scc is arbitrary, but the order of
168/// the sccs is their postorder (reverse topological sort).
169///
170/// For an undirected graph, the sccs are simply the connected components.
171///
172/// # Complexity
173/// * Time complexity: **O(|V| + |E|)**.
174/// * Auxiliary space: **O(|V|)**.
175///
176/// where **|V|** is the number of nodes and **|E|** is the number of edges.
177///
178/// # Examples
179///
180/// ```rust
181/// use petgraph::Graph;
182/// use petgraph::algo::tarjan_scc;
183/// use petgraph::prelude::*;
184///
185/// let mut graph: Graph<char, (), Directed> = Graph::new();
186/// let a = graph.add_node('A');
187/// let b = graph.add_node('B');
188/// let c = graph.add_node('C');
189/// let d = graph.add_node('D');
190/// let e = graph.add_node('E');
191///
192/// graph.extend_with_edges(&[
193/// (a, b), (b, c), (c, a), // SCC 1: A -> B -> C -> A
194/// (b, d), (d, e), // D and E form individual SCCs
195/// ]);
196///
197/// // Graph structure:
198/// // A --> B ---> D ---> E
199/// // ↑ ↑
200/// // └--- C
201///
202/// let sccs = tarjan_scc(&graph);
203/// assert_eq!(sccs.len(), 3); // Three strongly connected components
204///
205/// // Find the sizes of each SCC
206/// let mut scc_sizes: Vec<usize> = sccs.iter().map(|scc| scc.len()).collect();
207/// scc_sizes.sort();
208/// assert_eq!(scc_sizes, vec![1, 1, 3]); // Two single-node SCCs and one 3-node SCC
209/// ```
210///
211/// Using `TarjanScc` for reusable computation:
212///
213/// ```rust
214/// use petgraph::Graph;
215/// use petgraph::algo::TarjanScc;
216/// use petgraph::prelude::*;
217///
218/// let mut graph: Graph<u32, (), Directed> = Graph::new();
219/// let n1 = graph.add_node(1);
220/// let n2 = graph.add_node(2);
221/// let n3 = graph.add_node(3);
222/// let n4 = graph.add_node(4);
223///
224/// graph.extend_with_edges(&[
225/// (n1, n2), (n2, n3), (n3, n1), // Cycle: 1 -> 2 -> 3 -> 1
226/// (n2, n4), // Branch to isolated node 4
227/// ]);
228///
229/// let mut tarjan = TarjanScc::new();
230/// let mut sccs = Vec::new();
231///
232/// tarjan.run(&graph, |scc| {
233/// sccs.push(scc.to_vec());
234/// });
235///
236/// assert_eq!(sccs.len(), 2); // Two SCCs: {1,2,3} and {4}
237/// ```
238///
239/// For an undirected graph (SCCs are just connected components):
240///
241/// ```rust
242/// use petgraph::Graph;
243/// use petgraph::algo::tarjan_scc;
244/// use petgraph::prelude::*;
245///
246/// let mut graph: Graph<i32, (), Undirected> = Graph::new_undirected();
247/// let a = graph.add_node(1);
248/// let b = graph.add_node(2);
249/// let c = graph.add_node(3);
250/// let d = graph.add_node(4);
251///
252/// graph.extend_with_edges(&[
253/// (a, b), // Component 1: {1, 2}
254/// (c, d), // Component 2: {3, 4}
255/// ]);
256///
257/// let sccs = tarjan_scc(&graph);
258/// assert_eq!(sccs.len(), 2); // Two connected components
259///
260/// for scc in &sccs {
261/// assert_eq!(scc.len(), 2); // Each component has 2 nodes
262/// }
263/// ```
264///
265/// [1]: https://en.wikipedia.org/wiki/Tarjan%27s_strongly_connected_components_algorithm
266/// [2]: https://www.researchgate.net/publication/283024636_A_space-efficient_algorithm_for_finding_strongly_connected_components
267pub fn tarjan_scc<G>(g: G) -> Vec<Vec<G::NodeId>>
268where
269 G: IntoNodeIdentifiers + IntoNeighbors + NodeIndexable,
270{
271 let mut sccs = Vec::new();
272 {
273 let mut tarjan_scc = TarjanScc::new();
274 tarjan_scc.run(g, |scc| sccs.push(scc.to_vec()));
275 }
276 sccs
277}