petgraph/algo/scc/
tarjan_scc.rs

1use alloc::vec::Vec;
2use core::num::NonZeroUsize;
3
4use crate::visit::{IntoNeighbors, IntoNodeIdentifiers, NodeIndexable};
5
6#[derive(Copy, Clone, Debug)]
7struct NodeData {
8    rootindex: Option<NonZeroUsize>,
9}
10
11/// A reusable state for computing the *strongly connected components* using [Tarjan's algorithm][1].
12///
13/// [1]: https://en.wikipedia.org/wiki/Tarjan%27s_strongly_connected_components_algorithm
14#[derive(Debug)]
15pub struct TarjanScc<N> {
16    index: usize,
17    componentcount: usize,
18    nodes: Vec<NodeData>,
19    stack: Vec<N>,
20}
21
22impl<N> Default for TarjanScc<N> {
23    fn default() -> Self {
24        Self::new()
25    }
26}
27
28impl<N> TarjanScc<N> {
29    /// Creates a new `TarjanScc`
30    pub fn new() -> Self {
31        TarjanScc {
32            index: 1,                   // Invariant: index < componentcount at all times.
33            componentcount: usize::MAX, // Will hold if componentcount is initialized to number of nodes - 1 or higher.
34            nodes: Vec::new(),
35            stack: Vec::new(),
36        }
37    }
38
39    /// Compute the *strongly connected components* using Algorithm 3 in
40    /// [A Space-Efficient Algorithm for Finding Strongly Connected Components][1] by David J. Pierce,
41    /// which is a memory-efficient variation of [Tarjan's algorithm][2].
42    ///
43    ///
44    /// [1]: https://homepages.ecs.vuw.ac.nz/~djp/files/P05.pdf
45    /// [2]: https://en.wikipedia.org/wiki/Tarjan%27s_strongly_connected_components_algorithm
46    ///
47    /// Calls `f` for each strongly connected component (scc).
48    /// The order of node ids within each scc is arbitrary, but the order of
49    /// the sccs is their postorder (reverse topological sort).
50    ///
51    /// For an undirected graph, the sccs are simply the connected components.
52    ///
53    /// This implementation is recursive and does one pass over the nodes.
54    pub fn run<G, F>(&mut self, g: G, mut f: F)
55    where
56        G: IntoNodeIdentifiers<NodeId = N> + IntoNeighbors<NodeId = N> + NodeIndexable<NodeId = N>,
57        F: FnMut(&[N]),
58        N: Copy + PartialEq,
59    {
60        self.nodes.clear();
61        self.nodes
62            .resize(g.node_bound(), NodeData { rootindex: None });
63
64        for n in g.node_identifiers() {
65            let visited = self.nodes[g.to_index(n)].rootindex.is_some();
66            if !visited {
67                self.visit(n, g, &mut f);
68            }
69        }
70
71        debug_assert!(self.stack.is_empty());
72    }
73
74    fn visit<G, F>(&mut self, v: G::NodeId, g: G, f: &mut F)
75    where
76        G: IntoNeighbors<NodeId = N> + NodeIndexable<NodeId = N>,
77        F: FnMut(&[N]),
78        N: Copy + PartialEq,
79    {
80        macro_rules! node {
81            ($node:expr) => {
82                self.nodes[g.to_index($node)]
83            };
84        }
85
86        let node_v = &mut node![v];
87        debug_assert!(node_v.rootindex.is_none());
88
89        let mut v_is_local_root = true;
90        let v_index = self.index;
91        node_v.rootindex = NonZeroUsize::new(v_index);
92        self.index += 1;
93
94        for w in g.neighbors(v) {
95            if node![w].rootindex.is_none() {
96                self.visit(w, g, f);
97            }
98            if node![w].rootindex < node![v].rootindex {
99                node![v].rootindex = node![w].rootindex;
100                v_is_local_root = false
101            }
102        }
103
104        if v_is_local_root {
105            // Pop the stack and generate an SCC.
106            let mut indexadjustment = 1;
107            let c = NonZeroUsize::new(self.componentcount);
108            let nodes = &mut self.nodes;
109            let start = self
110                .stack
111                .iter()
112                .rposition(|&w| {
113                    if nodes[g.to_index(v)].rootindex > nodes[g.to_index(w)].rootindex {
114                        true
115                    } else {
116                        nodes[g.to_index(w)].rootindex = c;
117                        indexadjustment += 1;
118                        false
119                    }
120                })
121                .map(|x| x + 1)
122                .unwrap_or_default();
123            nodes[g.to_index(v)].rootindex = c;
124            self.stack.push(v); // Pushing the component root to the back right before getting rid of it is somewhat ugly, but it lets it be included in f.
125            f(&self.stack[start..]);
126            self.stack.truncate(start);
127            self.index -= indexadjustment; // Backtrack index back to where it was before we ever encountered the component.
128            self.componentcount -= 1;
129        } else {
130            self.stack.push(v); // Stack is filled up when backtracking, unlike in Tarjans original algorithm.
131        }
132    }
133
134    /// Returns the index of the component in which v has been assigned. Allows for using self as a lookup table for an scc decomposition produced by self.run().
135    pub fn node_component_index<G>(&self, g: G, v: N) -> usize
136    where
137        G: IntoNeighbors<NodeId = N> + NodeIndexable<NodeId = N>,
138        N: Copy + PartialEq,
139    {
140        let rindex: usize = self.nodes[g.to_index(v)]
141            .rootindex
142            .map(NonZeroUsize::get)
143            .unwrap_or(0); // Compiles to no-op.
144        debug_assert!(
145            rindex != 0,
146            "Tried to get the component index of an unvisited node."
147        );
148        debug_assert!(
149            rindex > self.componentcount,
150            "Given node has been visited but not yet assigned to a component."
151        );
152        usize::MAX - rindex
153    }
154}
155
156/// Compute the *strongly connected components* using [Tarjan's algorithm][1].
157///
158/// This implementation is recursive and does one pass over the nodes. It is based on
159/// [A Space-Efficient Algorithm for Finding Strongly Connected Components][2] by David J. Pierce,
160/// to provide a memory-efficient implementation of [Tarjan's algorithm][1].
161///
162/// # Arguments
163/// * `g`: a directed or undirected graph.
164///
165/// # Returns
166/// Return a vector where each element is a strongly connected component (scc).
167/// The order of node ids within each scc is arbitrary, but the order of
168/// the sccs is their postorder (reverse topological sort).
169///
170/// For an undirected graph, the sccs are simply the connected components.
171///
172/// # Complexity
173/// * Time complexity: **O(|V| + |E|)**.
174/// * Auxiliary space: **O(|V|)**.
175///
176/// where **|V|** is the number of nodes and **|E|** is the number of edges.
177///
178/// # Examples
179///
180/// ```rust
181/// use petgraph::Graph;
182/// use petgraph::algo::tarjan_scc;
183/// use petgraph::prelude::*;
184///
185/// let mut graph: Graph<char, (), Directed> = Graph::new();
186/// let a = graph.add_node('A');
187/// let b = graph.add_node('B');
188/// let c = graph.add_node('C');
189/// let d = graph.add_node('D');
190/// let e = graph.add_node('E');
191///
192/// graph.extend_with_edges(&[
193///     (a, b), (b, c), (c, a),  // SCC 1: A -> B -> C -> A
194///     (b, d), (d, e),          // D and E form individual SCCs
195/// ]);
196///
197/// // Graph structure:
198/// // A --> B ---> D ---> E
199/// // ↑    ↑
200/// // └--- C
201///
202/// let sccs = tarjan_scc(&graph);
203/// assert_eq!(sccs.len(), 3); // Three strongly connected components
204///
205/// // Find the sizes of each SCC
206/// let mut scc_sizes: Vec<usize> = sccs.iter().map(|scc| scc.len()).collect();
207/// scc_sizes.sort();
208/// assert_eq!(scc_sizes, vec![1, 1, 3]); // Two single-node SCCs and one 3-node SCC
209/// ```
210///
211/// Using `TarjanScc` for reusable computation:
212///
213/// ```rust
214/// use petgraph::Graph;
215/// use petgraph::algo::TarjanScc;
216/// use petgraph::prelude::*;
217///
218/// let mut graph: Graph<u32, (), Directed> = Graph::new();
219/// let n1 = graph.add_node(1);
220/// let n2 = graph.add_node(2);
221/// let n3 = graph.add_node(3);
222/// let n4 = graph.add_node(4);
223///
224/// graph.extend_with_edges(&[
225///     (n1, n2), (n2, n3), (n3, n1),  // Cycle: 1 -> 2 -> 3 -> 1
226///     (n2, n4),                       // Branch to isolated node 4
227/// ]);
228///
229/// let mut tarjan = TarjanScc::new();
230/// let mut sccs = Vec::new();
231///
232/// tarjan.run(&graph, |scc| {
233///     sccs.push(scc.to_vec());
234/// });
235///
236/// assert_eq!(sccs.len(), 2); // Two SCCs: {1,2,3} and {4}
237/// ```
238///
239/// For an undirected graph (SCCs are just connected components):
240///
241/// ```rust
242/// use petgraph::Graph;
243/// use petgraph::algo::tarjan_scc;
244/// use petgraph::prelude::*;
245///
246/// let mut graph: Graph<i32, (), Undirected> = Graph::new_undirected();
247/// let a = graph.add_node(1);
248/// let b = graph.add_node(2);
249/// let c = graph.add_node(3);
250/// let d = graph.add_node(4);
251///
252/// graph.extend_with_edges(&[
253///     (a, b), // Component 1: {1, 2}
254///     (c, d), // Component 2: {3, 4}
255/// ]);
256///
257/// let sccs = tarjan_scc(&graph);
258/// assert_eq!(sccs.len(), 2); // Two connected components
259///
260/// for scc in &sccs {
261///     assert_eq!(scc.len(), 2); // Each component has 2 nodes
262/// }
263/// ```
264///
265/// [1]: https://en.wikipedia.org/wiki/Tarjan%27s_strongly_connected_components_algorithm
266/// [2]: https://www.researchgate.net/publication/283024636_A_space-efficient_algorithm_for_finding_strongly_connected_components
267pub fn tarjan_scc<G>(g: G) -> Vec<Vec<G::NodeId>>
268where
269    G: IntoNodeIdentifiers + IntoNeighbors + NodeIndexable,
270{
271    let mut sccs = Vec::new();
272    {
273        let mut tarjan_scc = TarjanScc::new();
274        tarjan_scc.run(g, |scc| sccs.push(scc.to_vec()));
275    }
276    sccs
277}