petgraph/
unionfind.rs

1//! `UnionFind<K>` is a disjoint-set data structure.
2
3use super::graph::IndexType;
4use alloc::{collections::TryReserveError, vec, vec::Vec};
5use core::cmp::Ordering;
6
7/// `UnionFind<K>` is a disjoint-set data structure. It tracks set membership of *n* elements
8/// indexed from *0* to *n - 1*. The scalar type is `K` which must be an unsigned integer type.
9///
10/// <http://en.wikipedia.org/wiki/Disjoint-set_data_structure>
11///
12/// Too awesome not to quote:
13///
14/// “The amortized time per operation is **O(α(n))** where **α(n)** is the
15/// inverse of **f(x) = A(x, x)** with **A** being the extremely fast-growing Ackermann function.”
16#[derive(Debug, Clone)]
17pub struct UnionFind<K> {
18    // For element at index *i*, store the index of its parent; the representative itself
19    // stores its own index. This forms equivalence classes which are the disjoint sets, each
20    // with a unique representative.
21    parent: Vec<K>,
22    // It is a balancing tree structure,
23    // so the ranks are logarithmic in the size of the container -- a byte is more than enough.
24    //
25    // Rank is separated out both to save space and to save cache in when searching in the parent
26    // vector.
27    rank: Vec<u8>,
28}
29
30impl<K> Default for UnionFind<K> {
31    fn default() -> Self {
32        Self {
33            parent: Vec::new(),
34            rank: Vec::new(),
35        }
36    }
37}
38
39#[inline]
40unsafe fn get_unchecked<K>(xs: &[K], index: usize) -> &K {
41    debug_assert!(index < xs.len());
42    xs.get_unchecked(index)
43}
44
45#[inline]
46unsafe fn get_unchecked_mut<K>(xs: &mut [K], index: usize) -> &mut K {
47    debug_assert!(index < xs.len());
48    xs.get_unchecked_mut(index)
49}
50
51impl<K> UnionFind<K>
52where
53    K: IndexType,
54{
55    /// Create a new `UnionFind` of `n` disjoint sets.
56    pub fn new(n: usize) -> Self {
57        let rank = vec![0; n];
58        let parent = (0..n).map(K::new).collect::<Vec<K>>();
59
60        UnionFind { parent, rank }
61    }
62
63    /// Create a new `UnionFind` with no elements.
64    pub const fn new_empty() -> Self {
65        Self {
66            parent: Vec::new(),
67            rank: Vec::new(),
68        }
69    }
70
71    /// Returns the number of elements in the union-find data-structure.
72    pub fn len(&self) -> usize {
73        self.parent.len()
74    }
75
76    /// Returns true if there are no elements in the union-find data-structure.
77    pub fn is_empty(&self) -> bool {
78        self.parent.is_empty()
79    }
80
81    /// Adds a new disjoint set and returns the index of the new set.
82    ///
83    /// The new disjoint set is always added to the end, so the returned
84    /// index is the same as the number of elements before calling this function.
85    ///
86    /// **Time Complexity**
87    /// Takes amortized O(1) time.
88    pub fn new_set(&mut self) -> K {
89        let retval = K::new(self.parent.len());
90        self.rank.push(0);
91        self.parent.push(retval);
92        retval
93    }
94
95    /// Return the representative for `x`.
96    ///
97    /// **Panics** if `x` is out of bounds.
98    #[track_caller]
99    pub fn find(&self, x: K) -> K {
100        self.try_find(x).expect("The index is out of bounds")
101    }
102
103    /// Return the representative for `x` or `None` if `x` is out of bounds.
104    pub fn try_find(&self, mut x: K) -> Option<K> {
105        if x.index() >= self.len() {
106            return None;
107        }
108
109        loop {
110            // Use unchecked indexing because we can trust the internal set ids.
111            let xparent = unsafe { *get_unchecked(&self.parent, x.index()) };
112            if xparent == x {
113                break;
114            }
115            x = xparent;
116        }
117
118        Some(x)
119    }
120
121    /// Return the representative for `x`.
122    ///
123    /// Write back the found representative, flattening the internal
124    /// datastructure in the process and quicken future lookups.
125    ///
126    /// **Panics** if `x` is out of bounds.
127    #[track_caller]
128    pub fn find_mut(&mut self, x: K) -> K {
129        assert!(x.index() < self.len());
130        unsafe { self.find_mut_recursive(x) }
131    }
132
133    /// Return the representative for `x` or `None` if `x` is out of bounds.
134    ///
135    /// Write back the found representative, flattening the internal
136    /// datastructure in the process and quicken future lookups.
137    pub fn try_find_mut(&mut self, x: K) -> Option<K> {
138        if x.index() >= self.len() {
139            return None;
140        }
141        Some(unsafe { self.find_mut_recursive(x) })
142    }
143
144    unsafe fn find_mut_recursive(&mut self, mut x: K) -> K {
145        let mut parent = *get_unchecked(&self.parent, x.index());
146        while parent != x {
147            let grandparent = *get_unchecked(&self.parent, parent.index());
148            *get_unchecked_mut(&mut self.parent, x.index()) = grandparent;
149            x = parent;
150            parent = grandparent;
151        }
152        x
153    }
154
155    /// Returns `true` if the given elements belong to the same set, and returns
156    /// `false` otherwise.
157    ///
158    /// **Panics** if `x` or `y` is out of bounds.
159    #[track_caller]
160    pub fn equiv(&self, x: K, y: K) -> bool {
161        self.find(x) == self.find(y)
162    }
163
164    /// Returns `Ok(true)` if the given elements belong to the same set, and returns
165    /// `Ok(false)` otherwise.
166    ///
167    /// If `x` or `y` are out of bounds, it returns `Err` with the first bad index found.
168    pub fn try_equiv(&self, x: K, y: K) -> Result<bool, K> {
169        let xrep = self.try_find(x).ok_or(x)?;
170        let yrep = self.try_find(y).ok_or(y)?;
171        Ok(xrep == yrep)
172    }
173
174    /// Unify the two sets containing `x` and `y`.
175    ///
176    /// Return `false` if the sets were already the same, `true` if they were unified.
177    ///
178    /// **Panics** if `x` or `y` is out of bounds.
179    #[track_caller]
180    pub fn union(&mut self, x: K, y: K) -> bool {
181        self.try_union(x, y).unwrap()
182    }
183
184    /// Unify the two sets containing `x` and `y`.
185    ///
186    /// Return `Ok(false)` if the sets were already the same, `Ok(true)` if they were unified.
187    ///
188    /// If `x` or `y` are out of bounds, it returns `Err` with first found bad index.
189    /// But if `x == y`, the result will be `Ok(false)` even if the indexes go out of bounds.
190    pub fn try_union(&mut self, x: K, y: K) -> Result<bool, K> {
191        if x == y {
192            return Ok(false);
193        }
194        let xrep = self.try_find_mut(x).ok_or(x)?;
195        let yrep = self.try_find_mut(y).ok_or(y)?;
196
197        if xrep == yrep {
198            return Ok(false);
199        }
200
201        let xrepu = xrep.index();
202        let yrepu = yrep.index();
203        let xrank = self.rank[xrepu];
204        let yrank = self.rank[yrepu];
205
206        // The rank corresponds roughly to the depth of the treeset, so put the
207        // smaller set below the larger
208        match xrank.cmp(&yrank) {
209            Ordering::Less => self.parent[xrepu] = yrep,
210            Ordering::Greater => self.parent[yrepu] = xrep,
211            Ordering::Equal => {
212                self.parent[yrepu] = xrep;
213                self.rank[xrepu] += 1;
214            }
215        }
216        Ok(true)
217    }
218
219    /// Return a vector mapping each element to its representative.
220    pub fn into_labeling(mut self) -> Vec<K> {
221        // write in the labeling of each element
222        unsafe {
223            for ix in 0..self.len() {
224                let k = *get_unchecked(&self.parent, ix);
225                let xrep = self.find_mut_recursive(k);
226                *self.parent.get_unchecked_mut(ix) = xrep;
227            }
228        }
229        self.parent
230    }
231}
232
233impl<K> UnionFind<K> {
234    /// Constructs a new, empty `UnionFind<K>` with at least the specified capacity.
235    ///
236    /// This acts similarly to [`Vec::with_capacity`].
237    pub fn with_capacity(capacity: usize) -> Self {
238        UnionFind {
239            parent: Vec::with_capacity(capacity),
240            rank: Vec::with_capacity(capacity),
241        }
242    }
243
244    /// Returns the total number of elements the `UnionFind` can hold without reallocating.
245    ///
246    /// # Examples
247    ///
248    /// ```
249    /// use petgraph::unionfind::UnionFind;
250    ///
251    /// let mut uf: UnionFind<u32> = UnionFind::with_capacity(10);
252    /// uf.new_set();
253    /// assert!(uf.capacity() >= 10);
254    /// ```
255    #[inline]
256    pub fn capacity(&self) -> usize {
257        self.parent.capacity().min(self.rank.capacity())
258    }
259
260    /// Reserves capacity for at least `additional` more elements to be inserted
261    /// in the given `UnionFind<K>`. The collection may reserve more space to
262    /// speculatively avoid frequent reallocations. After calling `reserve`,
263    /// capacity will be greater than or equal to `self.len() + additional`.
264    /// Does nothing if capacity is already sufficient.
265    ///
266    /// # Panics
267    ///
268    /// Panics if the new capacity exceeds `isize::MAX` _bytes_.
269    ///
270    /// # Examples
271    ///
272    /// ```
273    /// use petgraph::unionfind::UnionFind;
274    ///
275    /// let mut uf: UnionFind<u32> = UnionFind::new(3);
276    /// uf.reserve(10);
277    /// assert!(uf.capacity() >= 13);
278    /// ```
279    #[inline]
280    pub fn reserve(&mut self, additional: usize) {
281        self.parent.reserve(additional);
282        self.rank.reserve(additional);
283    }
284
285    /// Reserves the minimum capacity for at least `additional` more elements to
286    /// be inserted in the given `UnionFind<K>`. Unlike [`reserve`], this will not
287    /// deliberately over-allocate to speculatively avoid frequent allocations.
288    /// After calling `reserve_exact`, capacity will be greater than or equal to
289    /// `self.len() + additional`. Does nothing if the capacity is already
290    /// sufficient.
291    ///
292    /// Note that the allocator may give the collection more space than it
293    /// requests. Therefore, capacity can not be relied upon to be precisely
294    /// minimal. Prefer [`reserve`] if future insertions are expected.
295    ///
296    /// [`reserve`]: UnionFind::reserve
297    ///
298    /// # Panics
299    ///
300    /// Panics if the new capacity exceeds `isize::MAX` _bytes_.
301    ///
302    /// # Examples
303    ///
304    /// ```
305    /// use petgraph::unionfind::UnionFind;
306    ///
307    /// let mut uf: UnionFind<u32> =  UnionFind::new_empty();
308    /// uf.reserve_exact(10);
309    /// assert!(uf.capacity() >= 10);
310    /// ```
311    #[inline]
312    pub fn reserve_exact(&mut self, additional: usize) {
313        self.parent.reserve_exact(additional);
314        self.rank.reserve_exact(additional);
315    }
316
317    /// Tries to reserve capacity for at least `additional` more elements to be inserted
318    /// in the given `UnionFind<K>`. The collection may reserve more space to speculatively avoid
319    /// frequent reallocations. After calling `try_reserve`, capacity will be
320    /// greater than or equal to `self.len() + additional` if it returns
321    /// `Ok(())`. Does nothing if capacity is already sufficient. This method
322    /// preserves the contents even if an error occurs.
323    ///
324    /// # Errors
325    ///
326    /// If the capacity overflows, or the allocator reports a failure, then an error
327    /// is returned.
328    #[inline]
329    pub fn try_reserve(&mut self, additional: usize) -> Result<(), TryReserveError> {
330        self.parent
331            .try_reserve(additional)
332            .and_then(|_| self.rank.try_reserve(additional))
333    }
334
335    /// Tries to reserve the minimum capacity for at least `additional`
336    /// elements to be inserted in the given `UnionFind<K>`. Unlike [`try_reserve`],
337    /// this will not deliberately over-allocate to speculatively avoid frequent
338    /// allocations. After calling `try_reserve_exact`, capacity will be greater
339    /// than or equal to `self.len() + additional` if it returns `Ok(())`.
340    /// Does nothing if the capacity is already sufficient.
341    ///
342    /// Note that the allocator may give the collection more space than it
343    /// requests. Therefore, capacity can not be relied upon to be precisely
344    /// minimal. Prefer [`try_reserve`] if future insertions are expected.
345    ///
346    /// [`try_reserve`]: UnionFind::try_reserve
347    ///
348    /// # Errors
349    ///
350    /// If the capacity overflows, or the allocator reports a failure, then an error
351    /// is returned.
352    #[inline]
353    pub fn try_reserve_exact(&mut self, additional: usize) -> Result<(), TryReserveError> {
354        self.parent
355            .try_reserve_exact(additional)
356            .and_then(|_| self.rank.try_reserve_exact(additional))
357    }
358
359    /// Shrinks the capacity of the `UnionFind` as much as possible.
360    ///
361    /// The behavior of this method depends on the allocator, which may either shrink the
362    /// collection in-place or reallocate. The resulting `UnionFind` might still have some excess capacity, just as
363    /// is the case for [`with_capacity`]. See [`Vec::shrink_to_fit`] for more details, since the implementation is based on this method.
364    ///
365    /// [`with_capacity`]: UnionFind::with_capacity
366    ///
367    /// # Examples
368    ///
369    /// ```
370    /// use petgraph::unionfind::UnionFind;
371    ///
372    /// let mut uf: UnionFind<u32> = UnionFind::with_capacity(10);
373    ///
374    /// for _ in 0..3 {
375    ///     uf.new_set();
376    /// }
377    ///
378    /// assert!(uf.capacity() >= 10);
379    /// uf.shrink_to_fit();
380    /// assert!(uf.capacity() >= 3);
381    /// ```
382    #[inline]
383    pub fn shrink_to_fit(&mut self) {
384        self.parent.shrink_to_fit();
385        self.rank.shrink_to_fit();
386    }
387
388    /// Shrinks the capacity of the `UnionFind` with a lower bound.
389    ///
390    /// The capacity will remain at least as large as both the length
391    /// and the supplied value.
392    ///
393    /// If the current capacity is less than the lower limit, this is a no-op.
394    ///
395    /// # Examples
396    ///
397    /// ```
398    /// use petgraph::unionfind::UnionFind;
399    ///
400    /// let mut uf: UnionFind<u32> = UnionFind::with_capacity(10);
401    ///
402    /// for _ in 0..3 {
403    ///     uf.new_set();
404    /// }
405    ///
406    /// assert!(uf.capacity() >= 10);
407    /// uf.shrink_to(4);
408    /// assert!(uf.capacity() >= 4);
409    /// uf.shrink_to(0);
410    /// assert!(uf.capacity() >= 3);
411    /// ```
412    #[inline]
413    pub fn shrink_to(&mut self, min_capacity: usize) {
414        self.parent.shrink_to(min_capacity);
415        self.rank.shrink_to(min_capacity);
416    }
417}