gml_parser/
parser.rs

1/*!
2A nom-based GML parser.
3*/
4
5use std::collections::HashMap;
6
7use nom::{
8    IResult, Parser,
9    bytes::complete::{escaped_transform, is_not, tag, take, take_while},
10    character::complete::{digit1, multispace0, multispace1, space0},
11    combinator::{self, map_res, recognize, verify},
12    error::{ErrorKind, FromExternalError, ParseError},
13};
14
15use crate::gml::{Edge, Gml, GmlItem, Node, Value};
16
17pub trait GmlParseError<'a>:
18    ParseError<&'a str>
19    + FromExternalError<&'a str, std::num::ParseIntError>
20    + FromExternalError<&'a str, std::num::ParseFloatError>
21    + FromExternalError<&'a str, &'a str>
22    + std::fmt::Debug
23{
24}
25impl<'a, T> GmlParseError<'a> for T where
26    T: ParseError<&'a str>
27        + FromExternalError<&'a str, std::num::ParseIntError>
28        + FromExternalError<&'a str, std::num::ParseFloatError>
29        + FromExternalError<&'a str, &'a str>
30        + std::fmt::Debug
31{
32}
33
34/// Take `count` characters and make sure they all satisfy `cond`.
35fn take_verify<'a, E: GmlParseError<'a>>(
36    count: u32,
37    cond: impl Fn(char) -> bool,
38) -> impl Fn(&'a str) -> IResult<&'a str, &'a str, E> {
39    move |i| verify(take(count), |s: &str| s.chars().all(&cond)).parse(i)
40}
41
42/// Parse a GML key.
43pub fn key<'a, E: GmlParseError<'a>>(input: &'a str) -> IResult<&'a str, &'a str, E> {
44    // a key starts with the a character [a-zA-Z_], and has remaining characters [a-zA-Z0-9_]
45    let take_first = take_verify(1, |chr| chr.is_ascii_alphabetic() || chr == '_');
46    let take_remaining = take_while(|chr: char| chr.is_ascii_alphanumeric() || chr == '_');
47    let (input, key) = recognize((take_first, take_remaining)).parse(input)?;
48    Ok((input, key))
49}
50
51/// Parse a GML item (a key + value).
52pub fn item<'a, E: GmlParseError<'a>>(input: &'a str) -> IResult<&'a str, GmlItem<'a>, E> {
53    match key(input)? {
54        (input, "node") => node(input).map(|(input, node)| (input, GmlItem::Node(node))),
55        (input, "edge") => edge(input).map(|(input, edge)| (input, GmlItem::Edge(edge))),
56        (input, "directed") => {
57            int_as_bool(input).map(|(input, value)| (input, GmlItem::Directed(value)))
58        }
59        (input, name) => {
60            value(input).map(|(input, value)| (input, GmlItem::KeyValue((name.into(), value))))
61        }
62    }
63}
64
65/// Parse a GML graph.
66pub fn gml<'a, E: GmlParseError<'a>>(input: &'a str) -> IResult<&'a str, Gml<'a>, E> {
67    let (input, _) = multispace0(input)?;
68    let (input, _) = tag("graph")(input)?;
69    let (input, _) = space0(input)?;
70    let (input, _) = tag("[")(input)?;
71    let (input, _) = newline(input)?;
72
73    let (input, (items, _)) = nom::multi::many_till(item, tag("]")).parse(input)?;
74
75    let [nodes, edges, directed, others]: [Vec<_>; 4] = partition(items.into_iter(), |x| match x {
76        GmlItem::Node(_) => 0,
77        GmlItem::Edge(_) => 1,
78        GmlItem::Directed(_) => 2,
79        GmlItem::KeyValue(_) => 3,
80    });
81
82    let nodes: Vec<_> = nodes
83        .into_iter()
84        .map(|x| {
85            if let GmlItem::Node(x) = x {
86                x
87            } else {
88                panic!()
89            }
90        })
91        .collect();
92    let edges: Vec<_> = edges
93        .into_iter()
94        .map(|x| {
95            if let GmlItem::Edge(x) = x {
96                x
97            } else {
98                panic!()
99            }
100        })
101        .collect();
102    let others: Vec<_> = others
103        .into_iter()
104        .map(|x| {
105            if let GmlItem::KeyValue(x) = x {
106                x
107            } else {
108                panic!()
109            }
110        })
111        .collect();
112
113    if directed.len() > 1 {
114        result_str_to_nom(
115            input,
116            Err("The 'directed' key must only be specified once"),
117            ErrorKind::Fail,
118        )?;
119    }
120    let directed = match directed.first() {
121        Some(GmlItem::Directed(x)) => *x,
122        Some(_) => panic!(),
123        // GML graphs are undirected by default
124        None => false,
125    };
126
127    let expected_len = others.len();
128    let others: HashMap<_, _> = others.into_iter().collect();
129    if others.len() != expected_len {
130        result_str_to_nom(
131            input,
132            Err("Duplicate keys are not supported"),
133            ErrorKind::Fail,
134        )?;
135    }
136
137    let (input, _) = multispace0(input)?;
138
139    Ok((
140        input,
141        Gml {
142            directed,
143            nodes,
144            edges,
145            other: others,
146        },
147    ))
148}
149
150/// Parse a GML node.
151fn node<'a, E: GmlParseError<'a>>(input: &'a str) -> IResult<&'a str, Node<'a>, E> {
152    let (input, _) = space0(input)?;
153    let (input, _) = tag("[")(input)?;
154    let (input, _) = newline(input)?;
155
156    let (input, (key_values, _)) = nom::multi::many_till((key, value), tag("]")).parse(input)?;
157    let expected_len = key_values.len();
158    let mut key_values: HashMap<_, _> = key_values.into_iter().collect();
159    if key_values.len() != expected_len {
160        result_str_to_nom(
161            input,
162            Err("Duplicate keys are not supported"),
163            ErrorKind::Fail,
164        )?;
165    }
166
167    let (input, _) = newline(input)?;
168
169    let id = match key_values.remove("id") {
170        Some(Value::Int(x)) => Some(x as u32),
171        Some(_) => result_str_to_nom(input, Err("Incorrect 'id' type"), ErrorKind::Fail)?,
172        None => None,
173    };
174
175    Ok((input, Node::new(id, key_values)))
176}
177
178/// Parse a GML edge.
179fn edge<'a, E: GmlParseError<'a>>(input: &'a str) -> IResult<&'a str, Edge<'a>, E> {
180    let (input, _) = space0(input)?;
181    let (input, _) = tag("[")(input)?;
182    let (input, _) = newline(input)?;
183
184    let (input, (key_values, _)) = nom::multi::many_till((key, value), tag("]")).parse(input)?;
185    let expected_len = key_values.len();
186    let mut key_values: HashMap<_, _> = key_values.into_iter().collect();
187    if key_values.len() != expected_len {
188        result_str_to_nom(
189            input,
190            Err("Duplicate keys are not supported"),
191            ErrorKind::Fail,
192        )?;
193    }
194
195    let (input, _) = newline(input)?;
196
197    let source = match key_values.remove("source") {
198        Some(Value::Int(x)) => x,
199        Some(_) => result_str_to_nom(input, Err("Incorrect 'source' type"), ErrorKind::Fail)?,
200        None => result_str_to_nom(input, Err("'source' doesn't exist"), ErrorKind::NoneOf)?,
201    };
202
203    let target = match key_values.remove("target") {
204        Some(Value::Int(x)) => x,
205        Some(_) => result_str_to_nom(input, Err("Incorrect 'target' type"), ErrorKind::Fail)?,
206        None => result_str_to_nom(input, Err("'target' doesn't exist"), ErrorKind::NoneOf)?,
207    };
208
209    Ok((input, Edge::new(source as u32, target as u32, key_values)))
210}
211
212fn value<'a, E: GmlParseError<'a>>(input: &'a str) -> IResult<&'a str, Value<'a>, E> {
213    let (input, _) = space0(input)?;
214
215    let (input, (value, _)) =
216        nom::branch::alt(((int, newline), (float, newline), (string, newline))).parse(input)?;
217
218    Ok((input, value))
219}
220
221fn int<'a, E: GmlParseError<'a>>(input: &'a str) -> IResult<&'a str, Value<'a>, E> {
222    let (input, value) = map_res(recognize(digit1), str::parse).parse(input)?;
223    Ok((input, Value::Int(value)))
224}
225
226fn float<'a, E: GmlParseError<'a>>(input: &'a str) -> IResult<&'a str, Value<'a>, E> {
227    let (input, value) =
228        map_res(nom::number::complete::recognize_float, str::parse).parse(input)?;
229    Ok((input, Value::Float(value)))
230}
231
232/// Parse a GML string.
233fn string<'a, E: GmlParseError<'a>>(input: &'a str) -> IResult<&'a str, Value<'a>, E> {
234    let (input, _) = tag("\"")(input)?;
235    let (input, value) = escaped_transform(
236        is_not("\""),
237        '\\',
238        nom::branch::alt((
239            combinator::value("\\", tag("\\")),
240            combinator::value("\"", tag("\"")),
241        )),
242    )(input)?;
243    let (input, _) = tag("\"")(input)?;
244
245    Ok((input, Value::Str(value.into())))
246}
247
248fn newline<'a, E: GmlParseError<'a>>(input: &'a str) -> IResult<&'a str, &'a str, E> {
249    recognize((space0, multispace1, space0)).parse(input)
250}
251
252fn int_to_bool(x: i32) -> Result<bool, &'static str> {
253    match x {
254        1 => Ok(true),
255        0 => Ok(false),
256        _ => Err("Bool must be 0 or 1"),
257    }
258}
259
260fn int_as_bool<'a, E: GmlParseError<'a>>(input: &'a str) -> IResult<&'a str, bool, E> {
261    let (input, value) = value(input)?;
262
263    let value = match value {
264        Value::Int(x) => result_str_to_nom(input, int_to_bool(x), ErrorKind::Fail)?,
265        _ => result_str_to_nom(input, Err("Value was not an integer"), ErrorKind::Fail)?,
266    };
267
268    Ok((input, value))
269}
270
271fn result_str_to_nom<'a, T, E: GmlParseError<'a>>(
272    input: &'a str,
273    result: Result<T, &'a str>,
274    error_kind: ErrorKind,
275) -> Result<T, nom::Err<E>> {
276    result.map_err(|e| nom::Err::Failure(E::from_external_error(input, error_kind, e)))
277}
278
279fn partition<I, B, F, const N: usize>(iter: I, f: F) -> [B; N]
280where
281    I: Iterator + Sized,
282    B: Default + Extend<I::Item>,
283    [B; N]: Default,
284    F: FnMut(&I::Item) -> usize,
285{
286    #[inline]
287    fn extend<'a, T, B: Extend<T>, const N: usize>(
288        mut f: impl FnMut(&T) -> usize + 'a,
289        collections: &'a mut [B; N],
290    ) -> impl FnMut((), T) + 'a {
291        move |(), x| {
292            collections[f(&x)].extend(Some(x));
293        }
294    }
295
296    let mut collections: [B; N] = Default::default();
297
298    iter.fold((), extend(f, &mut collections));
299
300    collections
301}