unsafe_libyaml/
reader.rs

1use crate::externs::{memcmp, memmove};
2use crate::ops::ForceAdd as _;
3use crate::success::{Success, FAIL, OK};
4use crate::yaml::{size_t, yaml_char_t};
5use crate::{
6    libc, yaml_parser_t, PointerExt, YAML_ANY_ENCODING, YAML_READER_ERROR, YAML_UTF16BE_ENCODING,
7    YAML_UTF16LE_ENCODING, YAML_UTF8_ENCODING,
8};
9use core::ptr::addr_of_mut;
10
11unsafe fn yaml_parser_set_reader_error(
12    parser: *mut yaml_parser_t,
13    problem: *const libc::c_char,
14    offset: size_t,
15    value: libc::c_int,
16) -> Success {
17    (*parser).error = YAML_READER_ERROR;
18    let fresh0 = addr_of_mut!((*parser).problem);
19    *fresh0 = problem;
20    (*parser).problem_offset = offset;
21    (*parser).problem_value = value;
22    FAIL
23}
24
25const BOM_UTF8: *const libc::c_char = b"\xEF\xBB\xBF\0" as *const u8 as *const libc::c_char;
26const BOM_UTF16LE: *const libc::c_char = b"\xFF\xFE\0" as *const u8 as *const libc::c_char;
27const BOM_UTF16BE: *const libc::c_char = b"\xFE\xFF\0" as *const u8 as *const libc::c_char;
28
29unsafe fn yaml_parser_determine_encoding(parser: *mut yaml_parser_t) -> Success {
30    while !(*parser).eof
31        && ((*parser)
32            .raw_buffer
33            .last
34            .c_offset_from((*parser).raw_buffer.pointer) as libc::c_long)
35            < 3_i64
36    {
37        if yaml_parser_update_raw_buffer(parser).fail {
38            return FAIL;
39        }
40    }
41    if (*parser)
42        .raw_buffer
43        .last
44        .c_offset_from((*parser).raw_buffer.pointer) as libc::c_long
45        >= 2_i64
46        && memcmp(
47            (*parser).raw_buffer.pointer as *const libc::c_void,
48            BOM_UTF16LE as *const libc::c_void,
49            2_u64,
50        ) == 0
51    {
52        (*parser).encoding = YAML_UTF16LE_ENCODING;
53        let fresh1 = addr_of_mut!((*parser).raw_buffer.pointer);
54        *fresh1 = (*fresh1).wrapping_offset(2_isize);
55        let fresh2 = addr_of_mut!((*parser).offset);
56        *fresh2 = (*fresh2 as libc::c_ulong).force_add(2_u64) as size_t;
57    } else if (*parser)
58        .raw_buffer
59        .last
60        .c_offset_from((*parser).raw_buffer.pointer) as libc::c_long
61        >= 2_i64
62        && memcmp(
63            (*parser).raw_buffer.pointer as *const libc::c_void,
64            BOM_UTF16BE as *const libc::c_void,
65            2_u64,
66        ) == 0
67    {
68        (*parser).encoding = YAML_UTF16BE_ENCODING;
69        let fresh3 = addr_of_mut!((*parser).raw_buffer.pointer);
70        *fresh3 = (*fresh3).wrapping_offset(2_isize);
71        let fresh4 = addr_of_mut!((*parser).offset);
72        *fresh4 = (*fresh4 as libc::c_ulong).force_add(2_u64) as size_t;
73    } else if (*parser)
74        .raw_buffer
75        .last
76        .c_offset_from((*parser).raw_buffer.pointer) as libc::c_long
77        >= 3_i64
78        && memcmp(
79            (*parser).raw_buffer.pointer as *const libc::c_void,
80            BOM_UTF8 as *const libc::c_void,
81            3_u64,
82        ) == 0
83    {
84        (*parser).encoding = YAML_UTF8_ENCODING;
85        let fresh5 = addr_of_mut!((*parser).raw_buffer.pointer);
86        *fresh5 = (*fresh5).wrapping_offset(3_isize);
87        let fresh6 = addr_of_mut!((*parser).offset);
88        *fresh6 = (*fresh6 as libc::c_ulong).force_add(3_u64) as size_t;
89    } else {
90        (*parser).encoding = YAML_UTF8_ENCODING;
91    }
92    OK
93}
94
95unsafe fn yaml_parser_update_raw_buffer(parser: *mut yaml_parser_t) -> Success {
96    let mut size_read: size_t = 0_u64;
97    if (*parser).raw_buffer.start == (*parser).raw_buffer.pointer
98        && (*parser).raw_buffer.last == (*parser).raw_buffer.end
99    {
100        return OK;
101    }
102    if (*parser).eof {
103        return OK;
104    }
105    if (*parser).raw_buffer.start < (*parser).raw_buffer.pointer
106        && (*parser).raw_buffer.pointer < (*parser).raw_buffer.last
107    {
108        memmove(
109            (*parser).raw_buffer.start as *mut libc::c_void,
110            (*parser).raw_buffer.pointer as *const libc::c_void,
111            (*parser)
112                .raw_buffer
113                .last
114                .c_offset_from((*parser).raw_buffer.pointer) as libc::c_long
115                as libc::c_ulong,
116        );
117    }
118    let fresh7 = addr_of_mut!((*parser).raw_buffer.last);
119    *fresh7 = (*fresh7).wrapping_offset(
120        -((*parser)
121            .raw_buffer
122            .pointer
123            .c_offset_from((*parser).raw_buffer.start) as libc::c_long as isize),
124    );
125    let fresh8 = addr_of_mut!((*parser).raw_buffer.pointer);
126    *fresh8 = (*parser).raw_buffer.start;
127    if (*parser).read_handler.expect("non-null function pointer")(
128        (*parser).read_handler_data,
129        (*parser).raw_buffer.last,
130        (*parser)
131            .raw_buffer
132            .end
133            .c_offset_from((*parser).raw_buffer.last) as size_t,
134        addr_of_mut!(size_read),
135    ) == 0
136    {
137        return yaml_parser_set_reader_error(
138            parser,
139            b"input error\0" as *const u8 as *const libc::c_char,
140            (*parser).offset,
141            -1,
142        );
143    }
144    let fresh9 = addr_of_mut!((*parser).raw_buffer.last);
145    *fresh9 = (*fresh9).wrapping_offset(size_read as isize);
146    if size_read == 0 {
147        (*parser).eof = true;
148    }
149    OK
150}
151
152pub(crate) unsafe fn yaml_parser_update_buffer(
153    parser: *mut yaml_parser_t,
154    length: size_t,
155) -> Success {
156    let mut first = true;
157    __assert!(((*parser).read_handler).is_some());
158    if (*parser).eof && (*parser).raw_buffer.pointer == (*parser).raw_buffer.last {
159        return OK;
160    }
161    if (*parser).unread >= length {
162        return OK;
163    }
164    if (*parser).encoding == YAML_ANY_ENCODING {
165        if yaml_parser_determine_encoding(parser).fail {
166            return FAIL;
167        }
168    }
169    if (*parser).buffer.start < (*parser).buffer.pointer
170        && (*parser).buffer.pointer < (*parser).buffer.last
171    {
172        let size: size_t = (*parser)
173            .buffer
174            .last
175            .c_offset_from((*parser).buffer.pointer) as size_t;
176        memmove(
177            (*parser).buffer.start as *mut libc::c_void,
178            (*parser).buffer.pointer as *const libc::c_void,
179            size,
180        );
181        let fresh10 = addr_of_mut!((*parser).buffer.pointer);
182        *fresh10 = (*parser).buffer.start;
183        let fresh11 = addr_of_mut!((*parser).buffer.last);
184        *fresh11 = (*parser).buffer.start.wrapping_offset(size as isize);
185    } else if (*parser).buffer.pointer == (*parser).buffer.last {
186        let fresh12 = addr_of_mut!((*parser).buffer.pointer);
187        *fresh12 = (*parser).buffer.start;
188        let fresh13 = addr_of_mut!((*parser).buffer.last);
189        *fresh13 = (*parser).buffer.start;
190    }
191    while (*parser).unread < length {
192        if !first || (*parser).raw_buffer.pointer == (*parser).raw_buffer.last {
193            if yaml_parser_update_raw_buffer(parser).fail {
194                return FAIL;
195            }
196        }
197        first = false;
198        while (*parser).raw_buffer.pointer != (*parser).raw_buffer.last {
199            let mut value: libc::c_uint = 0;
200            let value2: libc::c_uint;
201            let mut incomplete = false;
202            let mut octet: libc::c_uchar;
203            let mut width: libc::c_uint = 0;
204            let low: libc::c_int;
205            let high: libc::c_int;
206            let mut k: size_t;
207            let raw_unread: size_t = (*parser)
208                .raw_buffer
209                .last
210                .c_offset_from((*parser).raw_buffer.pointer)
211                as size_t;
212            match (*parser).encoding {
213                YAML_UTF8_ENCODING => {
214                    octet = *(*parser).raw_buffer.pointer;
215                    width = if octet & 0x80 == 0 {
216                        1
217                    } else if octet & 0xE0 == 0xC0 {
218                        2
219                    } else if octet & 0xF0 == 0xE0 {
220                        3
221                    } else if octet & 0xF8 == 0xF0 {
222                        4
223                    } else {
224                        0
225                    } as libc::c_uint;
226                    if width == 0 {
227                        return yaml_parser_set_reader_error(
228                            parser,
229                            b"invalid leading UTF-8 octet\0" as *const u8 as *const libc::c_char,
230                            (*parser).offset,
231                            octet as libc::c_int,
232                        );
233                    }
234                    if width as libc::c_ulong > raw_unread {
235                        if (*parser).eof {
236                            return yaml_parser_set_reader_error(
237                                parser,
238                                b"incomplete UTF-8 octet sequence\0" as *const u8
239                                    as *const libc::c_char,
240                                (*parser).offset,
241                                -1,
242                            );
243                        }
244                        incomplete = true;
245                    } else {
246                        value = if octet & 0x80 == 0 {
247                            octet & 0x7F
248                        } else if octet & 0xE0 == 0xC0 {
249                            octet & 0x1F
250                        } else if octet & 0xF0 == 0xE0 {
251                            octet & 0xF
252                        } else if octet & 0xF8 == 0xF0 {
253                            octet & 0x7
254                        } else {
255                            0
256                        } as libc::c_uint;
257                        k = 1_u64;
258                        while k < width as libc::c_ulong {
259                            octet = *(*parser).raw_buffer.pointer.wrapping_offset(k as isize);
260                            if octet & 0xC0 != 0x80 {
261                                return yaml_parser_set_reader_error(
262                                    parser,
263                                    b"invalid trailing UTF-8 octet\0" as *const u8
264                                        as *const libc::c_char,
265                                    (*parser).offset.force_add(k),
266                                    octet as libc::c_int,
267                                );
268                            }
269                            value = (value << 6).force_add((octet & 0x3F) as libc::c_uint);
270                            k = k.force_add(1);
271                        }
272                        if !(width == 1
273                            || width == 2 && value >= 0x80
274                            || width == 3 && value >= 0x800
275                            || width == 4 && value >= 0x10000)
276                        {
277                            return yaml_parser_set_reader_error(
278                                parser,
279                                b"invalid length of a UTF-8 sequence\0" as *const u8
280                                    as *const libc::c_char,
281                                (*parser).offset,
282                                -1,
283                            );
284                        }
285                        if value >= 0xD800 && value <= 0xDFFF || value > 0x10FFFF {
286                            return yaml_parser_set_reader_error(
287                                parser,
288                                b"invalid Unicode character\0" as *const u8 as *const libc::c_char,
289                                (*parser).offset,
290                                value as libc::c_int,
291                            );
292                        }
293                    }
294                }
295                YAML_UTF16LE_ENCODING | YAML_UTF16BE_ENCODING => {
296                    low = if (*parser).encoding == YAML_UTF16LE_ENCODING {
297                        0
298                    } else {
299                        1
300                    };
301                    high = if (*parser).encoding == YAML_UTF16LE_ENCODING {
302                        1
303                    } else {
304                        0
305                    };
306                    if raw_unread < 2_u64 {
307                        if (*parser).eof {
308                            return yaml_parser_set_reader_error(
309                                parser,
310                                b"incomplete UTF-16 character\0" as *const u8
311                                    as *const libc::c_char,
312                                (*parser).offset,
313                                -1,
314                            );
315                        }
316                        incomplete = true;
317                    } else {
318                        value = (*(*parser).raw_buffer.pointer.wrapping_offset(low as isize)
319                            as libc::c_int
320                            + ((*(*parser).raw_buffer.pointer.wrapping_offset(high as isize)
321                                as libc::c_int)
322                                << 8)) as libc::c_uint;
323                        if value & 0xFC00 == 0xDC00 {
324                            return yaml_parser_set_reader_error(
325                                parser,
326                                b"unexpected low surrogate area\0" as *const u8
327                                    as *const libc::c_char,
328                                (*parser).offset,
329                                value as libc::c_int,
330                            );
331                        }
332                        if value & 0xFC00 == 0xD800 {
333                            width = 4;
334                            if raw_unread < 4_u64 {
335                                if (*parser).eof {
336                                    return yaml_parser_set_reader_error(
337                                        parser,
338                                        b"incomplete UTF-16 surrogate pair\0" as *const u8
339                                            as *const libc::c_char,
340                                        (*parser).offset,
341                                        -1,
342                                    );
343                                }
344                                incomplete = true;
345                            } else {
346                                value2 = (*(*parser)
347                                    .raw_buffer
348                                    .pointer
349                                    .wrapping_offset((low + 2) as isize)
350                                    as libc::c_int
351                                    + ((*(*parser)
352                                        .raw_buffer
353                                        .pointer
354                                        .wrapping_offset((high + 2) as isize)
355                                        as libc::c_int)
356                                        << 8))
357                                    as libc::c_uint;
358                                if value2 & 0xFC00 != 0xDC00 {
359                                    return yaml_parser_set_reader_error(
360                                        parser,
361                                        b"expected low surrogate area\0" as *const u8
362                                            as *const libc::c_char,
363                                        (*parser).offset.force_add(2_u64),
364                                        value2 as libc::c_int,
365                                    );
366                                }
367                                value = 0x10000_u32
368                                    .force_add((value & 0x3FF) << 10)
369                                    .force_add(value2 & 0x3FF);
370                            }
371                        } else {
372                            width = 2;
373                        }
374                    }
375                }
376                _ => {}
377            }
378            if incomplete {
379                break;
380            }
381            if !(value == 0x9
382                || value == 0xA
383                || value == 0xD
384                || value >= 0x20 && value <= 0x7E
385                || value == 0x85
386                || value >= 0xA0 && value <= 0xD7FF
387                || value >= 0xE000 && value <= 0xFFFD
388                || value >= 0x10000 && value <= 0x10FFFF)
389            {
390                return yaml_parser_set_reader_error(
391                    parser,
392                    b"control characters are not allowed\0" as *const u8 as *const libc::c_char,
393                    (*parser).offset,
394                    value as libc::c_int,
395                );
396            }
397            let fresh14 = addr_of_mut!((*parser).raw_buffer.pointer);
398            *fresh14 = (*fresh14).wrapping_offset(width as isize);
399            let fresh15 = addr_of_mut!((*parser).offset);
400            *fresh15 = (*fresh15 as libc::c_ulong).force_add(width as libc::c_ulong) as size_t;
401            if value <= 0x7F {
402                let fresh16 = addr_of_mut!((*parser).buffer.last);
403                let fresh17 = *fresh16;
404                *fresh16 = (*fresh16).wrapping_offset(1);
405                *fresh17 = value as yaml_char_t;
406            } else if value <= 0x7FF {
407                let fresh18 = addr_of_mut!((*parser).buffer.last);
408                let fresh19 = *fresh18;
409                *fresh18 = (*fresh18).wrapping_offset(1);
410                *fresh19 = 0xC0_u32.force_add(value >> 6) as yaml_char_t;
411                let fresh20 = addr_of_mut!((*parser).buffer.last);
412                let fresh21 = *fresh20;
413                *fresh20 = (*fresh20).wrapping_offset(1);
414                *fresh21 = 0x80_u32.force_add(value & 0x3F) as yaml_char_t;
415            } else if value <= 0xFFFF {
416                let fresh22 = addr_of_mut!((*parser).buffer.last);
417                let fresh23 = *fresh22;
418                *fresh22 = (*fresh22).wrapping_offset(1);
419                *fresh23 = 0xE0_u32.force_add(value >> 12) as yaml_char_t;
420                let fresh24 = addr_of_mut!((*parser).buffer.last);
421                let fresh25 = *fresh24;
422                *fresh24 = (*fresh24).wrapping_offset(1);
423                *fresh25 = 0x80_u32.force_add(value >> 6 & 0x3F) as yaml_char_t;
424                let fresh26 = addr_of_mut!((*parser).buffer.last);
425                let fresh27 = *fresh26;
426                *fresh26 = (*fresh26).wrapping_offset(1);
427                *fresh27 = 0x80_u32.force_add(value & 0x3F) as yaml_char_t;
428            } else {
429                let fresh28 = addr_of_mut!((*parser).buffer.last);
430                let fresh29 = *fresh28;
431                *fresh28 = (*fresh28).wrapping_offset(1);
432                *fresh29 = 0xF0_u32.force_add(value >> 18) as yaml_char_t;
433                let fresh30 = addr_of_mut!((*parser).buffer.last);
434                let fresh31 = *fresh30;
435                *fresh30 = (*fresh30).wrapping_offset(1);
436                *fresh31 = 0x80_u32.force_add(value >> 12 & 0x3F) as yaml_char_t;
437                let fresh32 = addr_of_mut!((*parser).buffer.last);
438                let fresh33 = *fresh32;
439                *fresh32 = (*fresh32).wrapping_offset(1);
440                *fresh33 = 0x80_u32.force_add(value >> 6 & 0x3F) as yaml_char_t;
441                let fresh34 = addr_of_mut!((*parser).buffer.last);
442                let fresh35 = *fresh34;
443                *fresh34 = (*fresh34).wrapping_offset(1);
444                *fresh35 = 0x80_u32.force_add(value & 0x3F) as yaml_char_t;
445            }
446            let fresh36 = addr_of_mut!((*parser).unread);
447            *fresh36 = (*fresh36).force_add(1);
448        }
449        if (*parser).eof {
450            let fresh37 = addr_of_mut!((*parser).buffer.last);
451            let fresh38 = *fresh37;
452            *fresh37 = (*fresh37).wrapping_offset(1);
453            *fresh38 = b'\0';
454            let fresh39 = addr_of_mut!((*parser).unread);
455            *fresh39 = (*fresh39).force_add(1);
456            return OK;
457        }
458    }
459    if (*parser).offset >= (!0_u64).wrapping_div(2_u64) {
460        return yaml_parser_set_reader_error(
461            parser,
462            b"input is too long\0" as *const u8 as *const libc::c_char,
463            (*parser).offset,
464            -1,
465        );
466    }
467    OK
468}