1use crate::externs::{memcmp, memmove};
2use crate::ops::ForceAdd as _;
3use crate::success::{Success, FAIL, OK};
4use crate::yaml::{size_t, yaml_char_t};
5use crate::{
6 libc, yaml_parser_t, PointerExt, YAML_ANY_ENCODING, YAML_READER_ERROR, YAML_UTF16BE_ENCODING,
7 YAML_UTF16LE_ENCODING, YAML_UTF8_ENCODING,
8};
9use core::ptr::addr_of_mut;
10
11unsafe fn yaml_parser_set_reader_error(
12 parser: *mut yaml_parser_t,
13 problem: *const libc::c_char,
14 offset: size_t,
15 value: libc::c_int,
16) -> Success {
17 (*parser).error = YAML_READER_ERROR;
18 let fresh0 = addr_of_mut!((*parser).problem);
19 *fresh0 = problem;
20 (*parser).problem_offset = offset;
21 (*parser).problem_value = value;
22 FAIL
23}
24
25const BOM_UTF8: *const libc::c_char = b"\xEF\xBB\xBF\0" as *const u8 as *const libc::c_char;
26const BOM_UTF16LE: *const libc::c_char = b"\xFF\xFE\0" as *const u8 as *const libc::c_char;
27const BOM_UTF16BE: *const libc::c_char = b"\xFE\xFF\0" as *const u8 as *const libc::c_char;
28
29unsafe fn yaml_parser_determine_encoding(parser: *mut yaml_parser_t) -> Success {
30 while !(*parser).eof
31 && ((*parser)
32 .raw_buffer
33 .last
34 .c_offset_from((*parser).raw_buffer.pointer) as libc::c_long)
35 < 3_i64
36 {
37 if yaml_parser_update_raw_buffer(parser).fail {
38 return FAIL;
39 }
40 }
41 if (*parser)
42 .raw_buffer
43 .last
44 .c_offset_from((*parser).raw_buffer.pointer) as libc::c_long
45 >= 2_i64
46 && memcmp(
47 (*parser).raw_buffer.pointer as *const libc::c_void,
48 BOM_UTF16LE as *const libc::c_void,
49 2_u64,
50 ) == 0
51 {
52 (*parser).encoding = YAML_UTF16LE_ENCODING;
53 let fresh1 = addr_of_mut!((*parser).raw_buffer.pointer);
54 *fresh1 = (*fresh1).wrapping_offset(2_isize);
55 let fresh2 = addr_of_mut!((*parser).offset);
56 *fresh2 = (*fresh2 as libc::c_ulong).force_add(2_u64) as size_t;
57 } else if (*parser)
58 .raw_buffer
59 .last
60 .c_offset_from((*parser).raw_buffer.pointer) as libc::c_long
61 >= 2_i64
62 && memcmp(
63 (*parser).raw_buffer.pointer as *const libc::c_void,
64 BOM_UTF16BE as *const libc::c_void,
65 2_u64,
66 ) == 0
67 {
68 (*parser).encoding = YAML_UTF16BE_ENCODING;
69 let fresh3 = addr_of_mut!((*parser).raw_buffer.pointer);
70 *fresh3 = (*fresh3).wrapping_offset(2_isize);
71 let fresh4 = addr_of_mut!((*parser).offset);
72 *fresh4 = (*fresh4 as libc::c_ulong).force_add(2_u64) as size_t;
73 } else if (*parser)
74 .raw_buffer
75 .last
76 .c_offset_from((*parser).raw_buffer.pointer) as libc::c_long
77 >= 3_i64
78 && memcmp(
79 (*parser).raw_buffer.pointer as *const libc::c_void,
80 BOM_UTF8 as *const libc::c_void,
81 3_u64,
82 ) == 0
83 {
84 (*parser).encoding = YAML_UTF8_ENCODING;
85 let fresh5 = addr_of_mut!((*parser).raw_buffer.pointer);
86 *fresh5 = (*fresh5).wrapping_offset(3_isize);
87 let fresh6 = addr_of_mut!((*parser).offset);
88 *fresh6 = (*fresh6 as libc::c_ulong).force_add(3_u64) as size_t;
89 } else {
90 (*parser).encoding = YAML_UTF8_ENCODING;
91 }
92 OK
93}
94
95unsafe fn yaml_parser_update_raw_buffer(parser: *mut yaml_parser_t) -> Success {
96 let mut size_read: size_t = 0_u64;
97 if (*parser).raw_buffer.start == (*parser).raw_buffer.pointer
98 && (*parser).raw_buffer.last == (*parser).raw_buffer.end
99 {
100 return OK;
101 }
102 if (*parser).eof {
103 return OK;
104 }
105 if (*parser).raw_buffer.start < (*parser).raw_buffer.pointer
106 && (*parser).raw_buffer.pointer < (*parser).raw_buffer.last
107 {
108 memmove(
109 (*parser).raw_buffer.start as *mut libc::c_void,
110 (*parser).raw_buffer.pointer as *const libc::c_void,
111 (*parser)
112 .raw_buffer
113 .last
114 .c_offset_from((*parser).raw_buffer.pointer) as libc::c_long
115 as libc::c_ulong,
116 );
117 }
118 let fresh7 = addr_of_mut!((*parser).raw_buffer.last);
119 *fresh7 = (*fresh7).wrapping_offset(
120 -((*parser)
121 .raw_buffer
122 .pointer
123 .c_offset_from((*parser).raw_buffer.start) as libc::c_long as isize),
124 );
125 let fresh8 = addr_of_mut!((*parser).raw_buffer.pointer);
126 *fresh8 = (*parser).raw_buffer.start;
127 if (*parser).read_handler.expect("non-null function pointer")(
128 (*parser).read_handler_data,
129 (*parser).raw_buffer.last,
130 (*parser)
131 .raw_buffer
132 .end
133 .c_offset_from((*parser).raw_buffer.last) as size_t,
134 addr_of_mut!(size_read),
135 ) == 0
136 {
137 return yaml_parser_set_reader_error(
138 parser,
139 b"input error\0" as *const u8 as *const libc::c_char,
140 (*parser).offset,
141 -1,
142 );
143 }
144 let fresh9 = addr_of_mut!((*parser).raw_buffer.last);
145 *fresh9 = (*fresh9).wrapping_offset(size_read as isize);
146 if size_read == 0 {
147 (*parser).eof = true;
148 }
149 OK
150}
151
152pub(crate) unsafe fn yaml_parser_update_buffer(
153 parser: *mut yaml_parser_t,
154 length: size_t,
155) -> Success {
156 let mut first = true;
157 __assert!(((*parser).read_handler).is_some());
158 if (*parser).eof && (*parser).raw_buffer.pointer == (*parser).raw_buffer.last {
159 return OK;
160 }
161 if (*parser).unread >= length {
162 return OK;
163 }
164 if (*parser).encoding == YAML_ANY_ENCODING {
165 if yaml_parser_determine_encoding(parser).fail {
166 return FAIL;
167 }
168 }
169 if (*parser).buffer.start < (*parser).buffer.pointer
170 && (*parser).buffer.pointer < (*parser).buffer.last
171 {
172 let size: size_t = (*parser)
173 .buffer
174 .last
175 .c_offset_from((*parser).buffer.pointer) as size_t;
176 memmove(
177 (*parser).buffer.start as *mut libc::c_void,
178 (*parser).buffer.pointer as *const libc::c_void,
179 size,
180 );
181 let fresh10 = addr_of_mut!((*parser).buffer.pointer);
182 *fresh10 = (*parser).buffer.start;
183 let fresh11 = addr_of_mut!((*parser).buffer.last);
184 *fresh11 = (*parser).buffer.start.wrapping_offset(size as isize);
185 } else if (*parser).buffer.pointer == (*parser).buffer.last {
186 let fresh12 = addr_of_mut!((*parser).buffer.pointer);
187 *fresh12 = (*parser).buffer.start;
188 let fresh13 = addr_of_mut!((*parser).buffer.last);
189 *fresh13 = (*parser).buffer.start;
190 }
191 while (*parser).unread < length {
192 if !first || (*parser).raw_buffer.pointer == (*parser).raw_buffer.last {
193 if yaml_parser_update_raw_buffer(parser).fail {
194 return FAIL;
195 }
196 }
197 first = false;
198 while (*parser).raw_buffer.pointer != (*parser).raw_buffer.last {
199 let mut value: libc::c_uint = 0;
200 let value2: libc::c_uint;
201 let mut incomplete = false;
202 let mut octet: libc::c_uchar;
203 let mut width: libc::c_uint = 0;
204 let low: libc::c_int;
205 let high: libc::c_int;
206 let mut k: size_t;
207 let raw_unread: size_t = (*parser)
208 .raw_buffer
209 .last
210 .c_offset_from((*parser).raw_buffer.pointer)
211 as size_t;
212 match (*parser).encoding {
213 YAML_UTF8_ENCODING => {
214 octet = *(*parser).raw_buffer.pointer;
215 width = if octet & 0x80 == 0 {
216 1
217 } else if octet & 0xE0 == 0xC0 {
218 2
219 } else if octet & 0xF0 == 0xE0 {
220 3
221 } else if octet & 0xF8 == 0xF0 {
222 4
223 } else {
224 0
225 } as libc::c_uint;
226 if width == 0 {
227 return yaml_parser_set_reader_error(
228 parser,
229 b"invalid leading UTF-8 octet\0" as *const u8 as *const libc::c_char,
230 (*parser).offset,
231 octet as libc::c_int,
232 );
233 }
234 if width as libc::c_ulong > raw_unread {
235 if (*parser).eof {
236 return yaml_parser_set_reader_error(
237 parser,
238 b"incomplete UTF-8 octet sequence\0" as *const u8
239 as *const libc::c_char,
240 (*parser).offset,
241 -1,
242 );
243 }
244 incomplete = true;
245 } else {
246 value = if octet & 0x80 == 0 {
247 octet & 0x7F
248 } else if octet & 0xE0 == 0xC0 {
249 octet & 0x1F
250 } else if octet & 0xF0 == 0xE0 {
251 octet & 0xF
252 } else if octet & 0xF8 == 0xF0 {
253 octet & 0x7
254 } else {
255 0
256 } as libc::c_uint;
257 k = 1_u64;
258 while k < width as libc::c_ulong {
259 octet = *(*parser).raw_buffer.pointer.wrapping_offset(k as isize);
260 if octet & 0xC0 != 0x80 {
261 return yaml_parser_set_reader_error(
262 parser,
263 b"invalid trailing UTF-8 octet\0" as *const u8
264 as *const libc::c_char,
265 (*parser).offset.force_add(k),
266 octet as libc::c_int,
267 );
268 }
269 value = (value << 6).force_add((octet & 0x3F) as libc::c_uint);
270 k = k.force_add(1);
271 }
272 if !(width == 1
273 || width == 2 && value >= 0x80
274 || width == 3 && value >= 0x800
275 || width == 4 && value >= 0x10000)
276 {
277 return yaml_parser_set_reader_error(
278 parser,
279 b"invalid length of a UTF-8 sequence\0" as *const u8
280 as *const libc::c_char,
281 (*parser).offset,
282 -1,
283 );
284 }
285 if value >= 0xD800 && value <= 0xDFFF || value > 0x10FFFF {
286 return yaml_parser_set_reader_error(
287 parser,
288 b"invalid Unicode character\0" as *const u8 as *const libc::c_char,
289 (*parser).offset,
290 value as libc::c_int,
291 );
292 }
293 }
294 }
295 YAML_UTF16LE_ENCODING | YAML_UTF16BE_ENCODING => {
296 low = if (*parser).encoding == YAML_UTF16LE_ENCODING {
297 0
298 } else {
299 1
300 };
301 high = if (*parser).encoding == YAML_UTF16LE_ENCODING {
302 1
303 } else {
304 0
305 };
306 if raw_unread < 2_u64 {
307 if (*parser).eof {
308 return yaml_parser_set_reader_error(
309 parser,
310 b"incomplete UTF-16 character\0" as *const u8
311 as *const libc::c_char,
312 (*parser).offset,
313 -1,
314 );
315 }
316 incomplete = true;
317 } else {
318 value = (*(*parser).raw_buffer.pointer.wrapping_offset(low as isize)
319 as libc::c_int
320 + ((*(*parser).raw_buffer.pointer.wrapping_offset(high as isize)
321 as libc::c_int)
322 << 8)) as libc::c_uint;
323 if value & 0xFC00 == 0xDC00 {
324 return yaml_parser_set_reader_error(
325 parser,
326 b"unexpected low surrogate area\0" as *const u8
327 as *const libc::c_char,
328 (*parser).offset,
329 value as libc::c_int,
330 );
331 }
332 if value & 0xFC00 == 0xD800 {
333 width = 4;
334 if raw_unread < 4_u64 {
335 if (*parser).eof {
336 return yaml_parser_set_reader_error(
337 parser,
338 b"incomplete UTF-16 surrogate pair\0" as *const u8
339 as *const libc::c_char,
340 (*parser).offset,
341 -1,
342 );
343 }
344 incomplete = true;
345 } else {
346 value2 = (*(*parser)
347 .raw_buffer
348 .pointer
349 .wrapping_offset((low + 2) as isize)
350 as libc::c_int
351 + ((*(*parser)
352 .raw_buffer
353 .pointer
354 .wrapping_offset((high + 2) as isize)
355 as libc::c_int)
356 << 8))
357 as libc::c_uint;
358 if value2 & 0xFC00 != 0xDC00 {
359 return yaml_parser_set_reader_error(
360 parser,
361 b"expected low surrogate area\0" as *const u8
362 as *const libc::c_char,
363 (*parser).offset.force_add(2_u64),
364 value2 as libc::c_int,
365 );
366 }
367 value = 0x10000_u32
368 .force_add((value & 0x3FF) << 10)
369 .force_add(value2 & 0x3FF);
370 }
371 } else {
372 width = 2;
373 }
374 }
375 }
376 _ => {}
377 }
378 if incomplete {
379 break;
380 }
381 if !(value == 0x9
382 || value == 0xA
383 || value == 0xD
384 || value >= 0x20 && value <= 0x7E
385 || value == 0x85
386 || value >= 0xA0 && value <= 0xD7FF
387 || value >= 0xE000 && value <= 0xFFFD
388 || value >= 0x10000 && value <= 0x10FFFF)
389 {
390 return yaml_parser_set_reader_error(
391 parser,
392 b"control characters are not allowed\0" as *const u8 as *const libc::c_char,
393 (*parser).offset,
394 value as libc::c_int,
395 );
396 }
397 let fresh14 = addr_of_mut!((*parser).raw_buffer.pointer);
398 *fresh14 = (*fresh14).wrapping_offset(width as isize);
399 let fresh15 = addr_of_mut!((*parser).offset);
400 *fresh15 = (*fresh15 as libc::c_ulong).force_add(width as libc::c_ulong) as size_t;
401 if value <= 0x7F {
402 let fresh16 = addr_of_mut!((*parser).buffer.last);
403 let fresh17 = *fresh16;
404 *fresh16 = (*fresh16).wrapping_offset(1);
405 *fresh17 = value as yaml_char_t;
406 } else if value <= 0x7FF {
407 let fresh18 = addr_of_mut!((*parser).buffer.last);
408 let fresh19 = *fresh18;
409 *fresh18 = (*fresh18).wrapping_offset(1);
410 *fresh19 = 0xC0_u32.force_add(value >> 6) as yaml_char_t;
411 let fresh20 = addr_of_mut!((*parser).buffer.last);
412 let fresh21 = *fresh20;
413 *fresh20 = (*fresh20).wrapping_offset(1);
414 *fresh21 = 0x80_u32.force_add(value & 0x3F) as yaml_char_t;
415 } else if value <= 0xFFFF {
416 let fresh22 = addr_of_mut!((*parser).buffer.last);
417 let fresh23 = *fresh22;
418 *fresh22 = (*fresh22).wrapping_offset(1);
419 *fresh23 = 0xE0_u32.force_add(value >> 12) as yaml_char_t;
420 let fresh24 = addr_of_mut!((*parser).buffer.last);
421 let fresh25 = *fresh24;
422 *fresh24 = (*fresh24).wrapping_offset(1);
423 *fresh25 = 0x80_u32.force_add(value >> 6 & 0x3F) as yaml_char_t;
424 let fresh26 = addr_of_mut!((*parser).buffer.last);
425 let fresh27 = *fresh26;
426 *fresh26 = (*fresh26).wrapping_offset(1);
427 *fresh27 = 0x80_u32.force_add(value & 0x3F) as yaml_char_t;
428 } else {
429 let fresh28 = addr_of_mut!((*parser).buffer.last);
430 let fresh29 = *fresh28;
431 *fresh28 = (*fresh28).wrapping_offset(1);
432 *fresh29 = 0xF0_u32.force_add(value >> 18) as yaml_char_t;
433 let fresh30 = addr_of_mut!((*parser).buffer.last);
434 let fresh31 = *fresh30;
435 *fresh30 = (*fresh30).wrapping_offset(1);
436 *fresh31 = 0x80_u32.force_add(value >> 12 & 0x3F) as yaml_char_t;
437 let fresh32 = addr_of_mut!((*parser).buffer.last);
438 let fresh33 = *fresh32;
439 *fresh32 = (*fresh32).wrapping_offset(1);
440 *fresh33 = 0x80_u32.force_add(value >> 6 & 0x3F) as yaml_char_t;
441 let fresh34 = addr_of_mut!((*parser).buffer.last);
442 let fresh35 = *fresh34;
443 *fresh34 = (*fresh34).wrapping_offset(1);
444 *fresh35 = 0x80_u32.force_add(value & 0x3F) as yaml_char_t;
445 }
446 let fresh36 = addr_of_mut!((*parser).unread);
447 *fresh36 = (*fresh36).force_add(1);
448 }
449 if (*parser).eof {
450 let fresh37 = addr_of_mut!((*parser).buffer.last);
451 let fresh38 = *fresh37;
452 *fresh37 = (*fresh37).wrapping_offset(1);
453 *fresh38 = b'\0';
454 let fresh39 = addr_of_mut!((*parser).unread);
455 *fresh39 = (*fresh39).force_add(1);
456 return OK;
457 }
458 }
459 if (*parser).offset >= (!0_u64).wrapping_div(2_u64) {
460 return yaml_parser_set_reader_error(
461 parser,
462 b"input is too long\0" as *const u8 as *const libc::c_char,
463 (*parser).offset,
464 -1,
465 );
466 }
467 OK
468}