shadow_rs/core/
configuration.rs

1//! Shadow's configuration and cli parsing code using [serde] and [clap]. This contains all of
2//! Shadow's configuration options, some of which are also exposed as CLI options.
3//!
4//! Shadow uses [schemars] to get the option description (its doc comment) and default value so that
5//! it can be shown in the CLI help text.
6//!
7//! This code should be careful about validating or interpreting values. It should be focused on
8//! parsing and checking that the format is correct, and not validating the values. For example for
9//! options that take paths, this code should not verify that the path actually exists or perform
10//! any path canonicalization. That should be left to other code outside of this module. This is so
11//! that the configuration parsing does not become environment-dependent. If a configuration file
12//! parses on one system, it should parse successfully on other systems as well.
13
14use std::borrow::Cow;
15use std::collections::{BTreeMap, HashSet};
16use std::ffi::{CStr, CString, OsStr, OsString};
17use std::os::unix::ffi::OsStrExt;
18use std::str::FromStr;
19
20use clap::Parser;
21use logger as c_log;
22use merge::Merge;
23use once_cell::sync::Lazy;
24use schemars::{JsonSchema, schema_for};
25use serde::{Deserialize, Serialize};
26use shadow_shim_helper_rs::simulation_time::SimulationTime;
27
28use crate::cshadow as c;
29use crate::host::syscall::formatter::FmtOptions;
30use crate::utility::units::{self, Unit};
31
32const START_HELP_TEXT: &str = "\
33    Run real applications over simulated networks.\n\n\
34    For documentation, visit https://shadow.github.io/docs/guide";
35
36const END_HELP_TEXT: &str = "\
37    If units are not specified, all values are assumed to be given in their base \
38    unit (seconds, bytes, bits, etc). Units can optionally be specified (for \
39    example: '1024 B', '1024 bytes', '1 KiB', '1 kibibyte', etc) and are \
40    case-sensitive.";
41
42// clap requires a 'static str for the version
43static VERSION: Lazy<String> = Lazy::new(crate::shadow::version);
44
45#[derive(Debug, Clone, Parser)]
46#[clap(name = "Shadow", about = START_HELP_TEXT, after_help = END_HELP_TEXT)]
47#[clap(version = VERSION.as_str())]
48#[clap(next_display_order = None)]
49// clap only shows the possible values for bool options (unless we add support for the other
50// non-bool options in the future), which isn't very helpful
51#[clap(hide_possible_values = true)]
52pub struct CliOptions {
53    /// Path to the Shadow configuration file. Use '-' to read from stdin
54    #[clap(required_unless_present_any(&["show_build_info", "shm_cleanup"]))]
55    pub config: Option<String>,
56
57    /// Pause to allow gdb to attach
58    #[clap(long, short = 'g')]
59    pub gdb: bool,
60
61    /// Pause after starting any processes on the comma-delimited list of hostnames
62    #[clap(value_parser = parse_set_str)]
63    #[clap(long, value_name = "hostnames")]
64    pub debug_hosts: Option<HashSet<String>>,
65
66    /// Exit after running shared memory cleanup routine
67    #[clap(long, exclusive(true))]
68    pub shm_cleanup: bool,
69
70    /// Exit after printing build information
71    #[clap(long, exclusive(true))]
72    pub show_build_info: bool,
73
74    /// Exit after printing the final configuration
75    #[clap(long)]
76    pub show_config: bool,
77
78    #[clap(flatten)]
79    pub general: GeneralOptions,
80
81    #[clap(flatten)]
82    pub network: NetworkOptions,
83
84    #[clap(flatten)]
85    pub host_option_defaults: HostDefaultOptions,
86
87    #[clap(flatten)]
88    pub experimental: ExperimentalOptions,
89}
90
91/// Options contained in a configuration file.
92#[derive(Debug, Clone, Serialize, Deserialize)]
93#[serde(deny_unknown_fields)]
94pub struct ConfigFileOptions {
95    pub general: GeneralOptions,
96
97    pub network: NetworkOptions,
98
99    #[serde(default)]
100    pub host_option_defaults: HostDefaultOptions,
101
102    #[serde(default)]
103    pub experimental: ExperimentalOptions,
104
105    // we use a BTreeMap so that the hosts are sorted by their hostname (useful for determinism)
106    // since shadow parses to a serde_yaml::Value initially, we don't need to worry about duplicate
107    // hostnames here
108    pub hosts: BTreeMap<HostName, HostOptions>,
109}
110
111/// Shadow configuration options after processing command-line and configuration file options.
112#[derive(Debug, Clone, Serialize)]
113pub struct ConfigOptions {
114    pub general: GeneralOptions,
115
116    pub network: NetworkOptions,
117
118    pub experimental: ExperimentalOptions,
119
120    // we use a BTreeMap so that the hosts are sorted by their hostname (useful for determinism)
121    pub hosts: BTreeMap<HostName, HostOptions>,
122}
123
124impl ConfigOptions {
125    pub fn new(mut config_file: ConfigFileOptions, options: CliOptions) -> Self {
126        // the `HostDefaultOptions::default` contains only `None` values, so we must first merge the
127        // config file with the real defaults from `HostDefaultOptions::new_with_defaults`
128        config_file.host_option_defaults = config_file
129            .host_option_defaults
130            .with_defaults(HostDefaultOptions::new_with_defaults());
131
132        // override config options with command line options
133        config_file.general = options.general.with_defaults(config_file.general);
134        config_file.network = options.network.with_defaults(config_file.network);
135        config_file.host_option_defaults = options
136            .host_option_defaults
137            .with_defaults(config_file.host_option_defaults);
138        config_file.experimental = options.experimental.with_defaults(config_file.experimental);
139
140        // copy the host defaults to all of the hosts
141        for host in config_file.hosts.values_mut() {
142            host.host_options = host
143                .host_options
144                .clone()
145                .with_defaults(config_file.host_option_defaults.clone());
146        }
147
148        Self {
149            general: config_file.general,
150            network: config_file.network,
151            experimental: config_file.experimental,
152            hosts: config_file.hosts,
153        }
154    }
155
156    pub fn model_unblocked_syscall_latency(&self) -> bool {
157        self.general.model_unblocked_syscall_latency.unwrap()
158    }
159
160    pub fn max_unapplied_cpu_latency(&self) -> SimulationTime {
161        let nanos = self.experimental.max_unapplied_cpu_latency.unwrap();
162        let nanos = nanos.convert(units::TimePrefix::Nano).unwrap().value();
163        SimulationTime::from_nanos(nanos)
164    }
165
166    pub fn unblocked_syscall_latency(&self) -> SimulationTime {
167        let nanos = self.experimental.unblocked_syscall_latency.unwrap();
168        let nanos = nanos.convert(units::TimePrefix::Nano).unwrap().value();
169        SimulationTime::from_nanos(nanos)
170    }
171
172    pub fn unblocked_vdso_latency(&self) -> SimulationTime {
173        let nanos = self.experimental.unblocked_vdso_latency.unwrap();
174        let nanos = nanos.convert(units::TimePrefix::Nano).unwrap().value();
175        SimulationTime::from_nanos(nanos)
176    }
177
178    pub fn native_preemption_enabled(&self) -> bool {
179        self.experimental.native_preemption_enabled.unwrap()
180    }
181
182    pub fn native_preemption_native_interval(
183        &self,
184    ) -> anyhow::Result<linux_api::time::kernel_old_timeval> {
185        let t = self.experimental.native_preemption_native_interval.unwrap();
186        let t = core::time::Duration::from(t);
187        // TODO: Would be a little nicer to surface this error when we parse the
188        // config. I think ideally we'd update the type such that some bounds
189        // can be enforced at parse time.
190        if t < core::time::Duration::from_micros(1) {
191            return Err(anyhow::anyhow!(
192                "native_preemption_native_interval must be >= 1 microsecond. Got {t:?}."
193            ));
194        }
195        let rv = linux_api::time::kernel_old_timeval {
196            tv_sec: t.as_secs().try_into().unwrap(),
197            tv_usec: t.subsec_micros().into(),
198        };
199        assert!(!(rv.tv_sec == 0 && rv.tv_usec == 0));
200        Ok(rv)
201    }
202
203    pub fn native_preemption_sim_interval(&self) -> SimulationTime {
204        let t = self.experimental.native_preemption_sim_interval.unwrap();
205        let nanos = t.convert(units::TimePrefix::Nano).unwrap().value();
206        SimulationTime::from_nanos(nanos)
207    }
208
209    pub fn strace_logging_mode(&self) -> Option<FmtOptions> {
210        match self.experimental.strace_logging_mode.as_ref().unwrap() {
211            StraceLoggingMode::Standard => Some(FmtOptions::Standard),
212            StraceLoggingMode::Deterministic => Some(FmtOptions::Deterministic),
213            StraceLoggingMode::Off => None,
214        }
215    }
216}
217
218/// Help messages used by Clap for command line arguments, combining the doc string with
219/// the Serde default.
220static GENERAL_HELP: Lazy<std::collections::HashMap<String, String>> =
221    Lazy::new(|| generate_help_strs(schema_for!(GeneralOptions)));
222
223// these must all be Option types since they aren't required by the CLI, even if they're
224// required in the configuration file
225#[derive(Debug, Clone, Parser, Serialize, Deserialize, Merge, JsonSchema)]
226#[clap(next_help_heading = "General (Override configuration file options)")]
227#[clap(next_display_order = None)]
228#[serde(deny_unknown_fields)]
229#[merge(strategy = merge::option::overwrite_none)]
230pub struct GeneralOptions {
231    /// The simulated time at which simulated processes are sent a SIGKILL signal
232    #[clap(long, value_name = "seconds")]
233    #[clap(help = GENERAL_HELP.get("stop_time").unwrap().as_str())]
234    pub stop_time: Option<units::Time<units::TimePrefix>>,
235
236    /// Initialize randomness using seed N
237    #[clap(long, value_name = "N")]
238    #[clap(help = GENERAL_HELP.get("seed").unwrap().as_str())]
239    #[serde(default = "default_some_1")]
240    pub seed: Option<u32>,
241
242    /// How many parallel threads to use to run the simulation. A value of 0 will allow Shadow to
243    /// choose the number of threads.
244    #[clap(long, short = 'p', value_name = "cores")]
245    #[clap(help = GENERAL_HELP.get("parallelism").unwrap().as_str())]
246    #[serde(default = "default_some_0")]
247    pub parallelism: Option<u32>,
248
249    /// The simulated time that ends Shadow's high network bandwidth/reliability bootstrap period
250    #[clap(long, value_name = "seconds")]
251    #[clap(help = GENERAL_HELP.get("bootstrap_end_time").unwrap().as_str())]
252    #[serde(default = "default_some_time_0")]
253    pub bootstrap_end_time: Option<units::Time<units::TimePrefix>>,
254
255    /// Log level of output written on stdout. If Shadow was built in release mode, then log
256    /// messages at level 'trace' will always be dropped
257    #[clap(long, short = 'l', value_name = "level")]
258    #[clap(help = GENERAL_HELP.get("log_level").unwrap().as_str())]
259    #[serde(default = "default_some_info")]
260    pub log_level: Option<LogLevel>,
261
262    /// Interval at which to print heartbeat messages
263    #[clap(long, value_name = "seconds")]
264    #[clap(help = GENERAL_HELP.get("heartbeat_interval").unwrap().as_str())]
265    #[serde(default = "default_some_nullable_time_1")]
266    pub heartbeat_interval: Option<NullableOption<units::Time<units::TimePrefix>>>,
267
268    /// Path to store simulation output
269    #[clap(long, short = 'd', value_name = "path")]
270    #[clap(help = GENERAL_HELP.get("data_directory").unwrap().as_str())]
271    #[serde(default = "default_data_directory")]
272    pub data_directory: Option<String>,
273
274    /// Path to recursively copy during startup and use as the data-directory
275    #[clap(long, short = 'e', value_name = "path")]
276    #[clap(help = GENERAL_HELP.get("template_directory").unwrap().as_str())]
277    #[serde(default)]
278    pub template_directory: Option<NullableOption<String>>,
279
280    /// Show the simulation progress on stderr
281    #[clap(long, value_name = "bool")]
282    #[clap(help = GENERAL_HELP.get("progress").unwrap().as_str())]
283    #[serde(default = "default_some_false")]
284    pub progress: Option<bool>,
285
286    /// Model syscalls and VDSO functions that don't block as having some
287    /// latency. This should have minimal effect on typical simulations, but
288    /// can be helpful for programs with "busy loops" that otherwise deadlock
289    /// under Shadow.
290    #[clap(long, value_name = "bool")]
291    #[clap(help = GENERAL_HELP.get("model_unblocked_syscall_latency").unwrap().as_str())]
292    #[serde(default = "default_some_false")]
293    pub model_unblocked_syscall_latency: Option<bool>,
294}
295
296impl GeneralOptions {
297    /// Replace unset (`None`) values of `base` with values from `default`.
298    pub fn with_defaults(mut self, default: Self) -> Self {
299        self.merge(default);
300        self
301    }
302}
303
304/// Help messages used by Clap for command line arguments, combining the doc string with
305/// the Serde default.
306static NETWORK_HELP: Lazy<std::collections::HashMap<String, String>> =
307    Lazy::new(|| generate_help_strs(schema_for!(NetworkOptions)));
308
309// these must all be Option types since they aren't required by the CLI, even if they're
310// required in the configuration file
311#[derive(Debug, Clone, Parser, Serialize, Deserialize, Merge, JsonSchema)]
312#[clap(next_help_heading = "Network (Override network options)")]
313#[clap(next_display_order = None)]
314#[serde(deny_unknown_fields)]
315#[merge(strategy = merge::option::overwrite_none)]
316pub struct NetworkOptions {
317    /// The network topology graph
318    #[clap(skip)]
319    pub graph: Option<GraphOptions>,
320
321    /// When routing packets, follow the shortest path rather than following a direct
322    /// edge between nodes. If false, the network graph is required to be complete.
323    #[serde(default = "default_some_true")]
324    #[clap(long, value_name = "bool")]
325    #[clap(help = NETWORK_HELP.get("use_shortest_path").unwrap().as_str())]
326    pub use_shortest_path: Option<bool>,
327}
328
329impl NetworkOptions {
330    /// Replace unset (`None`) values of `base` with values from `default`.
331    pub fn with_defaults(mut self, default: Self) -> Self {
332        self.merge(default);
333        self
334    }
335}
336
337/// Help messages used by Clap for command line arguments, combining the doc string with
338/// the Serde default.
339static EXP_HELP: Lazy<std::collections::HashMap<String, String>> =
340    Lazy::new(|| generate_help_strs(schema_for!(ExperimentalOptions)));
341
342#[derive(Debug, Clone, Parser, Serialize, Deserialize, Merge, JsonSchema)]
343#[clap(
344    next_help_heading = "Experimental (Unstable and may change or be removed at any time, regardless of Shadow version)"
345)]
346#[clap(next_display_order = None)]
347#[serde(default, deny_unknown_fields)]
348#[merge(strategy = merge::option::overwrite_none)]
349pub struct ExperimentalOptions {
350    /// Use the SCHED_FIFO scheduler. Requires CAP_SYS_NICE. See sched(7), capabilities(7)
351    #[clap(hide_short_help = true)]
352    #[clap(long, value_name = "bool")]
353    #[clap(help = EXP_HELP.get("use_sched_fifo").unwrap().as_str())]
354    pub use_sched_fifo: Option<bool>,
355
356    /// Count the number of occurrences for individual syscalls
357    #[clap(hide_short_help = true)]
358    #[clap(long, value_name = "bool")]
359    #[clap(help = EXP_HELP.get("use_syscall_counters").unwrap().as_str())]
360    pub use_syscall_counters: Option<bool>,
361
362    /// Count object allocations and deallocations. If disabled, we will not be able to detect object memory leaks
363    #[clap(hide_short_help = true)]
364    #[clap(long, value_name = "bool")]
365    #[clap(help = EXP_HELP.get("use_object_counters").unwrap().as_str())]
366    pub use_object_counters: Option<bool>,
367
368    /// Preload our libc library for all managed processes for fast syscall interposition when possible.
369    #[clap(hide_short_help = true)]
370    #[clap(long, value_name = "bool")]
371    #[clap(help = EXP_HELP.get("use_preload_libc").unwrap().as_str())]
372    pub use_preload_libc: Option<bool>,
373
374    /// Preload our OpenSSL RNG library for all managed processes to mitigate non-deterministic use of OpenSSL.
375    #[clap(hide_short_help = true)]
376    #[clap(long, value_name = "bool")]
377    #[clap(help = EXP_HELP.get("use_preload_openssl_rng").unwrap().as_str())]
378    pub use_preload_openssl_rng: Option<bool>,
379
380    /// Preload our OpenSSL crypto library for all managed processes to skip some crypto operations
381    /// (may speed up simulation if your CPU lacks AES-NI support, but can cause bugs so do not use
382    /// unless you know what you're doing).
383    #[clap(hide_short_help = true)]
384    #[clap(long, value_name = "bool")]
385    #[clap(help = EXP_HELP.get("use_preload_openssl_crypto").unwrap().as_str())]
386    pub use_preload_openssl_crypto: Option<bool>,
387
388    /// Use the MemoryManager in memory-mapping mode. This can improve
389    /// performance, but disables support for dynamically spawning processes
390    /// inside the simulation (e.g. the `fork` syscall).
391    #[clap(hide_short_help = true)]
392    #[clap(long, value_name = "bool")]
393    #[clap(help = EXP_HELP.get("use_memory_manager").unwrap().as_str())]
394    pub use_memory_manager: Option<bool>,
395
396    /// Pin each thread and any processes it executes to the same logical CPU Core to improve cache affinity
397    #[clap(hide_short_help = true)]
398    #[clap(long, value_name = "bool")]
399    #[clap(help = EXP_HELP.get("use_cpu_pinning").unwrap().as_str())]
400    pub use_cpu_pinning: Option<bool>,
401
402    /// Each worker thread will spin in a `sched_yield` loop while waiting for a new task. This is
403    /// ignored if not using the thread-per-core scheduler.
404    #[clap(hide_short_help = true)]
405    #[clap(long, value_name = "bool")]
406    #[clap(help = EXP_HELP.get("use_worker_spinning").unwrap().as_str())]
407    pub use_worker_spinning: Option<bool>,
408
409    /// If set, overrides the automatically calculated minimum time workers may run ahead when sending events between nodes
410    #[clap(hide_short_help = true)]
411    #[clap(long, value_name = "seconds")]
412    #[clap(help = EXP_HELP.get("runahead").unwrap().as_str())]
413    pub runahead: Option<NullableOption<units::Time<units::TimePrefix>>>,
414
415    /// Update the minimum runahead dynamically throughout the simulation.
416    #[clap(hide_short_help = true)]
417    #[clap(long, value_name = "bool")]
418    #[clap(help = EXP_HELP.get("use_dynamic_runahead").unwrap().as_str())]
419    pub use_dynamic_runahead: Option<bool>,
420
421    /// Initial size of the socket's send buffer
422    #[clap(hide_short_help = true)]
423    #[clap(long, value_name = "bytes")]
424    #[clap(help = EXP_HELP.get("socket_send_buffer").unwrap().as_str())]
425    pub socket_send_buffer: Option<units::Bytes<units::SiPrefixUpper>>,
426
427    /// Enable send window autotuning
428    #[clap(hide_short_help = true)]
429    #[clap(long, value_name = "bool")]
430    #[clap(help = EXP_HELP.get("socket_send_autotune").unwrap().as_str())]
431    pub socket_send_autotune: Option<bool>,
432
433    /// Initial size of the socket's receive buffer
434    #[clap(hide_short_help = true)]
435    #[clap(long, value_name = "bytes")]
436    #[clap(help = EXP_HELP.get("socket_recv_buffer").unwrap().as_str())]
437    pub socket_recv_buffer: Option<units::Bytes<units::SiPrefixUpper>>,
438
439    /// Enable receive window autotuning
440    #[clap(hide_short_help = true)]
441    #[clap(long, value_name = "bool")]
442    #[clap(help = EXP_HELP.get("socket_recv_autotune").unwrap().as_str())]
443    pub socket_recv_autotune: Option<bool>,
444
445    /// The queueing discipline to use at the network interface
446    #[clap(hide_short_help = true)]
447    #[clap(long, value_name = "mode")]
448    #[clap(help = EXP_HELP.get("interface_qdisc").unwrap().as_str())]
449    pub interface_qdisc: Option<QDiscMode>,
450
451    /// Log the syscalls for each process to individual "strace" files
452    #[clap(hide_short_help = true)]
453    #[clap(long, value_name = "mode")]
454    #[clap(help = EXP_HELP.get("strace_logging_mode").unwrap().as_str())]
455    pub strace_logging_mode: Option<StraceLoggingMode>,
456
457    /// Max amount of execution-time latency allowed to accumulate before the
458    /// clock is moved forward. Moving the clock forward is a potentially
459    /// expensive operation, so larger values reduce simulation overhead, at the
460    /// cost of coarser time jumps. Note also that accumulated-but-unapplied
461    /// latency is discarded when a thread is blocked on a syscall.
462    #[clap(hide_short_help = true)]
463    #[clap(long, value_name = "seconds")]
464    #[clap(help = EXP_HELP.get("max_unapplied_cpu_latency").unwrap().as_str())]
465    pub max_unapplied_cpu_latency: Option<units::Time<units::TimePrefix>>,
466
467    /// Simulated latency of an unblocked syscall. For efficiency Shadow only
468    /// actually adds this latency if and when `max_unapplied_cpu_latency` is
469    /// reached.
470    #[clap(hide_short_help = true)]
471    #[clap(long, value_name = "seconds")]
472    #[clap(help = EXP_HELP.get("unblocked_syscall_latency").unwrap().as_str())]
473    pub unblocked_syscall_latency: Option<units::Time<units::TimePrefix>>,
474
475    /// Simulated latency of a vdso "syscall". For efficiency Shadow only
476    /// actually adds this latency if and when `max_unapplied_cpu_latency` is
477    /// reached.
478    #[clap(hide_short_help = true)]
479    #[clap(long, value_name = "seconds")]
480    #[clap(help = EXP_HELP.get("unblocked_vdso_latency").unwrap().as_str())]
481    pub unblocked_vdso_latency: Option<units::Time<units::TimePrefix>>,
482
483    /// The host scheduler implementation, which decides how to assign hosts to threads and threads
484    /// to CPU cores
485    #[clap(hide_short_help = true)]
486    #[clap(long, value_name = "name")]
487    #[clap(help = EXP_HELP.get("scheduler").unwrap().as_str())]
488    pub scheduler: Option<Scheduler>,
489
490    /// When true, report error-level messages to stderr in addition to logging to stdout.
491    #[clap(hide_short_help = true)]
492    #[clap(long, value_name = "bool")]
493    #[clap(help = EXP_HELP.get("report_errors_to_stderr").unwrap().as_str())]
494    pub report_errors_to_stderr: Option<bool>,
495
496    /// Use the rust TCP implementation
497    #[clap(hide_short_help = true)]
498    #[clap(long, value_name = "bool")]
499    #[clap(help = EXP_HELP.get("use_new_tcp").unwrap().as_str())]
500    pub use_new_tcp: Option<bool>,
501
502    /// When true, and when managed code runs for an extended time without
503    /// returning control to shadow (e.g. by making a syscall), shadow preempts
504    /// the managed code and moves simulated time forward. This can be used to
505    /// escape "pure-CPU busy-loops", but isn't usually needed, breaks
506    /// simulation determinism, and significantly affects simulation
507    /// performance.
508    #[clap(hide_short_help = true)]
509    #[clap(long, value_name = "bool")]
510    #[clap(help = EXP_HELP.get("native_preemption_enabled").unwrap().as_str())]
511    pub native_preemption_enabled: Option<bool>,
512
513    /// When `native_preemption_enabled` is true, amount of native CPU-time to
514    /// wait before preempting managed code that hasn't returned control to
515    /// shadow. Only supports microsecond granularity, and values below 1 microsecond
516    /// are rejected.
517    #[clap(hide_short_help = true)]
518    #[clap(long, value_name = "seconds")]
519    #[clap(help = EXP_HELP.get("native_preemption_native_interval").unwrap().as_str())]
520    pub native_preemption_native_interval: Option<units::Time<units::TimePrefix>>,
521
522    /// When `native_preemption_enabled` is true, amount of simulated time to
523    /// consume after `native_preemption_native_interval` has elapsed without
524    /// returning control to shadow.
525    #[clap(hide_short_help = true)]
526    #[clap(long, value_name = "seconds")]
527    #[clap(help = EXP_HELP.get("native_preemption_sim_interval").unwrap().as_str())]
528    pub native_preemption_sim_interval: Option<units::Time<units::TimePrefix>>,
529}
530
531impl ExperimentalOptions {
532    /// Replace unset (`None`) values of `base` with values from `default`.
533    pub fn with_defaults(mut self, default: Self) -> Self {
534        self.merge(default);
535        self
536    }
537}
538
539impl Default for ExperimentalOptions {
540    fn default() -> Self {
541        Self {
542            use_sched_fifo: Some(false),
543            use_syscall_counters: Some(true),
544            use_object_counters: Some(true),
545            use_preload_libc: Some(true),
546            use_preload_openssl_rng: Some(true),
547            use_preload_openssl_crypto: Some(false),
548            max_unapplied_cpu_latency: Some(units::Time::new(1, units::TimePrefix::Micro)),
549            // 1-2 microseconds is a ballpark estimate of the minimal latency for
550            // context switching to the kernel and back on modern machines.
551            // Default to the lower end to minimize effect in simualations without busy loops.
552            unblocked_syscall_latency: Some(units::Time::new(1, units::TimePrefix::Micro)),
553            // Actual latencies vary from ~40 to ~400 CPU cycles. https://stackoverflow.com/a/13096917
554            // Default to the lower end to minimize effect in simualations without busy loops.
555            unblocked_vdso_latency: Some(units::Time::new(10, units::TimePrefix::Nano)),
556            use_memory_manager: Some(false),
557            use_cpu_pinning: Some(true),
558            use_worker_spinning: Some(true),
559            runahead: Some(NullableOption::Value(units::Time::new(
560                1,
561                units::TimePrefix::Milli,
562            ))),
563            use_dynamic_runahead: Some(false),
564            socket_send_buffer: Some(units::Bytes::new(131_072, units::SiPrefixUpper::Base)),
565            socket_send_autotune: Some(true),
566            socket_recv_buffer: Some(units::Bytes::new(174_760, units::SiPrefixUpper::Base)),
567            socket_recv_autotune: Some(true),
568            interface_qdisc: Some(QDiscMode::Fifo),
569            strace_logging_mode: Some(StraceLoggingMode::Off),
570            scheduler: Some(Scheduler::ThreadPerCore),
571            report_errors_to_stderr: Some(true),
572            use_new_tcp: Some(false),
573            native_preemption_enabled: Some(false),
574            native_preemption_native_interval: Some(units::Time::new(
575                100,
576                units::TimePrefix::Milli,
577            )),
578            native_preemption_sim_interval: Some(units::Time::new(10, units::TimePrefix::Milli)),
579        }
580    }
581}
582
583/// Help messages used by Clap for command line arguments, combining the doc string with
584/// the Serde default.
585static HOST_HELP: Lazy<std::collections::HashMap<String, String>> =
586    Lazy::new(|| generate_help_strs(schema_for!(HostDefaultOptions)));
587
588#[derive(Debug, Clone, Parser, Serialize, Deserialize, Merge, JsonSchema)]
589#[clap(next_help_heading = "Host Defaults (Default options for hosts)")]
590#[clap(next_display_order = None)]
591#[serde(default, deny_unknown_fields)]
592// serde will default all fields to `None`, but in the cli help we want the actual defaults
593#[schemars(default = "HostDefaultOptions::new_with_defaults")]
594#[merge(strategy = merge::option::overwrite_none)]
595pub struct HostDefaultOptions {
596    /// Log level at which to print node messages
597    #[clap(long = "host-log-level", name = "host-log-level")]
598    #[clap(value_name = "level")]
599    #[clap(help = HOST_HELP.get("log_level").unwrap().as_str())]
600    pub log_level: Option<NullableOption<LogLevel>>,
601
602    /// Should shadow generate pcap files?
603    #[clap(long, value_name = "bool")]
604    #[clap(help = HOST_HELP.get("pcap_enabled").unwrap().as_str())]
605    pub pcap_enabled: Option<bool>,
606
607    /// How much data to capture per packet (header and payload) if pcap logging is enabled
608    #[clap(long, value_name = "bytes")]
609    #[clap(help = HOST_HELP.get("pcap_capture_size").unwrap().as_str())]
610    pub pcap_capture_size: Option<units::Bytes<units::SiPrefixUpper>>,
611}
612
613impl HostDefaultOptions {
614    pub fn new_with_defaults() -> Self {
615        Self {
616            log_level: None,
617            pcap_enabled: Some(false),
618            // From pcap(3): "A value of 65535 should be sufficient, on most if not all networks, to
619            // capture all the data available from the packet". The maximum length of an IP packet
620            // (including the header) is 65535 bytes.
621            pcap_capture_size: Some(units::Bytes::new(65535, units::SiPrefixUpper::Base)),
622        }
623    }
624
625    /// Replace unset (`None`) values of `base` with values from `default`.
626    pub fn with_defaults(mut self, default: Self) -> Self {
627        self.merge(default);
628        self
629    }
630}
631
632#[allow(clippy::derivable_impls)]
633impl Default for HostDefaultOptions {
634    fn default() -> Self {
635        // Our config fields would typically be initialized with their real defaults here in the
636        // `Default::default` implementation, but we need to handle the host options differently
637        // because the global `host_option_defaults` can be overridden by host-specific
638        // `host_options`. So instead we use defaults of `None` here and set the real defaults with
639        // `Self::new_with_defaults` in `ConfigOptions::new`.
640        Self {
641            log_level: None,
642            pcap_enabled: None,
643            pcap_capture_size: None,
644        }
645    }
646}
647
648#[derive(Serialize, Deserialize, Eq, PartialEq, Debug, Copy, Clone, JsonSchema)]
649#[serde(rename_all = "kebab-case")]
650pub enum RunningVal {
651    Running,
652}
653
654/// The enum variants here have an extra level of indirection to get the
655/// serde serialization that we want.
656#[derive(Debug, Copy, Clone, Eq, PartialEq, Serialize, Deserialize, JsonSchema)]
657#[serde(untagged)]
658pub enum ProcessFinalState {
659    Exited { exited: i32 },
660    Signaled { signaled: Signal },
661    Running(RunningVal),
662}
663
664impl Default for ProcessFinalState {
665    fn default() -> Self {
666        Self::Exited { exited: 0 }
667    }
668}
669
670impl std::fmt::Display for ProcessFinalState {
671    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
672        // We use the yaml serialization here so that when reporting that an
673        // expected state didn't match the actual state, it's clear how to set
674        // the expected state in the config file to match the actual state if
675        // desired.
676        //
677        // The current enum works OK for this since there are no internal
678        // newlines in the serialization; if there are some later we might wand
679        // to serialize to json instead, which can always be put on a single
680        // line and should also be valid yaml.
681        let s = serde_yaml::to_string(self).or(Err(std::fmt::Error))?;
682        write!(f, "{}", s.trim())
683    }
684}
685
686#[derive(Debug, Clone, Serialize, Deserialize, JsonSchema)]
687#[serde(deny_unknown_fields)]
688pub struct ProcessOptions {
689    pub path: std::path::PathBuf,
690
691    /// Process arguments
692    #[serde(default = "default_args_empty")]
693    pub args: ProcessArgs,
694
695    /// Environment variables passed when executing this process
696    #[serde(default)]
697    pub environment: BTreeMap<EnvName, String>,
698
699    /// The simulated time at which to execute the process
700    #[serde(default)]
701    pub start_time: units::Time<units::TimePrefix>,
702
703    /// The simulated time at which to send a `shutdown_signal` signal to the process
704    #[serde(default)]
705    pub shutdown_time: Option<units::Time<units::TimePrefix>>,
706
707    /// The signal that will be sent to the process at `shutdown_time`
708    #[serde(default = "default_sigterm")]
709    pub shutdown_signal: Signal,
710
711    /// The expected final state of the process. Shadow will report an error
712    /// if the actual state doesn't match.
713    #[serde(default)]
714    pub expected_final_state: ProcessFinalState,
715}
716
717#[derive(Debug, Clone, Serialize, Deserialize, JsonSchema)]
718#[serde(deny_unknown_fields)]
719pub struct HostOptions {
720    /// Network graph node ID to assign the host to
721    pub network_node_id: u32,
722
723    pub processes: Vec<ProcessOptions>,
724
725    /// IP address to assign to the host
726    #[serde(default)]
727    pub ip_addr: Option<std::net::Ipv4Addr>,
728
729    /// Downstream bandwidth capacity of the host
730    #[serde(default)]
731    pub bandwidth_down: Option<units::BitsPerSec<units::SiPrefixUpper>>,
732
733    /// Upstream bandwidth capacity of the host
734    #[serde(default)]
735    pub bandwidth_up: Option<units::BitsPerSec<units::SiPrefixUpper>>,
736
737    #[serde(default)]
738    pub host_options: HostDefaultOptions,
739}
740
741#[derive(Debug, Copy, Clone, Serialize, Deserialize, JsonSchema)]
742#[serde(rename_all = "kebab-case")]
743pub enum LogLevel {
744    Error,
745    Warning,
746    Info,
747    Debug,
748    Trace,
749}
750
751impl FromStr for LogLevel {
752    type Err = serde_yaml::Error;
753
754    fn from_str(s: &str) -> Result<Self, Self::Err> {
755        serde_yaml::from_str(s)
756    }
757}
758
759impl LogLevel {
760    pub fn to_c_loglevel(&self) -> c_log::LogLevel {
761        match self {
762            Self::Error => c_log::_LogLevel_LOGLEVEL_ERROR,
763            Self::Warning => c_log::_LogLevel_LOGLEVEL_WARNING,
764            Self::Info => c_log::_LogLevel_LOGLEVEL_INFO,
765            Self::Debug => c_log::_LogLevel_LOGLEVEL_DEBUG,
766            Self::Trace => c_log::_LogLevel_LOGLEVEL_TRACE,
767        }
768    }
769}
770
771impl From<LogLevel> for log::Level {
772    fn from(level: LogLevel) -> Self {
773        match level {
774            LogLevel::Error => log::Level::Error,
775            LogLevel::Warning => log::Level::Warn,
776            LogLevel::Info => log::Level::Info,
777            LogLevel::Debug => log::Level::Debug,
778            LogLevel::Trace => log::Level::Trace,
779        }
780    }
781}
782
783#[derive(Debug, Clone, PartialOrd, Ord, PartialEq, Eq, Serialize, JsonSchema)]
784pub struct HostName(String);
785
786impl<'de> serde::Deserialize<'de> for HostName {
787    fn deserialize<D: serde::Deserializer<'de>>(deserializer: D) -> Result<Self, D::Error> {
788        struct HostNameVisitor;
789
790        impl serde::de::Visitor<'_> for HostNameVisitor {
791            type Value = HostName;
792
793            fn expecting(&self, formatter: &mut std::fmt::Formatter) -> std::fmt::Result {
794                formatter.write_str("a string")
795            }
796
797            fn visit_string<E>(self, v: String) -> Result<Self::Value, E>
798            where
799                E: serde::de::Error,
800            {
801                // hostname(7): "Valid characters for hostnames are ASCII(7) letters from a to z,
802                // the digits from 0 to 9, and the hyphen (-)."
803                fn is_allowed(c: char) -> bool {
804                    c.is_ascii_lowercase() || c.is_ascii_digit() || c == '-' || c == '.'
805                }
806                if let Some(invalid_char) = v.chars().find(|x| !is_allowed(*x)) {
807                    return Err(E::custom(format!(
808                        "invalid hostname character: '{invalid_char}'"
809                    )));
810                }
811
812                if v.is_empty() {
813                    return Err(E::custom("empty hostname"));
814                }
815
816                // hostname(7): "A hostname may not start with a hyphen."
817                if v.starts_with('-') {
818                    return Err(E::custom("hostname begins with a '-' character"));
819                }
820
821                // hostname(7): "Each element of the hostname must be from 1 to 63 characters long
822                // and the entire hostname, including the dots, can be at most 253 characters long."
823                if v.len() > 253 {
824                    return Err(E::custom("hostname exceeds 253 characters"));
825                }
826
827                Ok(HostName(v))
828            }
829
830            fn visit_str<E>(self, v: &str) -> Result<Self::Value, E>
831            where
832                E: serde::de::Error,
833            {
834                // serde::de::Visitor: "It is never correct to implement `visit_string` without
835                // implementing `visit_str`. Implement neither, both, or just `visit_str`.'
836                self.visit_string(v.to_string())
837            }
838        }
839
840        deserializer.deserialize_string(HostNameVisitor)
841    }
842}
843
844impl std::ops::Deref for HostName {
845    type Target = String;
846
847    fn deref(&self) -> &Self::Target {
848        &self.0
849    }
850}
851
852impl From<HostName> for String {
853    fn from(name: HostName) -> Self {
854        name.0
855    }
856}
857
858impl std::fmt::Display for HostName {
859    fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
860        self.0.fmt(f)
861    }
862}
863
864#[derive(Debug, Clone, PartialOrd, Ord, PartialEq, Eq, Serialize, JsonSchema)]
865pub struct EnvName(String);
866
867impl EnvName {
868    pub fn new(name: impl Into<String>) -> Option<Self> {
869        let name = name.into();
870
871        // an environment variable name cannot contain a '=' character
872        if name.contains('=') {
873            return None;
874        }
875
876        Some(Self(name))
877    }
878}
879
880impl<'de> serde::Deserialize<'de> for EnvName {
881    fn deserialize<D: serde::Deserializer<'de>>(deserializer: D) -> Result<Self, D::Error> {
882        struct EnvNameVisitor;
883
884        impl serde::de::Visitor<'_> for EnvNameVisitor {
885            type Value = EnvName;
886
887            fn expecting(&self, formatter: &mut std::fmt::Formatter) -> std::fmt::Result {
888                formatter.write_str("a string")
889            }
890
891            fn visit_string<E>(self, v: String) -> Result<Self::Value, E>
892            where
893                E: serde::de::Error,
894            {
895                let Some(name) = EnvName::new(v) else {
896                    let e = "environment variable name contains a '=' character";
897                    return Err(E::custom(e));
898                };
899
900                Ok(name)
901            }
902
903            fn visit_str<E>(self, v: &str) -> Result<Self::Value, E>
904            where
905                E: serde::de::Error,
906            {
907                // serde::de::Visitor: "It is never correct to implement `visit_string` without
908                // implementing `visit_str`. Implement neither, both, or just `visit_str`.'
909                self.visit_string(v.to_string())
910            }
911        }
912
913        deserializer.deserialize_string(EnvNameVisitor)
914    }
915}
916
917impl std::ops::Deref for EnvName {
918    type Target = String;
919
920    fn deref(&self) -> &Self::Target {
921        &self.0
922    }
923}
924
925impl From<EnvName> for String {
926    fn from(name: EnvName) -> Self {
927        name.0
928    }
929}
930
931impl std::fmt::Display for EnvName {
932    fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
933        self.0.fmt(f)
934    }
935}
936
937#[derive(Debug, Copy, Clone, Serialize, Deserialize, JsonSchema)]
938#[serde(rename_all = "kebab-case")]
939pub enum Scheduler {
940    ThreadPerHost,
941    ThreadPerCore,
942}
943
944impl FromStr for Scheduler {
945    type Err = serde_yaml::Error;
946
947    fn from_str(s: &str) -> Result<Self, Self::Err> {
948        serde_yaml::from_str(s)
949    }
950}
951
952fn default_data_directory() -> Option<String> {
953    Some("shadow.data".into())
954}
955
956/// Parse a string as a comma-delimited set of `T` values.
957fn parse_set<T>(s: &str) -> Result<HashSet<T>, <T as FromStr>::Err>
958where
959    T: std::cmp::Eq + std::hash::Hash + FromStr,
960{
961    s.split(',').map(|x| x.trim().parse()).collect()
962}
963
964/// Parse a string as a comma-delimited set of `String` values.
965fn parse_set_str(s: &str) -> Result<HashSet<String>, <String as FromStr>::Err> {
966    parse_set(s)
967}
968
969#[derive(Debug, Clone, Copy, Hash, PartialEq, Eq, Serialize, Deserialize, JsonSchema)]
970#[serde(rename_all = "kebab-case")]
971#[repr(C)]
972pub enum QDiscMode {
973    Fifo,
974    RoundRobin,
975}
976
977impl FromStr for QDiscMode {
978    type Err = serde_yaml::Error;
979
980    fn from_str(s: &str) -> Result<Self, Self::Err> {
981        serde_yaml::from_str(s)
982    }
983}
984
985#[derive(Debug, Clone, Serialize, Deserialize, JsonSchema)]
986#[serde(rename_all = "kebab-case")]
987pub enum Compression {
988    Xz,
989}
990
991#[derive(Debug, Clone, Serialize, Deserialize, JsonSchema)]
992#[serde(deny_unknown_fields)]
993pub struct FileSource {
994    /// The path to the file
995    pub path: String,
996    /// The file's compression format
997    pub compression: Option<Compression>,
998}
999
1000#[derive(Debug, Clone, Serialize, Deserialize, JsonSchema)]
1001#[serde(rename_all = "kebab-case")]
1002pub enum GraphSource {
1003    File(FileSource),
1004    Inline(String),
1005}
1006
1007#[derive(Debug, Clone, Serialize, Deserialize, JsonSchema)]
1008// we use "kebab-case" for other shadow options, but are leaving this as "snake_case" for backwards
1009// compatibility
1010#[serde(tag = "type", rename_all = "snake_case")]
1011pub enum GraphOptions {
1012    Gml(GraphSource),
1013    #[serde(rename = "1_gbit_switch")]
1014    OneGbitSwitch,
1015}
1016
1017#[derive(Debug, Clone, Serialize, JsonSchema)]
1018#[serde(untagged)]
1019pub enum ProcessArgs {
1020    List(Vec<String>),
1021    Str(String),
1022}
1023
1024/// Serde doesn't provide good deserialization error messages for untagged enums, so we implement
1025/// our own. For example, if serde finds a yaml value such as 4 for the process arguments, it won't
1026/// deserialize it to the string "4" and the yaml parsing will fail. The serde-generated error
1027/// message will say something like "data did not match any variant of untagged enum ProcessArgs at
1028/// line X column Y" which isn't very helpful to the user, so here we try to give a better error
1029/// message.
1030impl<'de> serde::Deserialize<'de> for ProcessArgs {
1031    fn deserialize<D: serde::Deserializer<'de>>(deserializer: D) -> Result<Self, D::Error> {
1032        struct ProcessArgsVisitor;
1033
1034        impl<'de> serde::de::Visitor<'de> for ProcessArgsVisitor {
1035            type Value = ProcessArgs;
1036
1037            fn expecting(&self, formatter: &mut std::fmt::Formatter) -> std::fmt::Result {
1038                formatter.write_str("a string or a sequence of strings")
1039            }
1040
1041            fn visit_str<E>(self, v: &str) -> Result<Self::Value, E>
1042            where
1043                E: serde::de::Error,
1044            {
1045                Ok(Self::Value::Str(v.to_owned()))
1046            }
1047
1048            fn visit_seq<A>(self, mut seq: A) -> Result<Self::Value, A::Error>
1049            where
1050                A: serde::de::SeqAccess<'de>,
1051            {
1052                let mut v = vec![];
1053
1054                while let Some(val) = seq.next_element()? {
1055                    v.push(val);
1056                }
1057
1058                Ok(Self::Value::List(v))
1059            }
1060        }
1061
1062        deserializer.deserialize_any(ProcessArgsVisitor)
1063    }
1064}
1065
1066// TODO: use linux_api's Signal internally, which we control and which supports
1067// realtime signals. We need to implement conversion to and from strings to do
1068// so, while being careful that the conversion is compatible with nix's so as
1069// not to be a breaking change to our configuration format.
1070#[derive(Debug, Copy, Clone, Eq, PartialEq)]
1071pub struct Signal(nix::sys::signal::Signal);
1072
1073impl From<nix::sys::signal::Signal> for Signal {
1074    fn from(value: nix::sys::signal::Signal) -> Self {
1075        Self(value)
1076    }
1077}
1078
1079impl TryFrom<linux_api::signal::Signal> for Signal {
1080    type Error = <nix::sys::signal::Signal as TryFrom<i32>>::Error;
1081    fn try_from(value: linux_api::signal::Signal) -> Result<Self, Self::Error> {
1082        let signal = nix::sys::signal::Signal::try_from(value.as_i32())?;
1083        Ok(Self(signal))
1084    }
1085}
1086
1087impl serde::Serialize for Signal {
1088    fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
1089    where
1090        S: serde::Serializer,
1091    {
1092        serializer.serialize_str(self.0.as_str())
1093    }
1094}
1095
1096impl<'de> serde::Deserialize<'de> for Signal {
1097    fn deserialize<D: serde::Deserializer<'de>>(deserializer: D) -> Result<Self, D::Error> {
1098        struct SignalVisitor;
1099
1100        impl serde::de::Visitor<'_> for SignalVisitor {
1101            type Value = Signal;
1102
1103            fn expecting(&self, formatter: &mut std::fmt::Formatter) -> std::fmt::Result {
1104                formatter.write_str("a signal string (e.g. \"SIGINT\") or integer")
1105            }
1106
1107            fn visit_str<E>(self, v: &str) -> Result<Self::Value, E>
1108            where
1109                E: serde::de::Error,
1110            {
1111                nix::sys::signal::Signal::from_str(v)
1112                    .map(Signal)
1113                    .map_err(|_e| E::custom(format!("Invalid signal string: {v}")))
1114            }
1115
1116            fn visit_i64<E>(self, v: i64) -> Result<Self::Value, E>
1117            where
1118                E: serde::de::Error,
1119            {
1120                let v = i32::try_from(v)
1121                    .map_err(|_e| E::custom(format!("Invalid signal number: {v}")))?;
1122                nix::sys::signal::Signal::try_from(v)
1123                    .map(Signal)
1124                    .map_err(|_e| E::custom(format!("Invalid signal number: {v}")))
1125            }
1126
1127            fn visit_u64<E>(self, v: u64) -> Result<Self::Value, E>
1128            where
1129                E: serde::de::Error,
1130            {
1131                let v = i64::try_from(v)
1132                    .map_err(|_e| E::custom(format!("Invalid signal number: {v}")))?;
1133                self.visit_i64(v)
1134            }
1135        }
1136
1137        deserializer.deserialize_any(SignalVisitor)
1138    }
1139}
1140
1141impl std::fmt::Display for Signal {
1142    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
1143        write!(f, "{}", self.0)
1144    }
1145}
1146
1147impl JsonSchema for Signal {
1148    fn schema_name() -> Cow<'static, str> {
1149        "Signal".into()
1150    }
1151
1152    fn json_schema(_gen: &mut schemars::SchemaGenerator) -> schemars::Schema {
1153        // Use the "anything" schema. The Deserialize implementation does the
1154        // actual parsing and error handling.
1155        // TODO: Ideally we'd only accept strings or integers here. The
1156        // documentation isn't very clear about how to construct such a schema
1157        // though, and we currently only use the schemas for command-line-option
1158        // help strings. Since we don't currently take Signals in
1159        // command-line-options, it doesn't matter.
1160        schemars::json_schema!(true)
1161    }
1162}
1163
1164impl std::ops::Deref for Signal {
1165    type Target = nix::sys::signal::Signal;
1166
1167    fn deref(&self) -> &Self::Target {
1168        &self.0
1169    }
1170}
1171
1172#[derive(Debug, Clone, Serialize, Deserialize, JsonSchema)]
1173#[serde(rename_all = "kebab-case")]
1174pub enum StraceLoggingMode {
1175    Off,
1176    Standard,
1177    Deterministic,
1178}
1179
1180impl FromStr for StraceLoggingMode {
1181    type Err = serde_yaml::Error;
1182
1183    fn from_str(s: &str) -> Result<Self, Self::Err> {
1184        serde_yaml::from_str(s)
1185    }
1186}
1187
1188/// This wrapper type allows cli options to specify "null" to overwrite a config file option with
1189/// `None`, and is intended to be used for options where "null" is a valid option value.
1190///
1191/// **Warning**: This may result in unexpected behaviour when wrapping string types. For example, if
1192/// this is used for a file path option, the value "null" will conflict with the valid filename
1193/// "null". So if the user specifies "null" for this option, Shadow will assume it means "no value"
1194/// rather than the filename "null".
1195///
1196/// ### Motivation
1197///
1198/// For configuration options, there are generally three states:
1199/// - set
1200/// - not set
1201/// - null
1202///
1203/// For serde, all three states are configurable:
1204/// - set: `runahead: 5ms`
1205/// - not set: (no `runahead` option used in yaml)
1206/// - null: `runahead: null`
1207///
1208/// For clap, there are only two states:
1209/// - set: `--runahead 5ms`
1210/// - not set: (no `--runahead` option used in command)
1211///
1212/// There is no way to set a "null" state for cli options with clap.
1213///
1214/// ### Configuration in Shadow
1215///
1216/// Shadow first parses the config file and cli options separately before merging them.
1217///
1218/// Parsing for serde:
1219/// - set: `runahead: 5ms` => runahead is set to `Some(5ms)`
1220/// - not set: (no `runahead` option used in yaml) => runahead is set to its default (either
1221///   `Some(..)` or `None`)
1222/// - null: `runahead: null` => runahead is set to `None`
1223///
1224/// Parsing for clap:
1225/// - set: `--runahead 5ms` => runahead is set to `Some(5ms)`
1226/// - not set: (no `--runahead` option used in command) => runahead is set to `None`
1227///
1228/// Then the options are merged such that any `Some(..)` options from the cli options will overwrite
1229/// any `Some` or `None` options from the config file.
1230///
1231/// The issue is that no clap option can overwrite a config file option of `Some` with a value of
1232/// `None`. For example if the config file specifies `runahead: 5ms`, then with clap you can only
1233/// use `--runahead 2ms` to change the runahead to a `Some(2ms)` value, or you can not set
1234/// `--runahead` at all to leave it as a `Some(5ms)` value. But there is no cli option to change the
1235/// runahead to a `None` value.
1236///
1237/// This `NullableOption` type is a wrapper to allow you to specify "null" on the command line to
1238/// overwrite the config file value with `None`. From the example above, you could now specify
1239/// "--runahead null" to overwrite the config file value (for example `Some(5ms)`) with a `None`
1240/// value.
1241#[derive(Debug, Copy, Clone, JsonSchema, Eq, PartialEq)]
1242pub enum NullableOption<T> {
1243    Value(T),
1244    Null,
1245}
1246
1247impl<T> NullableOption<T> {
1248    pub fn as_ref(&self) -> NullableOption<&T> {
1249        match self {
1250            NullableOption::Value(x) => NullableOption::Value(x),
1251            NullableOption::Null => NullableOption::Null,
1252        }
1253    }
1254
1255    pub fn as_mut(&mut self) -> NullableOption<&mut T> {
1256        match self {
1257            NullableOption::Value(x) => NullableOption::Value(x),
1258            NullableOption::Null => NullableOption::Null,
1259        }
1260    }
1261
1262    /// Easier to use than `Into<Option<T>>` since `Option` has a lot of blanket `From`
1263    /// implementations, requiring a lot of type annotations.
1264    pub fn to_option(self) -> Option<T> {
1265        match self {
1266            NullableOption::Value(x) => Some(x),
1267            NullableOption::Null => None,
1268        }
1269    }
1270}
1271
1272impl<T: serde::Serialize> serde::Serialize for NullableOption<T> {
1273    fn serialize<S: serde::Serializer>(&self, serializer: S) -> Result<S::Ok, S::Error> {
1274        match self {
1275            // use the inner type's serialize function
1276            Self::Value(x) => Ok(T::serialize(x, serializer)?),
1277            Self::Null => serializer.serialize_none(),
1278        }
1279    }
1280}
1281
1282impl<'de, T: serde::Deserialize<'de>> serde::Deserialize<'de> for NullableOption<T> {
1283    fn deserialize<D: serde::Deserializer<'de>>(deserializer: D) -> Result<Self, D::Error> {
1284        // always use the inner type's deserialize function
1285        Ok(Self::Value(T::deserialize(deserializer)?))
1286    }
1287}
1288
1289impl<T> FromStr for NullableOption<T>
1290where
1291    T: FromStr<Err: std::fmt::Debug + std::fmt::Display>,
1292{
1293    type Err = T::Err;
1294
1295    fn from_str(s: &str) -> Result<Self, Self::Err> {
1296        match s {
1297            // since we use serde-yaml, use "null" to match yaml's "null"
1298            "null" => Ok(Self::Null),
1299            x => Ok(Self::Value(FromStr::from_str(x)?)),
1300        }
1301    }
1302}
1303
1304/// A trait for `Option`-like types that can be flattened into a single `Option`.
1305pub trait Flatten<T> {
1306    fn flatten(self) -> Option<T>;
1307    fn flatten_ref(&self) -> Option<&T>;
1308}
1309
1310impl<T> Flatten<T> for Option<NullableOption<T>> {
1311    fn flatten(self) -> Option<T> {
1312        self.and_then(|x| x.to_option())
1313    }
1314
1315    fn flatten_ref(&self) -> Option<&T> {
1316        self.as_ref().and_then(|x| x.as_ref().to_option())
1317    }
1318}
1319
1320/// Helper function for serde default `ProcessArgs::Str("")` values.
1321fn default_args_empty() -> ProcessArgs {
1322    ProcessArgs::Str("".to_string())
1323}
1324
1325/// Helper function for serde default `Signal(Signal::SIGTERM)` values.
1326fn default_sigterm() -> Signal {
1327    Signal(nix::sys::signal::Signal::SIGTERM)
1328}
1329
1330/// Helper function for serde default `Some(0)` values.
1331fn default_some_time_0() -> Option<units::Time<units::TimePrefix>> {
1332    Some(units::Time::new(0, units::TimePrefix::Sec))
1333}
1334
1335/// Helper function for serde default `Some(true)` values.
1336fn default_some_true() -> Option<bool> {
1337    Some(true)
1338}
1339
1340/// Helper function for serde default `Some(false)` values.
1341fn default_some_false() -> Option<bool> {
1342    Some(false)
1343}
1344
1345/// Helper function for serde default `Some(0)` values.
1346fn default_some_0() -> Option<u32> {
1347    Some(0)
1348}
1349
1350/// Helper function for serde default `Some(1)` values.
1351fn default_some_1() -> Option<u32> {
1352    Some(1)
1353}
1354
1355/// Helper function for serde default `Some(NullableOption::Value(1 sec))` values.
1356fn default_some_nullable_time_1() -> Option<NullableOption<units::Time<units::TimePrefix>>> {
1357    let time = units::Time::new(1, units::TimePrefix::Sec);
1358    Some(NullableOption::Value(time))
1359}
1360
1361/// Helper function for serde default `Some(LogLevel::Info)` values.
1362fn default_some_info() -> Option<LogLevel> {
1363    Some(LogLevel::Info)
1364}
1365
1366// when updating this graph, make sure to also update the copy in docs/shadow_config_spec.md
1367pub const ONE_GBIT_SWITCH_GRAPH: &str = r#"graph [
1368  directed 0
1369  node [
1370    id 0
1371    host_bandwidth_up "1 Gbit"
1372    host_bandwidth_down "1 Gbit"
1373  ]
1374  edge [
1375    source 0
1376    target 0
1377    latency "1 ms"
1378    packet_loss 0.0
1379  ]
1380]"#;
1381
1382/// Generate help strings for objects in a JSON schema, including the Serde defaults if available.
1383fn generate_help_strs(schema: schemars::Schema) -> std::collections::HashMap<String, String> {
1384    // the default for each field
1385    let mut defaults = std::collections::HashMap::<String, String>::new();
1386
1387    // for each field with an entry in "properties"
1388    for (name, obj) in schema.get("properties").unwrap().as_object().unwrap() {
1389        let description = obj
1390            .get("description")
1391            .map(|x| x.as_str().unwrap())
1392            .unwrap_or("");
1393
1394        // schemars gives us the raw doc string, so there is no markdown formatting/rendering
1395        // applied. This means that line breaks aren't collapsed and our help text will have
1396        // unintended line breaks.
1397        //
1398        // We don't want to bring in an entire markdown parser here, so we crudely remove the line
1399        // breaks manually. This will cause issues where there are intended line breaks (lines
1400        // ending with two spaces), new paragraphs ('\n\n'), etc. But hopefully this is good enough
1401        // for now.
1402        //
1403        // See https://github.com/GREsau/schemars/issues/120
1404        let description = description.replace("\n", " ");
1405
1406        let name = name.clone();
1407
1408        match obj.get("default") {
1409            Some(default) => {
1410                let space = if !description.is_empty() { " " } else { "" };
1411                defaults.insert(name, format!("{description}{space}[default: {default}]"))
1412            }
1413            None => defaults.insert(name, description.to_string()),
1414        };
1415    }
1416
1417    defaults
1418}
1419
1420/// Parses a string as a list of arguments following the shell's parsing rules. This
1421/// uses `g_shell_parse_argv()` for parsing.
1422pub fn parse_string_as_args(args_str: &OsStr) -> Result<Vec<OsString>, String> {
1423    if args_str.is_empty() {
1424        return Ok(Vec::new());
1425    }
1426
1427    let args_str = CString::new(args_str.as_bytes()).unwrap();
1428
1429    // parse the argument string
1430    let mut argc: libc::c_int = 0;
1431    let mut argv: *mut *mut libc::c_char = std::ptr::null_mut();
1432    let mut error: *mut libc::c_char = std::ptr::null_mut();
1433    let rv = unsafe { c::process_parseArgStr(args_str.as_ptr(), &mut argc, &mut argv, &mut error) };
1434
1435    // if there was an error, return a copy of the error string
1436    if !rv {
1437        let error_message = match error.is_null() {
1438            false => unsafe { CStr::from_ptr(error) }.to_str().unwrap(),
1439            true => "Unknown parsing error",
1440        }
1441        .to_string();
1442
1443        unsafe { c::process_parseArgStrFree(argv, error) };
1444        return Err(error_message);
1445    }
1446
1447    assert!(!argv.is_null());
1448
1449    // copy the arg strings
1450    let args: Vec<_> = (0..argc)
1451        .map(|x| unsafe {
1452            let arg_ptr = *argv.add(x as usize);
1453            assert!(!arg_ptr.is_null());
1454            OsStr::from_bytes(CStr::from_ptr(arg_ptr).to_bytes()).to_os_string()
1455        })
1456        .collect();
1457
1458    unsafe { c::process_parseArgStrFree(argv, error) };
1459    Ok(args)
1460}
1461
1462#[cfg(test)]
1463mod tests {
1464    use super::*;
1465
1466    #[test]
1467    // can't call foreign function: process_parseArgStr
1468    #[cfg_attr(miri, ignore)]
1469    fn test_parse_args() {
1470        let arg_str = r#"the quick brown fox "jumped over" the "\"lazy\" dog""#;
1471        let expected_args = &[
1472            "the",
1473            "quick",
1474            "brown",
1475            "fox",
1476            "jumped over",
1477            "the",
1478            "\"lazy\" dog",
1479        ];
1480
1481        let arg_str: OsString = arg_str.into();
1482        let args = parse_string_as_args(&arg_str).unwrap();
1483
1484        assert_eq!(args, expected_args);
1485    }
1486
1487    #[test]
1488    // can't call foreign function: process_parseArgStr
1489    #[cfg_attr(miri, ignore)]
1490    fn test_parse_args_empty() {
1491        let arg_str = "";
1492        let expected_args: &[&str] = &[];
1493
1494        let arg_str: OsString = arg_str.into();
1495        let args = parse_string_as_args(&arg_str).unwrap();
1496
1497        assert_eq!(args, expected_args);
1498    }
1499
1500    #[test]
1501    // can't call foreign function: process_parseArgStr
1502    #[cfg_attr(miri, ignore)]
1503    fn test_parse_args_error() {
1504        let arg_str = r#"hello "world"#;
1505
1506        let arg_str: OsString = arg_str.into();
1507        let err_str = parse_string_as_args(&arg_str).unwrap_err();
1508
1509        assert!(!err_str.is_empty());
1510    }
1511
1512    #[test]
1513    // can't call foreign function: process_parseArgStr
1514    #[cfg_attr(miri, ignore)]
1515    fn test_nullable_option() {
1516        // format the yaml with an optional general option
1517        let yaml_fmt_fn = |option| {
1518            format!(
1519                r#"
1520                general:
1521                  stop_time: 1 min
1522                  {option}
1523                network:
1524                  graph:
1525                    type: 1_gbit_switch
1526                hosts:
1527                  myhost:
1528                    network_node_id: 0
1529                    processes:
1530                    - path: /bin/true
1531                "#,
1532            )
1533        };
1534
1535        let time_1_sec = units::Time::new(1, units::TimePrefix::Sec);
1536        let time_5_sec = units::Time::new(5, units::TimePrefix::Sec);
1537
1538        // "heartbeat_interval: null" with no cli option => None
1539        let yaml = yaml_fmt_fn("heartbeat_interval: null");
1540        let config_file: ConfigFileOptions = serde_yaml::from_str(&yaml).unwrap();
1541        let cli: CliOptions = CliOptions::try_parse_from(["shadow", "-"]).unwrap();
1542
1543        let merged = ConfigOptions::new(config_file, cli);
1544        assert_eq!(merged.general.heartbeat_interval, None);
1545
1546        // "heartbeat_interval: null" with "--heartbeat-interval 5s" => 5s
1547        let yaml = yaml_fmt_fn("heartbeat_interval: null");
1548        let config_file: ConfigFileOptions = serde_yaml::from_str(&yaml).unwrap();
1549        let cli: CliOptions =
1550            CliOptions::try_parse_from(["shadow", "--heartbeat-interval", "5s", "-"]).unwrap();
1551
1552        let merged = ConfigOptions::new(config_file, cli);
1553        assert_eq!(
1554            merged.general.heartbeat_interval,
1555            Some(NullableOption::Value(time_5_sec))
1556        );
1557
1558        // "heartbeat_interval: null" with "--heartbeat-interval null" => NullableOption::Null
1559        let yaml = yaml_fmt_fn("heartbeat_interval: null");
1560        let config_file: ConfigFileOptions = serde_yaml::from_str(&yaml).unwrap();
1561        let cli: CliOptions =
1562            CliOptions::try_parse_from(["shadow", "--heartbeat-interval", "null", "-"]).unwrap();
1563
1564        let merged = ConfigOptions::new(config_file, cli);
1565        assert_eq!(
1566            merged.general.heartbeat_interval,
1567            Some(NullableOption::Null)
1568        );
1569
1570        // "heartbeat_interval: 5s" with no cli option => 5s
1571        let yaml = yaml_fmt_fn("heartbeat_interval: 5s");
1572        let config_file: ConfigFileOptions = serde_yaml::from_str(&yaml).unwrap();
1573        let cli: CliOptions = CliOptions::try_parse_from(["shadow", "-"]).unwrap();
1574
1575        let merged = ConfigOptions::new(config_file, cli);
1576        assert_eq!(
1577            merged.general.heartbeat_interval,
1578            Some(NullableOption::Value(time_5_sec))
1579        );
1580
1581        // "heartbeat_interval: 5s" with "--heartbeat-interval 5s" => 5s
1582        let yaml = yaml_fmt_fn("heartbeat_interval: 5s");
1583        let config_file: ConfigFileOptions = serde_yaml::from_str(&yaml).unwrap();
1584        let cli: CliOptions =
1585            CliOptions::try_parse_from(["shadow", "--heartbeat-interval", "5s", "-"]).unwrap();
1586
1587        let merged = ConfigOptions::new(config_file, cli);
1588        assert_eq!(
1589            merged.general.heartbeat_interval,
1590            Some(NullableOption::Value(time_5_sec))
1591        );
1592
1593        // "heartbeat_interval: 5s" with "--heartbeat-interval null" => NullableOption::Null
1594        let yaml = yaml_fmt_fn("heartbeat_interval: 5s");
1595        let config_file: ConfigFileOptions = serde_yaml::from_str(&yaml).unwrap();
1596        let cli: CliOptions =
1597            CliOptions::try_parse_from(["shadow", "--heartbeat-interval", "null", "-"]).unwrap();
1598
1599        let merged = ConfigOptions::new(config_file, cli);
1600        assert_eq!(
1601            merged.general.heartbeat_interval,
1602            Some(NullableOption::Null)
1603        );
1604
1605        // no config option with no cli option => 1s (default)
1606        let yaml = yaml_fmt_fn("");
1607        let config_file: ConfigFileOptions = serde_yaml::from_str(&yaml).unwrap();
1608        let cli: CliOptions = CliOptions::try_parse_from(["shadow", "-"]).unwrap();
1609
1610        let merged = ConfigOptions::new(config_file, cli);
1611        assert_eq!(
1612            merged.general.heartbeat_interval,
1613            Some(NullableOption::Value(time_1_sec))
1614        );
1615
1616        // no config option with "--heartbeat-interval 5s" => 5s
1617        let yaml = yaml_fmt_fn("");
1618        let config_file: ConfigFileOptions = serde_yaml::from_str(&yaml).unwrap();
1619        let cli: CliOptions =
1620            CliOptions::try_parse_from(["shadow", "--heartbeat-interval", "5s", "-"]).unwrap();
1621
1622        let merged = ConfigOptions::new(config_file, cli);
1623        assert_eq!(
1624            merged.general.heartbeat_interval,
1625            Some(NullableOption::Value(time_5_sec))
1626        );
1627
1628        // no config option with "--heartbeat-interval null" => NullableOption::Null
1629        let yaml = yaml_fmt_fn("");
1630        let config_file: ConfigFileOptions = serde_yaml::from_str(&yaml).unwrap();
1631        let cli: CliOptions =
1632            CliOptions::try_parse_from(["shadow", "--heartbeat-interval", "null", "-"]).unwrap();
1633
1634        let merged = ConfigOptions::new(config_file, cli);
1635        assert_eq!(
1636            merged.general.heartbeat_interval,
1637            Some(NullableOption::Null)
1638        );
1639    }
1640}