shadow_rs/core/
configuration.rs

1//! Shadow's configuration and cli parsing code using [serde] and [clap]. This contains all of
2//! Shadow's configuration options, some of which are also exposed as CLI options.
3//!
4//! Shadow uses [schemars] to get the option description (its doc comment) and default value so that
5//! it can be shown in the CLI help text.
6//!
7//! This code should be careful about validating or interpreting values. It should be focused on
8//! parsing and checking that the format is correct, and not validating the values. For example for
9//! options that take paths, this code should not verify that the path actually exists or perform
10//! any path canonicalization. That should be left to other code outside of this module. This is so
11//! that the configuration parsing does not become environment-dependent. If a configuration file
12//! parses on one system, it should parse successfully on other systems as well.
13
14use std::collections::{BTreeMap, HashSet};
15use std::ffi::{CStr, CString, OsStr, OsString};
16use std::os::unix::ffi::OsStrExt;
17use std::str::FromStr;
18
19use clap::Parser;
20use logger as c_log;
21use merge::Merge;
22use once_cell::sync::Lazy;
23use schemars::{JsonSchema, schema_for};
24use serde::{Deserialize, Serialize};
25use shadow_shim_helper_rs::simulation_time::SimulationTime;
26
27use crate::cshadow as c;
28use crate::host::syscall::formatter::FmtOptions;
29use crate::utility::units::{self, Unit};
30
31const START_HELP_TEXT: &str = "\
32    Run real applications over simulated networks.\n\n\
33    For documentation, visit https://shadow.github.io/docs/guide";
34
35const END_HELP_TEXT: &str = "\
36    If units are not specified, all values are assumed to be given in their base \
37    unit (seconds, bytes, bits, etc). Units can optionally be specified (for \
38    example: '1024 B', '1024 bytes', '1 KiB', '1 kibibyte', etc) and are \
39    case-sensitive.";
40
41// clap requires a 'static str for the version
42static VERSION: Lazy<String> = Lazy::new(crate::shadow::version);
43
44#[derive(Debug, Clone, Parser)]
45#[clap(name = "Shadow", about = START_HELP_TEXT, after_help = END_HELP_TEXT)]
46#[clap(version = VERSION.as_str())]
47#[clap(next_display_order = None)]
48// clap only shows the possible values for bool options (unless we add support for the other
49// non-bool options in the future), which isn't very helpful
50#[clap(hide_possible_values = true)]
51pub struct CliOptions {
52    /// Path to the Shadow configuration file. Use '-' to read from stdin
53    #[clap(required_unless_present_any(&["show_build_info", "shm_cleanup"]))]
54    pub config: Option<String>,
55
56    /// Pause to allow gdb to attach
57    #[clap(long, short = 'g')]
58    pub gdb: bool,
59
60    /// Pause after starting any processes on the comma-delimited list of hostnames
61    #[clap(value_parser = parse_set_str)]
62    #[clap(long, value_name = "hostnames")]
63    pub debug_hosts: Option<HashSet<String>>,
64
65    /// Exit after running shared memory cleanup routine
66    #[clap(long, exclusive(true))]
67    pub shm_cleanup: bool,
68
69    /// Exit after printing build information
70    #[clap(long, exclusive(true))]
71    pub show_build_info: bool,
72
73    /// Exit after printing the final configuration
74    #[clap(long)]
75    pub show_config: bool,
76
77    #[clap(flatten)]
78    pub general: GeneralOptions,
79
80    #[clap(flatten)]
81    pub network: NetworkOptions,
82
83    #[clap(flatten)]
84    pub host_option_defaults: HostDefaultOptions,
85
86    #[clap(flatten)]
87    pub experimental: ExperimentalOptions,
88}
89
90/// Options contained in a configuration file.
91#[derive(Debug, Clone, Serialize, Deserialize)]
92#[serde(deny_unknown_fields)]
93pub struct ConfigFileOptions {
94    pub general: GeneralOptions,
95
96    pub network: NetworkOptions,
97
98    #[serde(default)]
99    pub host_option_defaults: HostDefaultOptions,
100
101    #[serde(default)]
102    pub experimental: ExperimentalOptions,
103
104    // we use a BTreeMap so that the hosts are sorted by their hostname (useful for determinism)
105    // since shadow parses to a serde_yaml::Value initially, we don't need to worry about duplicate
106    // hostnames here
107    pub hosts: BTreeMap<HostName, HostOptions>,
108}
109
110/// Shadow configuration options after processing command-line and configuration file options.
111#[derive(Debug, Clone, Serialize)]
112pub struct ConfigOptions {
113    pub general: GeneralOptions,
114
115    pub network: NetworkOptions,
116
117    pub experimental: ExperimentalOptions,
118
119    // we use a BTreeMap so that the hosts are sorted by their hostname (useful for determinism)
120    pub hosts: BTreeMap<HostName, HostOptions>,
121}
122
123impl ConfigOptions {
124    pub fn new(mut config_file: ConfigFileOptions, options: CliOptions) -> Self {
125        // the `HostDefaultOptions::default` contains only `None` values, so we must first merge the
126        // config file with the real defaults from `HostDefaultOptions::new_with_defaults`
127        config_file.host_option_defaults = config_file
128            .host_option_defaults
129            .with_defaults(HostDefaultOptions::new_with_defaults());
130
131        // override config options with command line options
132        config_file.general = options.general.with_defaults(config_file.general);
133        config_file.network = options.network.with_defaults(config_file.network);
134        config_file.host_option_defaults = options
135            .host_option_defaults
136            .with_defaults(config_file.host_option_defaults);
137        config_file.experimental = options.experimental.with_defaults(config_file.experimental);
138
139        // copy the host defaults to all of the hosts
140        for host in config_file.hosts.values_mut() {
141            host.host_options = host
142                .host_options
143                .clone()
144                .with_defaults(config_file.host_option_defaults.clone());
145        }
146
147        Self {
148            general: config_file.general,
149            network: config_file.network,
150            experimental: config_file.experimental,
151            hosts: config_file.hosts,
152        }
153    }
154
155    pub fn model_unblocked_syscall_latency(&self) -> bool {
156        self.general.model_unblocked_syscall_latency.unwrap()
157    }
158
159    pub fn max_unapplied_cpu_latency(&self) -> SimulationTime {
160        let nanos = self.experimental.max_unapplied_cpu_latency.unwrap();
161        let nanos = nanos.convert(units::TimePrefix::Nano).unwrap().value();
162        SimulationTime::from_nanos(nanos)
163    }
164
165    pub fn unblocked_syscall_latency(&self) -> SimulationTime {
166        let nanos = self.experimental.unblocked_syscall_latency.unwrap();
167        let nanos = nanos.convert(units::TimePrefix::Nano).unwrap().value();
168        SimulationTime::from_nanos(nanos)
169    }
170
171    pub fn unblocked_vdso_latency(&self) -> SimulationTime {
172        let nanos = self.experimental.unblocked_vdso_latency.unwrap();
173        let nanos = nanos.convert(units::TimePrefix::Nano).unwrap().value();
174        SimulationTime::from_nanos(nanos)
175    }
176
177    pub fn native_preemption_enabled(&self) -> bool {
178        self.experimental.native_preemption_enabled.unwrap()
179    }
180
181    pub fn native_preemption_native_interval(
182        &self,
183    ) -> anyhow::Result<linux_api::time::kernel_old_timeval> {
184        let t = self.experimental.native_preemption_native_interval.unwrap();
185        let t = core::time::Duration::from(t);
186        // TODO: Would be a little nicer to surface this error when we parse the
187        // config. I think ideally we'd update the type such that some bounds
188        // can be enforced at parse time.
189        if t < core::time::Duration::from_micros(1) {
190            return Err(anyhow::anyhow!(
191                "native_preemption_native_interval must be >= 1 microsecond. Got {t:?}."
192            ));
193        }
194        let rv = linux_api::time::kernel_old_timeval {
195            tv_sec: t.as_secs().try_into().unwrap(),
196            tv_usec: t.subsec_micros().into(),
197        };
198        assert!(!(rv.tv_sec == 0 && rv.tv_usec == 0));
199        Ok(rv)
200    }
201
202    pub fn native_preemption_sim_interval(&self) -> SimulationTime {
203        let t = self.experimental.native_preemption_sim_interval.unwrap();
204        let nanos = t.convert(units::TimePrefix::Nano).unwrap().value();
205        SimulationTime::from_nanos(nanos)
206    }
207
208    pub fn strace_logging_mode(&self) -> Option<FmtOptions> {
209        match self.experimental.strace_logging_mode.as_ref().unwrap() {
210            StraceLoggingMode::Standard => Some(FmtOptions::Standard),
211            StraceLoggingMode::Deterministic => Some(FmtOptions::Deterministic),
212            StraceLoggingMode::Off => None,
213        }
214    }
215}
216
217/// Help messages used by Clap for command line arguments, combining the doc string with
218/// the Serde default.
219static GENERAL_HELP: Lazy<std::collections::HashMap<String, String>> =
220    Lazy::new(|| generate_help_strs(schema_for!(GeneralOptions)));
221
222// these must all be Option types since they aren't required by the CLI, even if they're
223// required in the configuration file
224#[derive(Debug, Clone, Parser, Serialize, Deserialize, Merge, JsonSchema)]
225#[clap(next_help_heading = "General (Override configuration file options)")]
226#[clap(next_display_order = None)]
227#[serde(deny_unknown_fields)]
228#[merge(strategy = merge::option::overwrite_none)]
229pub struct GeneralOptions {
230    /// The simulated time at which simulated processes are sent a SIGKILL signal
231    #[clap(long, value_name = "seconds")]
232    #[clap(help = GENERAL_HELP.get("stop_time").unwrap().as_str())]
233    pub stop_time: Option<units::Time<units::TimePrefix>>,
234
235    /// Initialize randomness using seed N
236    #[clap(long, value_name = "N")]
237    #[clap(help = GENERAL_HELP.get("seed").unwrap().as_str())]
238    #[serde(default = "default_some_1")]
239    pub seed: Option<u32>,
240
241    /// How many parallel threads to use to run the simulation. A value of 0 will allow Shadow to
242    /// choose the number of threads.
243    #[clap(long, short = 'p', value_name = "cores")]
244    #[clap(help = GENERAL_HELP.get("parallelism").unwrap().as_str())]
245    #[serde(default = "default_some_0")]
246    pub parallelism: Option<u32>,
247
248    /// The simulated time that ends Shadow's high network bandwidth/reliability bootstrap period
249    #[clap(long, value_name = "seconds")]
250    #[clap(help = GENERAL_HELP.get("bootstrap_end_time").unwrap().as_str())]
251    #[serde(default = "default_some_time_0")]
252    pub bootstrap_end_time: Option<units::Time<units::TimePrefix>>,
253
254    /// Log level of output written on stdout. If Shadow was built in release mode, then log
255    /// messages at level 'trace' will always be dropped
256    #[clap(long, short = 'l', value_name = "level")]
257    #[clap(help = GENERAL_HELP.get("log_level").unwrap().as_str())]
258    #[serde(default = "default_some_info")]
259    pub log_level: Option<LogLevel>,
260
261    /// Interval at which to print heartbeat messages
262    #[clap(long, value_name = "seconds")]
263    #[clap(help = GENERAL_HELP.get("heartbeat_interval").unwrap().as_str())]
264    #[serde(default = "default_some_nullable_time_1")]
265    pub heartbeat_interval: Option<NullableOption<units::Time<units::TimePrefix>>>,
266
267    /// Path to store simulation output
268    #[clap(long, short = 'd', value_name = "path")]
269    #[clap(help = GENERAL_HELP.get("data_directory").unwrap().as_str())]
270    #[serde(default = "default_data_directory")]
271    pub data_directory: Option<String>,
272
273    /// Path to recursively copy during startup and use as the data-directory
274    #[clap(long, short = 'e', value_name = "path")]
275    #[clap(help = GENERAL_HELP.get("template_directory").unwrap().as_str())]
276    #[serde(default)]
277    pub template_directory: Option<NullableOption<String>>,
278
279    /// Show the simulation progress on stderr
280    #[clap(long, value_name = "bool")]
281    #[clap(help = GENERAL_HELP.get("progress").unwrap().as_str())]
282    #[serde(default = "default_some_false")]
283    pub progress: Option<bool>,
284
285    /// Model syscalls and VDSO functions that don't block as having some
286    /// latency. This should have minimal effect on typical simulations, but
287    /// can be helpful for programs with "busy loops" that otherwise deadlock
288    /// under Shadow.
289    #[clap(long, value_name = "bool")]
290    #[clap(help = GENERAL_HELP.get("model_unblocked_syscall_latency").unwrap().as_str())]
291    #[serde(default = "default_some_false")]
292    pub model_unblocked_syscall_latency: Option<bool>,
293}
294
295impl GeneralOptions {
296    /// Replace unset (`None`) values of `base` with values from `default`.
297    pub fn with_defaults(mut self, default: Self) -> Self {
298        self.merge(default);
299        self
300    }
301}
302
303/// Help messages used by Clap for command line arguments, combining the doc string with
304/// the Serde default.
305static NETWORK_HELP: Lazy<std::collections::HashMap<String, String>> =
306    Lazy::new(|| generate_help_strs(schema_for!(NetworkOptions)));
307
308// these must all be Option types since they aren't required by the CLI, even if they're
309// required in the configuration file
310#[derive(Debug, Clone, Parser, Serialize, Deserialize, Merge, JsonSchema)]
311#[clap(next_help_heading = "Network (Override network options)")]
312#[clap(next_display_order = None)]
313#[serde(deny_unknown_fields)]
314#[merge(strategy = merge::option::overwrite_none)]
315pub struct NetworkOptions {
316    /// The network topology graph
317    #[clap(skip)]
318    pub graph: Option<GraphOptions>,
319
320    /// When routing packets, follow the shortest path rather than following a direct
321    /// edge between nodes. If false, the network graph is required to be complete.
322    #[serde(default = "default_some_true")]
323    #[clap(long, value_name = "bool")]
324    #[clap(help = NETWORK_HELP.get("use_shortest_path").unwrap().as_str())]
325    pub use_shortest_path: Option<bool>,
326}
327
328impl NetworkOptions {
329    /// Replace unset (`None`) values of `base` with values from `default`.
330    pub fn with_defaults(mut self, default: Self) -> Self {
331        self.merge(default);
332        self
333    }
334}
335
336/// Help messages used by Clap for command line arguments, combining the doc string with
337/// the Serde default.
338static EXP_HELP: Lazy<std::collections::HashMap<String, String>> =
339    Lazy::new(|| generate_help_strs(schema_for!(ExperimentalOptions)));
340
341#[derive(Debug, Clone, Parser, Serialize, Deserialize, Merge, JsonSchema)]
342#[clap(
343    next_help_heading = "Experimental (Unstable and may change or be removed at any time, regardless of Shadow version)"
344)]
345#[clap(next_display_order = None)]
346#[serde(default, deny_unknown_fields)]
347#[merge(strategy = merge::option::overwrite_none)]
348pub struct ExperimentalOptions {
349    /// Use the SCHED_FIFO scheduler. Requires CAP_SYS_NICE. See sched(7), capabilities(7)
350    #[clap(hide_short_help = true)]
351    #[clap(long, value_name = "bool")]
352    #[clap(help = EXP_HELP.get("use_sched_fifo").unwrap().as_str())]
353    pub use_sched_fifo: Option<bool>,
354
355    /// Count the number of occurrences for individual syscalls
356    #[clap(hide_short_help = true)]
357    #[clap(long, value_name = "bool")]
358    #[clap(help = EXP_HELP.get("use_syscall_counters").unwrap().as_str())]
359    pub use_syscall_counters: Option<bool>,
360
361    /// Count object allocations and deallocations. If disabled, we will not be able to detect object memory leaks
362    #[clap(hide_short_help = true)]
363    #[clap(long, value_name = "bool")]
364    #[clap(help = EXP_HELP.get("use_object_counters").unwrap().as_str())]
365    pub use_object_counters: Option<bool>,
366
367    /// Preload our libc library for all managed processes for fast syscall interposition when possible.
368    #[clap(hide_short_help = true)]
369    #[clap(long, value_name = "bool")]
370    #[clap(help = EXP_HELP.get("use_preload_libc").unwrap().as_str())]
371    pub use_preload_libc: Option<bool>,
372
373    /// Preload our OpenSSL RNG library for all managed processes to mitigate non-deterministic use of OpenSSL.
374    #[clap(hide_short_help = true)]
375    #[clap(long, value_name = "bool")]
376    #[clap(help = EXP_HELP.get("use_preload_openssl_rng").unwrap().as_str())]
377    pub use_preload_openssl_rng: Option<bool>,
378
379    /// Preload our OpenSSL crypto library for all managed processes to skip some crypto operations
380    /// (may speed up simulation if your CPU lacks AES-NI support, but can cause bugs so do not use
381    /// unless you know what you're doing).
382    #[clap(hide_short_help = true)]
383    #[clap(long, value_name = "bool")]
384    #[clap(help = EXP_HELP.get("use_preload_openssl_crypto").unwrap().as_str())]
385    pub use_preload_openssl_crypto: Option<bool>,
386
387    /// Use the MemoryManager in memory-mapping mode. This can improve
388    /// performance, but disables support for dynamically spawning processes
389    /// inside the simulation (e.g. the `fork` syscall).
390    #[clap(hide_short_help = true)]
391    #[clap(long, value_name = "bool")]
392    #[clap(help = EXP_HELP.get("use_memory_manager").unwrap().as_str())]
393    pub use_memory_manager: Option<bool>,
394
395    /// Pin each thread and any processes it executes to the same logical CPU Core to improve cache affinity
396    #[clap(hide_short_help = true)]
397    #[clap(long, value_name = "bool")]
398    #[clap(help = EXP_HELP.get("use_cpu_pinning").unwrap().as_str())]
399    pub use_cpu_pinning: Option<bool>,
400
401    /// Each worker thread will spin in a `sched_yield` loop while waiting for a new task. This is
402    /// ignored if not using the thread-per-core scheduler.
403    #[clap(hide_short_help = true)]
404    #[clap(long, value_name = "bool")]
405    #[clap(help = EXP_HELP.get("use_worker_spinning").unwrap().as_str())]
406    pub use_worker_spinning: Option<bool>,
407
408    /// If set, overrides the automatically calculated minimum time workers may run ahead when sending events between nodes
409    #[clap(hide_short_help = true)]
410    #[clap(long, value_name = "seconds")]
411    #[clap(help = EXP_HELP.get("runahead").unwrap().as_str())]
412    pub runahead: Option<NullableOption<units::Time<units::TimePrefix>>>,
413
414    /// Update the minimum runahead dynamically throughout the simulation.
415    #[clap(hide_short_help = true)]
416    #[clap(long, value_name = "bool")]
417    #[clap(help = EXP_HELP.get("use_dynamic_runahead").unwrap().as_str())]
418    pub use_dynamic_runahead: Option<bool>,
419
420    /// Initial size of the socket's send buffer
421    #[clap(hide_short_help = true)]
422    #[clap(long, value_name = "bytes")]
423    #[clap(help = EXP_HELP.get("socket_send_buffer").unwrap().as_str())]
424    pub socket_send_buffer: Option<units::Bytes<units::SiPrefixUpper>>,
425
426    /// Enable send window autotuning
427    #[clap(hide_short_help = true)]
428    #[clap(long, value_name = "bool")]
429    #[clap(help = EXP_HELP.get("socket_send_autotune").unwrap().as_str())]
430    pub socket_send_autotune: Option<bool>,
431
432    /// Initial size of the socket's receive buffer
433    #[clap(hide_short_help = true)]
434    #[clap(long, value_name = "bytes")]
435    #[clap(help = EXP_HELP.get("socket_recv_buffer").unwrap().as_str())]
436    pub socket_recv_buffer: Option<units::Bytes<units::SiPrefixUpper>>,
437
438    /// Enable receive window autotuning
439    #[clap(hide_short_help = true)]
440    #[clap(long, value_name = "bool")]
441    #[clap(help = EXP_HELP.get("socket_recv_autotune").unwrap().as_str())]
442    pub socket_recv_autotune: Option<bool>,
443
444    /// The queueing discipline to use at the network interface
445    #[clap(hide_short_help = true)]
446    #[clap(long, value_name = "mode")]
447    #[clap(help = EXP_HELP.get("interface_qdisc").unwrap().as_str())]
448    pub interface_qdisc: Option<QDiscMode>,
449
450    /// Log the syscalls for each process to individual "strace" files
451    #[clap(hide_short_help = true)]
452    #[clap(long, value_name = "mode")]
453    #[clap(help = EXP_HELP.get("strace_logging_mode").unwrap().as_str())]
454    pub strace_logging_mode: Option<StraceLoggingMode>,
455
456    /// Max amount of execution-time latency allowed to accumulate before the
457    /// clock is moved forward. Moving the clock forward is a potentially
458    /// expensive operation, so larger values reduce simulation overhead, at the
459    /// cost of coarser time jumps. Note also that accumulated-but-unapplied
460    /// latency is discarded when a thread is blocked on a syscall.
461    #[clap(hide_short_help = true)]
462    #[clap(long, value_name = "seconds")]
463    #[clap(help = EXP_HELP.get("max_unapplied_cpu_latency").unwrap().as_str())]
464    pub max_unapplied_cpu_latency: Option<units::Time<units::TimePrefix>>,
465
466    /// Simulated latency of an unblocked syscall. For efficiency Shadow only
467    /// actually adds this latency if and when `max_unapplied_cpu_latency` is
468    /// reached.
469    #[clap(hide_short_help = true)]
470    #[clap(long, value_name = "seconds")]
471    #[clap(help = EXP_HELP.get("unblocked_syscall_latency").unwrap().as_str())]
472    pub unblocked_syscall_latency: Option<units::Time<units::TimePrefix>>,
473
474    /// Simulated latency of a vdso "syscall". For efficiency Shadow only
475    /// actually adds this latency if and when `max_unapplied_cpu_latency` is
476    /// reached.
477    #[clap(hide_short_help = true)]
478    #[clap(long, value_name = "seconds")]
479    #[clap(help = EXP_HELP.get("unblocked_vdso_latency").unwrap().as_str())]
480    pub unblocked_vdso_latency: Option<units::Time<units::TimePrefix>>,
481
482    /// The host scheduler implementation, which decides how to assign hosts to threads and threads
483    /// to CPU cores
484    #[clap(hide_short_help = true)]
485    #[clap(long, value_name = "name")]
486    #[clap(help = EXP_HELP.get("scheduler").unwrap().as_str())]
487    pub scheduler: Option<Scheduler>,
488
489    /// When true, report error-level messages to stderr in addition to logging to stdout.
490    #[clap(hide_short_help = true)]
491    #[clap(long, value_name = "bool")]
492    #[clap(help = EXP_HELP.get("report_errors_to_stderr").unwrap().as_str())]
493    pub report_errors_to_stderr: Option<bool>,
494
495    /// Use the rust TCP implementation
496    #[clap(hide_short_help = true)]
497    #[clap(long, value_name = "bool")]
498    #[clap(help = EXP_HELP.get("use_new_tcp").unwrap().as_str())]
499    pub use_new_tcp: Option<bool>,
500
501    /// When true, and when managed code runs for an extended time without
502    /// returning control to shadow (e.g. by making a syscall), shadow preempts
503    /// the managed code and moves simulated time forward. This can be used to
504    /// escape "pure-CPU busy-loops", but isn't usually needed, breaks
505    /// simulation determinism, and significantly affects simulation
506    /// performance.
507    #[clap(hide_short_help = true)]
508    #[clap(long, value_name = "bool")]
509    #[clap(help = EXP_HELP.get("native_preemption_enabled").unwrap().as_str())]
510    pub native_preemption_enabled: Option<bool>,
511
512    /// When `native_preemption_enabled` is true, amount of native CPU-time to
513    /// wait before preempting managed code that hasn't returned control to
514    /// shadow. Only supports microsecond granularity, and values below 1 microsecond
515    /// are rejected.
516    #[clap(hide_short_help = true)]
517    #[clap(long, value_name = "seconds")]
518    #[clap(help = EXP_HELP.get("native_preemption_native_interval").unwrap().as_str())]
519    pub native_preemption_native_interval: Option<units::Time<units::TimePrefix>>,
520
521    /// When `native_preemption_enabled` is true, amount of simulated time to
522    /// consume after `native_preemption_native_interval` has elapsed without
523    /// returning control to shadow.
524    #[clap(hide_short_help = true)]
525    #[clap(long, value_name = "seconds")]
526    #[clap(help = EXP_HELP.get("native_preemption_sim_interval").unwrap().as_str())]
527    pub native_preemption_sim_interval: Option<units::Time<units::TimePrefix>>,
528}
529
530impl ExperimentalOptions {
531    /// Replace unset (`None`) values of `base` with values from `default`.
532    pub fn with_defaults(mut self, default: Self) -> Self {
533        self.merge(default);
534        self
535    }
536}
537
538impl Default for ExperimentalOptions {
539    fn default() -> Self {
540        Self {
541            use_sched_fifo: Some(false),
542            use_syscall_counters: Some(true),
543            use_object_counters: Some(true),
544            use_preload_libc: Some(true),
545            use_preload_openssl_rng: Some(true),
546            use_preload_openssl_crypto: Some(false),
547            max_unapplied_cpu_latency: Some(units::Time::new(1, units::TimePrefix::Micro)),
548            // 1-2 microseconds is a ballpark estimate of the minimal latency for
549            // context switching to the kernel and back on modern machines.
550            // Default to the lower end to minimize effect in simualations without busy loops.
551            unblocked_syscall_latency: Some(units::Time::new(1, units::TimePrefix::Micro)),
552            // Actual latencies vary from ~40 to ~400 CPU cycles. https://stackoverflow.com/a/13096917
553            // Default to the lower end to minimize effect in simualations without busy loops.
554            unblocked_vdso_latency: Some(units::Time::new(10, units::TimePrefix::Nano)),
555            use_memory_manager: Some(false),
556            use_cpu_pinning: Some(true),
557            use_worker_spinning: Some(true),
558            runahead: Some(NullableOption::Value(units::Time::new(
559                1,
560                units::TimePrefix::Milli,
561            ))),
562            use_dynamic_runahead: Some(false),
563            socket_send_buffer: Some(units::Bytes::new(131_072, units::SiPrefixUpper::Base)),
564            socket_send_autotune: Some(true),
565            socket_recv_buffer: Some(units::Bytes::new(174_760, units::SiPrefixUpper::Base)),
566            socket_recv_autotune: Some(true),
567            interface_qdisc: Some(QDiscMode::Fifo),
568            strace_logging_mode: Some(StraceLoggingMode::Off),
569            scheduler: Some(Scheduler::ThreadPerCore),
570            report_errors_to_stderr: Some(true),
571            use_new_tcp: Some(false),
572            native_preemption_enabled: Some(false),
573            native_preemption_native_interval: Some(units::Time::new(
574                100,
575                units::TimePrefix::Milli,
576            )),
577            native_preemption_sim_interval: Some(units::Time::new(10, units::TimePrefix::Milli)),
578        }
579    }
580}
581
582/// Help messages used by Clap for command line arguments, combining the doc string with
583/// the Serde default.
584static HOST_HELP: Lazy<std::collections::HashMap<String, String>> =
585    Lazy::new(|| generate_help_strs(schema_for!(HostDefaultOptions)));
586
587#[derive(Debug, Clone, Parser, Serialize, Deserialize, Merge, JsonSchema)]
588#[clap(next_help_heading = "Host Defaults (Default options for hosts)")]
589#[clap(next_display_order = None)]
590#[serde(default, deny_unknown_fields)]
591// serde will default all fields to `None`, but in the cli help we want the actual defaults
592#[schemars(default = "HostDefaultOptions::new_with_defaults")]
593#[merge(strategy = merge::option::overwrite_none)]
594pub struct HostDefaultOptions {
595    /// Log level at which to print node messages
596    #[clap(long = "host-log-level", name = "host-log-level")]
597    #[clap(value_name = "level")]
598    #[clap(help = HOST_HELP.get("log_level").unwrap().as_str())]
599    pub log_level: Option<NullableOption<LogLevel>>,
600
601    /// Should shadow generate pcap files?
602    #[clap(long, value_name = "bool")]
603    #[clap(help = HOST_HELP.get("pcap_enabled").unwrap().as_str())]
604    pub pcap_enabled: Option<bool>,
605
606    /// How much data to capture per packet (header and payload) if pcap logging is enabled
607    #[clap(long, value_name = "bytes")]
608    #[clap(help = HOST_HELP.get("pcap_capture_size").unwrap().as_str())]
609    pub pcap_capture_size: Option<units::Bytes<units::SiPrefixUpper>>,
610}
611
612impl HostDefaultOptions {
613    pub fn new_with_defaults() -> Self {
614        Self {
615            log_level: None,
616            pcap_enabled: Some(false),
617            // From pcap(3): "A value of 65535 should be sufficient, on most if not all networks, to
618            // capture all the data available from the packet". The maximum length of an IP packet
619            // (including the header) is 65535 bytes.
620            pcap_capture_size: Some(units::Bytes::new(65535, units::SiPrefixUpper::Base)),
621        }
622    }
623
624    /// Replace unset (`None`) values of `base` with values from `default`.
625    pub fn with_defaults(mut self, default: Self) -> Self {
626        self.merge(default);
627        self
628    }
629}
630
631#[allow(clippy::derivable_impls)]
632impl Default for HostDefaultOptions {
633    fn default() -> Self {
634        // Our config fields would typically be initialized with their real defaults here in the
635        // `Default::default` implementation, but we need to handle the host options differently
636        // because the global `host_option_defaults` can be overridden by host-specific
637        // `host_options`. So instead we use defaults of `None` here and set the real defaults with
638        // `Self::new_with_defaults` in `ConfigOptions::new`.
639        Self {
640            log_level: None,
641            pcap_enabled: None,
642            pcap_capture_size: None,
643        }
644    }
645}
646
647#[derive(Serialize, Deserialize, Eq, PartialEq, Debug, Copy, Clone, JsonSchema)]
648#[serde(rename_all = "kebab-case")]
649pub enum RunningVal {
650    Running,
651}
652
653/// The enum variants here have an extra level of indirection to get the
654/// serde serialization that we want.
655#[derive(Debug, Copy, Clone, Eq, PartialEq, Serialize, Deserialize, JsonSchema)]
656#[serde(untagged)]
657pub enum ProcessFinalState {
658    Exited { exited: i32 },
659    Signaled { signaled: Signal },
660    Running(RunningVal),
661}
662
663impl Default for ProcessFinalState {
664    fn default() -> Self {
665        Self::Exited { exited: 0 }
666    }
667}
668
669impl std::fmt::Display for ProcessFinalState {
670    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
671        // We use the yaml serialization here so that when reporting that an
672        // expected state didn't match the actual state, it's clear how to set
673        // the expected state in the config file to match the actual state if
674        // desired.
675        //
676        // The current enum works OK for this since there are no internal
677        // newlines in the serialization; if there are some later we might wand
678        // to serialize to json instead, which can always be put on a single
679        // line and should also be valid yaml.
680        let s = serde_yaml::to_string(self).or(Err(std::fmt::Error))?;
681        write!(f, "{}", s.trim())
682    }
683}
684
685#[derive(Debug, Clone, Serialize, Deserialize, JsonSchema)]
686#[serde(deny_unknown_fields)]
687pub struct ProcessOptions {
688    pub path: std::path::PathBuf,
689
690    /// Process arguments
691    #[serde(default = "default_args_empty")]
692    pub args: ProcessArgs,
693
694    /// Environment variables passed when executing this process
695    #[serde(default)]
696    pub environment: BTreeMap<EnvName, String>,
697
698    /// The simulated time at which to execute the process
699    #[serde(default)]
700    pub start_time: units::Time<units::TimePrefix>,
701
702    /// The simulated time at which to send a `shutdown_signal` signal to the process
703    #[serde(default)]
704    pub shutdown_time: Option<units::Time<units::TimePrefix>>,
705
706    /// The signal that will be sent to the process at `shutdown_time`
707    #[serde(default = "default_sigterm")]
708    pub shutdown_signal: Signal,
709
710    /// The expected final state of the process. Shadow will report an error
711    /// if the actual state doesn't match.
712    #[serde(default)]
713    pub expected_final_state: ProcessFinalState,
714}
715
716#[derive(Debug, Clone, Serialize, Deserialize, JsonSchema)]
717#[serde(deny_unknown_fields)]
718pub struct HostOptions {
719    /// Network graph node ID to assign the host to
720    pub network_node_id: u32,
721
722    pub processes: Vec<ProcessOptions>,
723
724    /// IP address to assign to the host
725    #[serde(default)]
726    pub ip_addr: Option<std::net::Ipv4Addr>,
727
728    /// Downstream bandwidth capacity of the host
729    #[serde(default)]
730    pub bandwidth_down: Option<units::BitsPerSec<units::SiPrefixUpper>>,
731
732    /// Upstream bandwidth capacity of the host
733    #[serde(default)]
734    pub bandwidth_up: Option<units::BitsPerSec<units::SiPrefixUpper>>,
735
736    #[serde(default)]
737    pub host_options: HostDefaultOptions,
738}
739
740#[derive(Debug, Copy, Clone, Serialize, Deserialize, JsonSchema)]
741#[serde(rename_all = "kebab-case")]
742pub enum LogLevel {
743    Error,
744    Warning,
745    Info,
746    Debug,
747    Trace,
748}
749
750impl FromStr for LogLevel {
751    type Err = serde_yaml::Error;
752
753    fn from_str(s: &str) -> Result<Self, Self::Err> {
754        serde_yaml::from_str(s)
755    }
756}
757
758impl LogLevel {
759    pub fn to_c_loglevel(&self) -> c_log::LogLevel {
760        match self {
761            Self::Error => c_log::_LogLevel_LOGLEVEL_ERROR,
762            Self::Warning => c_log::_LogLevel_LOGLEVEL_WARNING,
763            Self::Info => c_log::_LogLevel_LOGLEVEL_INFO,
764            Self::Debug => c_log::_LogLevel_LOGLEVEL_DEBUG,
765            Self::Trace => c_log::_LogLevel_LOGLEVEL_TRACE,
766        }
767    }
768}
769
770impl From<LogLevel> for log::Level {
771    fn from(level: LogLevel) -> Self {
772        match level {
773            LogLevel::Error => log::Level::Error,
774            LogLevel::Warning => log::Level::Warn,
775            LogLevel::Info => log::Level::Info,
776            LogLevel::Debug => log::Level::Debug,
777            LogLevel::Trace => log::Level::Trace,
778        }
779    }
780}
781
782#[derive(Debug, Clone, PartialOrd, Ord, PartialEq, Eq, Serialize, JsonSchema)]
783pub struct HostName(String);
784
785impl<'de> serde::Deserialize<'de> for HostName {
786    fn deserialize<D: serde::Deserializer<'de>>(deserializer: D) -> Result<Self, D::Error> {
787        struct HostNameVisitor;
788
789        impl serde::de::Visitor<'_> for HostNameVisitor {
790            type Value = HostName;
791
792            fn expecting(&self, formatter: &mut std::fmt::Formatter) -> std::fmt::Result {
793                formatter.write_str("a string")
794            }
795
796            fn visit_string<E>(self, v: String) -> Result<Self::Value, E>
797            where
798                E: serde::de::Error,
799            {
800                // hostname(7): "Valid characters for hostnames are ASCII(7) letters from a to z,
801                // the digits from 0 to 9, and the hyphen (-)."
802                fn is_allowed(c: char) -> bool {
803                    c.is_ascii_lowercase() || c.is_ascii_digit() || c == '-' || c == '.'
804                }
805                if let Some(invalid_char) = v.chars().find(|x| !is_allowed(*x)) {
806                    return Err(E::custom(format!(
807                        "invalid hostname character: '{invalid_char}'"
808                    )));
809                }
810
811                if v.is_empty() {
812                    return Err(E::custom("empty hostname"));
813                }
814
815                // hostname(7): "A hostname may not start with a hyphen."
816                if v.starts_with('-') {
817                    return Err(E::custom("hostname begins with a '-' character"));
818                }
819
820                // hostname(7): "Each element of the hostname must be from 1 to 63 characters long
821                // and the entire hostname, including the dots, can be at most 253 characters long."
822                if v.len() > 253 {
823                    return Err(E::custom("hostname exceeds 253 characters"));
824                }
825
826                Ok(HostName(v))
827            }
828
829            fn visit_str<E>(self, v: &str) -> Result<Self::Value, E>
830            where
831                E: serde::de::Error,
832            {
833                // serde::de::Visitor: "It is never correct to implement `visit_string` without
834                // implementing `visit_str`. Implement neither, both, or just `visit_str`.'
835                self.visit_string(v.to_string())
836            }
837        }
838
839        deserializer.deserialize_string(HostNameVisitor)
840    }
841}
842
843impl std::ops::Deref for HostName {
844    type Target = String;
845
846    fn deref(&self) -> &Self::Target {
847        &self.0
848    }
849}
850
851impl From<HostName> for String {
852    fn from(name: HostName) -> Self {
853        name.0
854    }
855}
856
857impl std::fmt::Display for HostName {
858    fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
859        self.0.fmt(f)
860    }
861}
862
863#[derive(Debug, Clone, PartialOrd, Ord, PartialEq, Eq, Serialize, JsonSchema)]
864pub struct EnvName(String);
865
866impl EnvName {
867    pub fn new(name: impl Into<String>) -> Option<Self> {
868        let name = name.into();
869
870        // an environment variable name cannot contain a '=' character
871        if name.contains('=') {
872            return None;
873        }
874
875        Some(Self(name))
876    }
877}
878
879impl<'de> serde::Deserialize<'de> for EnvName {
880    fn deserialize<D: serde::Deserializer<'de>>(deserializer: D) -> Result<Self, D::Error> {
881        struct EnvNameVisitor;
882
883        impl serde::de::Visitor<'_> for EnvNameVisitor {
884            type Value = EnvName;
885
886            fn expecting(&self, formatter: &mut std::fmt::Formatter) -> std::fmt::Result {
887                formatter.write_str("a string")
888            }
889
890            fn visit_string<E>(self, v: String) -> Result<Self::Value, E>
891            where
892                E: serde::de::Error,
893            {
894                let Some(name) = EnvName::new(v) else {
895                    let e = "environment variable name contains a '=' character";
896                    return Err(E::custom(e));
897                };
898
899                Ok(name)
900            }
901
902            fn visit_str<E>(self, v: &str) -> Result<Self::Value, E>
903            where
904                E: serde::de::Error,
905            {
906                // serde::de::Visitor: "It is never correct to implement `visit_string` without
907                // implementing `visit_str`. Implement neither, both, or just `visit_str`.'
908                self.visit_string(v.to_string())
909            }
910        }
911
912        deserializer.deserialize_string(EnvNameVisitor)
913    }
914}
915
916impl std::ops::Deref for EnvName {
917    type Target = String;
918
919    fn deref(&self) -> &Self::Target {
920        &self.0
921    }
922}
923
924impl From<EnvName> for String {
925    fn from(name: EnvName) -> Self {
926        name.0
927    }
928}
929
930impl std::fmt::Display for EnvName {
931    fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
932        self.0.fmt(f)
933    }
934}
935
936#[derive(Debug, Copy, Clone, Serialize, Deserialize, JsonSchema)]
937#[serde(rename_all = "kebab-case")]
938pub enum Scheduler {
939    ThreadPerHost,
940    ThreadPerCore,
941}
942
943impl FromStr for Scheduler {
944    type Err = serde_yaml::Error;
945
946    fn from_str(s: &str) -> Result<Self, Self::Err> {
947        serde_yaml::from_str(s)
948    }
949}
950
951fn default_data_directory() -> Option<String> {
952    Some("shadow.data".into())
953}
954
955/// Parse a string as a comma-delimited set of `T` values.
956fn parse_set<T>(s: &str) -> Result<HashSet<T>, <T as FromStr>::Err>
957where
958    T: std::cmp::Eq + std::hash::Hash + FromStr,
959{
960    s.split(',').map(|x| x.trim().parse()).collect()
961}
962
963/// Parse a string as a comma-delimited set of `String` values.
964fn parse_set_str(s: &str) -> Result<HashSet<String>, <String as FromStr>::Err> {
965    parse_set(s)
966}
967
968#[derive(Debug, Clone, Copy, Hash, PartialEq, Eq, Serialize, Deserialize, JsonSchema)]
969#[serde(rename_all = "kebab-case")]
970#[repr(C)]
971pub enum QDiscMode {
972    Fifo,
973    RoundRobin,
974}
975
976impl FromStr for QDiscMode {
977    type Err = serde_yaml::Error;
978
979    fn from_str(s: &str) -> Result<Self, Self::Err> {
980        serde_yaml::from_str(s)
981    }
982}
983
984#[derive(Debug, Clone, Serialize, Deserialize, JsonSchema)]
985#[serde(rename_all = "kebab-case")]
986pub enum Compression {
987    Xz,
988}
989
990#[derive(Debug, Clone, Serialize, Deserialize, JsonSchema)]
991#[serde(deny_unknown_fields)]
992pub struct FileSource {
993    /// The path to the file
994    pub path: String,
995    /// The file's compression format
996    pub compression: Option<Compression>,
997}
998
999#[derive(Debug, Clone, Serialize, Deserialize, JsonSchema)]
1000#[serde(rename_all = "kebab-case")]
1001pub enum GraphSource {
1002    File(FileSource),
1003    Inline(String),
1004}
1005
1006#[derive(Debug, Clone, Serialize, Deserialize, JsonSchema)]
1007// we use "kebab-case" for other shadow options, but are leaving this as "snake_case" for backwards
1008// compatibility
1009#[serde(tag = "type", rename_all = "snake_case")]
1010pub enum GraphOptions {
1011    Gml(GraphSource),
1012    #[serde(rename = "1_gbit_switch")]
1013    OneGbitSwitch,
1014}
1015
1016#[derive(Debug, Clone, Serialize, JsonSchema)]
1017#[serde(untagged)]
1018pub enum ProcessArgs {
1019    List(Vec<String>),
1020    Str(String),
1021}
1022
1023/// Serde doesn't provide good deserialization error messages for untagged enums, so we implement
1024/// our own. For example, if serde finds a yaml value such as 4 for the process arguments, it won't
1025/// deserialize it to the string "4" and the yaml parsing will fail. The serde-generated error
1026/// message will say something like "data did not match any variant of untagged enum ProcessArgs at
1027/// line X column Y" which isn't very helpful to the user, so here we try to give a better error
1028/// message.
1029impl<'de> serde::Deserialize<'de> for ProcessArgs {
1030    fn deserialize<D: serde::Deserializer<'de>>(deserializer: D) -> Result<Self, D::Error> {
1031        struct ProcessArgsVisitor;
1032
1033        impl<'de> serde::de::Visitor<'de> for ProcessArgsVisitor {
1034            type Value = ProcessArgs;
1035
1036            fn expecting(&self, formatter: &mut std::fmt::Formatter) -> std::fmt::Result {
1037                formatter.write_str("a string or a sequence of strings")
1038            }
1039
1040            fn visit_str<E>(self, v: &str) -> Result<Self::Value, E>
1041            where
1042                E: serde::de::Error,
1043            {
1044                Ok(Self::Value::Str(v.to_owned()))
1045            }
1046
1047            fn visit_seq<A>(self, mut seq: A) -> Result<Self::Value, A::Error>
1048            where
1049                A: serde::de::SeqAccess<'de>,
1050            {
1051                let mut v = vec![];
1052
1053                while let Some(val) = seq.next_element()? {
1054                    v.push(val);
1055                }
1056
1057                Ok(Self::Value::List(v))
1058            }
1059        }
1060
1061        deserializer.deserialize_any(ProcessArgsVisitor)
1062    }
1063}
1064
1065// TODO: use linux_api's Signal internally, which we control and which supports
1066// realtime signals. We need to implement conversion to and from strings to do
1067// so, while being careful that the conversion is compatible with nix's so as
1068// not to be a breaking change to our configuration format.
1069#[derive(Debug, Copy, Clone, Eq, PartialEq)]
1070pub struct Signal(nix::sys::signal::Signal);
1071
1072impl From<nix::sys::signal::Signal> for Signal {
1073    fn from(value: nix::sys::signal::Signal) -> Self {
1074        Self(value)
1075    }
1076}
1077
1078impl TryFrom<linux_api::signal::Signal> for Signal {
1079    type Error = <nix::sys::signal::Signal as TryFrom<i32>>::Error;
1080    fn try_from(value: linux_api::signal::Signal) -> Result<Self, Self::Error> {
1081        let signal = nix::sys::signal::Signal::try_from(value.as_i32())?;
1082        Ok(Self(signal))
1083    }
1084}
1085
1086impl serde::Serialize for Signal {
1087    fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
1088    where
1089        S: serde::Serializer,
1090    {
1091        serializer.serialize_str(self.0.as_str())
1092    }
1093}
1094
1095impl<'de> serde::Deserialize<'de> for Signal {
1096    fn deserialize<D: serde::Deserializer<'de>>(deserializer: D) -> Result<Self, D::Error> {
1097        struct SignalVisitor;
1098
1099        impl serde::de::Visitor<'_> for SignalVisitor {
1100            type Value = Signal;
1101
1102            fn expecting(&self, formatter: &mut std::fmt::Formatter) -> std::fmt::Result {
1103                formatter.write_str("a signal string (e.g. \"SIGINT\") or integer")
1104            }
1105
1106            fn visit_str<E>(self, v: &str) -> Result<Self::Value, E>
1107            where
1108                E: serde::de::Error,
1109            {
1110                nix::sys::signal::Signal::from_str(v)
1111                    .map(Signal)
1112                    .map_err(|_e| E::custom(format!("Invalid signal string: {v}")))
1113            }
1114
1115            fn visit_i64<E>(self, v: i64) -> Result<Self::Value, E>
1116            where
1117                E: serde::de::Error,
1118            {
1119                let v = i32::try_from(v)
1120                    .map_err(|_e| E::custom(format!("Invalid signal number: {v}")))?;
1121                nix::sys::signal::Signal::try_from(v)
1122                    .map(Signal)
1123                    .map_err(|_e| E::custom(format!("Invalid signal number: {v}")))
1124            }
1125
1126            fn visit_u64<E>(self, v: u64) -> Result<Self::Value, E>
1127            where
1128                E: serde::de::Error,
1129            {
1130                let v = i64::try_from(v)
1131                    .map_err(|_e| E::custom(format!("Invalid signal number: {v}")))?;
1132                self.visit_i64(v)
1133            }
1134        }
1135
1136        deserializer.deserialize_any(SignalVisitor)
1137    }
1138}
1139
1140impl std::fmt::Display for Signal {
1141    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
1142        write!(f, "{}", self.0)
1143    }
1144}
1145
1146impl JsonSchema for Signal {
1147    fn schema_name() -> String {
1148        String::from("Signal")
1149    }
1150
1151    fn json_schema(_gen: &mut schemars::r#gen::SchemaGenerator) -> schemars::schema::Schema {
1152        // Use the "anything" schema. The Deserialize implementation does the
1153        // actual parsing and error handling.
1154        // TODO: Ideally we'd only accept strings or integers here. The
1155        // documentation isn't very clear about how to construct such a schema
1156        // though, and we currently only use the schemas for command-line-option
1157        // help strings. Since we don't currently take Signals in
1158        // command-line-options, it doesn't matter.
1159        schemars::schema::Schema::Bool(true)
1160    }
1161}
1162
1163impl std::ops::Deref for Signal {
1164    type Target = nix::sys::signal::Signal;
1165
1166    fn deref(&self) -> &Self::Target {
1167        &self.0
1168    }
1169}
1170
1171#[derive(Debug, Clone, Serialize, Deserialize, JsonSchema)]
1172#[serde(rename_all = "kebab-case")]
1173pub enum StraceLoggingMode {
1174    Off,
1175    Standard,
1176    Deterministic,
1177}
1178
1179impl FromStr for StraceLoggingMode {
1180    type Err = serde_yaml::Error;
1181
1182    fn from_str(s: &str) -> Result<Self, Self::Err> {
1183        serde_yaml::from_str(s)
1184    }
1185}
1186
1187/// This wrapper type allows cli options to specify "null" to overwrite a config file option with
1188/// `None`, and is intended to be used for options where "null" is a valid option value.
1189///
1190/// **Warning**: This may result in unexpected behaviour when wrapping string types. For example, if
1191/// this is used for a file path option, the value "null" will conflict with the valid filename
1192/// "null". So if the user specifies "null" for this option, Shadow will assume it means "no value"
1193/// rather than the filename "null".
1194///
1195/// ### Motivation
1196///
1197/// For configuration options, there are generally three states:
1198/// - set
1199/// - not set
1200/// - null
1201///
1202/// For serde, all three states are configurable:
1203/// - set: `runahead: 5ms`
1204/// - not set: (no `runahead` option used in yaml)
1205/// - null: `runahead: null`
1206///
1207/// For clap, there are only two states:
1208/// - set: `--runahead 5ms`
1209/// - not set: (no `--runahead` option used in command)
1210///
1211/// There is no way to set a "null" state for cli options with clap.
1212///
1213/// ### Configuration in Shadow
1214///
1215/// Shadow first parses the config file and cli options separately before merging them.
1216///
1217/// Parsing for serde:
1218/// - set: `runahead: 5ms` => runahead is set to `Some(5ms)`
1219/// - not set: (no `runahead` option used in yaml) => runahead is set to its default (either
1220///   `Some(..)` or `None`)
1221/// - null: `runahead: null` => runahead is set to `None`
1222///
1223/// Parsing for clap:
1224/// - set: `--runahead 5ms` => runahead is set to `Some(5ms)`
1225/// - not set: (no `--runahead` option used in command) => runahead is set to `None`
1226///
1227/// Then the options are merged such that any `Some(..)` options from the cli options will overwrite
1228/// any `Some` or `None` options from the config file.
1229///
1230/// The issue is that no clap option can overwrite a config file option of `Some` with a value of
1231/// `None`. For example if the config file specifies `runahead: 5ms`, then with clap you can only
1232/// use `--runahead 2ms` to change the runahead to a `Some(2ms)` value, or you can not set
1233/// `--runahead` at all to leave it as a `Some(5ms)` value. But there is no cli option to change the
1234/// runahead to a `None` value.
1235///
1236/// This `NullableOption` type is a wrapper to allow you to specify "null" on the command line to
1237/// overwrite the config file value with `None`. From the example above, you could now specify
1238/// "--runahead null" to overwrite the config file value (for example `Some(5ms)`) with a `None`
1239/// value.
1240#[derive(Debug, Copy, Clone, JsonSchema, Eq, PartialEq)]
1241pub enum NullableOption<T> {
1242    Value(T),
1243    Null,
1244}
1245
1246impl<T> NullableOption<T> {
1247    pub fn as_ref(&self) -> NullableOption<&T> {
1248        match self {
1249            NullableOption::Value(x) => NullableOption::Value(x),
1250            NullableOption::Null => NullableOption::Null,
1251        }
1252    }
1253
1254    pub fn as_mut(&mut self) -> NullableOption<&mut T> {
1255        match self {
1256            NullableOption::Value(x) => NullableOption::Value(x),
1257            NullableOption::Null => NullableOption::Null,
1258        }
1259    }
1260
1261    /// Easier to use than `Into<Option<T>>` since `Option` has a lot of blanket `From`
1262    /// implementations, requiring a lot of type annotations.
1263    pub fn to_option(self) -> Option<T> {
1264        match self {
1265            NullableOption::Value(x) => Some(x),
1266            NullableOption::Null => None,
1267        }
1268    }
1269}
1270
1271impl<T: serde::Serialize> serde::Serialize for NullableOption<T> {
1272    fn serialize<S: serde::Serializer>(&self, serializer: S) -> Result<S::Ok, S::Error> {
1273        match self {
1274            // use the inner type's serialize function
1275            Self::Value(x) => Ok(T::serialize(x, serializer)?),
1276            Self::Null => serializer.serialize_none(),
1277        }
1278    }
1279}
1280
1281impl<'de, T: serde::Deserialize<'de>> serde::Deserialize<'de> for NullableOption<T> {
1282    fn deserialize<D: serde::Deserializer<'de>>(deserializer: D) -> Result<Self, D::Error> {
1283        // always use the inner type's deserialize function
1284        Ok(Self::Value(T::deserialize(deserializer)?))
1285    }
1286}
1287
1288impl<T> FromStr for NullableOption<T>
1289where
1290    T: FromStr<Err: std::fmt::Debug + std::fmt::Display>,
1291{
1292    type Err = T::Err;
1293
1294    fn from_str(s: &str) -> Result<Self, Self::Err> {
1295        match s {
1296            // since we use serde-yaml, use "null" to match yaml's "null"
1297            "null" => Ok(Self::Null),
1298            x => Ok(Self::Value(FromStr::from_str(x)?)),
1299        }
1300    }
1301}
1302
1303/// A trait for `Option`-like types that can be flattened into a single `Option`.
1304pub trait Flatten<T> {
1305    fn flatten(self) -> Option<T>;
1306    fn flatten_ref(&self) -> Option<&T>;
1307}
1308
1309impl<T> Flatten<T> for Option<NullableOption<T>> {
1310    fn flatten(self) -> Option<T> {
1311        self.and_then(|x| x.to_option())
1312    }
1313
1314    fn flatten_ref(&self) -> Option<&T> {
1315        self.as_ref().and_then(|x| x.as_ref().to_option())
1316    }
1317}
1318
1319/// Helper function for serde default `ProcessArgs::Str("")` values.
1320fn default_args_empty() -> ProcessArgs {
1321    ProcessArgs::Str("".to_string())
1322}
1323
1324/// Helper function for serde default `Signal(Signal::SIGTERM)` values.
1325fn default_sigterm() -> Signal {
1326    Signal(nix::sys::signal::Signal::SIGTERM)
1327}
1328
1329/// Helper function for serde default `Some(0)` values.
1330fn default_some_time_0() -> Option<units::Time<units::TimePrefix>> {
1331    Some(units::Time::new(0, units::TimePrefix::Sec))
1332}
1333
1334/// Helper function for serde default `Some(true)` values.
1335fn default_some_true() -> Option<bool> {
1336    Some(true)
1337}
1338
1339/// Helper function for serde default `Some(false)` values.
1340fn default_some_false() -> Option<bool> {
1341    Some(false)
1342}
1343
1344/// Helper function for serde default `Some(0)` values.
1345fn default_some_0() -> Option<u32> {
1346    Some(0)
1347}
1348
1349/// Helper function for serde default `Some(1)` values.
1350fn default_some_1() -> Option<u32> {
1351    Some(1)
1352}
1353
1354/// Helper function for serde default `Some(NullableOption::Value(1 sec))` values.
1355fn default_some_nullable_time_1() -> Option<NullableOption<units::Time<units::TimePrefix>>> {
1356    let time = units::Time::new(1, units::TimePrefix::Sec);
1357    Some(NullableOption::Value(time))
1358}
1359
1360/// Helper function for serde default `Some(LogLevel::Info)` values.
1361fn default_some_info() -> Option<LogLevel> {
1362    Some(LogLevel::Info)
1363}
1364
1365// when updating this graph, make sure to also update the copy in docs/shadow_config_spec.md
1366pub const ONE_GBIT_SWITCH_GRAPH: &str = r#"graph [
1367  directed 0
1368  node [
1369    id 0
1370    host_bandwidth_up "1 Gbit"
1371    host_bandwidth_down "1 Gbit"
1372  ]
1373  edge [
1374    source 0
1375    target 0
1376    latency "1 ms"
1377    packet_loss 0.0
1378  ]
1379]"#;
1380
1381/// Generate help strings for objects in a JSON schema, including the Serde defaults if available.
1382fn generate_help_strs(
1383    schema: schemars::schema::RootSchema,
1384) -> std::collections::HashMap<String, String> {
1385    let mut defaults = std::collections::HashMap::<String, String>::new();
1386    for (name, obj) in &schema.schema.object.as_ref().unwrap().properties {
1387        if let Some(meta) = obj.clone().into_object().metadata {
1388            let description = meta.description.unwrap_or_default();
1389            let space = if !description.is_empty() { " " } else { "" };
1390            match meta.default {
1391                Some(default) => defaults.insert(
1392                    name.clone(),
1393                    format!("{}{}[default: {}]", description, space, default),
1394                ),
1395                None => defaults.insert(name.clone(), description.to_string()),
1396            };
1397        }
1398    }
1399    defaults
1400}
1401
1402/// Parses a string as a list of arguments following the shell's parsing rules. This
1403/// uses `g_shell_parse_argv()` for parsing.
1404pub fn parse_string_as_args(args_str: &OsStr) -> Result<Vec<OsString>, String> {
1405    if args_str.is_empty() {
1406        return Ok(Vec::new());
1407    }
1408
1409    let args_str = CString::new(args_str.as_bytes()).unwrap();
1410
1411    // parse the argument string
1412    let mut argc: libc::c_int = 0;
1413    let mut argv: *mut *mut libc::c_char = std::ptr::null_mut();
1414    let mut error: *mut libc::c_char = std::ptr::null_mut();
1415    let rv = unsafe { c::process_parseArgStr(args_str.as_ptr(), &mut argc, &mut argv, &mut error) };
1416
1417    // if there was an error, return a copy of the error string
1418    if !rv {
1419        let error_message = match error.is_null() {
1420            false => unsafe { CStr::from_ptr(error) }.to_str().unwrap(),
1421            true => "Unknown parsing error",
1422        }
1423        .to_string();
1424
1425        unsafe { c::process_parseArgStrFree(argv, error) };
1426        return Err(error_message);
1427    }
1428
1429    assert!(!argv.is_null());
1430
1431    // copy the arg strings
1432    let args: Vec<_> = (0..argc)
1433        .map(|x| unsafe {
1434            let arg_ptr = *argv.add(x as usize);
1435            assert!(!arg_ptr.is_null());
1436            OsStr::from_bytes(CStr::from_ptr(arg_ptr).to_bytes()).to_os_string()
1437        })
1438        .collect();
1439
1440    unsafe { c::process_parseArgStrFree(argv, error) };
1441    Ok(args)
1442}
1443
1444#[cfg(test)]
1445mod tests {
1446    use super::*;
1447
1448    #[test]
1449    // can't call foreign function: process_parseArgStr
1450    #[cfg_attr(miri, ignore)]
1451    fn test_parse_args() {
1452        let arg_str = r#"the quick brown fox "jumped over" the "\"lazy\" dog""#;
1453        let expected_args = &[
1454            "the",
1455            "quick",
1456            "brown",
1457            "fox",
1458            "jumped over",
1459            "the",
1460            "\"lazy\" dog",
1461        ];
1462
1463        let arg_str: OsString = arg_str.into();
1464        let args = parse_string_as_args(&arg_str).unwrap();
1465
1466        assert_eq!(args, expected_args);
1467    }
1468
1469    #[test]
1470    // can't call foreign function: process_parseArgStr
1471    #[cfg_attr(miri, ignore)]
1472    fn test_parse_args_empty() {
1473        let arg_str = "";
1474        let expected_args: &[&str] = &[];
1475
1476        let arg_str: OsString = arg_str.into();
1477        let args = parse_string_as_args(&arg_str).unwrap();
1478
1479        assert_eq!(args, expected_args);
1480    }
1481
1482    #[test]
1483    // can't call foreign function: process_parseArgStr
1484    #[cfg_attr(miri, ignore)]
1485    fn test_parse_args_error() {
1486        let arg_str = r#"hello "world"#;
1487
1488        let arg_str: OsString = arg_str.into();
1489        let err_str = parse_string_as_args(&arg_str).unwrap_err();
1490
1491        assert!(!err_str.is_empty());
1492    }
1493
1494    #[test]
1495    // can't call foreign function: process_parseArgStr
1496    #[cfg_attr(miri, ignore)]
1497    fn test_nullable_option() {
1498        // format the yaml with an optional general option
1499        let yaml_fmt_fn = |option| {
1500            format!(
1501                r#"
1502                general:
1503                  stop_time: 1 min
1504                  {}
1505                network:
1506                  graph:
1507                    type: 1_gbit_switch
1508                hosts:
1509                  myhost:
1510                    network_node_id: 0
1511                    processes:
1512                    - path: /bin/true
1513                "#,
1514                option
1515            )
1516        };
1517
1518        let time_1_sec = units::Time::new(1, units::TimePrefix::Sec);
1519        let time_5_sec = units::Time::new(5, units::TimePrefix::Sec);
1520
1521        // "heartbeat_interval: null" with no cli option => None
1522        let yaml = yaml_fmt_fn("heartbeat_interval: null");
1523        let config_file: ConfigFileOptions = serde_yaml::from_str(&yaml).unwrap();
1524        let cli: CliOptions = CliOptions::try_parse_from(["shadow", "-"]).unwrap();
1525
1526        let merged = ConfigOptions::new(config_file, cli);
1527        assert_eq!(merged.general.heartbeat_interval, None);
1528
1529        // "heartbeat_interval: null" with "--heartbeat-interval 5s" => 5s
1530        let yaml = yaml_fmt_fn("heartbeat_interval: null");
1531        let config_file: ConfigFileOptions = serde_yaml::from_str(&yaml).unwrap();
1532        let cli: CliOptions =
1533            CliOptions::try_parse_from(["shadow", "--heartbeat-interval", "5s", "-"]).unwrap();
1534
1535        let merged = ConfigOptions::new(config_file, cli);
1536        assert_eq!(
1537            merged.general.heartbeat_interval,
1538            Some(NullableOption::Value(time_5_sec))
1539        );
1540
1541        // "heartbeat_interval: null" with "--heartbeat-interval null" => NullableOption::Null
1542        let yaml = yaml_fmt_fn("heartbeat_interval: null");
1543        let config_file: ConfigFileOptions = serde_yaml::from_str(&yaml).unwrap();
1544        let cli: CliOptions =
1545            CliOptions::try_parse_from(["shadow", "--heartbeat-interval", "null", "-"]).unwrap();
1546
1547        let merged = ConfigOptions::new(config_file, cli);
1548        assert_eq!(
1549            merged.general.heartbeat_interval,
1550            Some(NullableOption::Null)
1551        );
1552
1553        // "heartbeat_interval: 5s" with no cli option => 5s
1554        let yaml = yaml_fmt_fn("heartbeat_interval: 5s");
1555        let config_file: ConfigFileOptions = serde_yaml::from_str(&yaml).unwrap();
1556        let cli: CliOptions = CliOptions::try_parse_from(["shadow", "-"]).unwrap();
1557
1558        let merged = ConfigOptions::new(config_file, cli);
1559        assert_eq!(
1560            merged.general.heartbeat_interval,
1561            Some(NullableOption::Value(time_5_sec))
1562        );
1563
1564        // "heartbeat_interval: 5s" with "--heartbeat-interval 5s" => 5s
1565        let yaml = yaml_fmt_fn("heartbeat_interval: 5s");
1566        let config_file: ConfigFileOptions = serde_yaml::from_str(&yaml).unwrap();
1567        let cli: CliOptions =
1568            CliOptions::try_parse_from(["shadow", "--heartbeat-interval", "5s", "-"]).unwrap();
1569
1570        let merged = ConfigOptions::new(config_file, cli);
1571        assert_eq!(
1572            merged.general.heartbeat_interval,
1573            Some(NullableOption::Value(time_5_sec))
1574        );
1575
1576        // "heartbeat_interval: 5s" with "--heartbeat-interval null" => NullableOption::Null
1577        let yaml = yaml_fmt_fn("heartbeat_interval: 5s");
1578        let config_file: ConfigFileOptions = serde_yaml::from_str(&yaml).unwrap();
1579        let cli: CliOptions =
1580            CliOptions::try_parse_from(["shadow", "--heartbeat-interval", "null", "-"]).unwrap();
1581
1582        let merged = ConfigOptions::new(config_file, cli);
1583        assert_eq!(
1584            merged.general.heartbeat_interval,
1585            Some(NullableOption::Null)
1586        );
1587
1588        // no config option with no cli option => 1s (default)
1589        let yaml = yaml_fmt_fn("");
1590        let config_file: ConfigFileOptions = serde_yaml::from_str(&yaml).unwrap();
1591        let cli: CliOptions = CliOptions::try_parse_from(["shadow", "-"]).unwrap();
1592
1593        let merged = ConfigOptions::new(config_file, cli);
1594        assert_eq!(
1595            merged.general.heartbeat_interval,
1596            Some(NullableOption::Value(time_1_sec))
1597        );
1598
1599        // no config option with "--heartbeat-interval 5s" => 5s
1600        let yaml = yaml_fmt_fn("");
1601        let config_file: ConfigFileOptions = serde_yaml::from_str(&yaml).unwrap();
1602        let cli: CliOptions =
1603            CliOptions::try_parse_from(["shadow", "--heartbeat-interval", "5s", "-"]).unwrap();
1604
1605        let merged = ConfigOptions::new(config_file, cli);
1606        assert_eq!(
1607            merged.general.heartbeat_interval,
1608            Some(NullableOption::Value(time_5_sec))
1609        );
1610
1611        // no config option with "--heartbeat-interval null" => NullableOption::Null
1612        let yaml = yaml_fmt_fn("");
1613        let config_file: ConfigFileOptions = serde_yaml::from_str(&yaml).unwrap();
1614        let cli: CliOptions =
1615            CliOptions::try_parse_from(["shadow", "--heartbeat-interval", "null", "-"]).unwrap();
1616
1617        let merged = ConfigOptions::new(config_file, cli);
1618        assert_eq!(
1619            merged.general.heartbeat_interval,
1620            Some(NullableOption::Null)
1621        );
1622    }
1623}