shadow_rs/core/
configuration.rs

1//! Shadow's configuration and cli parsing code using [serde] and [clap]. This contains all of
2//! Shadow's configuration options, some of which are also exposed as CLI options.
3//!
4//! Shadow uses [schemars] to get the option description (its doc comment) and default value so that
5//! it can be shown in the CLI help text.
6//!
7//! This code should be careful about validating or interpreting values. It should be focused on
8//! parsing and checking that the format is correct, and not validating the values. For example for
9//! options that take paths, this code should not verify that the path actually exists or perform
10//! any path canonicalization. That should be left to other code outside of this module. This is so
11//! that the configuration parsing does not become environment-dependent. If a configuration file
12//! parses on one system, it should parse successfully on other systems as well.
13
14use std::collections::{BTreeMap, HashSet};
15use std::ffi::{CStr, CString, OsStr, OsString};
16use std::os::unix::ffi::OsStrExt;
17use std::str::FromStr;
18
19use clap::Parser;
20use logger as c_log;
21use merge::Merge;
22use once_cell::sync::Lazy;
23use schemars::{JsonSchema, schema_for};
24use serde::{Deserialize, Serialize};
25use shadow_shim_helper_rs::simulation_time::SimulationTime;
26
27use crate::cshadow as c;
28use crate::host::syscall::formatter::FmtOptions;
29use crate::utility::units::{self, Unit};
30
31const START_HELP_TEXT: &str = "\
32    Run real applications over simulated networks.\n\n\
33    For documentation, visit https://shadow.github.io/docs/guide";
34
35const END_HELP_TEXT: &str = "\
36    If units are not specified, all values are assumed to be given in their base \
37    unit (seconds, bytes, bits, etc). Units can optionally be specified (for \
38    example: '1024 B', '1024 bytes', '1 KiB', '1 kibibyte', etc) and are \
39    case-sensitive.";
40
41// clap requires a 'static str for the version
42static VERSION: Lazy<String> = Lazy::new(crate::shadow::version);
43
44#[derive(Debug, Clone, Parser)]
45#[clap(name = "Shadow", about = START_HELP_TEXT, after_help = END_HELP_TEXT)]
46#[clap(version = VERSION.as_str())]
47#[clap(next_display_order = None)]
48// clap only shows the possible values for bool options (unless we add support for the other
49// non-bool options in the future), which isn't very helpful
50#[clap(hide_possible_values = true)]
51pub struct CliOptions {
52    /// Path to the Shadow configuration file. Use '-' to read from stdin
53    #[clap(required_unless_present_any(&["show_build_info", "shm_cleanup"]))]
54    pub config: Option<String>,
55
56    /// Pause to allow gdb to attach
57    #[clap(long, short = 'g')]
58    pub gdb: bool,
59
60    /// Pause after starting any processes on the comma-delimited list of hostnames
61    #[clap(value_parser = parse_set_str)]
62    #[clap(long, value_name = "hostnames")]
63    pub debug_hosts: Option<HashSet<String>>,
64
65    /// Exit after running shared memory cleanup routine
66    #[clap(long, exclusive(true))]
67    pub shm_cleanup: bool,
68
69    /// Exit after printing build information
70    #[clap(long, exclusive(true))]
71    pub show_build_info: bool,
72
73    /// Exit after printing the final configuration
74    #[clap(long)]
75    pub show_config: bool,
76
77    #[clap(flatten)]
78    pub general: GeneralOptions,
79
80    #[clap(flatten)]
81    pub network: NetworkOptions,
82
83    #[clap(flatten)]
84    pub host_option_defaults: HostDefaultOptions,
85
86    #[clap(flatten)]
87    pub experimental: ExperimentalOptions,
88}
89
90/// Options contained in a configuration file.
91#[derive(Debug, Clone, Serialize, Deserialize)]
92#[serde(deny_unknown_fields)]
93pub struct ConfigFileOptions {
94    pub general: GeneralOptions,
95
96    pub network: NetworkOptions,
97
98    #[serde(default)]
99    pub host_option_defaults: HostDefaultOptions,
100
101    #[serde(default)]
102    pub experimental: ExperimentalOptions,
103
104    // we use a BTreeMap so that the hosts are sorted by their hostname (useful for determinism)
105    // since shadow parses to a serde_yaml::Value initially, we don't need to worry about duplicate
106    // hostnames here
107    pub hosts: BTreeMap<HostName, HostOptions>,
108}
109
110/// Shadow configuration options after processing command-line and configuration file options.
111#[derive(Debug, Clone, Serialize)]
112pub struct ConfigOptions {
113    pub general: GeneralOptions,
114
115    pub network: NetworkOptions,
116
117    pub experimental: ExperimentalOptions,
118
119    // we use a BTreeMap so that the hosts are sorted by their hostname (useful for determinism)
120    pub hosts: BTreeMap<HostName, HostOptions>,
121}
122
123impl ConfigOptions {
124    pub fn new(mut config_file: ConfigFileOptions, options: CliOptions) -> Self {
125        // the `HostDefaultOptions::default` contains only `None` values, so we must first merge the
126        // config file with the real defaults from `HostDefaultOptions::new_with_defaults`
127        config_file.host_option_defaults = config_file
128            .host_option_defaults
129            .with_defaults(HostDefaultOptions::new_with_defaults());
130
131        // override config options with command line options
132        config_file.general = options.general.with_defaults(config_file.general);
133        config_file.network = options.network.with_defaults(config_file.network);
134        config_file.host_option_defaults = options
135            .host_option_defaults
136            .with_defaults(config_file.host_option_defaults);
137        config_file.experimental = options.experimental.with_defaults(config_file.experimental);
138
139        // copy the host defaults to all of the hosts
140        for host in config_file.hosts.values_mut() {
141            host.host_options = host
142                .host_options
143                .clone()
144                .with_defaults(config_file.host_option_defaults.clone());
145        }
146
147        Self {
148            general: config_file.general,
149            network: config_file.network,
150            experimental: config_file.experimental,
151            hosts: config_file.hosts,
152        }
153    }
154
155    pub fn model_unblocked_syscall_latency(&self) -> bool {
156        self.general.model_unblocked_syscall_latency.unwrap()
157    }
158
159    pub fn max_unapplied_cpu_latency(&self) -> SimulationTime {
160        let nanos = self.experimental.max_unapplied_cpu_latency.unwrap();
161        let nanos = nanos.convert(units::TimePrefix::Nano).unwrap().value();
162        SimulationTime::from_nanos(nanos)
163    }
164
165    pub fn unblocked_syscall_latency(&self) -> SimulationTime {
166        let nanos = self.experimental.unblocked_syscall_latency.unwrap();
167        let nanos = nanos.convert(units::TimePrefix::Nano).unwrap().value();
168        SimulationTime::from_nanos(nanos)
169    }
170
171    pub fn unblocked_vdso_latency(&self) -> SimulationTime {
172        let nanos = self.experimental.unblocked_vdso_latency.unwrap();
173        let nanos = nanos.convert(units::TimePrefix::Nano).unwrap().value();
174        SimulationTime::from_nanos(nanos)
175    }
176
177    pub fn native_preemption_enabled(&self) -> bool {
178        self.experimental.native_preemption_enabled.unwrap()
179    }
180
181    pub fn native_preemption_native_interval(
182        &self,
183    ) -> anyhow::Result<linux_api::time::kernel_old_timeval> {
184        let t = self.experimental.native_preemption_native_interval.unwrap();
185        let t = core::time::Duration::from(t);
186        // TODO: Would be a little nicer to surface this error when we parse the
187        // config. I think ideally we'd update the type such that some bounds
188        // can be enforced at parse time.
189        if t < core::time::Duration::from_micros(1) {
190            return Err(anyhow::anyhow!(
191                "native_preemption_native_interval must be >= 1 microsecond. Got {t:?}."
192            ));
193        }
194        let rv = linux_api::time::kernel_old_timeval {
195            tv_sec: t.as_secs().try_into().unwrap(),
196            tv_usec: t.subsec_micros().into(),
197        };
198        assert!(!(rv.tv_sec == 0 && rv.tv_usec == 0));
199        Ok(rv)
200    }
201
202    pub fn native_preemption_sim_interval(&self) -> SimulationTime {
203        let t = self.experimental.native_preemption_sim_interval.unwrap();
204        let nanos = t.convert(units::TimePrefix::Nano).unwrap().value();
205        SimulationTime::from_nanos(nanos)
206    }
207
208    pub fn strace_logging_mode(&self) -> Option<FmtOptions> {
209        match self.experimental.strace_logging_mode.as_ref().unwrap() {
210            StraceLoggingMode::Standard => Some(FmtOptions::Standard),
211            StraceLoggingMode::Deterministic => Some(FmtOptions::Deterministic),
212            StraceLoggingMode::Off => None,
213        }
214    }
215}
216
217/// Help messages used by Clap for command line arguments, combining the doc string with
218/// the Serde default.
219static GENERAL_HELP: Lazy<std::collections::HashMap<String, String>> =
220    Lazy::new(|| generate_help_strs(schema_for!(GeneralOptions)));
221
222// these must all be Option types since they aren't required by the CLI, even if they're
223// required in the configuration file
224#[derive(Debug, Clone, Parser, Serialize, Deserialize, Merge, JsonSchema)]
225#[clap(next_help_heading = "General (Override configuration file options)")]
226#[clap(next_display_order = None)]
227#[serde(deny_unknown_fields)]
228pub struct GeneralOptions {
229    /// The simulated time at which simulated processes are sent a SIGKILL signal
230    #[clap(long, value_name = "seconds")]
231    #[clap(help = GENERAL_HELP.get("stop_time").unwrap().as_str())]
232    pub stop_time: Option<units::Time<units::TimePrefix>>,
233
234    /// Initialize randomness using seed N
235    #[clap(long, value_name = "N")]
236    #[clap(help = GENERAL_HELP.get("seed").unwrap().as_str())]
237    #[serde(default = "default_some_1")]
238    pub seed: Option<u32>,
239
240    /// How many parallel threads to use to run the simulation. A value of 0 will allow Shadow to
241    /// choose the number of threads.
242    #[clap(long, short = 'p', value_name = "cores")]
243    #[clap(help = GENERAL_HELP.get("parallelism").unwrap().as_str())]
244    #[serde(default = "default_some_0")]
245    pub parallelism: Option<u32>,
246
247    /// The simulated time that ends Shadow's high network bandwidth/reliability bootstrap period
248    #[clap(long, value_name = "seconds")]
249    #[clap(help = GENERAL_HELP.get("bootstrap_end_time").unwrap().as_str())]
250    #[serde(default = "default_some_time_0")]
251    pub bootstrap_end_time: Option<units::Time<units::TimePrefix>>,
252
253    /// Log level of output written on stdout. If Shadow was built in release mode, then log
254    /// messages at level 'trace' will always be dropped
255    #[clap(long, short = 'l', value_name = "level")]
256    #[clap(help = GENERAL_HELP.get("log_level").unwrap().as_str())]
257    #[serde(default = "default_some_info")]
258    pub log_level: Option<LogLevel>,
259
260    /// Interval at which to print heartbeat messages
261    #[clap(long, value_name = "seconds")]
262    #[clap(help = GENERAL_HELP.get("heartbeat_interval").unwrap().as_str())]
263    #[serde(default = "default_some_nullable_time_1")]
264    pub heartbeat_interval: Option<NullableOption<units::Time<units::TimePrefix>>>,
265
266    /// Path to store simulation output
267    #[clap(long, short = 'd', value_name = "path")]
268    #[clap(help = GENERAL_HELP.get("data_directory").unwrap().as_str())]
269    #[serde(default = "default_data_directory")]
270    pub data_directory: Option<String>,
271
272    /// Path to recursively copy during startup and use as the data-directory
273    #[clap(long, short = 'e', value_name = "path")]
274    #[clap(help = GENERAL_HELP.get("template_directory").unwrap().as_str())]
275    #[serde(default)]
276    pub template_directory: Option<NullableOption<String>>,
277
278    /// Show the simulation progress on stderr
279    #[clap(long, value_name = "bool")]
280    #[clap(help = GENERAL_HELP.get("progress").unwrap().as_str())]
281    #[serde(default = "default_some_false")]
282    pub progress: Option<bool>,
283
284    /// Model syscalls and VDSO functions that don't block as having some
285    /// latency. This should have minimal effect on typical simulations, but
286    /// can be helpful for programs with "busy loops" that otherwise deadlock
287    /// under Shadow.
288    #[clap(long, value_name = "bool")]
289    #[clap(help = GENERAL_HELP.get("model_unblocked_syscall_latency").unwrap().as_str())]
290    #[serde(default = "default_some_false")]
291    pub model_unblocked_syscall_latency: Option<bool>,
292}
293
294impl GeneralOptions {
295    /// Replace unset (`None`) values of `base` with values from `default`.
296    pub fn with_defaults(mut self, default: Self) -> Self {
297        self.merge(default);
298        self
299    }
300}
301
302/// Help messages used by Clap for command line arguments, combining the doc string with
303/// the Serde default.
304static NETWORK_HELP: Lazy<std::collections::HashMap<String, String>> =
305    Lazy::new(|| generate_help_strs(schema_for!(NetworkOptions)));
306
307// these must all be Option types since they aren't required by the CLI, even if they're
308// required in the configuration file
309#[derive(Debug, Clone, Parser, Serialize, Deserialize, Merge, JsonSchema)]
310#[clap(next_help_heading = "Network (Override network options)")]
311#[clap(next_display_order = None)]
312#[serde(deny_unknown_fields)]
313pub struct NetworkOptions {
314    /// The network topology graph
315    #[clap(skip)]
316    pub graph: Option<GraphOptions>,
317
318    /// When routing packets, follow the shortest path rather than following a direct
319    /// edge between nodes. If false, the network graph is required to be complete.
320    #[serde(default = "default_some_true")]
321    #[clap(long, value_name = "bool")]
322    #[clap(help = NETWORK_HELP.get("use_shortest_path").unwrap().as_str())]
323    pub use_shortest_path: Option<bool>,
324}
325
326impl NetworkOptions {
327    /// Replace unset (`None`) values of `base` with values from `default`.
328    pub fn with_defaults(mut self, default: Self) -> Self {
329        self.merge(default);
330        self
331    }
332}
333
334/// Help messages used by Clap for command line arguments, combining the doc string with
335/// the Serde default.
336static EXP_HELP: Lazy<std::collections::HashMap<String, String>> =
337    Lazy::new(|| generate_help_strs(schema_for!(ExperimentalOptions)));
338
339#[derive(Debug, Clone, Parser, Serialize, Deserialize, Merge, JsonSchema)]
340#[clap(
341    next_help_heading = "Experimental (Unstable and may change or be removed at any time, regardless of Shadow version)"
342)]
343#[clap(next_display_order = None)]
344#[serde(default, deny_unknown_fields)]
345pub struct ExperimentalOptions {
346    /// Use the SCHED_FIFO scheduler. Requires CAP_SYS_NICE. See sched(7), capabilities(7)
347    #[clap(hide_short_help = true)]
348    #[clap(long, value_name = "bool")]
349    #[clap(help = EXP_HELP.get("use_sched_fifo").unwrap().as_str())]
350    pub use_sched_fifo: Option<bool>,
351
352    /// Count the number of occurrences for individual syscalls
353    #[clap(hide_short_help = true)]
354    #[clap(long, value_name = "bool")]
355    #[clap(help = EXP_HELP.get("use_syscall_counters").unwrap().as_str())]
356    pub use_syscall_counters: Option<bool>,
357
358    /// Count object allocations and deallocations. If disabled, we will not be able to detect object memory leaks
359    #[clap(hide_short_help = true)]
360    #[clap(long, value_name = "bool")]
361    #[clap(help = EXP_HELP.get("use_object_counters").unwrap().as_str())]
362    pub use_object_counters: Option<bool>,
363
364    /// Preload our libc library for all managed processes for fast syscall interposition when possible.
365    #[clap(hide_short_help = true)]
366    #[clap(long, value_name = "bool")]
367    #[clap(help = EXP_HELP.get("use_preload_libc").unwrap().as_str())]
368    pub use_preload_libc: Option<bool>,
369
370    /// Preload our OpenSSL RNG library for all managed processes to mitigate non-deterministic use of OpenSSL.
371    #[clap(hide_short_help = true)]
372    #[clap(long, value_name = "bool")]
373    #[clap(help = EXP_HELP.get("use_preload_openssl_rng").unwrap().as_str())]
374    pub use_preload_openssl_rng: Option<bool>,
375
376    /// Preload our OpenSSL crypto library for all managed processes to skip some crypto operations
377    /// (may speed up simulation if your CPU lacks AES-NI support, but can cause bugs so do not use
378    /// unless you know what you're doing).
379    #[clap(hide_short_help = true)]
380    #[clap(long, value_name = "bool")]
381    #[clap(help = EXP_HELP.get("use_preload_openssl_crypto").unwrap().as_str())]
382    pub use_preload_openssl_crypto: Option<bool>,
383
384    /// Use the MemoryManager in memory-mapping mode. This can improve
385    /// performance, but disables support for dynamically spawning processes
386    /// inside the simulation (e.g. the `fork` syscall).
387    #[clap(hide_short_help = true)]
388    #[clap(long, value_name = "bool")]
389    #[clap(help = EXP_HELP.get("use_memory_manager").unwrap().as_str())]
390    pub use_memory_manager: Option<bool>,
391
392    /// Pin each thread and any processes it executes to the same logical CPU Core to improve cache affinity
393    #[clap(hide_short_help = true)]
394    #[clap(long, value_name = "bool")]
395    #[clap(help = EXP_HELP.get("use_cpu_pinning").unwrap().as_str())]
396    pub use_cpu_pinning: Option<bool>,
397
398    /// Each worker thread will spin in a `sched_yield` loop while waiting for a new task. This is
399    /// ignored if not using the thread-per-core scheduler.
400    #[clap(hide_short_help = true)]
401    #[clap(long, value_name = "bool")]
402    #[clap(help = EXP_HELP.get("use_worker_spinning").unwrap().as_str())]
403    pub use_worker_spinning: Option<bool>,
404
405    /// If set, overrides the automatically calculated minimum time workers may run ahead when sending events between nodes
406    #[clap(hide_short_help = true)]
407    #[clap(long, value_name = "seconds")]
408    #[clap(help = EXP_HELP.get("runahead").unwrap().as_str())]
409    pub runahead: Option<NullableOption<units::Time<units::TimePrefix>>>,
410
411    /// Update the minimum runahead dynamically throughout the simulation.
412    #[clap(hide_short_help = true)]
413    #[clap(long, value_name = "bool")]
414    #[clap(help = EXP_HELP.get("use_dynamic_runahead").unwrap().as_str())]
415    pub use_dynamic_runahead: Option<bool>,
416
417    /// Initial size of the socket's send buffer
418    #[clap(hide_short_help = true)]
419    #[clap(long, value_name = "bytes")]
420    #[clap(help = EXP_HELP.get("socket_send_buffer").unwrap().as_str())]
421    pub socket_send_buffer: Option<units::Bytes<units::SiPrefixUpper>>,
422
423    /// Enable send window autotuning
424    #[clap(hide_short_help = true)]
425    #[clap(long, value_name = "bool")]
426    #[clap(help = EXP_HELP.get("socket_send_autotune").unwrap().as_str())]
427    pub socket_send_autotune: Option<bool>,
428
429    /// Initial size of the socket's receive buffer
430    #[clap(hide_short_help = true)]
431    #[clap(long, value_name = "bytes")]
432    #[clap(help = EXP_HELP.get("socket_recv_buffer").unwrap().as_str())]
433    pub socket_recv_buffer: Option<units::Bytes<units::SiPrefixUpper>>,
434
435    /// Enable receive window autotuning
436    #[clap(hide_short_help = true)]
437    #[clap(long, value_name = "bool")]
438    #[clap(help = EXP_HELP.get("socket_recv_autotune").unwrap().as_str())]
439    pub socket_recv_autotune: Option<bool>,
440
441    /// The queueing discipline to use at the network interface
442    #[clap(hide_short_help = true)]
443    #[clap(long, value_name = "mode")]
444    #[clap(help = EXP_HELP.get("interface_qdisc").unwrap().as_str())]
445    pub interface_qdisc: Option<QDiscMode>,
446
447    /// Log the syscalls for each process to individual "strace" files
448    #[clap(hide_short_help = true)]
449    #[clap(long, value_name = "mode")]
450    #[clap(help = EXP_HELP.get("strace_logging_mode").unwrap().as_str())]
451    pub strace_logging_mode: Option<StraceLoggingMode>,
452
453    /// Max amount of execution-time latency allowed to accumulate before the
454    /// clock is moved forward. Moving the clock forward is a potentially
455    /// expensive operation, so larger values reduce simulation overhead, at the
456    /// cost of coarser time jumps. Note also that accumulated-but-unapplied
457    /// latency is discarded when a thread is blocked on a syscall.
458    #[clap(hide_short_help = true)]
459    #[clap(long, value_name = "seconds")]
460    #[clap(help = EXP_HELP.get("max_unapplied_cpu_latency").unwrap().as_str())]
461    pub max_unapplied_cpu_latency: Option<units::Time<units::TimePrefix>>,
462
463    /// Simulated latency of an unblocked syscall. For efficiency Shadow only
464    /// actually adds this latency if and when `max_unapplied_cpu_latency` is
465    /// reached.
466    #[clap(hide_short_help = true)]
467    #[clap(long, value_name = "seconds")]
468    #[clap(help = EXP_HELP.get("unblocked_syscall_latency").unwrap().as_str())]
469    pub unblocked_syscall_latency: Option<units::Time<units::TimePrefix>>,
470
471    /// Simulated latency of a vdso "syscall". For efficiency Shadow only
472    /// actually adds this latency if and when `max_unapplied_cpu_latency` is
473    /// reached.
474    #[clap(hide_short_help = true)]
475    #[clap(long, value_name = "seconds")]
476    #[clap(help = EXP_HELP.get("unblocked_vdso_latency").unwrap().as_str())]
477    pub unblocked_vdso_latency: Option<units::Time<units::TimePrefix>>,
478
479    /// The host scheduler implementation, which decides how to assign hosts to threads and threads
480    /// to CPU cores
481    #[clap(hide_short_help = true)]
482    #[clap(long, value_name = "name")]
483    #[clap(help = EXP_HELP.get("scheduler").unwrap().as_str())]
484    pub scheduler: Option<Scheduler>,
485
486    /// When true, report error-level messages to stderr in addition to logging to stdout.
487    #[clap(hide_short_help = true)]
488    #[clap(long, value_name = "bool")]
489    #[clap(help = EXP_HELP.get("report_errors_to_stderr").unwrap().as_str())]
490    pub report_errors_to_stderr: Option<bool>,
491
492    /// Use the rust TCP implementation
493    #[clap(hide_short_help = true)]
494    #[clap(long, value_name = "bool")]
495    #[clap(help = EXP_HELP.get("use_new_tcp").unwrap().as_str())]
496    pub use_new_tcp: Option<bool>,
497
498    /// When true, and when managed code runs for an extended time without
499    /// returning control to shadow (e.g. by making a syscall), shadow preempts
500    /// the managed code and moves simulated time forward. This can be used to
501    /// escape "pure-CPU busy-loops", but isn't usually needed, breaks
502    /// simulation determinism, and significantly affects simulation
503    /// performance.
504    #[clap(hide_short_help = true)]
505    #[clap(long, value_name = "bool")]
506    #[clap(help = EXP_HELP.get("native_preemption_enabled").unwrap().as_str())]
507    pub native_preemption_enabled: Option<bool>,
508
509    /// When `native_preemption_enabled` is true, amount of native CPU-time to
510    /// wait before preempting managed code that hasn't returned control to
511    /// shadow. Only supports microsecond granularity, and values below 1 microsecond
512    /// are rejected.
513    #[clap(hide_short_help = true)]
514    #[clap(long, value_name = "seconds")]
515    #[clap(help = EXP_HELP.get("native_preemption_native_interval").unwrap().as_str())]
516    pub native_preemption_native_interval: Option<units::Time<units::TimePrefix>>,
517
518    /// When `native_preemption_enabled` is true, amount of simulated time to
519    /// consume after `native_preemption_native_interval` has elapsed without
520    /// returning control to shadow.
521    #[clap(hide_short_help = true)]
522    #[clap(long, value_name = "seconds")]
523    #[clap(help = EXP_HELP.get("native_preemption_sim_interval").unwrap().as_str())]
524    pub native_preemption_sim_interval: Option<units::Time<units::TimePrefix>>,
525}
526
527impl ExperimentalOptions {
528    /// Replace unset (`None`) values of `base` with values from `default`.
529    pub fn with_defaults(mut self, default: Self) -> Self {
530        self.merge(default);
531        self
532    }
533}
534
535impl Default for ExperimentalOptions {
536    fn default() -> Self {
537        Self {
538            use_sched_fifo: Some(false),
539            use_syscall_counters: Some(true),
540            use_object_counters: Some(true),
541            use_preload_libc: Some(true),
542            use_preload_openssl_rng: Some(true),
543            use_preload_openssl_crypto: Some(false),
544            max_unapplied_cpu_latency: Some(units::Time::new(1, units::TimePrefix::Micro)),
545            // 1-2 microseconds is a ballpark estimate of the minimal latency for
546            // context switching to the kernel and back on modern machines.
547            // Default to the lower end to minimize effect in simualations without busy loops.
548            unblocked_syscall_latency: Some(units::Time::new(1, units::TimePrefix::Micro)),
549            // Actual latencies vary from ~40 to ~400 CPU cycles. https://stackoverflow.com/a/13096917
550            // Default to the lower end to minimize effect in simualations without busy loops.
551            unblocked_vdso_latency: Some(units::Time::new(10, units::TimePrefix::Nano)),
552            use_memory_manager: Some(false),
553            use_cpu_pinning: Some(true),
554            use_worker_spinning: Some(true),
555            runahead: Some(NullableOption::Value(units::Time::new(
556                1,
557                units::TimePrefix::Milli,
558            ))),
559            use_dynamic_runahead: Some(false),
560            socket_send_buffer: Some(units::Bytes::new(131_072, units::SiPrefixUpper::Base)),
561            socket_send_autotune: Some(true),
562            socket_recv_buffer: Some(units::Bytes::new(174_760, units::SiPrefixUpper::Base)),
563            socket_recv_autotune: Some(true),
564            interface_qdisc: Some(QDiscMode::Fifo),
565            strace_logging_mode: Some(StraceLoggingMode::Off),
566            scheduler: Some(Scheduler::ThreadPerCore),
567            report_errors_to_stderr: Some(true),
568            use_new_tcp: Some(false),
569            native_preemption_enabled: Some(false),
570            native_preemption_native_interval: Some(units::Time::new(
571                100,
572                units::TimePrefix::Milli,
573            )),
574            native_preemption_sim_interval: Some(units::Time::new(10, units::TimePrefix::Milli)),
575        }
576    }
577}
578
579/// Help messages used by Clap for command line arguments, combining the doc string with
580/// the Serde default.
581static HOST_HELP: Lazy<std::collections::HashMap<String, String>> =
582    Lazy::new(|| generate_help_strs(schema_for!(HostDefaultOptions)));
583
584#[derive(Debug, Clone, Parser, Serialize, Deserialize, Merge, JsonSchema)]
585#[clap(next_help_heading = "Host Defaults (Default options for hosts)")]
586#[clap(next_display_order = None)]
587#[serde(default, deny_unknown_fields)]
588// serde will default all fields to `None`, but in the cli help we want the actual defaults
589#[schemars(default = "HostDefaultOptions::new_with_defaults")]
590pub struct HostDefaultOptions {
591    /// Log level at which to print node messages
592    #[clap(long = "host-log-level", name = "host-log-level")]
593    #[clap(value_name = "level")]
594    #[clap(help = HOST_HELP.get("log_level").unwrap().as_str())]
595    pub log_level: Option<NullableOption<LogLevel>>,
596
597    /// Should shadow generate pcap files?
598    #[clap(long, value_name = "bool")]
599    #[clap(help = HOST_HELP.get("pcap_enabled").unwrap().as_str())]
600    pub pcap_enabled: Option<bool>,
601
602    /// How much data to capture per packet (header and payload) if pcap logging is enabled
603    #[clap(long, value_name = "bytes")]
604    #[clap(help = HOST_HELP.get("pcap_capture_size").unwrap().as_str())]
605    pub pcap_capture_size: Option<units::Bytes<units::SiPrefixUpper>>,
606}
607
608impl HostDefaultOptions {
609    pub fn new_with_defaults() -> Self {
610        Self {
611            log_level: None,
612            pcap_enabled: Some(false),
613            // From pcap(3): "A value of 65535 should be sufficient, on most if not all networks, to
614            // capture all the data available from the packet". The maximum length of an IP packet
615            // (including the header) is 65535 bytes.
616            pcap_capture_size: Some(units::Bytes::new(65535, units::SiPrefixUpper::Base)),
617        }
618    }
619
620    /// Replace unset (`None`) values of `base` with values from `default`.
621    pub fn with_defaults(mut self, default: Self) -> Self {
622        self.merge(default);
623        self
624    }
625}
626
627#[allow(clippy::derivable_impls)]
628impl Default for HostDefaultOptions {
629    fn default() -> Self {
630        // Our config fields would typically be initialized with their real defaults here in the
631        // `Default::default` implementation, but we need to handle the host options differently
632        // because the global `host_option_defaults` can be overridden by host-specific
633        // `host_options`. So instead we use defaults of `None` here and set the real defaults with
634        // `Self::new_with_defaults` in `ConfigOptions::new`.
635        Self {
636            log_level: None,
637            pcap_enabled: None,
638            pcap_capture_size: None,
639        }
640    }
641}
642
643#[derive(Serialize, Deserialize, Eq, PartialEq, Debug, Copy, Clone, JsonSchema)]
644#[serde(rename_all = "kebab-case")]
645pub enum RunningVal {
646    Running,
647}
648
649/// The enum variants here have an extra level of indirection to get the
650/// serde serialization that we want.
651#[derive(Debug, Copy, Clone, Eq, PartialEq, Serialize, Deserialize, JsonSchema)]
652#[serde(untagged)]
653pub enum ProcessFinalState {
654    Exited { exited: i32 },
655    Signaled { signaled: Signal },
656    Running(RunningVal),
657}
658
659impl Default for ProcessFinalState {
660    fn default() -> Self {
661        Self::Exited { exited: 0 }
662    }
663}
664
665impl std::fmt::Display for ProcessFinalState {
666    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
667        // We use the yaml serialization here so that when reporting that an
668        // expected state didn't match the actual state, it's clear how to set
669        // the expected state in the config file to match the actual state if
670        // desired.
671        //
672        // The current enum works OK for this since there are no internal
673        // newlines in the serialization; if there are some later we might wand
674        // to serialize to json instead, which can always be put on a single
675        // line and should also be valid yaml.
676        let s = serde_yaml::to_string(self).or(Err(std::fmt::Error))?;
677        write!(f, "{}", s.trim())
678    }
679}
680
681#[derive(Debug, Clone, Serialize, Deserialize, JsonSchema)]
682#[serde(deny_unknown_fields)]
683pub struct ProcessOptions {
684    pub path: std::path::PathBuf,
685
686    /// Process arguments
687    #[serde(default = "default_args_empty")]
688    pub args: ProcessArgs,
689
690    /// Environment variables passed when executing this process
691    #[serde(default)]
692    pub environment: BTreeMap<EnvName, String>,
693
694    /// The simulated time at which to execute the process
695    #[serde(default)]
696    pub start_time: units::Time<units::TimePrefix>,
697
698    /// The simulated time at which to send a `shutdown_signal` signal to the process
699    #[serde(default)]
700    pub shutdown_time: Option<units::Time<units::TimePrefix>>,
701
702    /// The signal that will be sent to the process at `shutdown_time`
703    #[serde(default = "default_sigterm")]
704    pub shutdown_signal: Signal,
705
706    /// The expected final state of the process. Shadow will report an error
707    /// if the actual state doesn't match.
708    #[serde(default)]
709    pub expected_final_state: ProcessFinalState,
710}
711
712#[derive(Debug, Clone, Serialize, Deserialize, JsonSchema)]
713#[serde(deny_unknown_fields)]
714pub struct HostOptions {
715    /// Network graph node ID to assign the host to
716    pub network_node_id: u32,
717
718    pub processes: Vec<ProcessOptions>,
719
720    /// IP address to assign to the host
721    #[serde(default)]
722    pub ip_addr: Option<std::net::Ipv4Addr>,
723
724    /// Downstream bandwidth capacity of the host
725    #[serde(default)]
726    pub bandwidth_down: Option<units::BitsPerSec<units::SiPrefixUpper>>,
727
728    /// Upstream bandwidth capacity of the host
729    #[serde(default)]
730    pub bandwidth_up: Option<units::BitsPerSec<units::SiPrefixUpper>>,
731
732    #[serde(default)]
733    pub host_options: HostDefaultOptions,
734}
735
736#[derive(Debug, Copy, Clone, Serialize, Deserialize, JsonSchema)]
737#[serde(rename_all = "kebab-case")]
738pub enum LogLevel {
739    Error,
740    Warning,
741    Info,
742    Debug,
743    Trace,
744}
745
746impl FromStr for LogLevel {
747    type Err = serde_yaml::Error;
748
749    fn from_str(s: &str) -> Result<Self, Self::Err> {
750        serde_yaml::from_str(s)
751    }
752}
753
754impl LogLevel {
755    pub fn to_c_loglevel(&self) -> c_log::LogLevel {
756        match self {
757            Self::Error => c_log::_LogLevel_LOGLEVEL_ERROR,
758            Self::Warning => c_log::_LogLevel_LOGLEVEL_WARNING,
759            Self::Info => c_log::_LogLevel_LOGLEVEL_INFO,
760            Self::Debug => c_log::_LogLevel_LOGLEVEL_DEBUG,
761            Self::Trace => c_log::_LogLevel_LOGLEVEL_TRACE,
762        }
763    }
764}
765
766impl From<LogLevel> for log::Level {
767    fn from(level: LogLevel) -> Self {
768        match level {
769            LogLevel::Error => log::Level::Error,
770            LogLevel::Warning => log::Level::Warn,
771            LogLevel::Info => log::Level::Info,
772            LogLevel::Debug => log::Level::Debug,
773            LogLevel::Trace => log::Level::Trace,
774        }
775    }
776}
777
778#[derive(Debug, Clone, PartialOrd, Ord, PartialEq, Eq, Serialize, JsonSchema)]
779pub struct HostName(String);
780
781impl<'de> serde::Deserialize<'de> for HostName {
782    fn deserialize<D: serde::Deserializer<'de>>(deserializer: D) -> Result<Self, D::Error> {
783        struct HostNameVisitor;
784
785        impl serde::de::Visitor<'_> for HostNameVisitor {
786            type Value = HostName;
787
788            fn expecting(&self, formatter: &mut std::fmt::Formatter) -> std::fmt::Result {
789                formatter.write_str("a string")
790            }
791
792            fn visit_string<E>(self, v: String) -> Result<Self::Value, E>
793            where
794                E: serde::de::Error,
795            {
796                // hostname(7): "Valid characters for hostnames are ASCII(7) letters from a to z,
797                // the digits from 0 to 9, and the hyphen (-)."
798                fn is_allowed(c: char) -> bool {
799                    c.is_ascii_lowercase() || c.is_ascii_digit() || c == '-' || c == '.'
800                }
801                if let Some(invalid_char) = v.chars().find(|x| !is_allowed(*x)) {
802                    return Err(E::custom(format!(
803                        "invalid hostname character: '{invalid_char}'"
804                    )));
805                }
806
807                if v.is_empty() {
808                    return Err(E::custom("empty hostname"));
809                }
810
811                // hostname(7): "A hostname may not start with a hyphen."
812                if v.starts_with('-') {
813                    return Err(E::custom("hostname begins with a '-' character"));
814                }
815
816                // hostname(7): "Each element of the hostname must be from 1 to 63 characters long
817                // and the entire hostname, including the dots, can be at most 253 characters long."
818                if v.len() > 253 {
819                    return Err(E::custom("hostname exceeds 253 characters"));
820                }
821
822                Ok(HostName(v))
823            }
824
825            fn visit_str<E>(self, v: &str) -> Result<Self::Value, E>
826            where
827                E: serde::de::Error,
828            {
829                // serde::de::Visitor: "It is never correct to implement `visit_string` without
830                // implementing `visit_str`. Implement neither, both, or just `visit_str`.'
831                self.visit_string(v.to_string())
832            }
833        }
834
835        deserializer.deserialize_string(HostNameVisitor)
836    }
837}
838
839impl std::ops::Deref for HostName {
840    type Target = String;
841
842    fn deref(&self) -> &Self::Target {
843        &self.0
844    }
845}
846
847impl From<HostName> for String {
848    fn from(name: HostName) -> Self {
849        name.0
850    }
851}
852
853impl std::fmt::Display for HostName {
854    fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
855        self.0.fmt(f)
856    }
857}
858
859#[derive(Debug, Clone, PartialOrd, Ord, PartialEq, Eq, Serialize, JsonSchema)]
860pub struct EnvName(String);
861
862impl EnvName {
863    pub fn new(name: impl Into<String>) -> Option<Self> {
864        let name = name.into();
865
866        // an environment variable name cannot contain a '=' character
867        if name.contains('=') {
868            return None;
869        }
870
871        Some(Self(name))
872    }
873}
874
875impl<'de> serde::Deserialize<'de> for EnvName {
876    fn deserialize<D: serde::Deserializer<'de>>(deserializer: D) -> Result<Self, D::Error> {
877        struct EnvNameVisitor;
878
879        impl serde::de::Visitor<'_> for EnvNameVisitor {
880            type Value = EnvName;
881
882            fn expecting(&self, formatter: &mut std::fmt::Formatter) -> std::fmt::Result {
883                formatter.write_str("a string")
884            }
885
886            fn visit_string<E>(self, v: String) -> Result<Self::Value, E>
887            where
888                E: serde::de::Error,
889            {
890                let Some(name) = EnvName::new(v) else {
891                    let e = "environment variable name contains a '=' character";
892                    return Err(E::custom(e));
893                };
894
895                Ok(name)
896            }
897
898            fn visit_str<E>(self, v: &str) -> Result<Self::Value, E>
899            where
900                E: serde::de::Error,
901            {
902                // serde::de::Visitor: "It is never correct to implement `visit_string` without
903                // implementing `visit_str`. Implement neither, both, or just `visit_str`.'
904                self.visit_string(v.to_string())
905            }
906        }
907
908        deserializer.deserialize_string(EnvNameVisitor)
909    }
910}
911
912impl std::ops::Deref for EnvName {
913    type Target = String;
914
915    fn deref(&self) -> &Self::Target {
916        &self.0
917    }
918}
919
920impl From<EnvName> for String {
921    fn from(name: EnvName) -> Self {
922        name.0
923    }
924}
925
926impl std::fmt::Display for EnvName {
927    fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
928        self.0.fmt(f)
929    }
930}
931
932#[derive(Debug, Copy, Clone, Serialize, Deserialize, JsonSchema)]
933#[serde(rename_all = "kebab-case")]
934pub enum Scheduler {
935    ThreadPerHost,
936    ThreadPerCore,
937}
938
939impl FromStr for Scheduler {
940    type Err = serde_yaml::Error;
941
942    fn from_str(s: &str) -> Result<Self, Self::Err> {
943        serde_yaml::from_str(s)
944    }
945}
946
947fn default_data_directory() -> Option<String> {
948    Some("shadow.data".into())
949}
950
951/// Parse a string as a comma-delimited set of `T` values.
952fn parse_set<T>(s: &str) -> Result<HashSet<T>, <T as FromStr>::Err>
953where
954    T: std::cmp::Eq + std::hash::Hash + FromStr,
955{
956    s.split(',').map(|x| x.trim().parse()).collect()
957}
958
959/// Parse a string as a comma-delimited set of `String` values.
960fn parse_set_str(s: &str) -> Result<HashSet<String>, <String as FromStr>::Err> {
961    parse_set(s)
962}
963
964#[derive(Debug, Clone, Copy, Hash, PartialEq, Eq, Serialize, Deserialize, JsonSchema)]
965#[serde(rename_all = "kebab-case")]
966#[repr(C)]
967pub enum QDiscMode {
968    Fifo,
969    RoundRobin,
970}
971
972impl FromStr for QDiscMode {
973    type Err = serde_yaml::Error;
974
975    fn from_str(s: &str) -> Result<Self, Self::Err> {
976        serde_yaml::from_str(s)
977    }
978}
979
980#[derive(Debug, Clone, Serialize, Deserialize, JsonSchema)]
981#[serde(rename_all = "kebab-case")]
982pub enum Compression {
983    Xz,
984}
985
986#[derive(Debug, Clone, Serialize, Deserialize, JsonSchema)]
987#[serde(deny_unknown_fields)]
988pub struct FileSource {
989    /// The path to the file
990    pub path: String,
991    /// The file's compression format
992    pub compression: Option<Compression>,
993}
994
995#[derive(Debug, Clone, Serialize, Deserialize, JsonSchema)]
996#[serde(rename_all = "kebab-case")]
997pub enum GraphSource {
998    File(FileSource),
999    Inline(String),
1000}
1001
1002#[derive(Debug, Clone, Serialize, Deserialize, JsonSchema)]
1003// we use "kebab-case" for other shadow options, but are leaving this as "snake_case" for backwards
1004// compatibility
1005#[serde(tag = "type", rename_all = "snake_case")]
1006pub enum GraphOptions {
1007    Gml(GraphSource),
1008    #[serde(rename = "1_gbit_switch")]
1009    OneGbitSwitch,
1010}
1011
1012#[derive(Debug, Clone, Serialize, JsonSchema)]
1013#[serde(untagged)]
1014pub enum ProcessArgs {
1015    List(Vec<String>),
1016    Str(String),
1017}
1018
1019/// Serde doesn't provide good deserialization error messages for untagged enums, so we implement
1020/// our own. For example, if serde finds a yaml value such as 4 for the process arguments, it won't
1021/// deserialize it to the string "4" and the yaml parsing will fail. The serde-generated error
1022/// message will say something like "data did not match any variant of untagged enum ProcessArgs at
1023/// line X column Y" which isn't very helpful to the user, so here we try to give a better error
1024/// message.
1025impl<'de> serde::Deserialize<'de> for ProcessArgs {
1026    fn deserialize<D: serde::Deserializer<'de>>(deserializer: D) -> Result<Self, D::Error> {
1027        struct ProcessArgsVisitor;
1028
1029        impl<'de> serde::de::Visitor<'de> for ProcessArgsVisitor {
1030            type Value = ProcessArgs;
1031
1032            fn expecting(&self, formatter: &mut std::fmt::Formatter) -> std::fmt::Result {
1033                formatter.write_str("a string or a sequence of strings")
1034            }
1035
1036            fn visit_str<E>(self, v: &str) -> Result<Self::Value, E>
1037            where
1038                E: serde::de::Error,
1039            {
1040                Ok(Self::Value::Str(v.to_owned()))
1041            }
1042
1043            fn visit_seq<A>(self, mut seq: A) -> Result<Self::Value, A::Error>
1044            where
1045                A: serde::de::SeqAccess<'de>,
1046            {
1047                let mut v = vec![];
1048
1049                while let Some(val) = seq.next_element()? {
1050                    v.push(val);
1051                }
1052
1053                Ok(Self::Value::List(v))
1054            }
1055        }
1056
1057        deserializer.deserialize_any(ProcessArgsVisitor)
1058    }
1059}
1060
1061// TODO: use linux_api's Signal internally, which we control and which supports
1062// realtime signals. We need to implement conversion to and from strings to do
1063// so, while being careful that the conversion is compatible with nix's so as
1064// not to be a breaking change to our configuration format.
1065#[derive(Debug, Copy, Clone, Eq, PartialEq)]
1066pub struct Signal(nix::sys::signal::Signal);
1067
1068impl From<nix::sys::signal::Signal> for Signal {
1069    fn from(value: nix::sys::signal::Signal) -> Self {
1070        Self(value)
1071    }
1072}
1073
1074impl TryFrom<linux_api::signal::Signal> for Signal {
1075    type Error = <nix::sys::signal::Signal as TryFrom<i32>>::Error;
1076    fn try_from(value: linux_api::signal::Signal) -> Result<Self, Self::Error> {
1077        let signal = nix::sys::signal::Signal::try_from(value.as_i32())?;
1078        Ok(Self(signal))
1079    }
1080}
1081
1082impl serde::Serialize for Signal {
1083    fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
1084    where
1085        S: serde::Serializer,
1086    {
1087        serializer.serialize_str(self.0.as_str())
1088    }
1089}
1090
1091impl<'de> serde::Deserialize<'de> for Signal {
1092    fn deserialize<D: serde::Deserializer<'de>>(deserializer: D) -> Result<Self, D::Error> {
1093        struct SignalVisitor;
1094
1095        impl serde::de::Visitor<'_> for SignalVisitor {
1096            type Value = Signal;
1097
1098            fn expecting(&self, formatter: &mut std::fmt::Formatter) -> std::fmt::Result {
1099                formatter.write_str("a signal string (e.g. \"SIGINT\") or integer")
1100            }
1101
1102            fn visit_str<E>(self, v: &str) -> Result<Self::Value, E>
1103            where
1104                E: serde::de::Error,
1105            {
1106                nix::sys::signal::Signal::from_str(v)
1107                    .map(Signal)
1108                    .map_err(|_e| E::custom(format!("Invalid signal string: {v}")))
1109            }
1110
1111            fn visit_i64<E>(self, v: i64) -> Result<Self::Value, E>
1112            where
1113                E: serde::de::Error,
1114            {
1115                let v = i32::try_from(v)
1116                    .map_err(|_e| E::custom(format!("Invalid signal number: {v}")))?;
1117                nix::sys::signal::Signal::try_from(v)
1118                    .map(Signal)
1119                    .map_err(|_e| E::custom(format!("Invalid signal number: {v}")))
1120            }
1121
1122            fn visit_u64<E>(self, v: u64) -> Result<Self::Value, E>
1123            where
1124                E: serde::de::Error,
1125            {
1126                let v = i64::try_from(v)
1127                    .map_err(|_e| E::custom(format!("Invalid signal number: {v}")))?;
1128                self.visit_i64(v)
1129            }
1130        }
1131
1132        deserializer.deserialize_any(SignalVisitor)
1133    }
1134}
1135
1136impl std::fmt::Display for Signal {
1137    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
1138        write!(f, "{}", self.0)
1139    }
1140}
1141
1142impl JsonSchema for Signal {
1143    fn schema_name() -> String {
1144        String::from("Signal")
1145    }
1146
1147    fn json_schema(_gen: &mut schemars::r#gen::SchemaGenerator) -> schemars::schema::Schema {
1148        // Use the "anything" schema. The Deserialize implementation does the
1149        // actual parsing and error handling.
1150        // TODO: Ideally we'd only accept strings or integers here. The
1151        // documentation isn't very clear about how to construct such a schema
1152        // though, and we currently only use the schemas for command-line-option
1153        // help strings. Since we don't currently take Signals in
1154        // command-line-options, it doesn't matter.
1155        schemars::schema::Schema::Bool(true)
1156    }
1157}
1158
1159impl std::ops::Deref for Signal {
1160    type Target = nix::sys::signal::Signal;
1161
1162    fn deref(&self) -> &Self::Target {
1163        &self.0
1164    }
1165}
1166
1167#[derive(Debug, Clone, Serialize, Deserialize, JsonSchema)]
1168#[serde(rename_all = "kebab-case")]
1169pub enum StraceLoggingMode {
1170    Off,
1171    Standard,
1172    Deterministic,
1173}
1174
1175impl FromStr for StraceLoggingMode {
1176    type Err = serde_yaml::Error;
1177
1178    fn from_str(s: &str) -> Result<Self, Self::Err> {
1179        serde_yaml::from_str(s)
1180    }
1181}
1182
1183/// This wrapper type allows cli options to specify "null" to overwrite a config file option with
1184/// `None`, and is intended to be used for options where "null" is a valid option value.
1185///
1186/// **Warning**: This may result in unexpected behaviour when wrapping string types. For example, if
1187/// this is used for a file path option, the value "null" will conflict with the valid filename
1188/// "null". So if the user specifies "null" for this option, Shadow will assume it means "no value"
1189/// rather than the filename "null".
1190///
1191/// ### Motivation
1192///
1193/// For configuration options, there are generally three states:
1194/// - set
1195/// - not set
1196/// - null
1197///
1198/// For serde, all three states are configurable:
1199/// - set: `runahead: 5ms`
1200/// - not set: (no `runahead` option used in yaml)
1201/// - null: `runahead: null`
1202///
1203/// For clap, there are only two states:
1204/// - set: `--runahead 5ms`
1205/// - not set: (no `--runahead` option used in command)
1206///
1207/// There is no way to set a "null" state for cli options with clap.
1208///
1209/// ### Configuration in Shadow
1210///
1211/// Shadow first parses the config file and cli options separately before merging them.
1212///
1213/// Parsing for serde:
1214/// - set: `runahead: 5ms` => runahead is set to `Some(5ms)`
1215/// - not set: (no `runahead` option used in yaml) => runahead is set to its default (either
1216///   `Some(..)` or `None`)
1217/// - null: `runahead: null` => runahead is set to `None`
1218///
1219/// Parsing for clap:
1220/// - set: `--runahead 5ms` => runahead is set to `Some(5ms)`
1221/// - not set: (no `--runahead` option used in command) => runahead is set to `None`
1222///
1223/// Then the options are merged such that any `Some(..)` options from the cli options will overwrite
1224/// any `Some` or `None` options from the config file.
1225///
1226/// The issue is that no clap option can overwrite a config file option of `Some` with a value of
1227/// `None`. For example if the config file specifies `runahead: 5ms`, then with clap you can only
1228/// use `--runahead 2ms` to change the runahead to a `Some(2ms)` value, or you can not set
1229/// `--runahead` at all to leave it as a `Some(5ms)` value. But there is no cli option to change the
1230/// runahead to a `None` value.
1231///
1232/// This `NullableOption` type is a wrapper to allow you to specify "null" on the command line to
1233/// overwrite the config file value with `None`. From the example above, you could now specify
1234/// "--runahead null" to overwrite the config file value (for example `Some(5ms)`) with a `None`
1235/// value.
1236#[derive(Debug, Copy, Clone, JsonSchema, Eq, PartialEq)]
1237pub enum NullableOption<T> {
1238    Value(T),
1239    Null,
1240}
1241
1242impl<T> NullableOption<T> {
1243    pub fn as_ref(&self) -> NullableOption<&T> {
1244        match self {
1245            NullableOption::Value(x) => NullableOption::Value(x),
1246            NullableOption::Null => NullableOption::Null,
1247        }
1248    }
1249
1250    pub fn as_mut(&mut self) -> NullableOption<&mut T> {
1251        match self {
1252            NullableOption::Value(x) => NullableOption::Value(x),
1253            NullableOption::Null => NullableOption::Null,
1254        }
1255    }
1256
1257    /// Easier to use than `Into<Option<T>>` since `Option` has a lot of blanket `From`
1258    /// implementations, requiring a lot of type annotations.
1259    pub fn to_option(self) -> Option<T> {
1260        match self {
1261            NullableOption::Value(x) => Some(x),
1262            NullableOption::Null => None,
1263        }
1264    }
1265}
1266
1267impl<T: serde::Serialize> serde::Serialize for NullableOption<T> {
1268    fn serialize<S: serde::Serializer>(&self, serializer: S) -> Result<S::Ok, S::Error> {
1269        match self {
1270            // use the inner type's serialize function
1271            Self::Value(x) => Ok(T::serialize(x, serializer)?),
1272            Self::Null => serializer.serialize_none(),
1273        }
1274    }
1275}
1276
1277impl<'de, T: serde::Deserialize<'de>> serde::Deserialize<'de> for NullableOption<T> {
1278    fn deserialize<D: serde::Deserializer<'de>>(deserializer: D) -> Result<Self, D::Error> {
1279        // always use the inner type's deserialize function
1280        Ok(Self::Value(T::deserialize(deserializer)?))
1281    }
1282}
1283
1284impl<T> FromStr for NullableOption<T>
1285where
1286    T: FromStr<Err: std::fmt::Debug + std::fmt::Display>,
1287{
1288    type Err = T::Err;
1289
1290    fn from_str(s: &str) -> Result<Self, Self::Err> {
1291        match s {
1292            // since we use serde-yaml, use "null" to match yaml's "null"
1293            "null" => Ok(Self::Null),
1294            x => Ok(Self::Value(FromStr::from_str(x)?)),
1295        }
1296    }
1297}
1298
1299/// A trait for `Option`-like types that can be flattened into a single `Option`.
1300pub trait Flatten<T> {
1301    fn flatten(self) -> Option<T>;
1302    fn flatten_ref(&self) -> Option<&T>;
1303}
1304
1305impl<T> Flatten<T> for Option<NullableOption<T>> {
1306    fn flatten(self) -> Option<T> {
1307        self.and_then(|x| x.to_option())
1308    }
1309
1310    fn flatten_ref(&self) -> Option<&T> {
1311        self.as_ref().and_then(|x| x.as_ref().to_option())
1312    }
1313}
1314
1315/// Helper function for serde default `ProcessArgs::Str("")` values.
1316fn default_args_empty() -> ProcessArgs {
1317    ProcessArgs::Str("".to_string())
1318}
1319
1320/// Helper function for serde default `Signal(Signal::SIGTERM)` values.
1321fn default_sigterm() -> Signal {
1322    Signal(nix::sys::signal::Signal::SIGTERM)
1323}
1324
1325/// Helper function for serde default `Some(0)` values.
1326fn default_some_time_0() -> Option<units::Time<units::TimePrefix>> {
1327    Some(units::Time::new(0, units::TimePrefix::Sec))
1328}
1329
1330/// Helper function for serde default `Some(true)` values.
1331fn default_some_true() -> Option<bool> {
1332    Some(true)
1333}
1334
1335/// Helper function for serde default `Some(false)` values.
1336fn default_some_false() -> Option<bool> {
1337    Some(false)
1338}
1339
1340/// Helper function for serde default `Some(0)` values.
1341fn default_some_0() -> Option<u32> {
1342    Some(0)
1343}
1344
1345/// Helper function for serde default `Some(1)` values.
1346fn default_some_1() -> Option<u32> {
1347    Some(1)
1348}
1349
1350/// Helper function for serde default `Some(NullableOption::Value(1 sec))` values.
1351fn default_some_nullable_time_1() -> Option<NullableOption<units::Time<units::TimePrefix>>> {
1352    let time = units::Time::new(1, units::TimePrefix::Sec);
1353    Some(NullableOption::Value(time))
1354}
1355
1356/// Helper function for serde default `Some(LogLevel::Info)` values.
1357fn default_some_info() -> Option<LogLevel> {
1358    Some(LogLevel::Info)
1359}
1360
1361// when updating this graph, make sure to also update the copy in docs/shadow_config_spec.md
1362pub const ONE_GBIT_SWITCH_GRAPH: &str = r#"graph [
1363  directed 0
1364  node [
1365    id 0
1366    host_bandwidth_up "1 Gbit"
1367    host_bandwidth_down "1 Gbit"
1368  ]
1369  edge [
1370    source 0
1371    target 0
1372    latency "1 ms"
1373    packet_loss 0.0
1374  ]
1375]"#;
1376
1377/// Generate help strings for objects in a JSON schema, including the Serde defaults if available.
1378fn generate_help_strs(
1379    schema: schemars::schema::RootSchema,
1380) -> std::collections::HashMap<String, String> {
1381    let mut defaults = std::collections::HashMap::<String, String>::new();
1382    for (name, obj) in &schema.schema.object.as_ref().unwrap().properties {
1383        if let Some(meta) = obj.clone().into_object().metadata {
1384            let description = meta.description.unwrap_or_default();
1385            let space = if !description.is_empty() { " " } else { "" };
1386            match meta.default {
1387                Some(default) => defaults.insert(
1388                    name.clone(),
1389                    format!("{}{}[default: {}]", description, space, default),
1390                ),
1391                None => defaults.insert(name.clone(), description.to_string()),
1392            };
1393        }
1394    }
1395    defaults
1396}
1397
1398/// Parses a string as a list of arguments following the shell's parsing rules. This
1399/// uses `g_shell_parse_argv()` for parsing.
1400pub fn parse_string_as_args(args_str: &OsStr) -> Result<Vec<OsString>, String> {
1401    if args_str.is_empty() {
1402        return Ok(Vec::new());
1403    }
1404
1405    let args_str = CString::new(args_str.as_bytes()).unwrap();
1406
1407    // parse the argument string
1408    let mut argc: libc::c_int = 0;
1409    let mut argv: *mut *mut libc::c_char = std::ptr::null_mut();
1410    let mut error: *mut libc::c_char = std::ptr::null_mut();
1411    let rv = unsafe { c::process_parseArgStr(args_str.as_ptr(), &mut argc, &mut argv, &mut error) };
1412
1413    // if there was an error, return a copy of the error string
1414    if !rv {
1415        let error_message = match error.is_null() {
1416            false => unsafe { CStr::from_ptr(error) }.to_str().unwrap(),
1417            true => "Unknown parsing error",
1418        }
1419        .to_string();
1420
1421        unsafe { c::process_parseArgStrFree(argv, error) };
1422        return Err(error_message);
1423    }
1424
1425    assert!(!argv.is_null());
1426
1427    // copy the arg strings
1428    let args: Vec<_> = (0..argc)
1429        .map(|x| unsafe {
1430            let arg_ptr = *argv.add(x as usize);
1431            assert!(!arg_ptr.is_null());
1432            OsStr::from_bytes(CStr::from_ptr(arg_ptr).to_bytes()).to_os_string()
1433        })
1434        .collect();
1435
1436    unsafe { c::process_parseArgStrFree(argv, error) };
1437    Ok(args)
1438}
1439
1440#[cfg(test)]
1441mod tests {
1442    use super::*;
1443
1444    #[test]
1445    // can't call foreign function: process_parseArgStr
1446    #[cfg_attr(miri, ignore)]
1447    fn test_parse_args() {
1448        let arg_str = r#"the quick brown fox "jumped over" the "\"lazy\" dog""#;
1449        let expected_args = &[
1450            "the",
1451            "quick",
1452            "brown",
1453            "fox",
1454            "jumped over",
1455            "the",
1456            "\"lazy\" dog",
1457        ];
1458
1459        let arg_str: OsString = arg_str.into();
1460        let args = parse_string_as_args(&arg_str).unwrap();
1461
1462        assert_eq!(args, expected_args);
1463    }
1464
1465    #[test]
1466    // can't call foreign function: process_parseArgStr
1467    #[cfg_attr(miri, ignore)]
1468    fn test_parse_args_empty() {
1469        let arg_str = "";
1470        let expected_args: &[&str] = &[];
1471
1472        let arg_str: OsString = arg_str.into();
1473        let args = parse_string_as_args(&arg_str).unwrap();
1474
1475        assert_eq!(args, expected_args);
1476    }
1477
1478    #[test]
1479    // can't call foreign function: process_parseArgStr
1480    #[cfg_attr(miri, ignore)]
1481    fn test_parse_args_error() {
1482        let arg_str = r#"hello "world"#;
1483
1484        let arg_str: OsString = arg_str.into();
1485        let err_str = parse_string_as_args(&arg_str).unwrap_err();
1486
1487        assert!(!err_str.is_empty());
1488    }
1489
1490    #[test]
1491    // can't call foreign function: process_parseArgStr
1492    #[cfg_attr(miri, ignore)]
1493    fn test_nullable_option() {
1494        // format the yaml with an optional general option
1495        let yaml_fmt_fn = |option| {
1496            format!(
1497                r#"
1498                general:
1499                  stop_time: 1 min
1500                  {}
1501                network:
1502                  graph:
1503                    type: 1_gbit_switch
1504                hosts:
1505                  myhost:
1506                    network_node_id: 0
1507                    processes:
1508                    - path: /bin/true
1509                "#,
1510                option
1511            )
1512        };
1513
1514        let time_1_sec = units::Time::new(1, units::TimePrefix::Sec);
1515        let time_5_sec = units::Time::new(5, units::TimePrefix::Sec);
1516
1517        // "heartbeat_interval: null" with no cli option => None
1518        let yaml = yaml_fmt_fn("heartbeat_interval: null");
1519        let config_file: ConfigFileOptions = serde_yaml::from_str(&yaml).unwrap();
1520        let cli: CliOptions = CliOptions::try_parse_from(["shadow", "-"]).unwrap();
1521
1522        let merged = ConfigOptions::new(config_file, cli);
1523        assert_eq!(merged.general.heartbeat_interval, None);
1524
1525        // "heartbeat_interval: null" with "--heartbeat-interval 5s" => 5s
1526        let yaml = yaml_fmt_fn("heartbeat_interval: null");
1527        let config_file: ConfigFileOptions = serde_yaml::from_str(&yaml).unwrap();
1528        let cli: CliOptions =
1529            CliOptions::try_parse_from(["shadow", "--heartbeat-interval", "5s", "-"]).unwrap();
1530
1531        let merged = ConfigOptions::new(config_file, cli);
1532        assert_eq!(
1533            merged.general.heartbeat_interval,
1534            Some(NullableOption::Value(time_5_sec))
1535        );
1536
1537        // "heartbeat_interval: null" with "--heartbeat-interval null" => NullableOption::Null
1538        let yaml = yaml_fmt_fn("heartbeat_interval: null");
1539        let config_file: ConfigFileOptions = serde_yaml::from_str(&yaml).unwrap();
1540        let cli: CliOptions =
1541            CliOptions::try_parse_from(["shadow", "--heartbeat-interval", "null", "-"]).unwrap();
1542
1543        let merged = ConfigOptions::new(config_file, cli);
1544        assert_eq!(
1545            merged.general.heartbeat_interval,
1546            Some(NullableOption::Null)
1547        );
1548
1549        // "heartbeat_interval: 5s" with no cli option => 5s
1550        let yaml = yaml_fmt_fn("heartbeat_interval: 5s");
1551        let config_file: ConfigFileOptions = serde_yaml::from_str(&yaml).unwrap();
1552        let cli: CliOptions = CliOptions::try_parse_from(["shadow", "-"]).unwrap();
1553
1554        let merged = ConfigOptions::new(config_file, cli);
1555        assert_eq!(
1556            merged.general.heartbeat_interval,
1557            Some(NullableOption::Value(time_5_sec))
1558        );
1559
1560        // "heartbeat_interval: 5s" with "--heartbeat-interval 5s" => 5s
1561        let yaml = yaml_fmt_fn("heartbeat_interval: 5s");
1562        let config_file: ConfigFileOptions = serde_yaml::from_str(&yaml).unwrap();
1563        let cli: CliOptions =
1564            CliOptions::try_parse_from(["shadow", "--heartbeat-interval", "5s", "-"]).unwrap();
1565
1566        let merged = ConfigOptions::new(config_file, cli);
1567        assert_eq!(
1568            merged.general.heartbeat_interval,
1569            Some(NullableOption::Value(time_5_sec))
1570        );
1571
1572        // "heartbeat_interval: 5s" with "--heartbeat-interval null" => NullableOption::Null
1573        let yaml = yaml_fmt_fn("heartbeat_interval: 5s");
1574        let config_file: ConfigFileOptions = serde_yaml::from_str(&yaml).unwrap();
1575        let cli: CliOptions =
1576            CliOptions::try_parse_from(["shadow", "--heartbeat-interval", "null", "-"]).unwrap();
1577
1578        let merged = ConfigOptions::new(config_file, cli);
1579        assert_eq!(
1580            merged.general.heartbeat_interval,
1581            Some(NullableOption::Null)
1582        );
1583
1584        // no config option with no cli option => 1s (default)
1585        let yaml = yaml_fmt_fn("");
1586        let config_file: ConfigFileOptions = serde_yaml::from_str(&yaml).unwrap();
1587        let cli: CliOptions = CliOptions::try_parse_from(["shadow", "-"]).unwrap();
1588
1589        let merged = ConfigOptions::new(config_file, cli);
1590        assert_eq!(
1591            merged.general.heartbeat_interval,
1592            Some(NullableOption::Value(time_1_sec))
1593        );
1594
1595        // no config option with "--heartbeat-interval 5s" => 5s
1596        let yaml = yaml_fmt_fn("");
1597        let config_file: ConfigFileOptions = serde_yaml::from_str(&yaml).unwrap();
1598        let cli: CliOptions =
1599            CliOptions::try_parse_from(["shadow", "--heartbeat-interval", "5s", "-"]).unwrap();
1600
1601        let merged = ConfigOptions::new(config_file, cli);
1602        assert_eq!(
1603            merged.general.heartbeat_interval,
1604            Some(NullableOption::Value(time_5_sec))
1605        );
1606
1607        // no config option with "--heartbeat-interval null" => NullableOption::Null
1608        let yaml = yaml_fmt_fn("");
1609        let config_file: ConfigFileOptions = serde_yaml::from_str(&yaml).unwrap();
1610        let cli: CliOptions =
1611            CliOptions::try_parse_from(["shadow", "--heartbeat-interval", "null", "-"]).unwrap();
1612
1613        let merged = ConfigOptions::new(config_file, cli);
1614        assert_eq!(
1615            merged.general.heartbeat_interval,
1616            Some(NullableOption::Null)
1617        );
1618    }
1619}