Compare commits

...

13 Commits

Author SHA1 Message Date
3eb978d27c
Bump version 2023-11-16 21:45:27 -08:00
fc24a5db72
Improve README 2023-11-16 21:35:30 -08:00
948e30ce00
Remove unnecessary archive formats from release CI 2023-11-16 21:35:29 -08:00
67047ba9fc
Update bench results 2023-11-16 21:35:29 -08:00
bbbc9da2ca
Add bench for small runs / startup time 2023-11-16 21:35:29 -08:00
09da703b20
Specialize on truncate arg for ~5% speedup in truncate case
Use a generic param to specialize on the truncate arg to avoid needing
to check it and/or host bits if we'll end up accepting it regardless
when truncate is enabled.

Refactor host bits check into iputils.
2023-11-16 21:33:59 -08:00
a75fdadcf8
Switch to doc comments instead of macro params for cli help 2023-11-16 19:04:30 -08:00
56ad01e74c
Remove unnecessary allocations in error handling 2023-11-16 18:59:29 -08:00
28bf3b5e10
Update benchmark results 2023-11-15 16:53:45 -08:00
914f5ea1a6
Improve formatting 2023-11-15 16:53:45 -08:00
ceaf503407
Improve bench graph outputs 2023-11-15 16:53:45 -08:00
caf0bbdbe3
Add benches and plot generation 2023-11-15 16:53:45 -08:00
037f9e9f6e
tests: Test against aggregate6 output ordering behaviour 2023-11-15 16:53:31 -08:00
19 changed files with 2631132 additions and 1507549 deletions

View File

@ -16,7 +16,7 @@ jobs:
- target: x86_64-pc-windows-gnu
archive: zip
- target: x86_64-unknown-linux-musl
archive: tar.gz tar.xz tar.zst
archive: tar.gz
- target: x86_64-apple-darwin
archive: zip
steps:

790
Cargo.lock generated

File diff suppressed because it is too large Load Diff

View File

@ -1,6 +1,6 @@
[package]
name = "rs-aggregate"
version = "0.3.0"
version = "0.3.1"
authors = ["Keenan Tims <ktims@gotroot.ca>"]
edition = "2021"
description = "Aggregate a list of IP prefixes into their minimum equivalent representation"
@ -8,11 +8,7 @@ readme = "README.md"
repository = "https://github.com/ktims/rs-aggregate"
license = "MIT"
categories = ["network-programming"]
exclude = [
".github/*",
"doc/*",
"test-data/*",
]
exclude = [".github/*", "doc/*", "test-data/*"]
[dependencies]
clap = { version = "4.4.6", features = ["derive"] }
@ -25,6 +21,15 @@ assert_fs = "1.0.12"
predicates = "3.0.1"
rstest = "0.16.0"
glob = "0.3.1"
tempfile = "3.8.1"
json = "0.12.4"
plotters = "0.3.5"
rand_chacha = "0.3.1"
rand = "0.8.5"
[[bin]]
name = "rs-aggregate"
[[bench]]
name = "perf"
harness = false

View File

@ -5,20 +5,44 @@ Intended to be a drop-in replacement for [aggregate6](https://github.com/job/agg
Takes a list of whitespace-separated IPs or IP networks and aggregates them to their minimal representation.
## Known discrepancies with `aggregate6`
## Installation
* `rs-aggregate` accepts subnet and wilcard mask formats in addition to CIDR, ie all these are valid and equivalent:
* `1.1.1.0/255.255.255.0`
* `1.1.1.0/0.0.0.255`
* `1.1.1.0/24`
* `-m/--max-prefixlen` supports different maximums for each address family as ipv4,ipv6 format
`rs-aggregate` is built statically. CI-built binaries can be found in the GitHub
releases for most common platforms. Simply download the appropriate binary and
place it in your path.
It can also be installed via some software management tools:
### FreeBSD
```
pkg install rs-aggregate
```
### Cargo
```
cargo install rs-aggregate
```
## Known differences from `aggregate6`
* `-m/--max-prefixlen` supports different maximums for each address family as
ipv4,ipv6 format. A single value is also supported and has the same behaviour
as `aggregate6` (apply the same maximum to both address families).
* `-v` verbose dump is not supported
* Truncation errors (when host bits are set without the `-t` flag) are printed
based on the parsed address, ie. always in CIDR format, whereas `aggregate6`
prints errors based on the input.
## Performance
Performance comparison of `rs-aggregate` vs `aggregate6`. A speedup of >100x is achieved on DFZ data.
Full DFZ (1154968 total, 202729 aggregates):
### Full DFZ (1154968 total, 202729 aggregates):
![dfz perf comparison](doc/perfcomp_all.png)
IPv4 DFZ (968520 total, 154061 aggregates):
![ipv4 dfz perf comparison](doc/perfcomp_v4.png)
### IPv4 DFZ (968520 total, 154061 aggregates):
![ipv4 dfz perf comparison](doc/perfcomp_v4.png)
### 1024 random prefixes (startup time):
![startup time comparison](doc/perfcomp_startup.png)

282
benches/perf.rs Normal file
View File

@ -0,0 +1,282 @@
use ipnet::Ipv4Net;
use json::JsonValue;
use plotters::backend::BitMapBackend;
use plotters::chart::ChartBuilder;
use plotters::coord::ranged1d::{IntoSegmentedCoord, SegmentValue};
use plotters::drawing::IntoDrawingArea;
use plotters::element::{EmptyElement, Text};
use plotters::series::{Histogram, PointSeries};
use plotters::style::full_palette::GREY;
use plotters::style::text_anchor::{HPos, Pos, VPos};
use plotters::style::{Color, IntoFont, RGBColor, ShapeStyle, BLACK, WHITE};
use rand::prelude::*;
use rand_chacha::ChaChaRng;
use std::ffi::OsStr;
use std::io::{Read, Write};
use std::process::Stdio;
use tempfile::NamedTempFile;
const BAR_COLOUR: RGBColor = RGBColor(66, 133, 244);
#[derive(Clone, Debug)]
struct TestDefinition {
cmd: String,
name: String, // including version
}
#[derive(Clone, Debug)]
struct TestResult {
mean: f64,
stddev: f64,
median: f64,
min: f64,
max: f64,
}
impl From<JsonValue> for TestResult {
fn from(value: JsonValue) -> Self {
Self {
mean: value["mean"].as_f64().unwrap(),
stddev: value["stddev"].as_f64().unwrap(),
median: value["median"].as_f64().unwrap(),
min: value["min"].as_f64().unwrap(),
max: value["max"].as_f64().unwrap(),
}
}
}
fn make_tests(input_path: &str) -> Vec<TestDefinition> {
let our_version = format!("rs-aggregate {}", env!("CARGO_PKG_VERSION"));
let our_path = env!("CARGO_BIN_EXE_rs-aggregate");
let python_version_raw = std::process::Command::new("python3")
.arg("--version")
.stdout(Stdio::piped())
.spawn()
.expect("Unable to run python3")
.wait_with_output()
.expect("Couldn't get python3 output")
.stdout;
let python_version = String::from_utf8_lossy(&python_version_raw);
let agg6_version_raw = std::process::Command::new("python3")
.arg("-m")
.arg("aggregate6")
.arg("-V")
.stdout(Stdio::piped())
.spawn()
.expect("Unable to run aggregate6")
.wait_with_output()
.expect("Couldn't get aggregate6 output")
.stdout;
let agg6_version = String::from_utf8_lossy(&agg6_version_raw);
vec![
TestDefinition {
cmd: format!("{} {}", our_path, input_path),
name: our_version.into(),
},
TestDefinition {
cmd: format!("python3 -m aggregate6 {}", input_path),
name: format!("{} ({})", agg6_version.trim(), python_version.trim()),
},
]
}
fn make_v4_tests(input_path: &str) -> Vec<TestDefinition> {
let mut all_tests = make_tests(input_path);
let iprange_version_raw = std::process::Command::new("iprange")
.arg("--version")
.stdout(Stdio::piped())
.spawn()
.expect("Unable to run iprange")
.wait_with_output()
.expect("Couldn't get iprange output")
.stdout;
let iprange_version = String::from_utf8_lossy(&iprange_version_raw);
all_tests.push(TestDefinition {
cmd: format!("iprange --optimize {}", input_path),
name: iprange_version.lines().nth(0).unwrap().into(),
});
all_tests
}
// We don't really care if aggregation will actually be possible, but we'll only
// generate prefixes with length 8->24 so some should be possible.
fn make_random_prefix(rng: &mut impl Rng) -> Ipv4Net {
let prefix_len: u8 = rng.gen_range(8..25);
let netaddr: u32 = rng.gen_range(0..(1 << prefix_len)) << 32 - prefix_len;
Ipv4Net::new(netaddr.into(), prefix_len).unwrap()
}
// Generate 1024 random v4 addresses as a startup time test
fn make_startup_tests() -> (NamedTempFile, Vec<TestDefinition>) {
let mut rng = ChaChaRng::seed_from_u64(0); // use a repeatable rng with custom seed
let addresses = std::iter::repeat_with(|| make_random_prefix(&mut rng)).take(1024);
let mut outfile = NamedTempFile::new().unwrap();
let mut outfile_f = outfile.as_file();
for addr in addresses {
outfile_f.write_fmt(format_args!("{}\n", addr)).unwrap();
}
outfile.flush().unwrap();
let outpath = outfile.path().as_os_str().to_string_lossy().to_string();
// outfile needs to live on so destructor doesn't delete it before we run the benches
(outfile, make_v4_tests(outpath.as_str()))
}
fn hyperfine_harness<S>(cmd: S) -> Result<TestResult, Box<dyn std::error::Error>>
where
S: AsRef<OsStr>,
{
let resultfile = NamedTempFile::new().expect("Unable to create tempfile");
let mut process = std::process::Command::new("hyperfine")
.arg("--export-json")
.arg(resultfile.path())
.arg("--min-runs")
.arg("10")
.arg("-N")
.arg("--")
.arg(&cmd)
.stdout(Stdio::null())
.spawn()
.expect("unable to run command");
let _rc = process.wait().expect("unable to wait on process");
let mut raw_result_buf = Vec::new();
resultfile
.as_file()
.read_to_end(&mut raw_result_buf)
.expect("Can't read results");
resultfile.close().unwrap();
let hf_result = json::parse(&String::from_utf8_lossy(&raw_result_buf)).expect(
format!(
"Can't parse hyperfine json results from command `{}`",
cmd.as_ref().to_string_lossy()
)
.as_str(),
);
let final_result = &hf_result["results"][0];
Ok((final_result.clone()).into())
}
fn plot_results(
results: &Vec<(TestDefinition, TestResult)>,
caption: &str,
outfile: &str,
) -> Result<(), Box<dyn std::error::Error>> {
// Second result is our baseline
let norm_numerator = results[1].1.mean;
let max_result = norm_numerator / results.iter().map(|x| x.1.mean).reduce(f64::min).unwrap();
let drawing = BitMapBackend::new(outfile, (640, 480)).into_drawing_area();
drawing.fill(&WHITE)?;
let mut chart = ChartBuilder::on(&drawing)
.x_label_area_size(40)
.y_label_area_size(40)
.caption(caption, ("Roboto", 24).into_font())
.build_cartesian_2d((0..results.len() - 1).into_segmented(), 0.0..max_result)?;
chart
.configure_mesh()
.y_desc("Speedup vs aggregate6")
.y_labels(5)
.y_label_formatter(&|x| std::fmt::format(format_args!("{:.0}", *x)))
.light_line_style(WHITE)
.bold_line_style(GREY)
.disable_x_mesh()
.x_label_style(("Roboto", 18).into_font())
.x_label_formatter(&|x| match x {
SegmentValue::Exact(val) => results[*val].0.name.clone(),
SegmentValue::CenterOf(val) => results[*val].0.name.clone(),
SegmentValue::Last => String::new(),
})
.draw()?;
chart.draw_series(
Histogram::vertical(&chart)
.style(BAR_COLOUR.filled())
.margin(10)
.data(
results
.iter()
.enumerate()
.map(|(x, y)| (x, norm_numerator / y.1.mean)),
),
)?;
chart.draw_series(PointSeries::of_element(
results
.iter()
.enumerate()
.map(|(x, y)| (SegmentValue::CenterOf(x), norm_numerator / y.1.mean)),
5,
ShapeStyle::from(&BLACK).filled(),
&|coord, _size, _style| {
let (target_y, target_colour) = if coord.1 < 25.0 {
(-25, BAR_COLOUR)
} else {
(25, WHITE)
};
EmptyElement::at(coord.clone())
+ Text::new(
format!("{:.1} x", coord.1),
(0, target_y),
("Roboto", 18)
.into_font()
.color(&target_colour)
.pos(Pos::new(HPos::Center, VPos::Center)),
)
},
))?;
Ok(())
}
fn main() -> Result<(), Box<dyn std::error::Error>> {
run_and_plot(
make_tests("test-data/dfz_combined/input"),
"doc/perfcomp_all.png",
"IPv4 & IPv6 Full DFZ",
)?;
run_and_plot(
make_v4_tests("test-data/dfz_v4/input"),
"doc/perfcomp_v4.png",
"IPv4 Full DFZ",
)?;
// Need to hold on to tmpfile so it doesn't get deleted before we can bench
let (_tmpfile, tests) = make_startup_tests();
run_and_plot(
tests,
"doc/perfcomp_startup.png",
"1024 Random IPv4 Prefixes",
)?;
Ok(())
}
fn run_and_plot(
tests: Vec<TestDefinition>,
filename: &str,
caption: &str,
) -> Result<(), Box<dyn std::error::Error>> {
let mut results: Vec<(TestDefinition, TestResult)> = Vec::new();
for test in tests {
println!("Running bench: {:?}", test);
results.push((test.clone(), hyperfine_harness(&test.cmd)?));
}
plot_results(&results, caption, filename)?;
Ok(())
}

Binary file not shown.

Before

Width:  |  Height:  |  Size: 20 KiB

After

Width:  |  Height:  |  Size: 24 KiB

BIN
doc/perfcomp_startup.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 29 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 19 KiB

After

Width:  |  Height:  |  Size: 25 KiB

View File

@ -63,7 +63,7 @@ pub struct IpOrNet(IpNet);
#[derive(Debug, Clone)]
pub struct NetParseError {
#[allow(dead_code)]
msg: String,
msg: &'static str,
}
impl Display for NetParseError {
@ -87,7 +87,7 @@ impl IpOrNet {
Ok(lead_ones.try_into()?)
} else {
Err(Box::new(NetParseError {
msg: "Invalid subnet mask".to_owned(),
msg: "Invalid subnet mask",
}))
}
} else {
@ -96,7 +96,7 @@ impl IpOrNet {
Ok(lead_zeros.try_into()?)
} else {
Err(Box::new(NetParseError {
msg: "Invalid wildcard mask".to_owned(),
msg: "Invalid wildcard mask",
}))
}
}
@ -113,7 +113,7 @@ impl IpOrNet {
Ok(IpNet::new(ip, IpOrNet::parse_mask(pfxlen)?)?.into())
} else {
Err(Box::new(NetParseError {
msg: "Mask form is not valid for IPv6 address".to_owned(),
msg: "Mask form is not valid for IPv6 address",
}))
}
}
@ -140,6 +140,9 @@ impl IpOrNet {
pub fn network(&self) -> IpAddr {
self.0.network()
}
pub fn has_host_bits(&self) -> bool {
self.0.addr() != self.0.network()
}
}
impl FromStr for IpOrNet {
@ -209,7 +212,7 @@ impl Default for PrefixlenPair {
impl Display for PrefixlenPair {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
f.write_str(format!("{},{}", self.v4, self.v6).as_str())
f.write_fmt(format_args!("{},{}", self.v4, self.v6))
}
}
@ -263,12 +266,12 @@ impl PartialOrd<IpOrNet> for PrefixlenPair {
#[derive(Debug)]
pub struct ParsePrefixlenError {
msg: String,
msg: &'static str,
}
impl Display for ParsePrefixlenError {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
f.write_str(self.msg.as_str())
f.write_str(self.msg)
}
}
@ -280,25 +283,25 @@ impl FromStr for PrefixlenPair {
match s.split_once(',') {
Some(pair) => {
let v4 = u8::from_str(pair.0).or(Err(ParsePrefixlenError {
msg: "Unable to parse integer".to_owned(),
msg: "Unable to parse integer",
}))?;
let v6 = u8::from_str(pair.1).or(Err(ParsePrefixlenError {
msg: "Unable to parse integer".to_owned(),
msg: "Unable to parse integer",
}))?;
if v4 > 32 || v6 > 128 {
return Err(ParsePrefixlenError {
msg: "Invalid prefix length".to_owned(),
msg: "Invalid prefix length",
});
}
Ok(PrefixlenPair { v4, v6 })
}
None => {
let len = u8::from_str(s).or(Err(ParsePrefixlenError {
msg: "Unable to parse integer".to_owned(),
msg: "Unable to parse integer",
}))?;
if len > 128 {
return Err(ParsePrefixlenError {
msg: "Invalid prefix length".to_owned(),
msg: "Invalid prefix length",
});
}
Ok(PrefixlenPair { v4: len, v6: len })

View File

@ -1,44 +1,33 @@
extern crate ipnet;
use std::{process::exit, io};
use std::{io, process::exit};
mod iputils;
use iputils::{IpBothRange, IpOrNet, PrefixlenPair};
use clio::*;
use std::io::{Write, BufRead};
use std::io::{BufRead, Write};
use clap::Parser;
const WRITER_BUFSIZE: usize = 32 * 1024;
const WRITER_BUFSIZE: usize = 1 * 1024;
#[derive(Parser)]
#[command(author, version, about, long_about=None)]
#[command(author, version, about)]
struct Args {
#[clap(value_parser, default_value = "-")]
input: Vec<Input>,
#[structopt(
short,
long,
default_value = "32,128",
help = "Maximum prefix length for prefixes read. Single value applies to IPv4 and IPv6, comma-separated [IPv4],[IPv6]."
)]
/// Maximum prefix length for prefixes read. Single value applies to IPv4 and IPv6, comma-separated [IPv4],[IPv6].
#[structopt(short, long, default_value = "32,128")]
max_prefixlen: PrefixlenPair,
#[arg(short, long, help = "truncate IP/mask to network/mask (else ignore)")]
/// Truncate IP/mask to network/mask (else ignore)
#[arg(short, long)]
truncate: bool,
#[arg(
id = "4",
short,
help = "Only output IPv4 prefixes",
conflicts_with("6")
)]
/// Only output IPv4 prefixes
#[arg(id = "4", short, conflicts_with("6"))]
only_v4: bool,
#[arg(
id = "6",
short,
help = "Only output IPv6 prefixes",
conflicts_with("4")
)]
/// Only output IPv6 prefixes
#[arg(id = "6", short, conflicts_with("4"))]
only_v6: bool,
}
@ -71,16 +60,17 @@ struct App {
}
impl App {
fn add_prefix(&mut self, pfx: IpOrNet) {
fn add_prefix<const TRUNCATE: bool>(&mut self, pfx: IpOrNet) {
// Parser accepts host bits set, so detect that case and error if not truncate mode
// Note: aggregate6 errors in this case regardless of -4, -6 so do the same
if !self.args.truncate {
if pfx.addr() != pfx.network() {
eprintln!("ERROR: '{}' is not a valid IP network, ignoring.", pfx);
return;
}
if !TRUNCATE && pfx.has_host_bits() {
// We don't have the original string any more so our error
// differs from `aggregate6` in that it prints the pfxlen as
// parsed, not as in the source.
eprintln!("ERROR: '{}' is not a valid IP network, ignoring.", pfx);
return;
}
// Don't bother saving if we won't display.
if self.args.only_v4 && pfx.is_ipv6() {
return;
} else if self.args.only_v6 && pfx.is_ipv4() {
@ -90,14 +80,14 @@ impl App {
self.prefixes.add(pfx);
}
}
fn consume_input(&mut self, input: &mut Input) {
fn consume_input<const TRUNCATE: bool>(&mut self, input: &mut Input) {
for line in input.lock().lines() {
match line {
Ok(line) => {
for net in line.split_whitespace() {
for net in line.split_ascii_whitespace() {
let pnet = net.parse::<IpOrNet>();
match pnet {
Ok(pnet) => self.add_prefix(pnet),
Ok(pnet) => self.add_prefix::<TRUNCATE>(pnet),
Err(_e) => {
eprintln!("ERROR: '{}' is not a valid IP network, ignoring.", net);
}
@ -114,7 +104,10 @@ impl App {
fn simplify_inputs(&mut self) {
let inputs = self.args.input.to_owned();
for mut input in inputs {
self.consume_input(&mut input);
match self.args.truncate {
true => self.consume_input::<true>(&mut input),
false => self.consume_input::<false>(&mut input),
}
}
self.prefixes.simplify();
}

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

154061
test-data/dfz_v4/expected Normal file

File diff suppressed because it is too large Load Diff

968520
test-data/dfz_v4/input Normal file

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -43,14 +43,24 @@ impl Predicate<[u8]> for SortedEquals {
impl PredicateReflection for SortedEquals {}
// Really should normalize the data (lex sort) before comparison
/// Compare the output with pre-prepared expected outputs. When functionality is
/// matching, we generate expected outputs with `aggregate6`, and expect byte-for-byte
/// output consistency, including ordering. When our functionality and `aggregate6`'s
/// diverge, we generate expected outputs ourselves, and expect output sorted by numeric
/// value of the address.
///
/// Normalization is available for future test cases.
#[rstest]
#[case("test-data/dfz_combined", "")] // Basic aggregation test
#[case("test-data/max_pfxlen", "-m 20")] // Filter on prefix length
#[case("test-data/max_pfxlen_split", "-m 20,32")] // Filter on prefix length (split v4/v6)
#[case("test-data/v4_only", "-4")] // Filter v4 only
#[case("test-data/v6_only", "-6")] // Filter v4 only
fn dfz_test(#[case] path: &str, #[case] args: &str) -> Result<(), Box<dyn Error>> {
#[case::dfz_combined("test-data/dfz_combined", "", false)] // Basic aggregation test
#[case::max_pfxlen("test-data/max_pfxlen", "-m 20", false)] // Filter on prefix length
#[case::max_pfxlen_split("test-data/max_pfxlen_split", "-m 20,32", false)] // Filter on prefix length (split v4/v6)
#[case::v4_only("test-data/v4_only", "-4", false)] // Filter v4 only
#[case::v6_only("test-data/v6_only", "-6", false)] // Filter v6 only
fn dfz_test(
#[case] path: &str,
#[case] args: &str,
#[case] normalize_data: bool,
) -> Result<(), Box<dyn Error>> {
let mut cmd = Command::cargo_bin("rs-aggregate")?;
let in_path = Path::new(path).join("input");
let expect_path = Path::new(path).join("expected");
@ -65,10 +75,17 @@ fn dfz_test(#[case] path: &str, #[case] args: &str) -> Result<(), Box<dyn Error>
.timeout(std::time::Duration::from_secs(30))
.assert();
assert
.success()
.stdout(SortedEquals::new(&expect_data))
.stderr(predicate::str::is_empty());
if normalize_data {
assert
.success()
.stdout(SortedEquals::new(&expect_data))
.stderr(predicate::str::is_empty());
} else {
assert
.success()
.stdout(predicate::eq(expect_data))
.stderr(predicate::str::is_empty());
}
Ok(())
}