commit | f8d8c1fff8696ed8026943e2142d164c956032df | [log] [tgz] |
---|---|---|
author | Danny Guo <dguo@users.noreply.github.com> | Tue Dec 27 03:54:32 2016 |
committer | GitHub <noreply@github.com> | Tue Dec 27 03:54:32 2016 |
tree | 1465349bd14487b4cf5411d14d6394cc4398c831 | |
parent | b2fc338541fef33104ec53c9821e61af13b1ff45 [diff] |
Fix Damerau-Levenshtein (#12) Rename the original implementation to osa_distance, and implement the actual metric.
Rust implementations of string similarity metrics:
# Cargo.toml [dependencies] strsim = "0.6.0"
You can change the version in the url to see the documentation for an older version in the changelog.
extern crate strsim; use strsim::{hamming, levenshtein, osa_distance, damerau_levenshtein, jaro, jaro_winkler, levenshtein_against_vec, osa_distance_against_vec, damerau_levenshtein_against_vec, jaro_against_vec, jaro_winkler_against_vec}; fn main() { match hamming("hamming", "hammers") { Ok(distance) => assert_eq!(3, distance), Err(why) => panic!("{:?}", why) } assert_eq!(3, levenshtein("kitten", "sitting")); assert_eq!(3, osa_distance("ac", "cba")); assert_eq!(2, damerau_levenshtein("ac", "cba")); assert!((0.392 - jaro("Friedrich Nietzsche", "Jean-Paul Sartre")).abs() < 0.001); assert!((0.911 - jaro_winkler("cheeseburger", "cheese fries")).abs() < 0.001); // get vectors of values back let v = vec!["test", "test1", "test12", "test123", "", "tset", "tsvet"]; assert_eq!(levenshtein_against_vec("test", &v), vec![0, 1, 2, 3, 4, 2, 3]); assert_eq!(osa_distance_against_vec("test", &v), vec![0, 1, 2, 3, 4, 1, 3]); assert_eq!(damerau_levenshtein_against_vec("test", &v), vec![0, 1, 2, 3, 4, 1, 2]); let jaro_distances = jaro_against_vec("test", &v); let jaro_expected = vec![1.0, 0.933333, 0.888889, 0.857143, 0.0, 0.916667]; let jaro_delta: f64 = jaro_distances.iter() .zip(jaro_expected.iter()) .map(|(x, y)| (x - y).abs() as f64) .fold(0.0, |x, y| x + y as f64); assert!(jaro_delta < 0.0001); let jaro_winkler_distances = jaro_winkler_against_vec("test", &v); let jaro_winkler_expected = vec![1.0, 0.96, 0.933333, 0.914286, 0.0, 0.925]; let jaro_winkler_delta = jaro_winkler_distances.iter() .zip(jaro_winkler_expected.iter()) .map(|(x, y)| (x - y).abs() as f64) .fold(0.0, |x, y| x + y as f64); assert!(jaro_winkler_delta < 0.0001); }
If you don't want to install Rust itself, you can install Docker, and run $ ./dev
. This should bring up a temporary container from which you can run cargo commands.