141 lines
No EOL
3.6 KiB
Rust
141 lines
No EOL
3.6 KiB
Rust
use std::collections::{HashMap, HashSet};
|
|
use std::str::FromStr;
|
|
use csv::Reader;
|
|
use crate::board::Word;
|
|
|
|
pub trait Dictionary {
|
|
|
|
fn create_from_reader<T: std::io::Read>(reader: Reader<T>) -> Self;
|
|
fn create_from_path(path: &str) -> Self;
|
|
fn create_from_str(data: &str) -> Self;
|
|
fn filter_to_sub_dictionary(&self, proportion: f64) -> Self;
|
|
fn substring_set(&self) -> HashSet<&str>;
|
|
fn is_word_valid(&self, word: &Word) -> bool;
|
|
}
|
|
pub type DictionaryImpl = HashMap<String, f64>;
|
|
|
|
impl Dictionary for DictionaryImpl{
|
|
|
|
fn create_from_reader<T: std::io::Read>(mut reader: Reader<T>) -> Self {
|
|
let mut map = HashMap::new();
|
|
|
|
for result in reader.records() {
|
|
let record = result.unwrap();
|
|
let word = record.get(0).unwrap().to_string();
|
|
|
|
let score = record.get(1).unwrap();
|
|
let score = f64::from_str(score).unwrap();
|
|
|
|
map.insert(word, score);
|
|
|
|
}
|
|
|
|
map
|
|
}
|
|
|
|
fn create_from_path(path: &str) -> Self {
|
|
let reader = csv::Reader::from_path(path).unwrap();
|
|
|
|
DictionaryImpl::create_from_reader(reader)
|
|
}
|
|
|
|
fn create_from_str(data: &str) -> Self {
|
|
let reader = csv::ReaderBuilder::new()
|
|
.has_headers(true)
|
|
.from_reader(data.as_bytes());
|
|
|
|
DictionaryImpl::create_from_reader(reader)
|
|
}
|
|
|
|
fn filter_to_sub_dictionary(&self, proportion: f64) -> Self {
|
|
let mut map = HashMap::new();
|
|
|
|
for (word, score) in self.iter() {
|
|
if *score >= proportion {
|
|
map.insert(word.clone(), *score);
|
|
}
|
|
}
|
|
|
|
map
|
|
|
|
}
|
|
|
|
fn substring_set(&self) -> HashSet<&str> {
|
|
let mut set = HashSet::new();
|
|
|
|
for (word, _score) in self.iter() {
|
|
for j in 0..word.len() {
|
|
for k in (j+1)..(word.len()+1) {
|
|
set.insert(&word[j..k]);
|
|
}
|
|
}
|
|
|
|
}
|
|
|
|
set
|
|
}
|
|
|
|
fn is_word_valid(&self, word: &Word) -> bool {
|
|
let text = word.to_string();
|
|
self.contains_key(&text)
|
|
}
|
|
}
|
|
|
|
#[cfg(test)]
|
|
mod tests {
|
|
use super::*;
|
|
|
|
|
|
#[test]
|
|
fn test_dictionary() {
|
|
let dictionary = HashMap::create_from_path("resources/dictionary.csv");
|
|
|
|
assert_eq!(dictionary.len(), 279429);
|
|
|
|
assert!(dictionary.contains_key("AA"));
|
|
assert!(dictionary.contains_key("AARDVARK"));
|
|
|
|
assert!((dictionary.get("AARDVARK").unwrap() - 0.5798372).abs() < 0.0001)
|
|
|
|
}
|
|
|
|
#[test]
|
|
fn test_dictionary_sets() {
|
|
let mut dictionary = HashMap::new();
|
|
dictionary.insert("JOEL".to_string(), 0.7);
|
|
dictionary.insert("JOHN".to_string(), 0.5);
|
|
dictionary.insert("XYZ".to_string(), 0.1);
|
|
|
|
let dictionary = dictionary.filter_to_sub_dictionary(0.3);
|
|
assert_eq!(dictionary.len(), 2);
|
|
assert!(dictionary.contains_key("JOEL"));
|
|
assert!(dictionary.contains_key("JOHN"));
|
|
|
|
let set = dictionary.substring_set();
|
|
|
|
assert!(set.contains("J"));
|
|
assert!(set.contains("O"));
|
|
assert!(set.contains("E"));
|
|
assert!(set.contains("L"));
|
|
assert!(set.contains("H"));
|
|
assert!(set.contains("N"));
|
|
|
|
assert!(set.contains("JO"));
|
|
assert!(set.contains("OE"));
|
|
assert!(set.contains("EL"));
|
|
assert!(set.contains("OH"));
|
|
assert!(set.contains("HN"));
|
|
|
|
assert!(set.contains("JOE"));
|
|
assert!(set.contains("OEL"));
|
|
assert!(set.contains("JOH"));
|
|
assert!(set.contains("OHN"));
|
|
|
|
|
|
assert!(!set.contains("XY"));
|
|
assert!(!set.contains("JH"));
|
|
assert!(!set.contains("JE"));
|
|
|
|
}
|
|
|
|
} |