WordGrid/src/dictionary.rs

141 lines
No EOL
3.6 KiB
Rust

use std::collections::{HashMap, HashSet};
use std::str::FromStr;
use csv::Reader;
use crate::board::Word;
pub trait Dictionary {
fn create_from_reader<T: std::io::Read>(reader: Reader<T>) -> Self;
fn create_from_path(path: &str) -> Self;
fn create_from_str(data: &str) -> Self;
fn filter_to_sub_dictionary(&self, proportion: f64) -> Self;
fn substring_set(&self) -> HashSet<&str>;
fn is_word_valid(&self, word: &Word) -> bool;
}
pub type DictionaryImpl = HashMap<String, f64>;
impl Dictionary for DictionaryImpl{
fn create_from_reader<T: std::io::Read>(mut reader: Reader<T>) -> Self {
let mut map = HashMap::new();
for result in reader.records() {
let record = result.unwrap();
let word = record.get(0).unwrap().to_string();
let score = record.get(1).unwrap();
let score = f64::from_str(score).unwrap();
map.insert(word, score);
}
map
}
fn create_from_path(path: &str) -> Self {
let reader = csv::Reader::from_path(path).unwrap();
DictionaryImpl::create_from_reader(reader)
}
fn create_from_str(data: &str) -> Self {
let reader = csv::ReaderBuilder::new()
.has_headers(true)
.from_reader(data.as_bytes());
DictionaryImpl::create_from_reader(reader)
}
fn filter_to_sub_dictionary(&self, proportion: f64) -> Self {
let mut map = HashMap::new();
for (word, score) in self.iter() {
if *score >= proportion {
map.insert(word.clone(), *score);
}
}
map
}
fn substring_set(&self) -> HashSet<&str> {
let mut set = HashSet::new();
for (word, _score) in self.iter() {
for j in 0..word.len() {
for k in (j+1)..(word.len()+1) {
set.insert(&word[j..k]);
}
}
}
set
}
fn is_word_valid(&self, word: &Word) -> bool {
let text = word.to_string();
self.contains_key(&text)
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_dictionary() {
let dictionary = HashMap::create_from_path("resources/dictionary.csv");
assert_eq!(dictionary.len(), 279429);
assert!(dictionary.contains_key("AA"));
assert!(dictionary.contains_key("AARDVARK"));
assert!((dictionary.get("AARDVARK").unwrap() - 0.5798372).abs() < 0.0001)
}
#[test]
fn test_dictionary_sets() {
let mut dictionary = HashMap::new();
dictionary.insert("JOEL".to_string(), 0.7);
dictionary.insert("JOHN".to_string(), 0.5);
dictionary.insert("XYZ".to_string(), 0.1);
let dictionary = dictionary.filter_to_sub_dictionary(0.3);
assert_eq!(dictionary.len(), 2);
assert!(dictionary.contains_key("JOEL"));
assert!(dictionary.contains_key("JOHN"));
let set = dictionary.substring_set();
assert!(set.contains("J"));
assert!(set.contains("O"));
assert!(set.contains("E"));
assert!(set.contains("L"));
assert!(set.contains("H"));
assert!(set.contains("N"));
assert!(set.contains("JO"));
assert!(set.contains("OE"));
assert!(set.contains("EL"));
assert!(set.contains("OH"));
assert!(set.contains("HN"));
assert!(set.contains("JOE"));
assert!(set.contains("OEL"));
assert!(set.contains("JOH"));
assert!(set.contains("OHN"));
assert!(!set.contains("XY"));
assert!(!set.contains("JH"));
assert!(!set.contains("JE"));
}
}