Add dictionary

This commit is contained in:
Joel Therrien 2023-07-24 19:50:40 -07:00
parent 4e4912ac3f
commit c842119254
3 changed files with 279544 additions and 0 deletions

View file

@ -6,3 +6,5 @@ edition = "2021"
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
[dependencies]
csv = "1.2.2"

279430
resources/dictionary.csv Normal file

File diff suppressed because it is too large Load diff

View file

@ -1,5 +1,7 @@
use std::collections::HashSet;
use std::fmt;
use std::fmt::{Formatter, Write};
use std::str::FromStr;
pub const GRID_LENGTH: u8 = 15;
pub const TRAY_LENGTH: u8 = 7;
@ -32,6 +34,63 @@ pub struct Board {
cells: Vec<Cell>,
}
pub struct Dictionary {
words: Vec<String>,
scores: Vec<f64>,
}
impl Dictionary {
fn new() -> Self {
let mut reader = csv::Reader::from_path("resources/dictionary.csv").unwrap();
let mut words: Vec<String> = Vec::new();
let mut scores: Vec<f64> = Vec::new();
for result in reader.records() {
let record = result.unwrap();
words.push(record.get(0).unwrap().to_string());
let score = record.get(1).unwrap();
scores.push(f64::from_str(score).unwrap());
}
Dictionary {
words,
scores,
}
}
fn filter_to_sub_dictionary(&self, proportion: f64) -> Self {
let mut words: Vec<String> = Vec::new();
let mut scores: Vec<f64> = Vec::new();
for (word, score) in self.words.iter().zip(self.scores.iter()) {
if *score >= proportion {
words.push(word.clone());
scores.push(*score);
}
}
Dictionary {words, scores}
}
fn substring_set(&self) -> HashSet<&str> {
let mut set = HashSet::new();
for word in self.words.iter() {
for j in 0..word.len() {
for k in (j+1)..(word.len()+1) {
set.insert(&word[j..k]);
}
}
}
set
}
}
impl Board {
pub fn new() -> Self {
@ -191,4 +250,57 @@ mod tests {
assert!(matches!(board.get_cell(8, 6).unwrap().cell_type, CellType::DoubleLetter));
assert!(matches!(board.get_cell(5, 9).unwrap().cell_type, CellType::TripleLetter));
}
#[test]
fn test_dictionary() {
let dictionary = Dictionary::new();
assert_eq!(dictionary.words.len(), dictionary.scores.len());
assert_eq!(dictionary.words.len(), 279429);
assert_eq!(dictionary.words.get(0).unwrap(), "AA");
assert_eq!(dictionary.words.get(9).unwrap(), "AARDVARK");
assert!((dictionary.scores.get(9).unwrap() - 0.5798372).abs() < 0.0001)
}
#[test]
fn test_dictionary_sets() {
let dictionary = Dictionary {
words: vec!["JOEL".to_string(), "JOHN".to_string(), "XYZ".to_string()],
scores: vec![0.7, 0.5, 0.1],
};
let dictionary = dictionary.filter_to_sub_dictionary(0.3);
assert_eq!(dictionary.words.len(), 2);
assert_eq!(dictionary.words.get(0).unwrap(), "JOEL");
assert_eq!(dictionary.words.get(1).unwrap(), "JOHN");
let set = dictionary.substring_set();
assert!(set.contains("J"));
assert!(set.contains("O"));
assert!(set.contains("E"));
assert!(set.contains("L"));
assert!(set.contains("H"));
assert!(set.contains("N"));
assert!(set.contains("JO"));
assert!(set.contains("OE"));
assert!(set.contains("EL"));
assert!(set.contains("OH"));
assert!(set.contains("HN"));
assert!(set.contains("JOE"));
assert!(set.contains("OEL"));
assert!(set.contains("JOH"));
assert!(set.contains("OHN"));
assert!(!set.contains("XY"));
assert!(!set.contains("JH"));
assert!(!set.contains("JE"));
}
}