This repository has been archived by the owner on Aug 14, 2021. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
1 parent
747588b
commit 762302c
Showing
9 changed files
with
168 additions
and
5 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,3 +1,5 @@ | ||
/target/ | ||
**/*.rs.bk | ||
Cargo.lock | ||
.idea/ | ||
/test/ |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -4,3 +4,4 @@ version = "0.1.0" | |
authors = ["Emmanuel Keller <[email protected]>"] | ||
|
||
[dependencies] | ||
fst = "0.2" |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,16 @@ | ||
pub mod document { | ||
use terms::terms::Terms; | ||
use std::collections::HashMap; | ||
|
||
pub struct Document { pub fields: HashMap<String, Terms> } | ||
|
||
impl Document { | ||
pub fn new() -> Document { | ||
return Document { fields: HashMap::new() }; | ||
} | ||
|
||
pub fn field(&mut self, field_name: &str) -> &mut Terms { | ||
return self.fields.entry(field_name.to_string()).or_insert(Terms::new()); | ||
} | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,5 @@ | ||
pub mod field { | ||
pub struct Field { | ||
name: String | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,64 @@ | ||
pub mod index { | ||
use std::collections::HashMap; | ||
use segment::segment::Segment; | ||
use terms::terms::Terms; | ||
use document::document::Document; | ||
use std::io; | ||
use std::path::Path; | ||
use std::path::PathBuf; | ||
use std::fs; | ||
use std::fs::File; | ||
use fst::{IntoStreamer, Streamer, Map, MapBuilder, Result}; | ||
|
||
pub struct Index { | ||
pub path: String, | ||
segments: HashMap<String, Segment> | ||
} | ||
|
||
impl Index { | ||
/// Open an existing index, or create a new one. | ||
pub fn new(index_path: &str) -> io::Result<Index> { | ||
let p = Path::new(index_path); | ||
if !p.exists() { | ||
fs::create_dir(p)? | ||
} | ||
// Read the existing segments | ||
let mut segments = HashMap::new(); | ||
for entry in fs::read_dir(p)? { | ||
let dir_entry = entry?; | ||
if dir_entry.file_type()?.is_dir() { | ||
let dir_name = dir_entry.file_name().into_string().unwrap(); | ||
segments.insert(dir_name.to_string(), Segment::new(dir_name.as_ref())?); | ||
} | ||
} | ||
return Ok(Index { path: index_path.to_string(), segments: segments }); | ||
} | ||
|
||
pub fn insert(&self, documents: Vec<Document>) -> Result<()> { | ||
//TODO get next segment number | ||
for document in documents { self.insert_document(document); } | ||
return Ok({}); | ||
} | ||
|
||
fn insert_document(&self, document: Document) -> Result<()> { | ||
for (field, terms) in document.fields { | ||
self.insert_field(1, field.as_ref(), terms)?; | ||
} | ||
return Ok({}); | ||
} | ||
|
||
fn insert_field(&self, segment_number: u64, field: &str, terms: Terms) -> Result<()> { | ||
let field_fst = field.to_string() + ".fst"; | ||
let field_path: PathBuf = [&self.path, &field_fst].iter().collect(); | ||
let mut wtr = io::BufWriter::new(try!(File::create(field_path))); | ||
let mut build = try!(MapBuilder::new(wtr)); | ||
let mut pos = 0; | ||
for (term, positions) in terms.term_positions { | ||
build.insert(term, pos)?; | ||
pos = pos + 1; | ||
} | ||
try!(build.finish()); | ||
return Ok({}); | ||
} | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,6 +1,14 @@ | ||
extern crate fst; | ||
|
||
use std::collections::HashMap; | ||
|
||
pub mod field; | ||
pub mod segment; | ||
pub mod index; | ||
pub mod terms; | ||
pub mod document; | ||
|
||
use index::index::Index; | ||
|
||
#[cfg(test)] | ||
mod tests { | ||
#[test] | ||
fn it_works() { | ||
} | ||
} | ||
mod tests; |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,17 @@ | ||
pub mod segment { | ||
use std::collections::HashMap; | ||
use std::io; | ||
use field::field::Field; | ||
|
||
pub struct Segment { | ||
name: String, | ||
fields: HashMap<String, Field> | ||
} | ||
|
||
impl Segment { | ||
pub fn new(name: &str) -> io::Result<Segment> { | ||
//TODO load fields | ||
return Ok(Segment { name: name.to_string(), fields: HashMap::new() }); | ||
} | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,18 @@ | ||
pub mod terms { | ||
use std::collections::BTreeMap; | ||
|
||
pub struct Terms { | ||
pub term_positions: BTreeMap<String, Vec<i32>> | ||
} | ||
|
||
impl Terms { | ||
pub fn new() -> Terms { | ||
return Terms { term_positions: BTreeMap::new() }; | ||
} | ||
|
||
pub fn term(&mut self, term: &str, position: i32) -> &mut Terms { | ||
self.term_positions.entry(term.to_string()).or_insert(Vec::new()).push(position); | ||
return self; | ||
} | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,32 @@ | ||
#[cfg(test)] | ||
mod tests { | ||
use std::collections::HashMap; | ||
use index::index::Index; | ||
use document::document::Document; | ||
|
||
#[test] | ||
fn create_index() { | ||
let index = Index::new("target/test").unwrap(); | ||
assert_eq!(index.path, "target/test"); | ||
|
||
let mut documents = Vec::new(); | ||
|
||
let mut document1 = Document::new(); | ||
document1.field("id").term("id1", 0); | ||
document1.field("title").term("my", 0).term("title", 1); | ||
documents.push(document1); | ||
|
||
let mut document2 = Document::new(); | ||
document2.field("id").term("id1", 1); | ||
document2.field("title").term("my", 0).term("second", 1).term("title", 2).term("titles", 2); | ||
documents.push(document2); | ||
|
||
assert!(index.insert(documents).is_ok()); | ||
} | ||
|
||
#[test] | ||
fn fail_on_create_index_sub_directory() { | ||
let result = Index::new("target/test/test/test"); | ||
assert!(result.is_err()); | ||
} | ||
} |