Search Engines Written in Rust
Recipe | Crates | Categories |
---|---|---|
meilisearch | ||
tantivy |
meilisearch
meilisearch⮳ is a fast search API that fits into your apps, websites, and workflow.
use std::env; use meilisearch_sdk::client::Client; use meilisearch_sdk::indexes::Index; use meilisearch_sdk::search::SearchResults; use meilisearch_sdk::task_info::TaskInfo; use meilisearch_sdk::tasks::Task; use serde::Deserialize; use serde::Serialize; /// A document that can be indexed in MeiliSearch. #[derive(Debug, Serialize, Deserialize)] struct MyDocument { id: usize, title: String, content: String, } /// This is an example of how to use the MeiliSearch Rust SDK to index and /// search documents. /// /// It requires a running MeiliSearch instance. #[tokio::main(flavor = "current_thread")] async fn main() -> anyhow::Result<()> { let meilisearch_url = env::var("MEILISEARCH_URL").unwrap_or("http://localhost:7700".into()); let meilisearch_api_key: Option<String> = env::var("MEILI_MASTER_KEY").ok(); // Create a client and connect to MeiliSearch. let client = Client::new(meilisearch_url, meilisearch_api_key).unwrap(); // Create an index. let index: Index = client.index("my_index"); // Define a list of documents to index. let docs = vec![ MyDocument { id: 1, title: "First Document".to_string(), content: "This is the first document content.".to_string(), }, MyDocument { id: 2, title: "Second Document".to_string(), content: "This is the second document content.".to_string(), }, MyDocument { id: 3, title: "Rust Programming".to_string(), content: "Learning the Rust programming language.".to_string(), }, ]; // Index the documents. // If the index does not exist, Meilisearch creates it when you first add // the documents. let task: TaskInfo = index.add_documents(&docs, Some("id")).await?; println!("Indexing task: {:?}", task); // Wait for the indexing task to complete. let status = index.wait_for_task(task, None, None).await?; println!("Indexing status: {:?}", status); assert!(matches!(status, Task::Succeeded { .. })); // Perform a search query. let query = "Rust"; let search_results: SearchResults<MyDocument> = index .search() .with_query(query) .execute::<MyDocument>() .await?; // Print the search results. println!("Search results for '{}':", query); for hit in search_results.hits { println!("{:?}", hit.result); } Ok(()) }
tantivy
tantivy⮳ is a full-text search engine library inspired by Apache Lucene.
// use tantivy::DocAddress; // use tantivy::Index; // use tantivy::ReloadPolicy; // use tantivy::Score; // use tantivy::doc; // use tantivy::schema::Document as TantivyDocument; // use tantivy::schema::STORED; // use tantivy::schema::Schema; // use tantivy::schema::TEXT; // /// `tantivy` is a Lucene-like full-text search engine library written in // /// Rust. // /// This example will demonstrate how to create an index, add documents, // /// and perform a search. // fn main() -> tantivy::Result<()> { // // Define the schema for your documents // // Tantivy has a very strict schema. You need to specify in advance, // // whether a field is indexed or not, stored or not, and RAM-based or // // not: `let mut schema_builder = Schema::builder();` // schema_builder.add_text_field("title", TEXT | STORED); // // `TEXT` means the field should be tokenized and indexed, // // along with its term frequency and term positions. // // `STORED` means that the field will also be saved // // in a compressed, row-oriented key-value store. // // This store is useful to reconstruct the // // documents that were selected during the search phase. // schema_builder.add_text_field("body", TEXT); // // You may also use: // // add_u64_field, add_bool_field, add_date_field, add_ip_addr_field, // // add_facet_field, add_bytes_field, add_json_field... e.g. // // let num_stars_options = // // NumericOptions::default().set_stored().set_indexed(); // // schema_builder.add_u64_field("num_stars", num_stars_options); // // Or simpler: schema_builder.add_u64_field("num_stars", INDEXED | // // STORED); // let schema = schema_builder.build(); // // Create a new index in the specified directory // // This index will be allocated in anonymous memory. This is useful for // // indexing small set of documents for testing or for a temporary // // in-memory index. // let index = Index::create_in_ram(schema.clone()); // // OR: let index = Index::create_in_dir(index_path, schema.clone())?; // // Create a multithreaded index writer, specify a buffer size in bytes // let mut index_writer = index.writer(50_000_000)?; // // Add documents to the index // index_writer.add_document(doc!( // schema.get_field("title").unwrap() => "Document 1", // schema.get_field("body").unwrap() => "This is the body of document // 1", ))?; index_writer.add_document(doc!( // schema.get_field("title").unwrap() => "Document 2", // schema.get_field("body").unwrap() => "This is the body of document // 2", ))?; // // Commit the changes // index_writer.commit()?; // // We need to call .commit() explicitly to force the // // index_writer to finish processing the documents in the queue, // // flush the current index to the disk, and advertise // // the existence of new documents. // // Create a reader // let reader = index // .reader_builder() // .reload_policy(ReloadPolicy::OnCommitWithDelay) // The index is // reloaded within milliseconds after a new commit is available. // .try_into()?; // // OR: let reader = index.reader()?; // // A searcher points to a snapshotted, immutable version of the index. // // You will typically create one reader for the entire lifetime of your // // program, and acquire a new searcher for every single request. // let searcher = reader.searcher(); // // Define a query parser that can interpret human queries // let query_parser = tantivy::query::QueryParser::for_index( // &index, // vec![schema.get_field("body").unwrap()], /* Set of default fields // * used to search if no // * field is specifically // * defined in the query. */ // ); // // Parse the query coming e.g. from the search bar. // let query = query_parser.parse_query("body:document")?; // // Search for documents that match the query // let top_docs: Vec<(Score, DocAddress)> = searcher // .search(&query, &tantivy::collector::TopDocs::with_limit(10))?; // // Print the search results // for (score, doc_address) in top_docs { // // Retrieve the actual content of documents given its `doc_address`. // let retrieved_doc = searcher.doc(doc_address)?; // println!( // "Document found: {:?}, score: {}", // retrieved_doc.to_json(&schema), // score // ); // // We can also get an explanation to understand how a found document // got // its score. // let explanation = query.explain(&searcher, doc_address)?; // println!("{}", explanation.to_pretty_json()); // } // // Delete all documents // index_writer.delete_all_documents()?; // index_writer.commit()?; // Ok(()) // } // // Adapted from https://docs.rs/tantivy/0.22.0/ // // See also examples e.g. https://tantivy-search.github.io/examples/basic_search.html
Related Topics
Topic | Rust crate(s) | Notes |
---|---|---|
Full-Text Search | elastic (for Elasticsearch) | |
Fuzzy Searching | fuzzy-matcher ⮳, strsim | strsim ⮳ provides string similarity metrics. See Text Processing. |
Regular Expressions | regex ⮳ | See Regex. |
String Search (Multiple Patterns at Once) | aho-corasick ⮳ | See Text Processing. |
Vector Search | qdrant ⮳, faiss-rs (bindings to FAISS) |