Search engines written in Rust

RecipeCratesCategories
meilisearchmeilisearchcat-database-implementations
tantivytantivycat-data-structures cat-database-implementations

meilisearch

meilisearch meilisearch-crates.io meilisearch-github meilisearch-lib.rs

meilisearch⮳ is a fast search API that fits into your apps, websites, and workflow.

use std::env;

use meilisearch_sdk::client::Client;
use meilisearch_sdk::indexes::Index;
use meilisearch_sdk::search::SearchResults;
use meilisearch_sdk::task_info::TaskInfo;
use meilisearch_sdk::tasks::Task;
use serde::Deserialize;
use serde::Serialize;

#[derive(Debug, Serialize, Deserialize)]
struct MyDocument {
    id: usize,
    title: String,
    content: String,
}

#[tokio::main(flavor = "current_thread")]
async fn main() -> anyhow::Result<()> {
    let meilisearch_url =
        env::var("MEILISEARCH_URL").unwrap_or("http://localhost:7700".into());
    let meilisearch_api_key: Option<String> = env::var("MEILI_MASTER_KEY").ok();

    // Create a client and connect to MeiliSearch
    let client = Client::new(meilisearch_url, meilisearch_api_key).unwrap();

    // Create an index
    let index: Index = client.index("my_index");

    // Define a list of documents to index
    let docs = vec![
        MyDocument {
            id: 1,
            title: "First Document".to_string(),
            content: "This is the first document content.".to_string(),
        },
        MyDocument {
            id: 2,
            title: "Second Document".to_string(),
            content: "This is the second document content.".to_string(),
        },
        MyDocument {
            id: 3,
            title: "Rust Programming".to_string(),
            content: "Learning the Rust programming language.".to_string(),
        },
    ];

    // Index the documents
    // If the index does not exist, Meilisearch creates it when you first add
    // the documents.
    let task: TaskInfo = index.add_documents(&docs, Some("id")).await?;
    println!("Indexing task: {:?}", task);

    // Wait for the indexing task to complete
    let status = index.wait_for_task(task, None, None).await?;
    println!("Indexing status: {:?}", status);
    assert!(matches!(status, Task::Succeeded { .. }));

    // Perform a search query
    let query = "Rust";
    let search_results: SearchResults<MyDocument> = index
        .search()
        .with_query(query)
        .execute::<MyDocument>()
        .await?;

    // Print the search results
    println!("Search results for '{}':", query);
    for hit in search_results.hits {
        println!("{:?}", hit.result);
    }

    Ok(())
}

tantivy

tantivy tantivy-crates.io tantivy-github tantivy-lib.rs cat-data-structures cat-database-implementations

tantivy⮳ is a full-text search engine library inspired by Apache Lucene.

use tantivy::DocAddress;
use tantivy::Index;
use tantivy::ReloadPolicy;
use tantivy::Score;
use tantivy::doc;
use tantivy::schema::*;

// `tantivy` is a Lucene-like full-text search engine library written in Rust.
// This example will demonstrate how to create an index, add documents,
// and perform a search.

fn main() -> tantivy::Result<()> {
    // Define the schema for your documents
    // Tantivy has a very strict schema. You need to specify in advance, whether
    // a field is indexed or not, stored or not, and RAM-based or not.
    let mut schema_builder = Schema::builder();

    schema_builder.add_text_field("title", TEXT | STORED);
    // `TEXT` means the field should be tokenized and indexed,
    // along with its term frequency and term positions.
    // `STORED` means that the field will also be saved
    // in a compressed, row-oriented key-value store.
    // This store is useful to reconstruct the
    // documents that were selected during the search phase.
    schema_builder.add_text_field("body", TEXT);

    // You may also use:
    // add_u64_field, add_bool_field, add_date_field, add_ip_addr_field,
    // add_facet_field, add_bytes_field, add_json_field... e.g.
    // let num_stars_options =
    //     NumericOptions::default().set_stored().set_indexed();
    // schema_builder.add_u64_field("num_stars", num_stars_options);
    // Or simpler: schema_builder.add_u64_field("num_stars", INDEXED | STORED);

    let schema = schema_builder.build();

    // Create a new index in the specified directory
    // This index will be allocated in anonymous memory. This is useful for
    // indexing small set of documents for testing or for a temporary
    // in-memory index.
    let index = Index::create_in_ram(schema.clone());
    // OR: let index = Index::create_in_dir(index_path, schema.clone())?;

    // Create a multithreaded index writer, specify a buffer size in bytes
    let mut index_writer = index.writer(50_000_000)?;

    // Add documents to the index
    index_writer.add_document(doc!(
        schema.get_field("title").unwrap() => "Document 1",
        schema.get_field("body").unwrap() => "This is the body of document 1", ))?;
    index_writer.add_document(doc!(
        schema.get_field("title").unwrap() => "Document 2",
        schema.get_field("body").unwrap() => "This is the body of document 2", ))?;

    // Commit the changes
    index_writer.commit()?;
    // We need to call .commit() explicitly to force the
    // index_writer to finish processing the documents in the queue,
    // flush the current index to the disk, and advertise
    // the existence of new documents.

    // Create a reader
    let reader = index
        .reader_builder()
        .reload_policy(ReloadPolicy::OnCommitWithDelay) // The index is reloaded within milliseconds after a new commit is available.
        .try_into()?;
    // OR: let reader = index.reader()?;

    // A searcher points to a snapshotted, immutable version of the index.
    // You will typically create one reader for the entire lifetime of your
    // program, and acquire a new searcher for every single request.
    let searcher = reader.searcher();

    // Define a query parser that can interpret human queries
    let query_parser = tantivy::query::QueryParser::for_index(
        &index,
        vec![schema.get_field("body").unwrap()], /* Set of default fields
                                                  * used to search if no
                                                  * field is specifically
                                                  * defined in the query. */
    );

    // Parse the query coming e.g. from the search bar.
    let query = query_parser.parse_query("body:document")?;

    // Search for documents that match the query
    let top_docs: Vec<(Score, DocAddress)> = searcher
        .search(&query, &tantivy::collector::TopDocs::with_limit(10))?;

    // Print the search results
    for (score, doc_address) in top_docs {
        // Retrieve the actual content of documents given its `doc_address`.
        let retrieved_doc: TantivyDocument = searcher.doc(doc_address)?;
        println!(
            "Document found: {:?}, score: {}",
            retrieved_doc.to_json(&schema),
            score
        );
        // We can also get an explanation to understand how a found document got
        // its score.
        let explanation = query.explain(&searcher, doc_address)?;
        println!("{}", explanation.to_pretty_json());
    }

    // Delete all documents
    index_writer.delete_all_documents()?;
    index_writer.commit()?;

    Ok(())
}
// Adapted from https://docs.rs/tantivy/0.22.0/
// See also examples e.g. https://tantivy-search.github.io/examples/basic_search.html