> I'd be curious to see the same test in C, if someone(TM) had the time.

My C is a bit rusty, but I gave Claude a whirl and it generated something that 
did the job perfectly. The difference was not quite as stark, but thread 
queries were still ~50% slower.

I've attached the code Claude generated for the curious — run as 
./notmuch-bench-something ~/.notmuch-db "date:2025"

> I agree twice as slow is not reasonable. I do expect it to be slower
> because of extra work that notmuch is doing (outside xapian) to
> construct the thread structures (parenting nodes turns out to be a bit
> non-trivial). I did also spend some effort tuning the thread:{}
> queries; the others I think have not looked at as closely.

This isn't even using thread:{}, it's just making sequential queries for each 
individual thread (the C version, the python version concatenates all thread 
IDs 
into a massive "thread: or thread: or thread:..." query).

At least for the C version, the performance gap narrows the more threads there 
are, which makes sense given that each new thread causes a new query in this 
very unoptimized version. But altogether, retrieving threads is substantially 
slower than messages.

Cheers,

Lars
#include <notmuch.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>

typedef struct {
    char **tags;
    size_t count;
    size_t capacity;
} tag_array_t;

typedef struct {
    char **thread_ids;
    size_t count;
    size_t capacity;
} thread_id_array_t;

static void init_tag_array(tag_array_t *arr, size_t initial_capacity) {
    arr->tags = malloc(initial_capacity * sizeof(char *));
    arr->count = 0;
    arr->capacity = initial_capacity;
}

static void init_thread_id_array(thread_id_array_t *arr, size_t initial_capacity) {
    arr->thread_ids = malloc(initial_capacity * sizeof(char *));
    arr->count = 0;
    arr->capacity = initial_capacity;
}

static void add_unique_tag(tag_array_t *arr, const char *tag) {
    // Check if tag already exists
    for (size_t i = 0; i < arr->count; i++) {
        if (strcmp(arr->tags[i], tag) == 0) {
            return;
        }
    }

    // Resize if necessary
    if (arr->count >= arr->capacity) {
        arr->capacity *= 2;
        arr->tags = realloc(arr->tags, arr->capacity * sizeof(char *));
    }

    // Add new tag
    arr->tags[arr->count] = strdup(tag);
    arr->count++;
}

static void add_unique_thread_id(thread_id_array_t *arr, const char *thread_id) {
    // Check if thread_id already exists
    for (size_t i = 0; i < arr->count; i++) {
        if (strcmp(arr->thread_ids[i], thread_id) == 0) {
            return;
        }
    }

    // Resize if necessary
    if (arr->count >= arr->capacity) {
        arr->capacity *= 2;
        arr->thread_ids = realloc(arr->thread_ids, arr->capacity * sizeof(char *));
    }

    // Add new thread_id
    arr->thread_ids[arr->count] = strdup(thread_id);
    arr->count++;
}

static void free_tag_array(tag_array_t *arr) {
    for (size_t i = 0; i < arr->count; i++) {
        free(arr->tags[i]);
    }
    free(arr->tags);
    arr->tags = NULL;
    arr->count = arr->capacity = 0;
}

static void free_thread_id_array(thread_id_array_t *arr) {
    for (size_t i = 0; i < arr->count; i++) {
        free(arr->thread_ids[i]);
    }
    free(arr->thread_ids);
    arr->thread_ids = NULL;
    arr->count = arr->capacity = 0;
}

static void process_message_tags(notmuch_message_t *message, tag_array_t *result) {
    notmuch_tags_t *tags = notmuch_message_get_tags(message);
    for (; notmuch_tags_valid(tags); notmuch_tags_move_to_next(tags)) {
        const char *tag = notmuch_tags_get(tags);
        add_unique_tag(result, tag);
    }
    notmuch_tags_destroy(tags);
}

tag_array_t *get_unique_tags(const char *db_path, const char *query_string) {
    notmuch_database_t *db = NULL;
    notmuch_query_t *query = NULL;
    notmuch_messages_t *messages = NULL;
    thread_id_array_t thread_ids;
    tag_array_t *result = malloc(sizeof(tag_array_t));
    
    init_tag_array(result, 16);
    init_thread_id_array(&thread_ids, 16);

    // Open the database
    notmuch_status_t status = notmuch_database_open(db_path, 
                                                   NOTMUCH_DATABASE_MODE_READ_ONLY, 
                                                   &db);
    if (status != NOTMUCH_STATUS_SUCCESS) {
        fprintf(stderr, "Failed to open database\n");
        free(result);
        return NULL;
    }

    // First query: get messages matching the search criteria
    query = notmuch_query_create(db, query_string);
    if (!query) {
        notmuch_database_destroy(db);
        free(result);
        return NULL;
    }

    // Get messages
    status = notmuch_query_search_messages(query, &messages);
    if (status != NOTMUCH_STATUS_SUCCESS) {
        notmuch_query_destroy(query);
        notmuch_database_destroy(db);
        free(result);
        return NULL;
    }

    // Collect thread IDs from matching messages
    for (; notmuch_messages_valid(messages); notmuch_messages_move_to_next(messages)) {
        notmuch_message_t *message = notmuch_messages_get(messages);
        const char *thread_id = notmuch_message_get_thread_id(message);
        add_unique_thread_id(&thread_ids, thread_id);
        process_message_tags(message, result);
        notmuch_message_destroy(message);
    }
    notmuch_messages_destroy(messages);
    notmuch_query_destroy(query);

    // Second query: get all messages from collected thread IDs
    for (size_t i = 0; i < thread_ids.count; i++) {
        char *thread_query = malloc(strlen("thread:") + strlen(thread_ids.thread_ids[i]) + 1);
        sprintf(thread_query, "thread:%s", thread_ids.thread_ids[i]);

        query = notmuch_query_create(db, thread_query);
        if (query) {
            status = notmuch_query_search_messages(query, &messages);
            if (status == NOTMUCH_STATUS_SUCCESS) {
                for (; notmuch_messages_valid(messages); notmuch_messages_move_to_next(messages)) {
                    notmuch_message_t *message = notmuch_messages_get(messages);
                    process_message_tags(message, result);
                    notmuch_message_destroy(message);
                }
                notmuch_messages_destroy(messages);
            }
            notmuch_query_destroy(query);
        }
        free(thread_query);
    }

    // Cleanup
    free_thread_id_array(&thread_ids);
    notmuch_database_destroy(db);

    return result;
}

// Example usage:
int main(int argc, char **argv) {
    if (argc != 3) {
        fprintf(stderr, "Usage: %s <notmuch-db-path> <query>\n", argv[0]);
        return 1;
    }

    tag_array_t *tags = get_unique_tags(argv[1], argv[2]);
    if (!tags) {
        fprintf(stderr, "Failed to get tags\n");
        return 1;
    }

    printf("Found %zu unique tags:\n", tags->count);
    //for (size_t i = 0; i < tags->count; i++) {
        //printf("%s\n", tags->tags[i]);
    //}

    free_tag_array(tags);
    free(tags);
    return 0;
}
#include <notmuch.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>

typedef struct {
    char **tags;
    size_t count;
    size_t capacity;
} tag_array_t;

static void init_tag_array(tag_array_t *arr, size_t initial_capacity) {
    arr->tags = malloc(initial_capacity * sizeof(char *));
    arr->count = 0;
    arr->capacity = initial_capacity;
}

static void add_unique_tag(tag_array_t *arr, const char *tag) {
    // Check if tag already exists
    for (size_t i = 0; i < arr->count; i++) {
        if (strcmp(arr->tags[i], tag) == 0) {
            return;
        }
    }

    // Resize if necessary
    if (arr->count >= arr->capacity) {
        arr->capacity *= 2;
        arr->tags = realloc(arr->tags, arr->capacity * sizeof(char *));
    }

    // Add new tag
    arr->tags[arr->count] = strdup(tag);
    arr->count++;
}

static void free_tag_array(tag_array_t *arr) {
    for (size_t i = 0; i < arr->count; i++) {
        free(arr->tags[i]);
    }
    free(arr->tags);
    arr->tags = NULL;
    arr->count = arr->capacity = 0;
}

tag_array_t *get_unique_tags(const char *db_path, const char *query_string) {
    notmuch_database_t *db = NULL;
    notmuch_query_t *query = NULL;
    notmuch_threads_t *threads = NULL;
    notmuch_messages_t *messages = NULL;
    notmuch_tags_t *tags = NULL;
    tag_array_t *result = malloc(sizeof(tag_array_t));
    
    init_tag_array(result, 16);  // Start with space for 16 tags

    // Open the database
    notmuch_status_t status = notmuch_database_open(db_path, 
                                                   NOTMUCH_DATABASE_MODE_READ_ONLY, 
                                                   &db);
    if (status != NOTMUCH_STATUS_SUCCESS) {
        fprintf(stderr, "Failed to open database\n");
        free(result);
        return NULL;
    }

    // Create and execute query
    query = notmuch_query_create(db, query_string);
    if (!query) {
        notmuch_database_destroy(db);
        free(result);
        return NULL;
    }

    // Get threads
    status = notmuch_query_search_threads(query, &threads);
    if (status != NOTMUCH_STATUS_SUCCESS) {
        notmuch_query_destroy(query);
        notmuch_database_destroy(db);
        free(result);
        return NULL;
    }

    // Iterate over threads
    for (; notmuch_threads_valid(threads); notmuch_threads_move_to_next(threads)) {
        notmuch_thread_t *thread = notmuch_threads_get(threads);
        
        // Get messages in thread
        messages = notmuch_thread_get_messages(thread);
        
        // Iterate over messages
        for (; notmuch_messages_valid(messages); notmuch_messages_move_to_next(messages)) {
            notmuch_message_t *message = notmuch_messages_get(messages);
            
            // Get tags for message
            tags = notmuch_message_get_tags(message);
            
            // Add each tag to our result array
            for (; notmuch_tags_valid(tags); notmuch_tags_move_to_next(tags)) {
                const char *tag = notmuch_tags_get(tags);
                add_unique_tag(result, tag);
            }
            
            notmuch_tags_destroy(tags);
            notmuch_message_destroy(message);
        }
        
        notmuch_messages_destroy(messages);
        notmuch_thread_destroy(thread);
    }

    // Cleanup
    notmuch_threads_destroy(threads);
    notmuch_query_destroy(query);
    notmuch_database_destroy(db);

    return result;
}

// Example usage:
int main(int argc, char **argv) {
    if (argc != 3) {
        fprintf(stderr, "Usage: %s <notmuch-db-path> <query>\n", argv[0]);
        return 1;
    }

    tag_array_t *tags = get_unique_tags(argv[1], argv[2]);
    if (!tags) {
        fprintf(stderr, "Failed to get tags\n");
        return 1;
    }

    printf("Found %zu unique tags:\n", tags->count);
    // for (size_t i = 0; i < tags->count; i++) {
        // printf("%s\n", tags->tags[i]);
    // }

    free_tag_array(tags);
    free(tags);
    return 0;
}
_______________________________________________
notmuch mailing list -- notmuch@notmuchmail.org
To unsubscribe send an email to notmuch-le...@notmuchmail.org

Reply via email to