> I'd be curious to see the same test in C, if someone(TM) had the time.
My C is a bit rusty, but I gave Claude a whirl and it generated something that did the job perfectly. The difference was not quite as stark, but thread queries were still ~50% slower. I've attached the code Claude generated for the curious — run as ./notmuch-bench-something ~/.notmuch-db "date:2025" > I agree twice as slow is not reasonable. I do expect it to be slower > because of extra work that notmuch is doing (outside xapian) to > construct the thread structures (parenting nodes turns out to be a bit > non-trivial). I did also spend some effort tuning the thread:{} > queries; the others I think have not looked at as closely. This isn't even using thread:{}, it's just making sequential queries for each individual thread (the C version, the python version concatenates all thread IDs into a massive "thread: or thread: or thread:..." query). At least for the C version, the performance gap narrows the more threads there are, which makes sense given that each new thread causes a new query in this very unoptimized version. But altogether, retrieving threads is substantially slower than messages. Cheers, Lars
#include <notmuch.h> #include <stdio.h> #include <stdlib.h> #include <string.h> typedef struct { char **tags; size_t count; size_t capacity; } tag_array_t; typedef struct { char **thread_ids; size_t count; size_t capacity; } thread_id_array_t; static void init_tag_array(tag_array_t *arr, size_t initial_capacity) { arr->tags = malloc(initial_capacity * sizeof(char *)); arr->count = 0; arr->capacity = initial_capacity; } static void init_thread_id_array(thread_id_array_t *arr, size_t initial_capacity) { arr->thread_ids = malloc(initial_capacity * sizeof(char *)); arr->count = 0; arr->capacity = initial_capacity; } static void add_unique_tag(tag_array_t *arr, const char *tag) { // Check if tag already exists for (size_t i = 0; i < arr->count; i++) { if (strcmp(arr->tags[i], tag) == 0) { return; } } // Resize if necessary if (arr->count >= arr->capacity) { arr->capacity *= 2; arr->tags = realloc(arr->tags, arr->capacity * sizeof(char *)); } // Add new tag arr->tags[arr->count] = strdup(tag); arr->count++; } static void add_unique_thread_id(thread_id_array_t *arr, const char *thread_id) { // Check if thread_id already exists for (size_t i = 0; i < arr->count; i++) { if (strcmp(arr->thread_ids[i], thread_id) == 0) { return; } } // Resize if necessary if (arr->count >= arr->capacity) { arr->capacity *= 2; arr->thread_ids = realloc(arr->thread_ids, arr->capacity * sizeof(char *)); } // Add new thread_id arr->thread_ids[arr->count] = strdup(thread_id); arr->count++; } static void free_tag_array(tag_array_t *arr) { for (size_t i = 0; i < arr->count; i++) { free(arr->tags[i]); } free(arr->tags); arr->tags = NULL; arr->count = arr->capacity = 0; } static void free_thread_id_array(thread_id_array_t *arr) { for (size_t i = 0; i < arr->count; i++) { free(arr->thread_ids[i]); } free(arr->thread_ids); arr->thread_ids = NULL; arr->count = arr->capacity = 0; } static void process_message_tags(notmuch_message_t *message, tag_array_t *result) { notmuch_tags_t *tags = notmuch_message_get_tags(message); for (; notmuch_tags_valid(tags); notmuch_tags_move_to_next(tags)) { const char *tag = notmuch_tags_get(tags); add_unique_tag(result, tag); } notmuch_tags_destroy(tags); } tag_array_t *get_unique_tags(const char *db_path, const char *query_string) { notmuch_database_t *db = NULL; notmuch_query_t *query = NULL; notmuch_messages_t *messages = NULL; thread_id_array_t thread_ids; tag_array_t *result = malloc(sizeof(tag_array_t)); init_tag_array(result, 16); init_thread_id_array(&thread_ids, 16); // Open the database notmuch_status_t status = notmuch_database_open(db_path, NOTMUCH_DATABASE_MODE_READ_ONLY, &db); if (status != NOTMUCH_STATUS_SUCCESS) { fprintf(stderr, "Failed to open database\n"); free(result); return NULL; } // First query: get messages matching the search criteria query = notmuch_query_create(db, query_string); if (!query) { notmuch_database_destroy(db); free(result); return NULL; } // Get messages status = notmuch_query_search_messages(query, &messages); if (status != NOTMUCH_STATUS_SUCCESS) { notmuch_query_destroy(query); notmuch_database_destroy(db); free(result); return NULL; } // Collect thread IDs from matching messages for (; notmuch_messages_valid(messages); notmuch_messages_move_to_next(messages)) { notmuch_message_t *message = notmuch_messages_get(messages); const char *thread_id = notmuch_message_get_thread_id(message); add_unique_thread_id(&thread_ids, thread_id); process_message_tags(message, result); notmuch_message_destroy(message); } notmuch_messages_destroy(messages); notmuch_query_destroy(query); // Second query: get all messages from collected thread IDs for (size_t i = 0; i < thread_ids.count; i++) { char *thread_query = malloc(strlen("thread:") + strlen(thread_ids.thread_ids[i]) + 1); sprintf(thread_query, "thread:%s", thread_ids.thread_ids[i]); query = notmuch_query_create(db, thread_query); if (query) { status = notmuch_query_search_messages(query, &messages); if (status == NOTMUCH_STATUS_SUCCESS) { for (; notmuch_messages_valid(messages); notmuch_messages_move_to_next(messages)) { notmuch_message_t *message = notmuch_messages_get(messages); process_message_tags(message, result); notmuch_message_destroy(message); } notmuch_messages_destroy(messages); } notmuch_query_destroy(query); } free(thread_query); } // Cleanup free_thread_id_array(&thread_ids); notmuch_database_destroy(db); return result; } // Example usage: int main(int argc, char **argv) { if (argc != 3) { fprintf(stderr, "Usage: %s <notmuch-db-path> <query>\n", argv[0]); return 1; } tag_array_t *tags = get_unique_tags(argv[1], argv[2]); if (!tags) { fprintf(stderr, "Failed to get tags\n"); return 1; } printf("Found %zu unique tags:\n", tags->count); //for (size_t i = 0; i < tags->count; i++) { //printf("%s\n", tags->tags[i]); //} free_tag_array(tags); free(tags); return 0; }
#include <notmuch.h> #include <stdio.h> #include <stdlib.h> #include <string.h> typedef struct { char **tags; size_t count; size_t capacity; } tag_array_t; static void init_tag_array(tag_array_t *arr, size_t initial_capacity) { arr->tags = malloc(initial_capacity * sizeof(char *)); arr->count = 0; arr->capacity = initial_capacity; } static void add_unique_tag(tag_array_t *arr, const char *tag) { // Check if tag already exists for (size_t i = 0; i < arr->count; i++) { if (strcmp(arr->tags[i], tag) == 0) { return; } } // Resize if necessary if (arr->count >= arr->capacity) { arr->capacity *= 2; arr->tags = realloc(arr->tags, arr->capacity * sizeof(char *)); } // Add new tag arr->tags[arr->count] = strdup(tag); arr->count++; } static void free_tag_array(tag_array_t *arr) { for (size_t i = 0; i < arr->count; i++) { free(arr->tags[i]); } free(arr->tags); arr->tags = NULL; arr->count = arr->capacity = 0; } tag_array_t *get_unique_tags(const char *db_path, const char *query_string) { notmuch_database_t *db = NULL; notmuch_query_t *query = NULL; notmuch_threads_t *threads = NULL; notmuch_messages_t *messages = NULL; notmuch_tags_t *tags = NULL; tag_array_t *result = malloc(sizeof(tag_array_t)); init_tag_array(result, 16); // Start with space for 16 tags // Open the database notmuch_status_t status = notmuch_database_open(db_path, NOTMUCH_DATABASE_MODE_READ_ONLY, &db); if (status != NOTMUCH_STATUS_SUCCESS) { fprintf(stderr, "Failed to open database\n"); free(result); return NULL; } // Create and execute query query = notmuch_query_create(db, query_string); if (!query) { notmuch_database_destroy(db); free(result); return NULL; } // Get threads status = notmuch_query_search_threads(query, &threads); if (status != NOTMUCH_STATUS_SUCCESS) { notmuch_query_destroy(query); notmuch_database_destroy(db); free(result); return NULL; } // Iterate over threads for (; notmuch_threads_valid(threads); notmuch_threads_move_to_next(threads)) { notmuch_thread_t *thread = notmuch_threads_get(threads); // Get messages in thread messages = notmuch_thread_get_messages(thread); // Iterate over messages for (; notmuch_messages_valid(messages); notmuch_messages_move_to_next(messages)) { notmuch_message_t *message = notmuch_messages_get(messages); // Get tags for message tags = notmuch_message_get_tags(message); // Add each tag to our result array for (; notmuch_tags_valid(tags); notmuch_tags_move_to_next(tags)) { const char *tag = notmuch_tags_get(tags); add_unique_tag(result, tag); } notmuch_tags_destroy(tags); notmuch_message_destroy(message); } notmuch_messages_destroy(messages); notmuch_thread_destroy(thread); } // Cleanup notmuch_threads_destroy(threads); notmuch_query_destroy(query); notmuch_database_destroy(db); return result; } // Example usage: int main(int argc, char **argv) { if (argc != 3) { fprintf(stderr, "Usage: %s <notmuch-db-path> <query>\n", argv[0]); return 1; } tag_array_t *tags = get_unique_tags(argv[1], argv[2]); if (!tags) { fprintf(stderr, "Failed to get tags\n"); return 1; } printf("Found %zu unique tags:\n", tags->count); // for (size_t i = 0; i < tags->count; i++) { // printf("%s\n", tags->tags[i]); // } free_tag_array(tags); free(tags); return 0; }
_______________________________________________ notmuch mailing list -- notmuch@notmuchmail.org To unsubscribe send an email to notmuch-le...@notmuchmail.org