branch: externals/hotfuzz
commit 95a1be449624aa2b25128b900b6211034d0e17bb
Author: Axel Forsman <[email protected]>
Commit: Axel Forsman <[email protected]>
Tidy up dynamic module
Use snake_case for function and variable names to better match the Emacs
C source code. Reduce the size of the candidate struct:s that get moved
around during sorting. Simplify batches and fix off-by-one illegal
pointer reads.
---
hotfuzz-module.c | 334 +++++++++++++++++++++++++++----------------------------
1 file changed, 163 insertions(+), 171 deletions(-)
diff --git a/hotfuzz-module.c b/hotfuzz-module.c
index 3748622ba7..a11625f91b 100644
--- a/hotfuzz-module.c
+++ b/hotfuzz-module.c
@@ -5,11 +5,11 @@
*/
#include <stdlib.h>
#include <stdbool.h>
+#include <stdalign.h>
#include <string.h>
#include <ctype.h>
-#include <pthread.h>
-#include <stdio.h>
#include <emacs-module.h>
+#include <pthread.h>
#include <sys/sysinfo.h>
#define MIN(a, b) ({ __typeof__(a) _a = (a), _b = (b); _a < _b ? _a : _b; })
@@ -17,25 +17,30 @@
#define MAX_NEEDLE_LEN 128
#define MAX_HAYSTACK_LEN 512
-#define BATCH_SIZE 1024
+#define BATCH_SIZE 2048
int plugin_is_GPL_compatible;
-struct CharSlice { size_t len; char *buf; };
+/** An Emacs string made accessible by copying. */
+struct EmacsStr {
+ emacs_value value; ///< The original string value.
+ size_t len; ///< The length of the string minus the null byte.
+ char b[]; ///< The null-terminated copied string.
+};
typedef int cost;
-static cost charBonus(char prev, char ch) {
- cost wordBonus = 80;
+static cost char_bonus(char prev, char ch) {
+ cost word_bonus = 80;
switch (ch) {
case 'A' ... 'Z':
- if ('a' <= prev && prev <= 'z') return wordBonus;
+ if ('a' <= prev && prev <= 'z') return word_bonus;
// Intentional fallthrough
case 'a' ... 'z':
switch (prev) {
case '/': return 90;
case '.': return 60;
- case '-': case '_': case ' ': return wordBonus;
+ case '-': case '_': case ' ': return word_bonus;
}
// Intentional fallthrough
default:
@@ -43,41 +48,41 @@ static cost charBonus(char prev, char ch) {
}
}
-static void calcBonus(struct CharSlice haystack, cost *out) {
+static void calc_bonus(struct EmacsStr *haystack, cost *out) {
char ch, lastch = '/';
- for (size_t i = 0; i < haystack.len; ++i, lastch = ch)
- out[i] = charBonus(lastch, ch = haystack.buf[i]);
+ for (size_t i = 0; i < haystack->len; ++i, lastch = ch)
+ out[i] = char_bonus(lastch, ch = haystack->b[i]);
}
/**
* Returns equality of the two characters up to a difference of case.
*/
-static bool charEqual(char a, char b) {
+static bool char_equal(char a, char b) {
return tolower(a) == tolower(b);
}
-static void matchRow(struct CharSlice a, struct CharSlice b, cost *bonuses,
int i,
- cost *nc, cost *nd, cost *pc, cost
*pd) {
+static void match_row(struct EmacsStr *a, struct EmacsStr *b, cost *bonuses,
unsigned i,
+ cost *nc, cost *nd, cost *pc, cost
*pd) {
cost g = 100, h = 5;
- size_t m = b.len;
+ size_t m = b->len;
cost oldc, s = i ? g + h * i : 0;
for (size_t j = 0; j < m; ++j, s = oldc) {
oldc = pc[j];
nc[j] = MIN(nd[j] = MIN(pd[j], oldc + g) + (j == m - 1 ? h : 2
* h),
- charEqual(a.buf[i], b.buf[j]) ? s -
bonuses[i] : 100000);
+ char_equal(a->b[i], b->b[j]) ? s -
bonuses[i] : 100000);
}
}
-static cost getCost(struct CharSlice needle, struct CharSlice haystack) {
- int n = haystack.len, m = needle.len;
+static cost get_cost(struct EmacsStr *needle, struct EmacsStr *haystack) {
+ unsigned n = haystack->len, m = needle->len;
if (n > MAX_HAYSTACK_LEN || m > MAX_NEEDLE_LEN) return 10000;
cost c[MAX_NEEDLE_LEN], d[MAX_NEEDLE_LEN];
- for (int i = 0; i < MAX_NEEDLE_LEN; ++i) c[i] = d[i] = 10000;
+ for (unsigned i = 0; i < MAX_NEEDLE_LEN; ++i) c[i] = d[i] = 10000;
cost bonuses[MAX_HAYSTACK_LEN];
- calcBonus(haystack, bonuses);
+ calc_bonus(haystack, bonuses);
- for (int i = 0; i < n; ++i) matchRow(haystack, needle, bonuses, i, c,
d, c, d);
+ for (unsigned i = 0; i < n; ++i) match_row(haystack, needle, bonuses,
i, c, d, c, d);
return c[m - 1];
}
@@ -91,7 +96,7 @@ static cost getCost(struct CharSlice needle, struct CharSlice
haystack) {
* @param needle Null-terminated search string.
* @param haystack Null-terminated completion candidate.
*/
-static bool isMatch(char *needle, char *haystack) {
+static bool is_match(char *needle, char *haystack) {
while (*needle) {
if ((haystack = strpbrk(haystack, (char[]) { tolower(*needle),
toupper(*needle), '\0' })))
++needle, ++haystack; // Skip past matched character
@@ -101,261 +106,248 @@ static bool isMatch(char *needle, char *haystack) {
return true;
}
-/// Intrusive linked list of string buffer blocks.
-struct StrBuf {
- struct StrBuf *next;
+/** Intrusive linked list of bump allocation blocks. */
+struct Bump {
+ struct Bump *next;
size_t index, capacity;
- char buf[];
+ char b[];
};
/**
- * Copies the Emacs string to make its lifetime that of the StrBuf.
+ * Allocates the specified number of bytes.
*
- * The resulting string is null-terminated.
+ * Returns NULL on failure.
*/
-static struct CharSlice strBufAdd(emacs_env *env, struct StrBuf **head,
emacs_value value) {
- ptrdiff_t len;
- // Determine the size of the string (including null-terminator)
- env->copy_string_contents(env, value, NULL, &len);
-
- if (!*head || (ptrdiff_t) ((*head)->capacity - (*head)->index) < len) {
- size_t capacity = MAX(2048, len);
- struct StrBuf *newHead;
- if (!(newHead = malloc(sizeof *newHead + capacity)))
- return (struct CharSlice) {0, NULL};
- newHead->next = *head;
- newHead->index = 0;
- newHead->capacity = capacity;
- *head = newHead;
+static void *bump_alloc(struct Bump **head, size_t len) {
+ if (!*head || (*head)->capacity - (*head)->index < len) {
+ size_t capacity = MAX(*head ? 2 * (*head)->capacity : 1024,
len);
+ struct Bump *new_head;
+ if (!(new_head = malloc(sizeof *new_head + capacity)))
+ return NULL;
+ *new_head = (struct Bump) { .next = *head, .index = 0,
.capacity = capacity };
+ *head = new_head;
}
- char *s = (*head)->buf + (*head)->index;
- env->copy_string_contents(env, value, s, &len);
+ void *p = (*head)->b + (*head)->index;
(*head)->index += len;
-
- return (struct CharSlice) {len - 1, s};
+ return p;
}
-static void strBufFree(struct StrBuf *head) {
+static void bump_free(struct Bump *head) {
while (head) {
- struct StrBuf *next = head->next;
+ struct Bump *next = head->next;
free(head);
head = next;
}
}
+/**
+ * Copies the Emacs string to make its lifetime that of the allocator.
+ */
+static struct EmacsStr *copy_emacs_string(emacs_env *env, struct Bump **bump,
emacs_value value) {
+ ptrdiff_t len;
+ // Determine the size of the string (including null-terminator)
+ env->copy_string_contents(env, value, NULL, &len);
+
+ struct EmacsStr *result;
+ // Note: Since only EmacsStr:s are allocated with bump_alloc we
+ // may use its smaller alignment rather than the scalar maximum.
+ if (!(result = bump_alloc(bump, sizeof *result + len
+ + alignof(struct
EmacsStr) - 1 & ~(alignof(struct EmacsStr) - 1))))
+ return NULL;
+
+ result->value = value;
+ result->len = len - 1;
+ env->copy_string_contents(env, value, result->b, &len);
+ return result;
+}
+
struct Candidate {
- /// The original string value.
- emacs_value value;
- /// The copied string.
- struct CharSlice s;
+ struct EmacsStr *s;
cost key;
};
-static int cmpCandidate(const void *a, const void *b) {
+static int cmp_candidate(const void *a, const void *b) {
return ((struct Candidate *) a)->key - ((struct Candidate *) b)->key;
}
-/// A batch is a collection of candidates that needs to be processed.
struct Batch {
- size_t count, capacity;
- struct Candidate candidates[];
+ unsigned len;
+ struct Candidate xs[BATCH_SIZE];
};
struct Shared {
pthread_mutex_t mutex;
- struct CharSlice needle;
- size_t remaining; ///< The remaining number of batches.
- struct Batch *batches; ///< Array of remaining batches.
- size_t matchCount;
+ struct EmacsStr *needle;
+ struct Batch *batches, *batches_end;
};
-struct Job {
+struct Worker {
pthread_t thread;
struct Shared *shared;
- struct Batch *batch;
+ struct Batch *batch; ///< The initial batch to work on.
};
static enum JobRetVal {
JOB_FINISHED,
JOB_FAILED
-} jobFinished = JOB_FINISHED, jobFailed = JOB_FAILED;
+} job_finished = JOB_FINISHED, job_failed = JOB_FAILED;
-static void *filterCandidates(void *ptr) {
- struct Job *job = ptr;
- struct Shared *shared = job->shared;
- struct CharSlice needle = shared->needle;
- struct Batch *batch = job->batch;
+static void *worker_routine(void *ptr) {
+ struct Worker *worker = ptr;
+ struct Shared *shared = worker->shared;
+ struct EmacsStr *needle = shared->needle;
+ struct Batch *batch = worker->batch;
do {
- size_t matchIdx = 0;
- for (size_t i = 0; i < batch->count; ++i) {
- struct Candidate *candidate = batch->candidates + i;
- if (!isMatch(needle.buf, candidate->s.buf)) continue;
-
- batch->candidates[matchIdx++] = (struct Candidate) {
- .value = candidate->value,
+ unsigned num_matches = 0;
+ for (unsigned i = 0; i < batch->len; ++i) {
+ struct Candidate *candidate = batch->xs + i;
+ if (!is_match(needle->b, candidate->s->b)) continue;
+ batch->xs[num_matches++] = (struct Candidate) {
.s = candidate->s,
- .key = getCost(needle, candidate->s),
+ .key = get_cost(needle, candidate->s),
};
}
- batch->count = matchIdx; // Update number of matching
completions
-
- // Try to get new batch
- if (pthread_mutex_lock(&shared->mutex)) return &jobFailed;
- shared->matchCount += matchIdx;
- if (shared->remaining > 0) {
- --shared->remaining;
- batch = shared->batches;
- shared->batches = (void *) (shared->batches + 1) +
sizeof(struct Candidate) * batch->capacity;
- } else {
- batch = NULL;
- }
+ batch->len = num_matches;
+
+ // Try to fetch a new batch
+ if (pthread_mutex_lock(&shared->mutex)) return &job_failed;
+ batch = shared->batches < shared->batches_end ?
shared->batches++ : NULL;
pthread_mutex_unlock(&shared->mutex);
} while (batch);
- return &jobFinished;
+ return &job_finished;
}
-/// Module userdata that gets allocated once at initialization.
+/** Module userdata that gets allocated once at initialization. */
struct Data {
- size_t maxJobs;
- struct Job *jobs;
+ unsigned max_workers;
+ struct Worker *workers;
};
-emacs_value filterEmacsList(emacs_env *env, ptrdiff_t nargs __attribute__
((__unused__)), emacs_value args[], void *dataPtr) {
- struct Data *data = dataPtr;
+emacs_value hotfuzz_filter(emacs_env *env, ptrdiff_t nargs __attribute__
((__unused__)), emacs_value args[], void *data_ptr) {
+ // Short-circuit if needle is empty
+ ptrdiff_t needle_len;
+ env->copy_string_contents(env, args[0], NULL, &needle_len);
+ if (needle_len == /* solely null byte */ 1)
+ return args[1];
+
+ struct Data *data = data_ptr;
emacs_value fcar = env->intern(env, "car"),
fcdr = env->intern(env, "cdr"),
fcons = env->intern(env, "cons"),
nil = env->intern(env, "nil");
- struct StrBuf *strBufs = NULL;
+ struct Bump *bump = NULL;
int success = false;
emacs_value result = nil;
// Collect all candidates
emacs_value list = args[1];
struct Batch *batches = NULL;
- size_t batchIdx = 0, capacity = 0;
+ size_t batch_idx = 0, capacity = 0;
while (env->is_not_nil(env, list)) {
- if (batchIdx >= capacity) {
- capacity = MAX(2 * capacity, 1);
- struct Batch *newBatches;
- if (!(newBatches = realloc(batches, capacity * (sizeof
*batches + sizeof(struct Candidate) * BATCH_SIZE))))
+ if ((batches && batches[batch_idx].len >= BATCH_SIZE ?
++batch_idx : batch_idx)
+ >= capacity) {
+ capacity = capacity ? 2 * capacity : 1;
+ struct Batch *new_batches;
+ if (!(new_batches = realloc(batches, capacity * sizeof
*batches)))
goto error;
- batches = newBatches;
-
- // Initialize the newly allocated batches
- for (size_t i = batchIdx; i < capacity; ++i) {
- struct Batch *batch = (void *) batches +
(sizeof *batches + sizeof(struct Candidate) * BATCH_SIZE) * i;
- batch->count = 0;
- batch->capacity = BATCH_SIZE;
- }
+ batches = new_batches;
+ for (size_t i = batch_idx; i < capacity; ++i)
+ batches[i].len = 0;
}
- struct Batch *batch = (void *) batches + (sizeof *batches +
sizeof(struct Candidate) * BATCH_SIZE) * batchIdx;
emacs_value value = env->funcall(env, fcar, 1, (emacs_value[])
{list});
- struct CharSlice s = strBufAdd(env, &strBufs, value);
- if (!s.buf) goto error;
- batch->candidates[batch->count++] = (struct Candidate) { .value
= value, .s = s, };
- if (batch->count >= BATCH_SIZE) ++batchIdx;
-
+ struct Batch *b = batches + batch_idx;
+ if (!(b->xs[b->len++].s = copy_emacs_string(env, &bump, value)))
+ goto error;
list = env->funcall(env, fcdr, 1, (emacs_value[]) {list});
}
+ if (!batches) return nil;
- struct CharSlice needle = strBufAdd(env, &strBufs, args[0]);
- if (!needle.buf) goto error;
+ struct EmacsStr *needle = copy_emacs_string(env, &bump, args[0]);
+ if (!needle) goto error;
struct Shared shared = {
.needle = needle,
- .remaining = batchIdx + 1,
.batches = batches,
- .matchCount = 0,
+ .batches_end = batches + batch_idx + 1,
};
if (pthread_mutex_init(&shared.mutex, NULL)) goto error;
if (pthread_mutex_lock(&shared.mutex)) goto mutex_error;
-
- enum JobRetVal res = jobFinished;
- size_t jobCount;
- struct Job *jobs = data->jobs;
- for (jobCount = 0; jobCount < data->maxJobs && shared.remaining;
++jobCount, --shared.remaining) {
- struct Job *job = jobs + jobCount;
- *job = (struct Job) {
+ enum JobRetVal res = job_finished;
+ unsigned worker_count;
+ struct Worker *workers = data->workers;
+ for (worker_count = 0; worker_count < data->max_workers
+ && shared.batches < shared.batches_end;
++worker_count) {
+ struct Worker *worker = workers + worker_count;
+ *worker = (struct Worker) {
.shared = &shared,
- .batch = shared.batches,
+ .batch = shared.batches++,
};
- shared.batches = (void *) (shared.batches + 1) + sizeof(struct
Candidate) * job->batch->capacity;
- if (pthread_create(&job->thread, NULL, filterCandidates, job)) {
- // Join all worker threads in order to at least safely
destroy mutex
- res = jobFailed;
+ if (pthread_create(&worker->thread, NULL, worker_routine,
worker)) {
+ // Join all workers in order to at least safely destroy
mutex
+ res = job_failed;
break;
}
}
pthread_mutex_unlock(&shared.mutex);
// Wait for all worker threads
- for (size_t i = 0; i < jobCount; ++i) {
+ for (unsigned i = 0; i < worker_count; ++i) {
enum JobRetVal *retval;
- pthread_join(jobs[i].thread, (void **) &retval);
+ pthread_join(workers[i].thread, (void **) &retval);
res |= *retval;
}
- if (res != jobFinished) goto mutex_error;
-
- // Compact all batches to be able to sort
- struct Batch *batch = batches,
- *b = (void *) (batch + 1) + sizeof(struct Candidate) *
batch->capacity;
- size_t k = batch->count;
- for (size_t i = 1; i <= batchIdx; ++i) {
- size_t count = b->count, capacity = b->capacity;
- for (size_t j = 0; j < count; ++j)
- batch->candidates[k++] = b->candidates[j];
- batch->count += count;
- batch->capacity += capacity;
-
- b = (void *) (b + 1) + sizeof(struct Candidate) * capacity;
+ if (res != job_finished) goto mutex_error;
+
+ // Compact all batches
+ size_t len = batches[0].len;
+ struct Candidate *xs = batches[0].xs;
+ for (struct Batch *b = batches + 1; b < shared.batches_end; ++b) {
+ unsigned n = b->len;
+ memmove(xs + len, b->xs, n * sizeof *b->xs);
+ len += n;
}
+ qsort(xs, len, sizeof *xs, cmp_candidate); // Sort the completions
- // Sort the completions
- if (needle.len > 0)
- qsort(batch->candidates,
- batch->count,
- sizeof *batch->candidates,
- cmpCandidate);
-
- for (size_t i = batch->count; i-- > 0;)
- result = env->funcall(env, fcons, 2,
- (emacs_value[])
{batch->candidates[i].value, result});
+ for (size_t i = len; i-- > 0;)
+ result = env->funcall(env, fcons, 2, (emacs_value[])
{xs[i].s->value, result});
success = true;
mutex_error:
pthread_mutex_destroy(&shared.mutex);
error:
free(batches);
- strBufFree(strBufs);
+ bump_free(bump);
if (!success)
env->non_local_exit_signal(env, env->intern(env, "error"), nil);
return result;
}
-int emacs_module_init(struct emacs_runtime *ert) {
- emacs_env *env = ert->get_environment(ert);
+int emacs_module_init(struct emacs_runtime *rt) {
+ // Verify compatability with Emacs executable loading this module
+ if ((size_t) rt->size < sizeof *rt)
+ return 1;
+ emacs_env *env = rt->get_environment(rt);
+ if ((size_t) env->size < sizeof *env)
+ return 2;
- long numProcs = get_nprocs();
static struct Data data;
- data.maxJobs = numProcs;
- if (!(data.jobs = malloc(data.maxJobs * sizeof *data.jobs)))
+ data.max_workers = get_nprocs();
+ if (!(data.workers = malloc(data.max_workers * sizeof *data.workers)))
return 1;
- env->funcall(env, env->intern(env, "defalias"),
- 2, (emacs_value[]) {
- env->intern(env, "hotfuzz--filter-c"),
- env->make_function(env, 2, 2,
filterEmacsList,
-
"Filter and sort CANDIDATES that match STRING.\n"
-
"\n"
-
"\(fn STRING CANDIDATES)",
-
&data),
- });
+ env->funcall(env, env->intern(env, "defalias"), 2, (emacs_value[]) {
+ env->intern(env, "hotfuzz--filter-c"),
+ env->make_function(env, 2, 2, hotfuzz_filter,
+ "Filter and sort
CANDIDATES that match STRING.\n"
+ "\n"
+ "\(fn STRING
CANDIDATES)",
+ &data),
+ });
env->funcall(env, env->intern(env, "provide"), 1,
(emacs_value[]) { env->intern(env,
"hotfuzz-module") });