commit a5061f770a80b2e450a8c950f658880b703607ba from: Stefan Sperling via: Thomas Adam date: Mon Jun 13 17:55:22 2022 UTC convert delta cache to a hash table This approach uses more memory but is much faster. To offset the additional memory usage somewhat the cache now stores very small deltas only. However, overall memory usage goes up. Hopefully we will find a way to reduce this later. ok op@ commit - 2185070209cdb8294a76bf6bc8695326dd4701fc commit + a5061f770a80b2e450a8c950f658880b703607ba blob - e4977e93a1361623ed181cbec59a9512bf69c155 blob + a3fc5309822ffb0f6abf0d07bb6684cf9dbaf057 --- lib/delta_cache.c +++ lib/delta_cache.c @@ -21,6 +21,8 @@ #include #include #include +#include +#include #include "got_compat.h" @@ -36,80 +38,152 @@ #define nitems(_a) (sizeof(_a) / sizeof((_a)[0])) #endif -struct got_delta_cache_element { - TAILQ_ENTRY(got_delta_cache_element) entry; - off_t delta_data_offset; - uint8_t *delta_data; - size_t delta_len; +#define GOT_DELTA_CACHE_MIN_BUCKETS 64 +#define GOT_DELTA_CACHE_MAX_BUCKETS 16384 +#define GOT_DELTA_CACHE_MAX_CHAIN 2 +#define GOT_DELTA_CACHE_MAX_DELTA_SIZE 2048 + +struct got_cached_delta { + off_t offset; + uint8_t *data; + size_t len; }; -TAILQ_HEAD(got_delta_cache_head, got_delta_cache_element); +struct got_delta_cache_head { + struct got_cached_delta entries[GOT_DELTA_CACHE_MAX_CHAIN]; + unsigned int nchain; +}; struct got_delta_cache { - struct got_delta_cache_head entries; - int nelem; - int maxelem; - size_t maxelemsize; + struct got_delta_cache_head *buckets; + unsigned int nbuckets; + unsigned int totelem; int cache_search; int cache_hit; int cache_miss; int cache_evict; int cache_toolarge; + unsigned int flags; +#define GOT_DELTA_CACHE_F_NOMEM 0x01 + SIPHASH_KEY key; }; -struct got_delta_cache * -got_delta_cache_alloc(int maxelem, size_t maxelemsize) +const struct got_error * +got_delta_cache_alloc(struct got_delta_cache **new) { + const struct got_error *err; struct got_delta_cache *cache; + *new = NULL; + cache = calloc(1, sizeof(*cache)); if (cache == NULL) - return NULL; + return got_error_from_errno("calloc"); + + cache->buckets = calloc(GOT_DELTA_CACHE_MIN_BUCKETS, + sizeof(cache->buckets[0])); + if (cache->buckets == NULL) { + err = got_error_from_errno("calloc"); + free(cache); + return err; + } + cache->nbuckets = GOT_DELTA_CACHE_MIN_BUCKETS; - TAILQ_INIT(&cache->entries); - cache->maxelem = maxelem; - cache->maxelemsize = maxelemsize; - return cache; + arc4random_buf(&cache->key, sizeof(cache->key)); + *new = cache; + return NULL; } void got_delta_cache_free(struct got_delta_cache *cache) { - struct got_delta_cache_element *entry; + struct got_cached_delta *delta; + unsigned int i; #ifdef GOT_OBJ_CACHE_DEBUG - fprintf(stderr, "%s: delta cache: %d elements, %d searches, %d hits, " - "%d missed, %d evicted, %d too large\n", getprogname(), cache->nelem, - cache->cache_search, cache->cache_hit, cache->cache_miss, - cache->cache_evict, cache->cache_toolarge); + fprintf(stderr, "%s: delta cache: %u elements, %d searches, %d hits, " + "%d missed, %d evicted, %d too large\n", getprogname(), + cache->totelem, cache->cache_search, cache->cache_hit, + cache->cache_miss, cache->cache_evict, cache->cache_toolarge); #endif - while (!TAILQ_EMPTY(&cache->entries)) { - entry = TAILQ_FIRST(&cache->entries); - TAILQ_REMOVE(&cache->entries, entry, entry); - free(entry->delta_data); - free(entry); + for (i = 0; i < cache->nbuckets; i++) { + struct got_delta_cache_head *head; + int j; + head = &cache->buckets[i]; + for (j = 0; j < head->nchain; j++) { + delta = &head->entries[j]; + free(delta->data); + } } + free(cache->buckets); free(cache); } -#ifndef GOT_NO_OBJ_CACHE -static void -remove_least_used_element(struct got_delta_cache *cache) +static uint64_t +delta_cache_hash(struct got_delta_cache *cache, off_t delta_offset) { - struct got_delta_cache_element *entry; + return SipHash24(&cache->key, &delta_offset, sizeof(delta_offset)); +} - if (cache->nelem == 0) - return; +static const struct got_error * +delta_cache_resize(struct got_delta_cache *cache, unsigned int nbuckets) +{ + struct got_delta_cache_head *buckets; + size_t i; - entry = TAILQ_LAST(&cache->entries, got_delta_cache_head); - TAILQ_REMOVE(&cache->entries, entry, entry); - free(entry->delta_data); - free(entry); - cache->nelem--; - cache->cache_evict++; + buckets = calloc(nbuckets, sizeof(buckets[0])); + if (buckets == NULL) { + if (errno != ENOMEM) + return got_error_from_errno("calloc"); + /* Proceed with our current amount of hash buckets. */ + cache->flags |= GOT_DELTA_CACHE_F_NOMEM; + return NULL; + } + + arc4random_buf(&cache->key, sizeof(cache->key)); + + for (i = 0; i < cache->nbuckets; i++) { + unsigned int j; + for (j = 0; j < cache->buckets[i].nchain; j++) { + struct got_delta_cache_head *head; + struct got_cached_delta *delta; + uint64_t idx; + delta = &cache->buckets[i].entries[j]; + idx = delta_cache_hash(cache, delta->offset) % nbuckets; + head = &buckets[idx]; + if (head->nchain < nitems(head->entries)) { + struct got_cached_delta *new_delta; + new_delta = &head->entries[head->nchain]; + memcpy(new_delta, delta, sizeof(*new_delta)); + head->nchain++; + } else + free(delta->data); + } + } + + free(cache->buckets); + cache->buckets = buckets; + cache->nbuckets = nbuckets; + return NULL; } -#endif +static const struct got_error * +delta_cache_grow(struct got_delta_cache *cache) +{ + unsigned int nbuckets; + + if ((cache->flags & GOT_DELTA_CACHE_F_NOMEM) || + cache->nbuckets == GOT_DELTA_CACHE_MAX_BUCKETS) + return NULL; + + if (cache->nbuckets >= GOT_DELTA_CACHE_MAX_BUCKETS / 2) + nbuckets = GOT_DELTA_CACHE_MAX_BUCKETS; + else + nbuckets = cache->nbuckets * 2; + + return delta_cache_resize(cache, nbuckets); +} + const struct got_error * got_delta_cache_add(struct got_delta_cache *cache, off_t delta_data_offset, uint8_t *delta_data, size_t delta_len) @@ -117,26 +191,38 @@ got_delta_cache_add(struct got_delta_cache *cache, #ifdef GOT_NO_OBJ_CACHE return got_error(GOT_ERR_NO_SPACE); #else - struct got_delta_cache_element *entry; + const struct got_error *err = NULL; + struct got_cached_delta *delta; + struct got_delta_cache_head *head; + uint64_t idx; - if (delta_len > cache->maxelemsize) { + if (delta_len > GOT_DELTA_RESULT_SIZE_CACHED_MAX) { cache->cache_toolarge++; return got_error(GOT_ERR_NO_SPACE); } - if (cache->nelem >= cache->maxelem) - remove_least_used_element(cache); + if (cache->nbuckets * 3 < cache->totelem * 4) { + err = delta_cache_grow(cache); + if (err) + return err; + } - entry = calloc(1, sizeof(*entry)); - if (entry == NULL) - return got_error_from_errno("calloc"); + idx = delta_cache_hash(cache, delta_data_offset) % cache->nbuckets; + head = &cache->buckets[idx]; + if (head->nchain >= nitems(head->entries)) { + delta = &head->entries[head->nchain - 1]; + free(delta->data); + memset(delta, 0, sizeof(*delta)); + head->nchain--; + } - entry->delta_data_offset = delta_data_offset; - entry->delta_data = delta_data; - entry->delta_len = delta_len; + delta = &head->entries[head->nchain]; + delta->offset = delta_data_offset; + delta->data = delta_data; + delta->len = delta_len; + head->nchain++; + cache->totelem++; - TAILQ_INSERT_HEAD(&cache->entries, entry, entry); - cache->nelem++; return NULL; #endif } @@ -145,21 +231,33 @@ void got_delta_cache_get(uint8_t **delta_data, size_t *delta_len, struct got_delta_cache *cache, off_t delta_data_offset) { - struct got_delta_cache_element *entry; + uint64_t idx; + struct got_delta_cache_head *head; + struct got_cached_delta *delta; + int i; + idx = delta_cache_hash(cache, delta_data_offset) % cache->nbuckets; + head = &cache->buckets[idx]; + cache->cache_search++; *delta_data = NULL; *delta_len = 0; - TAILQ_FOREACH(entry, &cache->entries, entry) { - if (entry->delta_data_offset != delta_data_offset) + for (i = 0; i < head->nchain; i++) { + delta = &head->entries[i]; + if (delta->offset != delta_data_offset) continue; cache->cache_hit++; - if (entry != TAILQ_FIRST(&cache->entries)) { - TAILQ_REMOVE(&cache->entries, entry, entry); - TAILQ_INSERT_HEAD(&cache->entries, entry, entry); + if (i > 0) { + struct got_cached_delta tmp; + memcpy(&tmp, &head->entries[0], sizeof(tmp)); + memcpy(&head->entries[0], &head->entries[i], + sizeof(head->entries[0])); + memcpy(&head->entries[i], &tmp, + sizeof(head->entries[i])); + delta = &head->entries[0]; } - *delta_data = entry->delta_data; - *delta_len = entry->delta_len; + *delta_data = delta->data; + *delta_len = delta->len; return; } blob - 39cb23dc018390d1de373fde1dc990faa76c21e2 blob + 7da86957201fd53ae69d5deb71b64d3bf6216599 --- lib/got_lib_delta_cache.h +++ lib/got_lib_delta_cache.h @@ -16,7 +16,7 @@ struct got_delta_cache; -struct got_delta_cache *got_delta_cache_alloc(int, size_t); +const struct got_error *got_delta_cache_alloc(struct got_delta_cache **); void got_delta_cache_free(struct got_delta_cache *); const struct got_error *got_delta_cache_add(struct got_delta_cache *, off_t, blob - 20b37bdd4753fdc49c3d8b42f85fd2e0be78eb68 blob + bdacb5506ed2ddd855aa4c7392923248c0f2b660 --- libexec/got-index-pack/got-index-pack.c +++ libexec/got-index-pack/got-index-pack.c @@ -1007,12 +1007,9 @@ main(int argc, char **argv) memset(&pack, 0, sizeof(pack)); pack.fd = -1; - pack.delta_cache = got_delta_cache_alloc(500, - GOT_DELTA_RESULT_SIZE_CACHED_MAX); - if (pack.delta_cache == NULL) { - err = got_error_from_errno("got_delta_cache_alloc"); + err = got_delta_cache_alloc(&pack.delta_cache); + if (err) goto done; - } imsg_init(&ibuf, GOT_IMSG_FD_CHILD); #ifndef PROFILE blob - ff7e4c262735bf12f8b7661425094d9ff8fdab74 blob + 6afe97bed5c5990ece7bbc7bbc1518be65ff5cca --- libexec/got-read-pack/got-read-pack.c +++ libexec/got-read-pack/got-read-pack.c @@ -1184,12 +1184,9 @@ receive_pack(struct got_pack **packp, struct imsgbuf * goto done; } - pack->delta_cache = got_delta_cache_alloc(100, - GOT_DELTA_RESULT_SIZE_CACHED_MAX); - if (pack->delta_cache == NULL) { - err = got_error_from_errno("got_delta_cache_alloc"); + err = got_delta_cache_alloc(&pack->delta_cache); + if (err) goto done; - } #ifndef GOT_PACK_NO_MMAP pack->map = mmap(NULL, pack->filesize, PROT_READ, MAP_PRIVATE,