/* bucket.c - The routines for playing with hash buckets. */ /* This file is part of GDBM, the GNU data base manager, by Philip A. Nelson. Copyright (C) 1990, 1991, 1993 Free Software Foundation, Inc. GDBM is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2, or (at your option) any later version. GDBM is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with GDBM; see the file COPYING. If not, write to the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. You may contact the author by: e-mail: phil@cs.wwu.edu us-mail: Philip A. Nelson Computer Science Department Western Washington University Bellingham, WA 98226 *************************************************************************/ /* include system configuration before all else. */ #include "autoconf.h" #include "gdbmdefs.h" /* Initializing a new hash buckets sets all bucket entries to -1 hash value. */ void _gdbm_new_bucket (dbf, bucket, bits) gdbm_file_info *dbf; hash_bucket *bucket; int bits; { int index; /* Initialize the avail block. */ bucket->av_count = 0; /* Set the information fields first. */ bucket->bucket_bits = bits; bucket->count = 0; /* Initialize all bucket elements. */ for (index = 0; index < dbf->header->bucket_elems; index++) bucket->h_table[index].hash_value = -1; } /* Find a bucket for DBF that is pointed to by the bucket directory from location DIR_INDEX. The bucket cache is first checked to see if it is already in memory. If not, a bucket may be tossed to read the new bucket. In any case, the requested bucket is make the "current" bucket and dbf->bucket points to the correct bucket. */ void _gdbm_get_bucket (dbf, dir_index) gdbm_file_info *dbf; word_t dir_index; { off_t bucket_adr; /* The address of the correct hash bucket. */ int num_bytes; /* The number of bytes read. */ off_t file_pos; /* The return address for lseek. */ register int index; /* Loop index. */ /* Initial set up. */ dbf->bucket_dir = dir_index; bucket_adr = dbf->dir [dir_index]; if (dbf->bucket_cache == NULL) { if(_gdbm_init_cache(dbf, DEFAULT_CACHESIZE) == -1) _gdbm_fatal(dbf, "couldn't init cache"); } /* Is that one is not already current, we must find it. */ if (dbf->cache_entry->ca_adr != bucket_adr) { /* Look in the cache. */ for (index = 0; index < dbf->cache_size; index++) { if (dbf->bucket_cache[index].ca_adr == bucket_adr) { dbf->bucket = dbf->bucket_cache[index].ca_bucket; dbf->cache_entry = &dbf->bucket_cache[index]; return; } } /* It is not in the cache, read it from the disk. */ dbf->last_read = (dbf->last_read + 1) % dbf->cache_size; if (dbf->bucket_cache[dbf->last_read].ca_changed) _gdbm_write_bucket (dbf, &dbf->bucket_cache[dbf->last_read]); dbf->bucket_cache[dbf->last_read].ca_adr = bucket_adr; dbf->bucket = dbf->bucket_cache[dbf->last_read].ca_bucket; dbf->cache_entry = &dbf->bucket_cache[dbf->last_read]; dbf->cache_entry->ca_data.elem_loc = -1; dbf->cache_entry->ca_changed = FALSE; /* Read the bucket. */ file_pos = lseek (dbf->desc, bucket_adr, L_SET); if (file_pos != bucket_adr) _gdbm_fatal (dbf, "lseek error"); num_bytes = read (dbf->desc, dbf->bucket, dbf->header->bucket_size); if (num_bytes != dbf->header->bucket_size) _gdbm_fatal (dbf, "read error"); } return; } /* Split the current bucket. This includes moving all items in the bucket to a new bucket. This doesn't require any disk reads because all hash values are stored in the buckets. Splitting the current bucket may require doubling the size of the hash directory. */ void _gdbm_split_bucket (dbf, next_insert) gdbm_file_info *dbf; word_t next_insert; { hash_bucket *bucket[2]; /* Pointers to the new buckets. */ int new_bits; /* The number of bits for the new buckets. */ int cache_0; /* Location in the cache for the buckets. */ int cache_1; off_t adr_0; /* File address of the new bucket 0. */ off_t adr_1; /* File address of the new bucket 1. */ avail_elem old_bucket; /* Avail Struct for the old bucket. */ off_t dir_start0; /* Used in updating the directory. */ off_t dir_start1; off_t dir_end; off_t *new_dir; /* Pointer to the new directory. */ off_t dir_adr; /* Address of the new directory. */ int dir_size; /* Size of the new directory. */ off_t old_adr[31]; /* Address of the old directories. */ int old_size[31]; /* Size of the old directories. */ int old_count; /* Number of old directories. */ int index; /* Used in array indexing. */ int index1; /* Used in array indexing. */ int elem_loc; /* Location in new bucket to put element. */ bucket_element *old_el; /* Pointer into the old bucket. */ int select; /* Used to index bucket during movement. */ /* No directories are yet old. */ old_count = 0; if (dbf->bucket_cache == NULL) { if(_gdbm_init_cache(dbf, DEFAULT_CACHESIZE) == -1) _gdbm_fatal(dbf, "couldn't init cache"); } while (dbf->bucket->count == dbf->header->bucket_elems) { /* Initialize the "new" buckets in the cache. */ do { dbf->last_read = (dbf->last_read + 1) % dbf->cache_size; cache_0 = dbf->last_read; } while (dbf->bucket_cache[cache_0].ca_bucket == dbf->bucket); bucket[0] = dbf->bucket_cache[cache_0].ca_bucket; if (dbf->bucket_cache[cache_0].ca_changed) _gdbm_write_bucket (dbf, &dbf->bucket_cache[cache_0]); do { dbf->last_read = (dbf->last_read + 1) % dbf->cache_size; cache_1 = dbf->last_read; } while (dbf->bucket_cache[cache_1].ca_bucket == dbf->bucket); bucket[1] = dbf->bucket_cache[cache_1].ca_bucket; if (dbf->bucket_cache[cache_1].ca_changed) _gdbm_write_bucket (dbf, &dbf->bucket_cache[cache_1]); new_bits = dbf->bucket->bucket_bits+1; _gdbm_new_bucket (dbf, bucket[0], new_bits); _gdbm_new_bucket (dbf, bucket[1], new_bits); adr_0 = _gdbm_alloc (dbf, dbf->header->bucket_size); dbf->bucket_cache[cache_0].ca_adr = adr_0; adr_1 = _gdbm_alloc (dbf, dbf->header->bucket_size); dbf->bucket_cache[cache_1].ca_adr = adr_1; /* Double the directory size if necessary. */ if (dbf->header->dir_bits == dbf->bucket->bucket_bits) { dir_size = dbf->header->dir_size * 2; dir_adr = _gdbm_alloc (dbf, dir_size); new_dir = (off_t *) malloc (dir_size); if (new_dir == NULL) _gdbm_fatal (dbf, "malloc error"); for (index = 0; index < dbf->header->dir_size/sizeof (off_t); index++) { new_dir[2*index] = dbf->dir[index]; new_dir[2*index+1] = dbf->dir[index]; } /* Update header. */ old_adr[old_count] = dbf->header->dir; dbf->header->dir = dir_adr; old_size[old_count] = dbf->header->dir_size; dbf->header->dir_size = dir_size; dbf->header->dir_bits = new_bits; old_count++; /* Now update dbf. */ dbf->header_changed = TRUE; dbf->bucket_dir *= 2; free (dbf->dir); dbf->dir = new_dir; } /* Copy all elements in dbf->bucket into the new buckets. */ for (index = 0; index < dbf->header->bucket_elems; index++) { old_el = & (dbf->bucket->h_table[index]); select = (old_el->hash_value >> (31-new_bits)) & 1; elem_loc = old_el->hash_value % dbf->header->bucket_elems; while (bucket[select]->h_table[elem_loc].hash_value != -1) elem_loc = (elem_loc + 1) % dbf->header->bucket_elems; bucket[select]->h_table[elem_loc] = *old_el; bucket[select]->count += 1; } /* Allocate avail space for the bucket[1]. */ bucket[1]->bucket_avail[0].av_adr = _gdbm_alloc (dbf, dbf->header->block_size); bucket[1]->bucket_avail[0].av_size = dbf->header->block_size; bucket[1]->av_count = 1; /* Copy the avail elements in dbf->bucket to bucket[0]. */ bucket[0]->av_count = dbf->bucket->av_count; index = 0; index1 = 0; if (bucket[0]->av_count == BUCKET_AVAIL) { /* The avail is full, move the first one to bucket[1]. */ _gdbm_put_av_elem (dbf->bucket->bucket_avail[0], bucket[1]->bucket_avail, &bucket[1]->av_count); index = 1; bucket[0]->av_count --; } for (; index < dbf->bucket->av_count; index++) { bucket[0]->bucket_avail[index1++] = dbf->bucket->bucket_avail[index]; } /* Update the directory. We have new file addresses for both buckets. */ dir_start1 = (dbf->bucket_dir >> (dbf->header->dir_bits - new_bits)) | 1; dir_end = (dir_start1 + 1) << (dbf->header->dir_bits - new_bits); dir_start1 = dir_start1 << (dbf->header->dir_bits - new_bits); dir_start0 = dir_start1 - (dir_end - dir_start1); for (index = dir_start0; index < dir_start1; index++) dbf->dir[index] = adr_0; for (index = dir_start1; index < dir_end; index++) dbf->dir[index] = adr_1; /* Set changed flags. */ dbf->bucket_cache[cache_0].ca_changed = TRUE; dbf->bucket_cache[cache_1].ca_changed = TRUE; dbf->bucket_changed = TRUE; dbf->directory_changed = TRUE; dbf->second_changed = TRUE; /* Update the cache! */ dbf->bucket_dir = next_insert >> (31-dbf->header->dir_bits); /* Invalidate old cache entry. */ old_bucket.av_adr = dbf->cache_entry->ca_adr; old_bucket.av_size = dbf->header->bucket_size; dbf->cache_entry->ca_adr = 0; dbf->cache_entry->ca_changed = FALSE; /* Set dbf->bucket to the proper bucket. */ if (dbf->dir[dbf->bucket_dir] == adr_0) { dbf->bucket = bucket[0]; dbf->cache_entry = &dbf->bucket_cache[cache_0]; _gdbm_put_av_elem (old_bucket, bucket[1]->bucket_avail, &bucket[1]->av_count); } else { dbf->bucket = bucket[1]; dbf->cache_entry = &dbf->bucket_cache[cache_1]; _gdbm_put_av_elem (old_bucket, bucket[0]->bucket_avail, &bucket[0]->av_count); } } /* Get rid of old directories. */ for (index = 0; index < old_count; index++) _gdbm_free (dbf, old_adr[index], old_size[index]); } /* The only place where a bucket is written. CA_ENTRY is the cache entry containing the bucket to be written. */ void _gdbm_write_bucket (dbf, ca_entry) gdbm_file_info *dbf; cache_elem *ca_entry; { int num_bytes; /* The return value for write. */ off_t file_pos; /* The return value for lseek. */ file_pos = lseek (dbf->desc, ca_entry->ca_adr, L_SET); if (file_pos != ca_entry->ca_adr) _gdbm_fatal (dbf, "lseek error"); num_bytes = write (dbf->desc, ca_entry->ca_bucket, dbf->header->bucket_size); if (num_bytes != dbf->header->bucket_size) _gdbm_fatal (dbf, "write error"); ca_entry->ca_changed = FALSE; ca_entry->ca_data.hash_val = -1; ca_entry->ca_data.elem_loc = -1; }