star-hitran

Load line-by-line data from the HITRAN database
git clone git://git.meso-star.fr/star-hitran.git
Log | Files | Refs | README | LICENSE

commit 8a076a4b558c3b568f554fece0849b9085851f51
parent 3603459c3b9bf97edfb9805b189f568b696f4326
Author: Vincent Forest <vincent.forest@meso-star.com>
Date:   Wed, 11 Feb 2026 10:41:09 +0100

Rewriting the [de]serialization of the line list

Updating the serialization data structure to allow the deserialization
of a subset of lines in a list. The profile of the deserialization
function is therefore updated to allow the range of lines to be read to
be defined.

Taking advantage of this change, the deserialization function has been
renamed shtr_line_list_read to be symmetrical with the serialization
function. Apparently, the old name was used to avoid confusion with the
load function. Loading now implicitly means that the data is parsed
and ultimately stored in memory, while reading is limited to parsing the
data in memory: no analysis is performed.

Finally, the function now accepts a file name as an input argument to
simplify its use: the user no longer has to manage the opening of the
file upstream.

Diffstat:
Msrc/shtr.h | 36++++++++++++++++++++++++++++++++----
Msrc/shtr_line_list.c | 137+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++--------------
Msrc/shtr_line_list_c.h | 7+++++++
Msrc/shtr_main.c | 16++++++----------
Msrc/test_shtr_lines.c | 25++++++++++++++-----------
5 files changed, 172 insertions(+), 49 deletions(-)

diff --git a/src/shtr.h b/src/shtr.h @@ -199,14 +199,42 @@ struct shtr_create_args { static const struct shtr_create_args SHTR_CREATE_ARGS_DEFAULT = SHTR_CREATE_ARGS_DEFAULT__; +/* Load lines saved in HITRAN format */ struct shtr_line_list_load_args { - const char* filename; /* Name of the file to load or of the provided stream */ - FILE* file; /* Stream from where data are loaded. NULL <=> load from file */ + /* Name of the file to load or of the provided stream. + * NULL <=> uses a default name for the stream to be loaded, which must + * therefore be defined. */ + const char* filename; + + /* Stream from where data are loaded. + * NULL <=> loading from the file name, which must therefore be defined */ + FILE* file; }; #define SHTR_LINE_LIST_LOAD_ARGS_NULL__ {NULL, NULL} static const struct shtr_line_list_load_args SHTR_LINE_LIST_LOAD_ARGS_NULL = SHTR_LINE_LIST_LOAD_ARGS_NULL__; +/* Read the lines stored in the internal format of the shtr library, i.e., as + * they were written by the shtr_line_list_write function */ +struct shtr_line_list_read_args { + /* Name of the file to read or of the provided stream. + * NULL <=> uses a default name for the stream to be loaded, which must + * therefore be defined. */ + const char* filename; /* Name of the file to read */ + FILE* file; /* Stream from where data are read. NULL <=> read from file */ + + /* Range of line indices to read. The limits are included. + * If the lower limit is greater than or equal to the number of lines to read, + * no lines are read. + * If the upper limit is greater than or equal to the number of lines to read, + * all lines from the lower limit are read. + * If the range is degenerate, an error is returned. */ + size_t range[2]; +}; +#define SHTR_LINE_LIST_READ_ARGS_NULL__ {NULL, NULL, {0,SIZE_MAX}} +static const struct shtr_line_list_read_args SHTR_LINE_LIST_READ_ARGS_NULL = + SHTR_LINE_LIST_READ_ARGS_NULL__; + BEGIN_DECLS /******************************************************************************* @@ -296,9 +324,9 @@ shtr_line_list_load /* Load the line list serialized with the "shtr_line_list_write" function */ SHTR_API res_T -shtr_line_list_create_from_stream +shtr_line_list_read (struct shtr* shtr, - FILE* stream, + const struct shtr_line_list_read_args* args, struct shtr_line_list** list); SHTR_API res_T diff --git a/src/shtr_line_list.c b/src/shtr_line_list.c @@ -16,6 +16,8 @@ * You should have received a copy of the GNU General Public License * along with this program. If not, see <http://www.gnu.org/licenses/>. */ +#define _POSIX_C_SOURCE 200809L /* fseeko */ + #include "shtr_c.h" #include "shtr_line_list_c.h" #include "shtr_param.h" @@ -23,6 +25,8 @@ #include <rsys/cstr.h> #include <rsys/text_reader.h> +#include <stdio.h> /* fseeko */ + /* Maximum number of lines that can be stored in a memory block */ #define NLINES_PER_BLOCK (BLOCK_SIZE/sizeof(struct line)) @@ -41,6 +45,29 @@ check_shtr_line_list_load_args(const struct shtr_line_list_load_args* args) } static res_T +check_shtr_line_list_read_args(const struct shtr_line_list_read_args* args) +{ + if(!args) return RES_BAD_ARG; + + /* Source is missing */ + if(!args->file && !args->filename) return RES_BAD_ARG; + + /* Line range is degenerated */ + if(args->range[0] > args->range[1]) return RES_BAD_ARG; + + return RES_OK; +} + +static res_T +size_to_off(const size_t sz, off_t* off) +{ + ASSERT(off); + if(sz != (size_t)((off_t)sz)) return RES_BAD_OP; + *off = (off_t)sz; + return RES_OK; +} + +static res_T create_line_list (struct shtr* shtr, struct shtr_line_list** out_list) @@ -453,26 +480,52 @@ error: } res_T -shtr_line_list_create_from_stream +shtr_line_list_read (struct shtr* shtr, - FILE* stream, + const struct shtr_line_list_read_args* args, struct shtr_line_list** out_list) { - struct shtr_line_list* list = NULL; - size_t nblocks = 0; - char** blocks = NULL; + size_t line_range[2]={0,0}; /* Range of lines adapted to effective line ids */ + size_t nlines = 0; /* Ttotal number of lines in the original list */ + size_t nblocks = 0; /* Number of memory blocks needed to store read lines */ + + struct shtr_line_list* list = NULL; /* The output list */ + char** blocks = NULL; /* Allocated list of memory blocks */ + + const char* name = NULL; /* file name */ + FILE* stream = NULL; /* The stream to load */ + + /* Miscellaneous */ size_t i = 0; + size_t sz = 0; + size_t sz_to_load = 0; + off_t off; int version = 0; + int err = 0; res_T res = RES_OK; - if(!shtr || !out_list || !stream) { - res = RES_BAD_ARG; - goto error; - } + if(!shtr || !out_list) { res = RES_BAD_ARG; goto error; } + res = check_shtr_line_list_read_args(args); + if(res != RES_OK) goto error; res = create_line_list(shtr, &list); if(res != RES_OK) goto error; + /* Setup the intput stream */ + if(args->file) { + name = args->filename ? args->filename : "<stream>"; + stream = args->file; + } else { + name = args->filename; + stream = fopen(args->filename, "r"); + if(!stream) { + ERROR(shtr, "%s: error opening file %s -- %s\n", + FUNC_NAME, args->filename, strerror(errno)); + res = RES_IO_ERR; + goto error; + } + } + #define READ(Var, Nb) { \ if(fread((Var), sizeof(*(Var)), (Nb), stream) != (Nb)) { \ if(feof(stream)) { \ @@ -483,7 +536,8 @@ shtr_line_list_create_from_stream res = RES_UNKNOWN_ERR; \ } \ ERROR(shtr, \ - "%s: error reading line list -- %s.\n", FUNC_NAME, res_to_cstr(res)); \ + "%s:%s: error reading line list -- %s.\n", \ + FUNC_NAME, name, res_to_cstr(res)); \ goto error; \ } \ } (void)0 @@ -491,31 +545,66 @@ shtr_line_list_create_from_stream READ(&version, 1); if(version != SHTR_LINE_LIST_VERSION) { ERROR(shtr, - "%s: unexpected line list version %d. " + "%s:%s: unexpected line list version %d. " "Expecting a line list in version %d.\n", - FUNC_NAME, version, SHTR_LINE_LIST_VERSION); + FUNC_NAME, name, version, SHTR_LINE_LIST_VERSION); res = RES_BAD_ARG; goto error; } - READ(&list->nlines, 1); - nblocks = (list->nlines + (NLINES_PER_BLOCK-1)/*ceil*/) / NLINES_PER_BLOCK; + /* Informations on line parameters */ + READ(&list->info, 1); + + /* Total number of lines in the list from which the lines will be read */ + READ(&nlines, 1); - /* Line stored in memory blocks */ + /* Actually, there are no lines to read */ + if(nlines <= args->range[0]) goto exit; + + /* Fit the upper limit to the effective number of lines */ + line_range[0] = args->range[0]; + line_range[1] = MMIN(args->range[1], nlines-1/*inclusive bounds*/); + list->nlines = line_range[1] - line_range[0] + 1/*inclusive bounds*/; + + /* Calculate the number of blocks needed to store the line to be read */ + nblocks = (list->nlines + (NLINES_PER_BLOCK-1)/*ceil*/) / NLINES_PER_BLOCK; if((res = darray_charp_resize(&list->blocks, nblocks)) != RES_OK) goto error; + + /* Compute the offset toward the first line to load */ + sz = line_range[0] * sizeof(struct line); + res = size_to_off(sz, &off); + if(res != RES_OK) { + ERROR(shtr, "%s:%s: file is too large regarding the seek offset %zu\n", + FUNC_NAME, name, sz); + goto error; + } + + /* Move to the first line to load */ + err = fseeko(stream, off, SEEK_CUR); + if(err) { + ERROR(shtr, "%s:%s: %s\n", FUNC_NAME, name, strerror(errno)); + res = RES_IO_ERR; + goto error; + } + + /* Compute the overall number of bytes to load */ + sz_to_load = list->nlines * sizeof(struct line); + + /* Load line data and store them in memory blocks */ blocks = darray_charp_data_get(&list->blocks); FOR_EACH(i, 0, nblocks) { - blocks[i] = MEM_ALLOC(list->shtr->allocator, BLOCK_SIZE); + blocks[i] = MEM_CALLOC(list->shtr->allocator, 1, BLOCK_SIZE); if(!blocks[i]) { - ERROR(shtr, "%s: error allocating memory block\n", FUNC_NAME); + ERROR(shtr, "%s:%s: error allocating memory block\n", name, FUNC_NAME); res = RES_MEM_ERR; goto error; } - READ(blocks[i], BLOCK_SIZE); - } - /* Informations on line parameters */ - READ(&list->info, 1); + sz = MMIN(BLOCK_SIZE, sz_to_load); + READ(blocks[i], sz); + + sz_to_load -= sz; + } #undef READ @@ -590,6 +679,9 @@ shtr_line_list_write /* Version management */ WRITE(&SHTR_LINE_LIST_VERSION, 1); + /* Informations on line parameters */ + WRITE(&list->info, 1); + /* Number of lines in the list */ WRITE(&list->nlines, 1); @@ -598,9 +690,6 @@ shtr_line_list_write n = darray_charp_size_get(&list->blocks); FOR_EACH(i, 0, n) { WRITE(blocks[i], BLOCK_SIZE); } - /* Informations on line parameters */ - WRITE(&list->info, 1); - #undef WRITE exit: diff --git a/src/shtr_line_list_c.h b/src/shtr_line_list_c.h @@ -73,6 +73,13 @@ static const struct line LINE_NULL = LINE_NULL__; STATIC_ASSERT(sizeof(struct line)==32, Unexpected_sizeof_struct_line); +/* Ensure that a block is filled with lines data, i.e., it does not contain any + * padding bytes, so that once a list of blocks is serialized, the resulting + * data forms a continuous list of lines. This ensures that lines can be loaded + * into memory blocks from anywhere in the list, without worrying about the + * layout of the original blocks */ +STATIC_ASSERT((BLOCK_SIZE % sizeof(struct line)) == 0, Unexpected_sizeof_block); + /* Generate the dynamic array of char*, the dynamic array of memory blocks */ #define DARRAY_NAME charp #define DARRAY_DATA char* diff --git a/src/shtr_main.c b/src/shtr_main.c @@ -153,24 +153,20 @@ load_molparam(const struct cmd* cmd, struct shtr_isotope_metadata** molparam) static res_T load_lines_binary(const struct cmd* cmd, struct shtr_line_list** lines) { + struct shtr_line_list_read_args args = SHTR_LINE_LIST_READ_ARGS_NULL; FILE* fp = NULL; res_T res = RES_OK; ASSERT(cmd && lines && cmd->args.lines); - if(!strcmp(cmd->args.lines, STDIN_NAME)) { - fp = stdin; + if(strcmp(cmd->args.lines, STDIN_NAME)) { + args.filename = cmd->args.lines; } else { - fp = fopen(cmd->args.lines, "r"); - if(!fp) { - fprintf(stderr, "%s: error opening file -- %s\n", - cmd->args.lines, strerror(errno)); - res = RES_IO_ERR; - goto error; - } + args.file = stdin; + args.filename = "stdin"; } - res = shtr_line_list_create_from_stream(cmd->shtr, fp, lines); + res = shtr_line_list_read(cmd->shtr, &args, lines); if(res != RES_OK) goto error; exit: diff --git a/src/test_shtr_lines.c b/src/test_shtr_lines.c @@ -338,18 +338,20 @@ test_serialization(struct shtr* shtr) }; const size_t nlines = sizeof(l) / sizeof(struct shtr_line); - struct shtr_line_list_load_args args = SHTR_LINE_LIST_LOAD_ARGS_NULL__; + struct shtr_line_list_load_args load_args = SHTR_LINE_LIST_LOAD_ARGS_NULL__; + struct shtr_line_list_read_args read_args = SHTR_LINE_LIST_READ_ARGS_NULL__; struct shtr_line_list* list1 = NULL; struct shtr_line_list* list2 = NULL; FILE* fp = NULL; - CHK(args.file = tmpfile()); - print_lines(args.file, l, nlines); - rewind(args.file); + CHK(fp = tmpfile()); + print_lines(fp, l, nlines); + rewind(fp); - CHK(shtr_line_list_load(shtr, &args, &list1) == RES_OK); - fclose(args.file); + load_args.file = fp; + CHK(shtr_line_list_load(shtr, &load_args, &list1) == RES_OK); + CHK(fclose(fp) == 0); CHK(fp = tmpfile()); CHK(shtr_line_list_write(NULL, fp) == RES_BAD_ARG); @@ -357,11 +359,12 @@ test_serialization(struct shtr* shtr) CHK(shtr_line_list_write(list1, fp) == RES_OK); rewind(fp); - CHK(shtr_line_list_create_from_stream(NULL, fp, &list2) == RES_BAD_ARG); - CHK(shtr_line_list_create_from_stream(shtr, NULL, &list2) == RES_BAD_ARG); - CHK(shtr_line_list_create_from_stream(shtr, fp, NULL) == RES_BAD_ARG); - CHK(shtr_line_list_create_from_stream(shtr, fp, &list2) == RES_OK); - fclose(fp); + read_args.file = fp; + CHK(shtr_line_list_read(NULL, &read_args, &list2) == RES_BAD_ARG); + CHK(shtr_line_list_read(shtr, NULL, &list2) == RES_BAD_ARG); + CHK(shtr_line_list_read(shtr, &read_args, NULL) == RES_BAD_ARG); + CHK(shtr_line_list_read(shtr, &read_args, &list2) == RES_OK); + CHK(fclose(fp) == 0); check_line_list_equality(list1, list2);