
/* sort binary file */

#include <stdio.h>
#include <fcntl.h>
#include <assert.h>
#include <stdlib.h>
#include <string.h>
#include "config.h"
#ifdef HAVE_SYS_UIO_H
#include <sys/uio.h>
#endif
#include "io.h"
#include "io_interface.h"
#include "io_internal.h"
#include "unix_defs.h"

#define INIT_MAX_RECS   10000

typedef struct _Record {
    int index;
    void *sort_item;
    char *data;
} DataRecord, *DataRecordPtr;

static int 
long_compare(d1, d2)
DataRecordPtr d1, d2;
{
    long i1, i2;

    i1 = *((long *) d1->sort_item);
    i2 = *((long *) d2->sort_item);
    return (int) (i1 - i2);
}

static int 
ulong_compare(d1, d2)
DataRecordPtr d1, d2;
{
    unsigned long i1, i2;

    i1 = *((unsigned long *) d1->sort_item);
    i2 = *((unsigned long *) d2->sort_item);
    return (int) (i1 - i2);
}

static int 
string_compare(d1, d2)
DataRecordPtr d1, d2;
{
    char *s1, *s2;

    s1 = *((char **) d1->sort_item);
    s2 = *((char **) d2->sort_item);
    if (s1 == NULL)
	return -1;
    if (s2 == NULL)
	return 1;
    return strcmp(s1, s2);
}

static int 
long8_compare(d1, d2)
DataRecordPtr d1, d2;
{
    unsigned long low1, low2;
    long high1, high2;

    low1 = *((unsigned long *) d1->sort_item);
    low2 = *((unsigned long *) d2->sort_item);
    high1 = *(((long *) d1->sort_item) + 1);
    high2 = *(((long *) d2->sort_item) + 1);

    if (high1 == high2) {
	if (low1 > low2)
	    return 1;
	else if (low1 < low2)
	    return -1;
	else
	    return 0;
    } else {
	return (int) (high1 - high2);
    }
}

static int 
ulong8_compare(d1, d2)
DataRecordPtr d1, d2;
{
    unsigned long low1, low2;
    long high1, high2;

    low1 = *((unsigned long *) d1->sort_item);
    low2 = *((unsigned long *) d2->sort_item);
    high1 = *(((unsigned long *) d1->sort_item) + 1);
    high2 = *(((unsigned long *) d2->sort_item) + 1);

    if (high1 == high2) {
	if (low1 > low2)
	    return 1;
	else if (low1 < low2)
	    return -1;
	else
	    return 0;
    } else {
	return (int) (high1 - high2);
    }
}

static int 
double_compare(d1, d2)
DataRecordPtr d1, d2;
{
    double u1, u2;

    u1 = *((double *) d1->sort_item);
    u2 = *((double *) d2->sort_item);

    if (u1 > u2)
	return 1;
    else if (u1 < u2)
	return -1;
    else
	return 0;
}

static int 
get_sort_size(sort_type, sort_size)
IOdata_type sort_type;
int sort_size;
{
    switch (sort_type) {
    case unknown_type:
	fprintf(stderr, "Nothing to sort\n");
	exit(1);
	break;
    case integer_type:
	if (sort_size <= sizeof(long)) {
	    sort_size = sizeof(long);
	}
	break;
    case unsigned_type:
	if (sort_size <= sizeof(unsigned long)) {
	    sort_size = sizeof(unsigned long);
	}
	break;
    case float_type:
	if (sort_size <= sizeof(double)) {
	    sort_size = sizeof(double);
	}
	break;
    case char_type:
	sort_size = sizeof(long);
	break;
    case string_type:
	sort_size = sizeof(char *);
	break;
    case enumeration_type:
	break;
    case boolean_type:
	break;
    }
    return sort_size;
}

static void 
get_sort_item(data_rec, iofield, sort_type, sort_size)
DataRecordPtr data_rec;
IOFieldPtr iofield;
IOdata_type sort_type;
int sort_size;
{
    switch (sort_type) {
    case integer_type:
	if (sort_size <= sizeof(long)) {
	    *((long *) data_rec->sort_item) =
		get_IOlong(iofield, data_rec->data);
	} else {
	    /* kludge to handle large integers */
	    assert(sort_size == 8);
	    get_IOlong8(iofield, data_rec->data,
			data_rec->sort_item,
			((long *) data_rec->sort_item) + 1);
	}
	break;
    case unsigned_type:
	if (sort_size <= sizeof(unsigned long)) {
	    *((unsigned long *) data_rec->sort_item) =
		get_IOulong(iofield, data_rec->data);
	} else {
	    /* kludge to handle large integers */
	    assert(sort_size == 8);
	    get_IOulong8(iofield, data_rec->data,
			 data_rec->sort_item,
			 ((unsigned long *) data_rec->sort_item) + 1);
	}
	break;
    case float_type:
	*((double *) data_rec->sort_item) =
	    get_IOdouble(iofield, data_rec->data);
	break;
    case char_type:
	*((long *) data_rec->sort_item) =
	    get_IOlong(iofield, data_rec->data);
	break;
    case string_type:
	*((char **) data_rec->sort_item) =
	    get_IOstring(iofield, data_rec->data);
	break;
    case boolean_type:
    case enumeration_type:
    case unknown_type:
	break;
    }
}

extern IOConversionPtr
create_conversion ARGS((IOFormat src_ioformat,
			IOFieldList target_field_list, int target_struct_size,
			int byte_reversal,
			IOconversion_type initial_conversion,
			int string_offset_size, int converted_strings));

extern int
sort_iofile(file_in_name, file_out_name, sort_field)
char *file_in_name, *file_out_name;
char *sort_field;
{
    int max_recs, num_recs;
    DataRecordPtr data_recs;
    IOFile file_in, file_out;
    int i;
    int format_count;
    IOdata_type sort_type = unknown_type;
    int sort_size = 0;
    int (*sort_compare) () = NULL;
    IOFieldPtr *iofields = NULL;
    IOConversionPtr *ioconvs = NULL;
    IOFormat *out_formats = NULL;
    IOFormat next_format;
    int finished = FALSE;

    if (!(file_in = open_IOfile(file_in_name, "r"))) {
	fprintf(stderr, "cannot open file %s\n", file_in_name);
	return -1;
    }
    if (!(file_out = open_IOfile(file_out_name, "w"))) {
	fprintf(stderr, "cannot open file %s\n", file_out_name);
	return -1;
    }
    iofields = (IOFieldPtr *) malloc(sizeof(IOFieldPtr) * 10);
    ioconvs = (IOConversionPtr *) malloc(sizeof(IOConversionPtr) * 10);
    out_formats = (IOFormat *) malloc(sizeof(IOFormat) * 10);

    format_count = 0;

    max_recs = INIT_MAX_RECS;
    num_recs = 0;
    data_recs = (DataRecordPtr) malloc(sizeof(DataRecord) * max_recs);

/*** read all records in the input file ***/
    while (!finished) {
	IOFormat format;
	char *format_name;
	char *comment;
	int index, data_size;

	switch (next_IOrecord_type(file_in)) {
	case IOcomment:
	    /* no sorting of comments, just write out */
	    comment = read_comment_IOfile(file_in);
	    if (comment)
		write_comment_IOfile(file_out, comment);
	    break;
	case IOformat:
	    format = read_format_IOfile(file_in);
	    format_name = name_of_IOformat(format);
	    index = index_of_IOformat(format);
	    if (index_of_IOformat(format) == format_count) {
		IOFieldList field_list = field_list_of_IOformat(format);
		format_count++;
		iofields = (IOFieldPtr *) realloc(iofields,
				      sizeof(IOFieldPtr) * format_count);
		ioconvs = (IOConversionPtr *) realloc(ioconvs,
				 sizeof(IOConversionPtr) * format_count);
		out_formats = (IOFormat *) realloc(out_formats,
					sizeof(IOFormat) * format_count);
		set_IOconversion(file_in, format_name,
				 field_list_of_IOformat(format),
			       struct_size_IOfield(file_in, field_list));
		set_notify_of_format_change(file_in, format_name, 1);

		out_formats[index] =
		    register_IOrecord_format(format_name,
					  field_list_of_IOformat(format),
					     file_out);

	    } else {
		/* a format we've seen before...  maybe rewrite stored
		 * recs */
		IOFieldList max_field_list = NULL;
		IOFieldList old_field_list =
		field_list_of_IOformat(out_formats[index]);
		IOFieldList new_field_list = field_list_of_IOformat(format);

		new_field_list = copy_field_list(new_field_list);
		old_field_list = copy_field_list(old_field_list);
		if (compare_field_lists(new_field_list, old_field_list) != 0) {
		    /* They're different... */
		    /* 
		     ** ordering in the max_field_list() call is important.  
		     ** max_field_lists() prefers the first list, we want it 
		     ** to prefer the established one...
		     */
		    max_field_list = max_field_lists(old_field_list,
						     new_field_list);
		    if (compare_field_lists(max_field_list,
					    old_field_list) != 0) {
			/* got to change */
			int new_struct_size;
			IOConversionPtr conv;
			int rec;

			force_align_field_list(max_field_list, sizeof(char*));
			new_struct_size = struct_size_IOfield(file_in,
							 max_field_list);
			conv = create_conversion(out_formats[index],
						 max_field_list,
						 new_struct_size,
						 0 /* byte reversal */ ,
						 buffer_and_convert,
						 out_formats[index]->body->record_length,
						 TRUE);
			for (rec = 0; rec < num_recs; rec++) {
			    if (data_recs[rec].index == index) {
				char *tmp = (char *) malloc(new_struct_size);
				IOconvert_record(conv, data_recs[rec].data,
				  tmp, NULL, NULL);
				data_recs[rec].data = tmp;
			    }
			}
			out_formats[index] =
			    register_IOrecord_format(format_name,
					       max_field_list, file_out);
		    }
		    /* regardless, we have to set the conversion on input */
		    set_IOconversion(file_in, format_name, max_field_list,
				     struct_size_IOfield(file_in,
							 max_field_list));
		    set_notify_of_format_change(file_in, format_name, 1);
		    free_field_list(max_field_list);
		    iofields[index] = get_local_IOfieldPtr(file_in, format_name,
							   sort_field);
		    if (iofields[index] &&
			(sort_size != get_sort_size(iofields[index]->data_type,
					       iofields[index]->size))) {
			/* Oy, sort data size changed... */
			int rec;
			sort_size = get_sort_size(iofields[index]->data_type,
						  iofields[index]->size);
			for (rec = 0; rec < num_recs; rec++) {
			    int this_i = data_recs[rec].index;
			    data_recs[rec].sort_item =
				(char *) realloc(data_recs[rec].sort_item,
						 sort_size);
			    get_sort_item(&data_recs[rec], iofields[this_i],
					  sort_type, sort_size);

			}
		    }
		}
		free_field_list(old_field_list);
		free_field_list(new_field_list);
	    }

	    iofields[index] = get_local_IOfieldPtr(file_in, format_name,
						   sort_field);

	    if (iofields[index] == NULL) {
		fprintf(stderr, "Warning, record type %s has no field \"%s\"\n",
			format_name, sort_field);
	    } else {
		if (sort_type == unknown_type) {
		    sort_type = iofields[index]->data_type;
		    sort_size = get_sort_size(sort_type, iofields[index]->size);
		} else {
		    if (sort_type != iofields[index]->data_type) {
			fprintf(stderr, "Warning, inconsistent data types for field %s\n",
				sort_field);
			iofields[index] = NULL;
		    }
		}
	    }
	    break;
	case IOdata:
	    next_format = next_IOrecord_format(file_in);
	    index = index_of_IOformat(next_format);
	    if (num_recs >= max_recs) {
		max_recs += INIT_MAX_RECS;
		data_recs = (DataRecordPtr)
		    realloc(data_recs, sizeof(DataRecord) * max_recs);
	    }
	    data_size = next_IOrecord_length(file_in);
	    data_recs[num_recs].data = (char *) malloc(data_size);

	    read_to_buffer_IOfile(file_in, data_recs[num_recs].data,
				  data_size);
	    data_recs[num_recs].index = index;
	    data_recs[num_recs].sort_item = (void *) malloc(sort_size);
	    if (iofields[index] != NULL) {
		get_sort_item(&data_recs[num_recs], iofields[index], sort_type,
			      sort_size);
		num_recs++;
	    } else {
		/* don't increment num_recs and ignore the record */
	    }
	    break;
	case IOerror:
	case IOend:
	    finished++;
	    break;
	}

    }

    switch (sort_type) {
    case unknown_type:
	fprintf(stderr, "Nothing to sort\n");
	exit(1);
	break;
    case integer_type:
	if (sort_size <= sizeof(long)) {
	    sort_size = sizeof(long);
	    sort_compare = long_compare;
	} else {
	    sort_compare = long8_compare;
	}
	break;
    case unsigned_type:
	if (sort_size <= sizeof(unsigned long)) {
	    sort_size = sizeof(unsigned long);
	    sort_compare = ulong_compare;
	} else {
	    sort_compare = ulong8_compare;
	}
	break;
    case float_type:
	if (sort_size <= sizeof(double)) {
	    sort_size = sizeof(double);
	}
	sort_compare = double_compare;
	break;
    case char_type:
	sort_compare = long_compare;
	break;
    case string_type:
	sort_compare = string_compare;
	break;
    case boolean_type:
    case enumeration_type:
	break;
    }

    qsort(data_recs, num_recs, sizeof(DataRecord), sort_compare);

    for (i = 0; i < num_recs; i++) {
	int index = data_recs[i].index;
	write_IOfile(file_out, out_formats[index], data_recs[i].data);
    }
    close_IOfile(file_in);
    close_IOfile(file_out);
    return 0;
}


static char *usage =
"Usage:  IOsort <sort field> <input file> <out file>\n";

int
main(argc, argv)
int argc;
char *argv[];
{
    char *sort_field;
    char *infile, *outfile;

    if (argc == 4) {
	sort_field = argv[1];
	infile = argv[2];
	outfile = argv[3];
    } else {
	fprintf(stderr, usage);
	exit(1);
    }

    sort_iofile(infile, outfile, sort_field);
    return 0;
}
