#include	<stdio.h>
#include	<signal.h>
#include	<string.h>
#include	<errno.h>
#include        <memory.h>
#ifndef SEGV_PROT
/* for System V systems like Solaris */
#define SEGV_PROT SEGV_ACCERR
#endif

#include        <sys/types.h>
#include        <sys/socket.h>
#include        <netinet/in.h>
#include        <netdb.h>
#include        <sys/fcntl.h>
#include        <sys/time.h>

#include        <sys/mman.h>
#include        <fcntl.h>
#include        <unistd.h>
#include        <sys/wait.h>
#include 	<sys/ipc.h>
#include 	<sys/shm.h>

#include        <sys/sysmp.h>
#include        <ulocks.h>
#include        <task.h>
#include        <malloc.h>
#include        <limits.h>
#include        <sys/prctl.h>
#include        <sys/schedctl.h>
#include	<assert.h>

#include	"results.h"
#include	"general.h"
#include	"init.h"
#include	"lock.h"
#include	"queue.h"
#include	"sync.h"
#include	"internal.h"
#include	"memalloc.h"
#include	"cth_sched.h"
#include	"hwlib.h"
#include	"sched_utils.h"
#include	"idebug.h"
#include	"threads.h"
#include	"sgi_init.h"
#include        "sched_io.h"
#ifdef WITH_MONITORING
#include "monitor.h"

extern void monitorinit ARGS((int procs));
static void sgi_monitorinit ARGS((int procs));
extern shared short *monitor_status;
extern shared mutex_t logicalTimeStamp_lock;
extern shared unsigned long *logicalTimeStamp_counter;
#endif

extern shared LOCK *print_sema;

/* these symbols needed from init.c */
extern struct local_info *processor;
extern short threads_started;
extern int *num_of_procs;
extern atom_int *total_threads;
extern atom_int *total_active;
typedef long *process_table_t;
extern process_table_t processes;	/* An array of the processes    */
extern shared long dynamic_memory_per_processor;
extern struct configuration config;	/* configuration        */
#define		NO_SIZE		-1
#define		NO_NODE		-1
extern shared mem_free_list_t *memory_pool;

#define SHARED_LOCK_SPACE 65536

LOCK *ss_lock;
extern cthread_t current_thread_array[MAX_PROC];
extern shared struct sched *local_scheduler[MAX_PROC];

atom_int *sync_init;

/* externals not defined by sys/shm.h */


/* other random unix externals */
extern void perror ARGS((const char *s));
extern void *memset ARGS((void *, int, size_t));
extern int select ARGS((int, fd_set *, fd_set *, fd_set *, struct timeval *));

/* defined is $OS_signal.c */
extern void setup_child_sighandlers ARGS((void (*)(), void (*)(), void (*)()));
extern void setup_scheduler_sighandlers ARGS((void));
extern void setup_chld_sighandler ARGS((void(*)()));
extern void init_sighandler_struct();
extern void setup_master_sighandlers();

extern int getpagesize ARGS((void));

typedef struct child_params {
    int (*start_func) ();
    int node;
} *param_t;

#define WAIT_STAT_TYPE int
#define TERM_SIG(stat) WIFSTOPPED(stat)
#define RET_CODE(stat) WEXITSTATUS(stat)


static void abnormal ARGS((WAIT_STAT_TYPE stat, int wid));
extern RESULT init_convert ARGS((int procs));
extern void ill_hndlr();
extern void seg_hndlr();


shared int real_num_of_procs;

usptr_t *mem_handle;

extern void relinquish_processor ARGS((void));

static volatile int total_procs_exit = 0;
static int total_procs_forked = 0;
static int parent_waiting_the_end = 0;
extern void child_exit ARGS((int status));

static void
master_on_exit()
{
    DBG3("proc %d in master on exit, total_procs_forked = %d, total_procs_exit = %d\n", virtual_processor(), total_procs_forked, total_procs_exit);
    if (virtual_processor() == 0 &&
	total_procs_forked > 0 &&
	total_procs_exit > 0) {
	child_exit(0);
    }
}

static void
some_child_exitted()
{
    int i;
    DBG3("proc %d in some_child_exitted, total_procs_forked = %d, total_procs_exit = %d\n", virtual_processor(), total_procs_forked, total_procs_exit);
    for (i = 0; i < total_procs_forked; i++) {
	processor[i].terminate = TERMINATE;
    }
    if (virtual_processor() == 0 &&
	total_procs_forked > 0 &&
	total_procs_exit > 0) {
	child_exit(0);
    }
}

int 
current_processor()
{
    return virtual_processor();
}

int
virtual_processor(void)
{
    int i = 0;
    pid_t pid;

    if (processes[0] == 0)
      return 0;

  check:
    pid = getpid();
    for (i = 0; i < MAX_PROC; i++) {
	if (pid == processes[i]) {
	    return i;
	}
    }
    /* Maybe it's not stored there yet... */
    relinquish_processor();
    goto check;
}

void
child_init(arg)
void *arg;
{
    param_t child_arg = (param_t) arg;
    int (*start_func) ();
    int node;

    setup_child_sighandlers(ill_hndlr, seg_hndlr, seg_hndlr);
    setup_chld_sighandler(some_child_exitted);
    schedctl(SCHEDMODE, SGS_GANG);

    node = child_arg->node;
    start_func = child_arg->start_func;
    current_thread_array[node] = 0;
    local_scheduler[node] = processor[node].scheduler;

    if (prctl(PR_SETEXITSIG, SIGCHLD) == -1)
	perror("prctl setexitsig");
    if (prctl(PR_TERMCHILD, SIGCHLD) == -1)
	perror("prctl termchld");

    DBG3("In child %d, processor = %lx, processes = %lx\n",
	 node, (long) processor, (long) processes);
    DBG4("In child %d no_of_procs = %lx *no_of_procs = %d mem_pool = %lx\n",
	 node, (long) num_of_procs, *num_of_procs,
	 (long) memory_pool);

    setup_scheduler_sighandlers();

    if (local_scheduler[node]->sched_vproc_init != 0) {
	(local_scheduler[node]->sched_vproc_init) (node);
    }
    threads_started = 1;

/*******************************************************************/
/* sync_init has to be equal to current_processor + 1 to make sure */
/* that all the child proceses wait till the child 0 is done       */
/* forking the init thread. This is necessary so that the monitor  */
/* thread doesn't get the chance to lock the list->lock before the */
/* init thread executes add_thread_to_lm_list                      */
/*******************************************************************/
    while (*sync_init != node + 1) {
	struct timeval delay_time;
	int delay_unit = (*num_of_procs - node + 1);
	delay_time.tv_sec = 0;
	delay_time.tv_usec = delay_unit * 10000;	/* return
							 * millisecs */
	DBG3("Sync init = %d  current proc = %d delaying for %d\n", *sync_init,
	     node, delay_unit);
	select(32, NULL, NULL, NULL, &delay_time);
    }

    start_proc_initial_thread(start_func);

    atomadd(sync_init, -1);
    while (*sync_init != 0) {
	struct timeval delay_time;
	delay_time.tv_sec = 0;
	delay_time.tv_usec = (*sync_init) * 10000;	/* return
							 * millisecs */
	DBG2("Sync post init = %d  current proc = %d\n", *sync_init, node);
	select(32, NULL, NULL, NULL, &delay_time);
    }
    if (prctl(PR_SETEXITSIG, SIGCHLD) == -1)
	perror("prctl setexitsig");
    if (prctl(PR_TERMCHILD, SIGCHLD) == -1)
	perror("prctl termchld");

    DBG("Children initialized. Starting our queue\n");
    if ((start_func == NULL) && (node == 0)) {
	current_thread_array[node] = *initial_thread;
	(*initial_thread)->already_started = 1;
	swap_context_with_func((void (*)()) local_scheduler[node]->schedule,
			       NULL);
    } else {
	local_scheduler[node]->schedule();
    }
}				/* child init */

int
quit_hndlr()
{
    int i;
    process_table_t pptr;

/* Kill the processes */
    for (i = 0, pptr = processes; i < *num_of_procs; i++) {
	kill((int) *pptr++, SIGKILL);
    }

/* Clean up shared memory */

    exit(1);

    /* NOTREACHED */
    return 0;
}

void
ill_hndlr()
{
    fflush(stdout);
    fflush(stderr);
    fprintf(stderr, "Illegal instruction signal.  Possible stack overflow on processor %d.\n", virtual_processor());
    fprintf(stderr, "Thread ");
    fprint_thread(stderr, current_thread_array[virtual_processor()]);
    fprintf(stderr, " was active.\nTerminating program.\n");

    exit(4);
}

void
seg_hndlr(signo, code, addr)
int signo, code;
void *addr;
{
    cthread_t current_thread = current_thread_array[virtual_processor()];

    fflush(stdout);
    fflush(stderr);
    if (signo == 11) {
	fprintf(stderr, "Segmentation fault signal (11) on processor %d.\n",
		virtual_processor());
    } else {
	fprintf(stderr, "Signal %d on processor %d.\n", signo,
		virtual_processor());
    }
    DBG4("signo %d, code %d, addr %lx, stacktop %lx\n",
	 signo, code, (long) addr, (long) current_thread->stack_top);
    fprintf(stderr, "Thread ");
    fprint_thread(stderr, current_thread);
    fprintf(stderr, " was active.\nTerminating program.\n");
    fflush(stderr);
    exit(1);
}

int
intr_hndlr()
{
    exit(1);

    /* NOTREACHED */
    return 0;
}

void
create_shared_arena(procs)
int procs;
{
    long size;
    long monitor_mem_size();
    int config_res;

    extern int _utrace;
    _utrace = 0;		/* set to 1 for detailed usinit traces */

    size = 1024 * 1024;		/* potentially grow to a megabyte for
				 * shared locks */
    DBG1("Configuring arena with size for %d processors\n", 2 * MAX_PROC);
    if (usconfig(CONF_INITUSERS, 2 * MAX_PROC) < 0) {
	perror("conf_initusers configuration error for the shared arena\n");
	exit(-1);
    }
    if (usconfig(CONF_ARENATYPE, US_SHAREDONLY) < 0) {
	perror("conf_arenatype configuration error for the shared arena\n");
	exit(-1);
    }
    if ((config_res = usconfig(CONF_INITSIZE, size)) == -1) {
	perror("configuration error for the shared arena\n");
	exit(1);
    }
    if ((config_res = usconfig(CONF_AUTOGROW, 1)) == -1) {
	perror("configuration error for the shared arena autogrow\n");
	exit(1);
    }
#ifdef CONF_AUTORESV
    if ((config_res = usconfig(CONF_AUTORESV, size)) == -1) {
	perror("configuration error for the shared arena autoresv\n");
	exit(1);
    }
#endif

    if ((mem_handle = usinit("/dev/null")) == NULL) {
	/* try again in /tmp */
	char fname[128];
	sprintf(fname, "/tmp/.shm_mem_%lx", (long) getpid());
	if ((mem_handle = usinit(fname)) == NULL) {
	    perror("usinit failed");
	    exit(-1);
	}
    }
}

RESULT
doinit(start_func, procs)
int (*start_func) ();
int procs;
{
    pid_t wid = 0;
    int node, i;
    long *shared_shorts;
    process_table_t pptr;
    WAIT_STAT_TYPE stat;
    int total_procs = procs;
    param_t param_block;

#ifdef WITH_MONITORING
    if (monitoring_enabled) {
	total_procs += number_of_lms;
	if (steering_enabled)
	    total_procs++;
    }
#endif
    /* 
     * Initialize the signal handler data structure.
     */
    init_sighandler_struct();

    create_shared_arena(total_procs);
#ifdef WITH_MONITORING
    if (monitoring_enabled)
	sgi_monitorinit(total_procs);
#endif

    shared_shorts = (long *) allocate_and_share(SHARED_SHORT_SPACE, -1);

    num_of_procs = &real_num_of_procs;
    *num_of_procs = procs;
    total_threads = (short *) (shared_shorts++);
    *total_threads = 0;
    total_active = (short *) (shared_shorts++);
    *total_active = 0;
    sync_init = (short *) (shared_shorts++);
    *sync_init = total_procs;
#ifdef WITH_MONITORING
    monitor_status = (short *) (shared_shorts++);
    *monitor_status = (short) MONITOR_NOT_INITIALIZED;
#endif
    ss_lock = (LOCK *) (shared_shorts++);

    LOCK_INIT(*ss_lock);

    print_sema = (LOCK *) (shared_shorts++);

    LOCK_INIT(*print_sema);

    init_shared_data(total_procs);

    DBG1("Before procinit total_procs %d \n", total_procs);
    for (node = 0; node < total_procs; node++) {
	procinit(node);
	DBG1("procinit for node %d done \n", node);
    }

    DBG("Per processor structures initiated\n");

    if (atexit(master_on_exit) != 0)
	perror("atexit");

    total_procs_forked = procs;
    total_procs_exit = procs;	/* for the overwritten "exit" */

    setup_master_sighandlers(intr_hndlr, quit_hndlr);
    if (init_global_io() < 0)
	exit(1);

    pptr = &processes[1];

    if (total_procs > 1) {
	int config_res;
	/* setup parameters for implicit arena creation */
	if ((config_res = usconfig(CONF_STHREADIOOFF, 1)) == -1) {
	    perror("configuration error sthreadIOoff\n");
	    exit(1);
	}
	if ((config_res = usconfig(CONF_STHREADMISCOFF, 1)) == -1) {
	    perror("configuration error sthreadmiscoff\n");
	    exit(1);
	}
	if ((config_res = usconfig(CONF_INITUSERS, MAX_PROC + 2)) == -1) {
	    perror("configuration error maxproc stdio\n");
	    exit(1);
	}
    }
    processes[0] = getpid();
    for (node = 1; node < total_procs; node++) {
	param_block = (void *) allocate_and_share(sizeof(struct
						      child_params), -1);
	param_block->node = node;
	param_block->start_func = start_func;
	/* if((*pptr++ = sprocsp(child_init, PR_SALL|PR_NOLIBC, */
	if ((*pptr++ = sprocsp(child_init, PR_SALL,
			       param_block, NULL, 10240)) == 0) {
	    perror("fork failed");
	    fprintf(stderr, "Can't support so many processes.  Reduce -num_procs.\n");
	    goto kill_children;
	}
    }
    param_block = (void *) allocate_and_share(sizeof(struct
						     child_params), -1);
    param_block->node = 0;
    param_block->start_func = start_func;
    child_init((void *) param_block);
    return T_SUCCEED;
  kill_children:
    /* DON'T ADD PRINTFs!  Triggers kernel bug for IRIX 6! */

    for (i = 0, pptr = processes; i < total_procs; i++) {
	if (*pptr != wid) {
	    printf("Killed child %lx in logical processor %d\n",
		   *pptr, (pptr - processes));
	    kill(*pptr++, SIGKILL);
	    wid = wait(&stat);
	    printf("finish kill\n");
	} else {
	    pptr++;
	}
    }
    /* NOTREACHED */
    return T_SUCCEED;
}

static void child_exit_handler ARGS((int wid, WAIT_STAT_TYPE stat));

#ifdef HAVE_SIGINFO_H
int
chld_hndlr(signo, siginfo, context)
int signo;
siginfo_t siginfo;
void *context;
#else
int
chld_hndlr(signo, code, scp, addr)
int signo, code;
void *scp;
char *addr;
#endif
{
    WAIT_STAT_TYPE stat;
    int wid;

#if !defined(__svr4__) && !defined(__SVR4)
    while ((wid = wait3((int *) &stat, WNOHANG | WUNTRACED, NULL)) != 0) {
	child_exit_handler(wid, stat);
    }
#else
    while ((wid = waitpid((pid_t) - 1, (int *) &stat, WNOHANG | WUNTRACED)) != 0) {
	child_exit_handler(wid, stat);
    }
#endif
    return 0;
}


void
clean_shut_down()
{
    close_global_io();
}

static void
child_exit_handler(wid, stat)
int wid;
WAIT_STAT_TYPE stat;
{
    int i, force_parent_exit = 0;

    DBG3("Child %d terminated with termsig %d, retcode %d\n",
	 wid, TERM_SIG(stat), RET_CODE(stat));


    total_procs_exit--;
    if (TERM_SIG(stat) != 0 || RET_CODE(stat) != 0) {
	if (parent_waiting_the_end < 0) {
	    parent_waiting_the_end = 1;

	    /* 
	     * the parent is already ended and waiting for the children
	     * signal all the remaining children to exit 
	     */
	    for (i = 1; i < total_procs_forked; i++) {
		if (processes[i] != wid) {
		    DBG1("Killing child %lx\n", processes[i]);
		    kill(processes[i], SIGKILL);
		}
	    }
	} else if (parent_waiting_the_end == 0) {
	    parent_waiting_the_end = 1;
	    force_parent_exit = 1;
	    if (wid != processes[0])
		total_procs_exit--;

	    for (i = 1; i < total_procs_forked; i++) {
		processor[i].terminate = TERMINATE;
	    }
	    sleep(1);
	    for (i = 1; i < total_procs_forked; i++) {
		if (processes[i] != wid) {
		    DBG1("Killing child %lx\n", processes[i]);
		    kill(processes[i], SIGKILL);
		}
	    }
	} else {
	    /* the parent is already signaled to finish */
	}
	abnormal(stat, wid);
    } else {
	DBG("Normal exit\n");
	if (wid == processes[0]) {
	    force_parent_exit = 1;
	}
    }

    if (total_procs_exit <= 0) {
	clean_shut_down();
	exit(0);
    }
    if (force_parent_exit != 0) {
	while (total_procs_exit > 0) {
	    wid = waitpid((pid_t) - 1, (int *) &stat, WUNTRACED);
	    if (wid == -1) {
		if (errno != EINTR)
		    perror("Waitpid error");
	    } else {
		total_procs_exit--;
		if (RET_CODE(stat) != 0) {
		    abnormal(stat, wid);
		}
	    }
	}
	clean_shut_down();
	exit(0);
    }
}

void
child_exit(status)
int status;
{
    DBG1("Processor %d in child_exit\n", virtual_processor());
    if (virtual_processor() == 0) {
	int i;
	if (parent_waiting_the_end == 0) {
	    /* 
	     * the loop is mainly meant to shut down the steering and 
	     * local monitor processes 
	     */

	    for (i = 1; i < total_procs_forked; i++) {
		processor[i].terminate = TERMINATE;
	    }
	    parent_waiting_the_end = -1;
	}
	child_exit_handler(processes[0], 0);
    }
    exit(status);
}

any_t
allocate_and_share(size, node)
unsigned size;
int node;
{
    u_char *address;
    if ((address = malloc(size)) == NULL) {
	perror("can't allocate memory");
	exit(1);
    }
    return ((any_t) address);
}

any_t
allocate_and_copy(size)
unsigned size;
{
    char *address;

    if ((address = (char *) malloc(size)) == NULL) {
	perror("Internal allocate_and_copy");
	exit(1);
    }
    return ((any_t) address);
}

char *
get_stack_memory(size)
int size;
{
    return (char *) allocate_and_copy(size);
}

static void
abnormal(stat, wid)
WAIT_STAT_TYPE stat;
int wid;
{
    process_table_t pptr;

    fflush(stdout);
    fflush(stderr);
    for (pptr = processes; *pptr != PROC_SENTINEL && *pptr != wid; pptr++);
    if (*pptr == PROC_SENTINEL) {
	printf("Received a termination of an unknown child (%d) termsig %d retcode %d\n",
	       wid, TERM_SIG(stat), RET_CODE(stat));
    } else {
	if (RET_CODE(stat) == 4) {
	    printf("Child %d died with illegal instruction signal 4 on logical proc %ld\n",
	      wid, ((long) pptr - (long) processes) / sizeof(processes[0]));
	} else {
	    printf("Child %d died with termination signal %d on logical proc %ld\n",
		   wid, TERM_SIG(stat),
		   ((long) pptr - (long) processes) / sizeof(processes[0]));
	}
    }
}

extern RESULT
init_convert(procs)
int procs;
{
    int i, j;

    /* if the processor is online, enter in the conversion table */
    j = 0;
    for (i = 0; i < procs; i++) {
	processor[j++].logical_to_phys = i;
    }
    return (T_SUCCEED);
}				/* init_convert */

void
check_num_procs(procs)
int procs;
{
    int total_procs = procs;
    int online = cpus_online();

#ifdef WITH_MONITORING
    if (monitoring_enabled) {
	total_procs += number_of_lms;	/* procs reserved for monitoring */
	if (steering_enabled)
	    total_procs++;
    }
#endif
    if (total_procs > MAX_PROC) {
	fprintf(stderr, "Requested number of processors exceeds Cthreads internal limits\n");
	exit(1);
    }
    if (total_procs > online) {
	printf("WARNING!! Value of no of processors is more than available. Expect Context Switch\n");
	fflush(stdout);
    }
}

int
check_stack_size(size)
int size;
{
    size += getpagesize();	/* add one page for protection */

    /* round up to the nearest integral multiple of page size */
    if ((size % getpagesize()) != 0) {
	size = ((size / getpagesize()) + 1) * getpagesize();
    }
    return size;
}

void *
allocate_monmem(size, node)
int size;
int node;
{
    u_char *address;
    if ((address = malloc(size)) == NULL) {
	perror("can't allocate monitor memory");
	exit(1);
    }
    return ((any_t) address);
}

#ifdef WITH_MONITORING
static void
sgi_monitorinit(procs)
int procs;
{
    monitorinit(procs);
}
#endif

void
relinquish_processor()
{
    struct timeval delay_time;
    delay_time.tv_sec = 0;
    delay_time.tv_usec = 10000;	/* return millisecs */
    select(32, NULL, NULL, NULL, &delay_time);
}

shared unsigned long start_time;

unsigned long
cthread_timestamp()
{
#ifdef WITH_MONITORING
    struct timeval tp0;
    unsigned long timestamp;

    if (logicalTimeStamp) {
	internal_mutex_lock(&logicalTimeStamp_lock->mlock);
	timestamp = (*logicalTimeStamp_counter)++;
	internal_mutex_unlock(&logicalTimeStamp_lock->mlock);
    } else {
	gettimeofday(&tp0, NULL);
	timestamp = (unsigned long) tp0.tv_sec * 1000000 + tp0.tv_usec;
	timestamp -= start_time;
    }
    return timestamp;
#else
    return 0;
#endif
}

void
get_start_timestamp_and_resolution(start_time_p, resolution_p)
unsigned long *start_time_p;
unsigned long *resolution_p;
{
#ifdef WITH_MONITORING
    struct timeval tp0;

    if (logicalTimeStamp) {
	*resolution_p = 1;
	*start_time_p = 0;
    } else {
	gettimeofday(&tp0, NULL);
	*resolution_p = 1000000;
	*start_time_p = (unsigned long) tp0.tv_sec * 1000000 + tp0.tv_usec;
    }
#endif
}


