/*
 *
 * Thread library initialization routines.
 *
 */

#include	<stdio.h>
#include	<signal.h>
#include	"assert.h"

#include	"results.h"
#include	"general.h"
#include	"init.h"
#include	"lock.h"
#include	"queue.h"
#include	"sync.h"
#include	"internal.h"
#include	"memalloc.h"
#include	"cth_sched.h"
#include	"hwlib.h"
#include	"sched_utils.h"
#include	"arch_init.h"
#include	"threads.h"

#ifdef WITH_MONITORING
#include "monitor.h"
#include "monitor_ur.h"
#endif

int	 dirty_print=0;

exported shared int	allow_core_dumps;  /* core dumps allowed in children */

extern  void cthread_perror();
extern  void child_exit();

extern  any_t   allocate_and_share(), allocate_and_copy();


exported shared int	*num_of_procs;	/* number of processors in	*/
					/* the current configuration	*/

exported atom_int	*total_threads; /* number of scheduled threads 	*/

exported atom_int	*total_active;	/* number of active threads 	*/
                           		/* Used to detect termination	*/

exported cthread_t	*initial_thread = NULL;  /* the initial thread */
exported cthread_t	*monitor_threads = NULL; /* the local monitor threads*/
exported cthread_t	*steer_thread = NULL;	 /* the steering thread */
exported cthread_t	*adv_steer_thread = NULL;/* the steering thread */

shared struct local_info	*processor;
private struct sched    *local_scheduler[MAX_PROC];

extern shared struct sched_configuration	sched[MAX_PROC];
struct sched_configuration	mon_sched_conf;

shared mem_free_list_t	*memory_pool;	/* The array of pointers to per	*/
                           		/* processor memory structures	*/

shared long *processes;	/* An array of the processes (or threads for NT)*/
                           		/* involved in the  library.	*/

/* used to avoid simultaneous cthread_init()		*/
short	threads_started=0;

struct configuration	config = {STACKSIZE,
				  MEMORY_EXPONENT,
				  THREADS_PER_PROCESSOR}; /* configuration */

extern shared short 		scheduler_installed[MAX_PROC];

shared long		dynamic_memory_per_processor;
					/* The dynamic memory per
					 * processor we have to 
					 * preallocate. It is
					 * 1<<config.memory_exponent
					 */

/* -------------------------------------------------------------------- */

volatile cthread_t	current_thread_array[MAX_PROC];	/* The currently executing thread*/

volatile private locked_queue_t	local_thread_queue[MAX_PROC];
					/* The thread queue for this	*/
                           		/* processor			*/
#define		MSGLEN		160
#define		NO_NODE		-1
#define		NO_SIZE		-1

extern void*memset ARGS((void*, int, size_t));
extern int fflush();
extern int getpagesize();

/* cthreads functions used but not declared here */
extern cthread_t cthread_fork ARGS((any_t (*func)(), any_t arg, int node));
extern void internal_cthread_exit ARGS((any_t res));
extern RESULT internal_cthread_join ARGS((cthread_t t, any_t *resptr));
extern void verify_arg_consistency ARGS((int *procs_p));
extern cthread_t cthread_thread_alloc ARGS((any_t (*)(), any_t, int));
extern void cthread_thread_schedule ARGS((cthread_t));
extern RESULT meminit();
extern int  monitor_process ARGS((int lm_node));
extern int  steer_process ARGS((int node));

/* functions declared in machine specific *_init.c files */
extern RESULT doinit ARGS((void (*start_func)(), int procs));
void *allocate_monmem ARGS((int size, int node));
extern RESULT init_convert ARGS((int num_processors));


any_t
fake(start_func)
any_t (*start_func)(); 
{ /* Yes . this is the start of the function (fake ) */
   cthread_t current_thread = current_thread_array[virtual_processor()];

   DBG2("In fake with start_func %x (thread %lx)\n", (int)start_func, 
	(long)current_thread);

   internal_mutex_lock(&current_thread->tlock);
   DBG2("after lock fake with start_func %x (thread %lx)\n", (int)start_func, 
	(long)current_thread);

   current_thread->status |= S_MAIN;
   internal_mutex_unlock(&current_thread->tlock);
   DBG2("after unlock fake with start_func %x (thread %lx)\n", (int)start_func, 
	(long)current_thread);

   internal_cthread_exit((*start_func)());

   /* only to make the compiler happy, this is never reached */
   return 0;
}


void
procinit(node)
int             node;
{
    processor[node].load = 0;
    processor[node].terminate = DONT_TERMINATE;
    if (scheduler_installed[node] != TRUE) {
        processor[node].scheduler = (sched_info_t)cth_sched_global_init(node);
        (processor[node].scheduler)->sched_vproc_init = 
          (void *(*)())cth_sched_vproc_init;
        (processor[node].scheduler)->sched_thread_init = (void *(*)())0;
        (processor[node].scheduler)->sched_get_thread = 
          (void *(*)())cth_sched_get_thread;
        (processor[node].scheduler)->sched_put_thread = 
          (void *(*)())cth_sched_put_thread;
        (processor[node].scheduler)->sched_empty_readyq = 
          (void *(*)())cth_sched_empty_readyq;
        (processor[node].scheduler)->schedule = 
          (void *(*)())cth_schedule;
        (processor[node].scheduler)->proc_idle = (void *(*)())0;
    } else {
        if (sched[node].sched_global_init == 0) {
            processor[node].scheduler = 
              allocate_and_init_scb(node, sched[node].no_of_readyqs, 
                                    sched[node].no_of_threads);
        } else {
            processor[node].scheduler =
              (sched_info_t)(sched[node].sched_global_init)(node);
            (processor[node].scheduler)->sched_global_init =
              sched[node].sched_global_init;
        }
        (processor[node].scheduler)->sched_vproc_init =
          sched[node].sched_vproc_init;
        (processor[node].scheduler)->sched_thread_init =
          sched[node].sched_thread_init;
        (processor[node].scheduler)->sched_get_thread =
          sched[node].sched_get_thread;
        (processor[node].scheduler)->sched_put_thread =
          sched[node].sched_put_thread;
        (processor[node].scheduler)->sched_empty_readyq =
          sched[node].sched_empty_readyq;
        (processor[node].scheduler)->proc_idle = sched[node].proc_idle;
        if (sched[node].schedule == 0) {
           (processor[node].scheduler)->schedule = 
             (void *(*)())default_schedule;
       } else {
           (processor[node].scheduler)->schedule = sched[node].schedule;
       }
   }
}

void
start_proc_initial_thread(func)
int	(*func)();
{
    int current_proc = virtual_processor();
    if (current_proc == 0) {
	*initial_thread = cthread_thread_alloc(fake, (any_t) func, 
					      N_CURRENT);
	cthread_set_name(*initial_thread, "Initial Thread");
	cthread_thread_schedule(*initial_thread);
    }
#ifdef WITH_MONITORING	
    if (current_proc >= (*num_of_procs)) {
	char name_buf[32];
	extern int adv_steer_enabled;
	if (steering_enabled &&
	    current_proc == *num_of_procs+number_of_lms) {
	    *steer_thread =
		cthread_thread_alloc((any_t(*)())steer_process,
				     (any_t)current_proc, N_CURRENT);
	    sprintf(name_buf, "Steering Thread");
	    cthread_set_name(*steer_thread, name_buf);
	    cthread_thread_schedule(*steer_thread);
	} else if 
	  (adv_steer_enabled && (current_proc == *num_of_procs+number_of_lms))
	{
	    /* spawn the advanced steering server */
	    extern int as_Spawn();
	    *adv_steer_thread =
		cthread_thread_alloc((any_t(*)())as_Spawn,
				     (any_t)current_proc, N_CURRENT);
	    sprintf(name_buf, "Adv Steering Thread");
	    cthread_set_name(*adv_steer_thread, name_buf);
	    cthread_thread_schedule(*adv_steer_thread);
	} else {
	    int current_lm = current_proc - (*num_of_procs);
	    monitor_threads[current_lm] =
		cthread_thread_alloc((any_t(*)())monitor_process,
				     (any_t)current_lm, N_CURRENT);
	    memset(name_buf, 0, sizeof(name_buf));
	    sprintf(name_buf, "Local Monitor Thread #%d", current_lm);
	    cthread_set_name(monitor_threads[current_lm], name_buf);
	    cthread_thread_schedule(monitor_threads[current_lm]);
	    if (monitor_output == SOCKET_OUTPUT) {
		LocalMonitorInfoPtr  lm = lm_info[current_lm];
		start_socket_connection(&lm->de, &lm->dep);
	    }
	}
	atomadd(total_active, -1);
	atomadd(total_threads, -1);
    }
#endif
}

RESULT
init_shared_data(total_procs)
int total_procs;
{
    int tmp, i;
    int size = total_procs * sizeof(struct local_info);

    processor = (struct local_info *)allocate_and_share(size,-1);

    initial_thread = (cthread_t *) allocate_and_share(sizeof(cthread_t),-1);
#ifdef WITH_MONITORING
    if (monitoring_enabled) {
	monitor_threads = (cthread_t *)
	  allocate_and_share(sizeof(cthread_t)*number_of_lms, -1);
	if (steering_enabled) {
	    steer_thread = (cthread_t *)
		allocate_and_share(sizeof(cthread_t), -1);
	}
	if (adv_steer_enabled) {
	    adv_steer_thread = (cthread_t *)
		allocate_and_share(sizeof(cthread_t), -1);
	}
    }
#endif
    /*
     * Allocate and inherit the processes table. Used in termination 
     * process. Implicit assumption: 
     *			
     */
    processes = (long*)allocate_and_share(PROCESSES_TABLE_SIZE,-1);

    if (cthread_in_schedule_function == NULL) {
	cthread_in_schedule_function = 
	  (void (**)())allocate_and_share(sizeof(cthread_in_schedule_function[0]) * 
			     MAX_PROC, 0);
    }
    for (i = 0; i<MAX_PROC; i++) {
	cthread_in_schedule_function[i] = NULL;
    }
    DBG("Before init_convert\n");
    if((tmp = init_convert(total_procs)) != T_SUCCEED)
      return(tmp);
    DBG("After init_convert\n");

    DBG("Before initializing memory\n");
    if((tmp = meminit()) != T_SUCCEED)
      return(tmp);

    DBG("memory initialized\n");

#ifdef WITH_MONITORING
    if (monitoring_enabled) {
	Set_global_init(mon_sched_conf, monitor_global_init);
	Set_vproc_init(mon_sched_conf, monitor_vproc_init);
	Set_thread_init(mon_sched_conf, 0);
	Set_get_thread(mon_sched_conf, monitor_get_thread);
	Set_put_thread(mon_sched_conf, monitor_put_thread);
	Set_empty_readyq(mon_sched_conf, monitor_empty_readyq);
	Set_schedule(mon_sched_conf, monitor_schedule);
	Set_proc_idle(mon_sched_conf, 0);
	install_scheduler(*num_of_procs, *num_of_procs + number_of_lms - 1,
			  &mon_sched_conf);
	if (steering_enabled) {
	    install_scheduler(*num_of_procs + number_of_lms,
			      *num_of_procs + number_of_lms,
			      &mon_sched_conf);
	}
    }
#endif
    return T_SUCCEED;
}

RESULT
  cthread_configure(conf, action)
configuration_t		conf;
int					action;
{
   if (threads_started == 1 && action == PUT_CONFIG)
      return(T_ALREADY_INIT);
   switch(action) {
      case GET_CONFIG:
            conf->stack_size = config.stack_size;
            conf->memory_exponent = config.memory_exponent;
            conf->threads_per_proc = config.threads_per_proc;
            break;
      case PUT_CONFIG:
            if (conf->stack_size == DEFAULT_STACK ||
                  conf->stack_size <=0)
               config.stack_size = STACKSIZE;
            else
               config.stack_size = conf->stack_size;
            if (conf->memory_exponent == DEFAULT_MEM ||
                  conf->memory_exponent <= 0)
               config.memory_exponent = MEMORY_EXPONENT;
            else
               config.memory_exponent = conf->memory_exponent;
            if (conf->threads_per_proc == DEFAULT_THREADS ||
                  conf->threads_per_proc <= 0)
               config.threads_per_proc = THREADS_PER_PROCESSOR; 
            else
               config.threads_per_proc = conf->threads_per_proc;
            break;
      default:
            return(T_BAD_REQUEST);
   }
   return(T_SUCCEED);
}


RESULT
cthread_init(procs, start_func)
int		procs;		/* Number of processors	*/
void		(*start_func)();
{

   DBG3("Entered in cthread_init (%lx) start_func is %x (ph %x)\n",
         (long)cthread_init, (int) start_func, (int) getphysaddr(start_func));

#ifdef SEQUENT
   usclk_init();
#endif

   if (threads_started == 1) {
      return(T_ALREADY_INIT);
   }

   fflush(stdout);
   fflush(stdin);

   verify_arg_consistency(&procs);

   doinit(start_func, procs);
   return T_SUCCEED;
}

RESULT
cthread_start(procs)
int		procs;		/* Number of processors	*/
{

   DBG1("Entered in cthread_start, num_procs = %d\n", procs);

#ifdef SEQUENT
   usclk_init();
#endif

   if (threads_started == 1) {
      return T_ALREADY_INIT;
   }

   fflush(stdout);
   fflush(stdin);

   verify_arg_consistency(&procs);

   doinit(NULL, procs);
   return T_SUCCEED;
}

int
page_aligned_size(size)
int size;
{
        int rem;
        if ((rem = size%getpagesize()) != 0)
                return((size/getpagesize() + 1) * getpagesize());
        else return(size) ;
}

int generic_shared_size(procs)
int procs;
{
    int size;
    int mem_size, tmp;

    mem_size = sizeof(struct mem_free_list);
    tmp = mem_size & 0x03;
    if(tmp != 0)
      mem_size += 4-tmp;
    mem_size += dynamic_memory_per_processor + 8 + 
                sizeof(struct mem_free_list) + 
		(config.memory_exponent - MIN_CHUNK) * sizeof(struct bucket);
    mem_size = procs * (mem_size + sizeof(mem_free_list_t));
    size =  page_aligned_size(SHARED_SHORT_SPACE +
			      procs * sizeof(struct local_info) +
			      PROCESSES_TABLE_SIZE +
			      mem_size +
			      2 * sizeof(cthread_t) + 
#ifdef WITH_MONITORING			      
			      sizeof(cthread_t)*number_of_lms +
#endif
			      procs * (scb_size() +
				       4*(sizeof(struct locked_queue)+
					sizeof(locked_queue_t))+
				       sizeof(struct locked_queue)+
				       config.threads_per_proc * 
				       (sizeof(struct cthread) +
					CONTEXT_SIZE)) +
			      (sizeof(char*)*MAX_PROC) +
			      getpagesize());

    DBG1("Shared arena size =  mem_size(%d) + \n", mem_size);
    DBG1("                     num_of_procs(%d) * (\n", procs);
    DBG1("                             queue_overhead(%ld)\n",
	 (long)scb_size() +
	 (sizeof(struct locked_queue)+ sizeof(locked_queue_t)));
    DBG1("                             thread TCBs(%ld))\n",
	 (long)config.threads_per_proc * sizeof(struct cthread));
    DBG1("    		    + other stuff = total size (%d)\n", size);
    return size;
}

int generic_monmem_size(procs)
int procs;
{
#ifdef WITH_MONITORING
    int size, monbuf_size, bufsteer_size;

    monbuf_size = sizeof(MonitorBuffer) + sizeof(struct cth_mutex)+
	size_of_monitor_buffer;
    bufsteer_size = size_of_lm_steer_buffer + sizeof(MonitorBuffer) +
	sizeof(struct cth_mutex);

    size = 	page_aligned_size(sizeof(LocalMonitorInfo)*number_of_lms +
			  MAX_NUM_SENSORS + 
			  procs * config.threads_per_proc * monbuf_size +
			  sizeof(unsigned long) +
			  10* sizeof(struct cth_mutex) +
			  2*MAX_NUM_SENSORS +
			  sizeof(SteerServerInfo) +
			  sizeof(ActionProc)*MAX_NUM_SENSORS +
			  sizeof(ActionProc)*USER_STEER_MAX +
			  sizeof(CommandHandlersInfo)*USER_STEER_MAX +
			  number_of_lms * bufsteer_size +
			  MAX_NUM_SAMPLING_INST *
			  (2*sizeof(unsigned long)+3*sizeof(void*)+sizeof(char))+
			  SIMPLE_HASH_VALUE*
			  (sizeof(cthread_t)+sizeof(mutex_t)+sizeof(condition_t)) +
			  getpagesize());
    return size;
#else
    return 0;
#endif
}

#ifdef STATIC_MEMORY_SHARED
void
internal_cthread_publish(p)
any_t *p;
{
	; /* this is a no-op if static memory is shared */
}

void dotell(){ ; }

#else
struct desc {		      /* stuff for dotell(),tell(),cthread_publish() */
	any_t value;
	any_t *pointer;
};

typedef struct desc *desc_t;

void
dotell(desc)
desc_t desc;
{
    cthread_t current_thread = current_thread_array[virtual_processor()];

    current_thread->status |= S_INTERNAL;
    *(desc->pointer)  =  desc->value;
}

static void
tell (i,p)
int i;
any_t 	*p;
{
    desc_t d;
    cthread_t 	t;
    any_t 		res;
    RESULT		result;

    if(i == virtual_processor()) return;
    result = memory_alloc((memory_t *)&d, sizeof(struct desc), N_ANYWHERE);

    if (result != T_SUCCEED) {
	cthread_perror("memory_alloc failed in cthread_publish", result);
	child_exit(1);
    }

    d->value = *p;
    d->pointer = p;

    t = (cthread_t)cthread_thread_alloc((any_t(*)())dotell, (any_t)d, i);
    if (t == NULL) {
	fprintf(stderr, "Cthread publish failed because of too few threads!  Exitting!\n");
	child_exit(1);
    }
    t->status |= S_INTERNAL;
    cthread_thread_schedule(t);
    internal_cthread_join(t,&res);
    memory_free((memory_t)d);
}

void
internal_cthread_publish(p)
any_t *p;
{
    int i, all_procs = *num_of_procs;
#ifdef WITH_MONITORING
    if (monitoring_enabled) {
	all_procs += number_of_lms;
	if (steering_enabled) all_procs++;
    }
#endif
    for (i = 0; i < all_procs; tell (i++,p));
}
#endif
