/*
**    THIS IS AN UNTESTED REPOSITORY OF BUTTERFLY SPECIFIC CODE.
**      This code was extracted from init.c to isolate machine dependencies.
**	It has not even been compiled on the Butterfly.
*/ 


#include	<stdio.h>
#include	<signal.h>

#ifdef WITH_MONITOR

#include <sys/types.h>
#include <sys/socket.h>
#include <netinet/in.h>
#include <netdb.h>
#include <sys/fcntl.h>

#include "monitor.h"
#endif WITH_MONITOR

#include	<sys/wait.h>


#include	<sys/cluster.h>
#include	<sys/kern_return.h>
#include	<mach.h>
#include	<sys/vm_mapmem.h>
#include	<sys/message.h>
#include	<sys/file.h>
#include	<errno.h>

#include	"results.h"
#include	"general.h"
#define		LOGICAL_PROCESSOR	int
typedef		LOGICAL_PROCESSOR		*convert;
#include	"init.h"
#include	"lock.h"
#include	"queue.h"
#include	"sync.h"
#include	"internal.h"
#include	"memalloc.h"
#include	"cth_sched.h"
#include	"hwlib.h"
#include	"sched_utils.h"
#include	"arch_init.h"

#ifdef	FAST
#include	"fast.h"
#endif	FAST

extern void monitorinit ARGS((int procs));
static void bfly_monitorinit ARGS((int procs));

typedef long *process_table_t;
extern unsigned long timestamp_resolution;
shared int real_num_of_procs;

RESULT
doinit(start_func,arg)
int		(*start_func)();
any_t		arg;
{
   int			wid, i, chid, node, size, kres, count;
   long			*shared_shorts;
   long                 *tmp_shared_shorts;
   u_char               *tmp_pds;
   RESULT		tmp;
   process_table_t	pptr;
   int 			tot_size;
   union wait		stat;
   union cluster_status	cstat;

   DBG4(
   "Entered in doinit start_func %08x (ph %08x) (fake %08x) (cth_start %08x)\n"
               ,start_func, getphysaddr(start_func), (long)fake,
               (long)start_thread);

#ifdef WITH_MONITOR
   bfly_monitorinit(procs);
#endif WITH_MONITOR

   size = MAX_PROC*sizeof(LOGICAL_PROCESSOR);

   phys_to_logical = (convert)allocate_and_copy(size);

   if((kres = cluster_stat(cluster_id, GET_NODE_LIST, &cstat, &count))
               != KERN_SUCCESS)
      error_exit("AK", "Can't get cluster status", kres, NO_NODE, NO_SIZE);


   shared_shorts = (long *)allocate_and_share(SHARED_SHORT_SPACE,-1);

   num_of_procs		= &real_num_of_procs;
#ifdef WITH_MONITOR
   *num_of_procs        = count - 1;
#else WITH_MONITOR
   *num_of_procs        = count;
#endif WITH_MONITOR
   total_threads	= (short *)(shared_shorts++);
   total_active		= (short *)(shared_shorts++);
   *total_threads	= 0;
   *total_active	= 0;
   sync_init		= (short *)(shared_shorts++);
   *sync_init		= count;
#ifdef WITH_MONITOR
   monitor_status      = (short *)(shared_shorts++);
   *monitor_status     = (short) MONITOR_NOT_INITIALIZED;
#endif WITH_MONITOR
   ss_lock              = (LOCK *)(shared_shorts++);
   LOCK_INIT(*ss_lock);

   print_sema = (short *)(shared_shorts++);
   *print_sema = 0;

   size = count * sizeof(struct local_info);

   processor = (struct local_info *)allocate_and_share(size,-1);

   /*
    * Allocate and inherit the processes table. Used in termination 
    * process. Implicit assumption: 
    *			PROCESSES_TABLE_SIZE > MAX_PROCESSORS * sizeof(process_t) 
    */

   processes = (process_table_t)allocate_and_share(PROCESSES_TABLE_SIZE,-1);

   DBG("Before init_convert\n");
   if((tmp = init_convert(count)) != T_SUCCEED)
     return(tmp);
   DBG("After init_convert\n");

   DBG("Before initializing memory\n");
   if((tmp = meminit()) != T_SUCCEED)
      return(tmp);

   DBG("memory initialized\n");

#ifdef WITH_MONITOR
   Set_global_init(mon_sched_conf, monitor_global_init);
   Set_vproc_init(mon_sched_conf, monitor_vproc_init);
   Set_thread_init(mon_sched_conf, 0);
   Set_get_thread(mon_sched_conf, monitor_get_thread);
   Set_put_thread(mon_sched_conf, monitor_put_thread);
   Set_empty_readyq(mon_sched_conf, monitor_empty_readyq);
   Set_schedule(mon_sched_conf, monitor_schedule);
   Set_proc_idle(mon_sched_conf, 0);
   install_scheduler(*num_of_procs, *num_of_procs, &mon_sched_conf);
#endif WITH_MONITOR

   for (node=0; node < count; node++)
      processor[node].logical_to_phys = cstat.node_list[node];

   DBG("Before procinit \n");
   for (node=0; node < count; node++) {
      procinit(node);
   }

   DBG("Per processor structures inited\n");

   /*
    * Here is the parent process which will fork the rest of the children
    * and it is going to wait for them to terminate.
    */
   pptr = processes;

   for(node = 0; node< count; node++){
      if((kres = fork_and_bind(node, cluster_id, &chid)) != KERN_SUCCESS)
         error_exit("AP", "Can't fork child", kres, node, NO_SIZE); 
      DBG2("In loop, node %d, chid %d\n",node, chid);
      if (chid == 0) {
         child_init(node,start_func,arg);
         printf("(AQ) Internal Error:Returned from child init !!!!!!\n");
         exit(1);
      }
      *pptr++ = chid;
   }
   *pptr = PROC_SENTINEL;

   DBG1("Out of the fork loop chid is %d\n",chid);
   DBG("Processes spawned\n");

   wid = wait(&stat);
   DBG3("Child %d terminated with termsig %d, retcode %d\n"
               , wid, stat.w_termsig, stat.w_retcode);
   if(stat.w_termsig != 0) { /* Abnormal exit. Report the termination 	*/
      abnormal(stat, wid);   /* signal, the PID and the logical processor*/
                        /* number. Then kill all remaining processes*/
                        /* and terminate after waiting their exit.	*/
      for (i=0, pptr = processes; i< count; i++)
         if (*pptr != wid)
            kill(*pptr++, SIGKILL);
         else
            pptr++;
      for(i=1; i< count; i++)
         wid = wait(&stat);
   }
   else {	/* Normal exit. Just wait for the termination of the other
          * children. If an  error occurs during the termination of the
          * *num_of_procs - 1 processes it is not reported.
          */
      for (i=1; i< count; i++)
         wid = wait(&stat);
   }
   exit(0); /* != 0 exit is used to pass error messages to the waiting
             * cthread_init()
             */
}

RESULT
init_convert(procs)
int		procs;
{
   int			kres, i, count;
   union cluster_status	cstat;

   for(i = 0; i < MAX_PROC; i++)
      phys_to_logical[i] = -1;

   if((kres = cluster_stat(cluster_id, GET_NODE_LIST, &cstat, &count))
            != KERN_SUCCESS)
      error_exit("AI", "Can't get cluster stat", kres, NO_NODE, NO_SIZE);

   for (i=0; i<count; i++)
      phys_to_logical[cstat.node_list[i]] = i;

   return(T_SUCCEED);
}

for_all_nodes(func, arg)
int		(*func)();
long	arg;
{
   int		i;

   for(i=0; i<MAX_PROC; i++)
      if(phys_to_logical[i] != -1)
         (*func)(i, arg);
}

vm_address_t
  mapfile(fname,size,node)
char		*fname;
vm_size_t	size;
int		node;
{
   int		fd;
   int		ps;
   vm_address_t	address	= 0;
   int		rsize;
   int		flags	= VM_MAPMEM_ANYWHERE;
   kern_return_t	res;

   /* open target file , if specified */
   if (fname == 0) {
      fd = 0;
      flags |= VM_MAPMEM_ALLOCATE;
   }
   else {
      if ((fd = open(fname,O_RDWR,O_CREAT,0xffff)) == -1) {
         error_exit("XX","Can't open file descriptor",errno,0,0);
      }
   }

   /* round size to a multiple of page size */
   ps = getpagesize() - 1;
   rsize = size;
   rsize = (rsize+ps) & ~ps;

   /* map file */
   res = vm_mapmem(task_self(),
   		&address,
   		size,
   		flags,
   		fd,
   		0,
   		node
   );
   if (res != KERN_SUCCESS) {
      error_exit("XX","Can't map memory",res,node,size);
   }

   /* close file */
   close(fd);
   return(address);
}

RESULT
cthread_family(procs, start_func, arg)
int		procs;		/* Number of processors	*/
int		(*start_func)();
any_t		arg;
{
   int			kres, chid, wid;
   union cluster_status	cstat;
   union wait		stat;

#ifdef DEBUG
   printf("Entered in cthread_family (%x) start_func is %x (ph %x)\n",
         (long)cthread_init, start_func,getphysaddr(start_func));
#endif DEBUG
   if (threads_started == 1) {
      return(T_ALREADY_INIT);
   }

   if (procs != 0) {
      int	nodes_allocated;
      printf("creating new cluster\n");
      if ((kres = cluster_create(procs,0,&cluster_id,&nodes_allocated))
      		!= KERN_SUCCESS) {
         cluster_disband(cluster_id,0);
         error_exit("XX","can't get cluster",kres,NO_NODE,NO_SIZE);
      }
      if (nodes_allocated != procs) {
         shouldnt("wrong number of nodes in cluster",nodes_allocated);
      }
   }
   else {
      cluster_id = HOME_CLUSTER;
   }

   if((kres = cluster_stat(cluster_id, GET_NODE_LIST, &cstat, &procs))
               != KERN_SUCCESS) {
      error_exit("AA", "Can't get cluster status", kres, NO_NODE, NO_SIZE);
   }

   /*
    * Initialize the configuration sttructure with the  default values.
    */
   if(configured != CONFIGURED){
      config.stack_size = STACKSIZE;
      config.memory_exponent = MEMORY_EXPONENT;
      dynamic_memory_per_processor = 1<<config.memory_exponent;
      config.threads_per_proc = THREADS_PER_PROCESSOR;
   }
   kres = fork_and_bind(0, cluster_id, &chid);
	/* This is to ensure compatibility between thread logical processor
         * mapping and cluster processor mapping
         */

   if(kres != KERN_SUCCESS)
      error_exit("AB", "Can't create family on processor", kres, 0, NO_SIZE);

   if(chid == 0) {
      exit(doinit(start_func,arg));
   }

   return(T_SUCCEED);
}

check_num_procs(procs)
int procs;
{
   if (procs != 0) {
      int	nodes_allocated;
      printf("creating new cluster\n");
      if ((kres = cluster_create(procs,0,&cluster_id,&nodes_allocated))
      		!= KERN_SUCCESS) {
         cluster_disband(cluster_id,0);
         error_exit("XX","can't get cluster",kres,NO_NODE,NO_SIZE);
      }
      if (nodes_allocated != procs) {
         shouldnt("wrong number of nodes in cluster",nodes_allocated);
      }
   }
   else {
      cluster_id = HOME_CLUSTER;
   }

   if((kres = cluster_stat(cluster_id, GET_NODE_LIST, &cstat, &procs))
               != KERN_SUCCESS) {
      error_exit("AA", "Can't get cluster status", kres, NO_NODE, NO_SIZE);
   }

}

/****************************************************************************/
/* fork a thread with a variable number of args:                            */
/*      machine/compiler dependent                                          */
/*      source code has to be synchronized with cthread_fork until it       */
/*        is stable and cthread_fork maps to cthread_forkV(f,1,arg,node);   */
/****************************************************************************/

cthread_t
cthread_forkV(func, n, arg /* , arg, ...., node */)
any_t	(*func)();
int	n;
any_t	arg;
{
   any_t		*args;
   int		node;
   register int	i,tmp;
   register cthread_t	newthread;

   args = &arg;
   node = * ((int *) (args + n));
   atomadd(total_threads,1);
   atomadd(total_active ,1);
   if(node == N_ANYWHERE || node >= *num_of_procs || node <= N_LESS){
      node = 0;
      tmp = processor[0].load;
      for (i = 1; i < *num_of_procs; i++)
         if( tmp >= processor[i].load){	/* the = is to reduce load on 0 */
            node = i;
            tmp = processor[i].load;
         }
   }
   else
      if (node == N_CURRENT)
         node = virtual_processor();
   if ((newthread = (cthread_t)locked_dequeue((processor[node].scheduler)->free_list)) == 0)
      shouldnt("free_list queue overflow ");

   /*
    *	Normally the following will not be necessary.
    */

   LOCK_INIT(newthread->tlock);
   newthread->name = 0;
   newthread->status = S_INITIALLY;
   newthread->thread_data = 0;
   condition_init(&newthread->done);
   newthread->home    = node;
   newthread->on_node = node;
   newthread->agent   = 0;		/* using own stack		*/
   newthread->synced = 0;
   LOCK_INIT(newthread->event.lock);
   LOCK_INIT(newthread->entry.lock);
   queue_init(& newthread->event.q);
   queue_init(& newthread->entry.q);
/*
   pcond_init(& newthread->wakeup);
*/
   newthread->onblock.function    = 0;
   newthread->whenblock.function  = 0;
   newthread->onresume.function   = 0;
   newthread->whenresume.function = 0;

#ifdef WITH_MONITOR
   cthread_monitoring(newthread);
#endif WITH_MONITOR

   fix_jbufV(newthread, func, n, args);

   processor[node].load += 1;
   atomadd(&((processor[node].scheduler)->active_threads), 1);
   (processor[node].scheduler)->sched_put_thread(node,newthread);
   return(newthread);
}

struct cthread;

#ifdef BUTTERFLY

#ifndef	FAST
#define		FRAME_POINTER	3
#define		STACK_POINTER	2
#define		PROGRAM_COUNTER	5
#define		SIGMASK	        1

#else	

#define		FRAME_POINTER	11
#define		STACK_POINTER	12
#define		PROGRAM_COUNTER	0

#endif	FAST

#endif BUTTERFLY

/*
 * fix_jbuf.c
 * Contains the function which sets the jump buffer initially. It is
 * highly machine depented so it was separated from the rest of the
 * library.
 *
 * Modification history:
 *
 * 1.0	01-Nov-88      	Initial release (MACH).
 * 1.1	01-Dec-88       Added the #ifdef PROFILE and #ifdef FAST switch to
 *			set the jump buffer in "_setjmp()" format.
 * 1.2	13-May_89	(Ahmed Gheith) Added variable args fork.
 * 1.3   Bodhi Mukherjee	Added Implementation for Sparcs.
 * 1.4   Bodhi Mukherjee	Added Implementation for Sequent.
 * 1.5   Bodhi Mukherjee	Added Implementation for Sun3/50.
 * 1.6   Bodhi Mukherjee	Added Implementation for Sun3/86.
 *
 */

#include	<stdio.h>
#include "results.h"
#include "general.h"

#ifdef	FAST
#include	"fast.h"
#endif	FAST

#include	"lock.h"
#include	"queue.h"
#include	"sync.h"
#include	"internal.h"
#include	"hwlib.h"

extern	any_t	start_thread();
extern	any_t	start_threadV();
extern 		cthread_exit();
extern		shouldnt ARGS((char *msg));


void
fix_jbuf(t, func, arg)
cthread_t	t;
any_t 	(*func)();
long 	arg;
{
	long	*lptr;
	int		result;

   DBG3("In fix_jbuf(), func is %x, arg is %x, start_thread is %x\n"
      		, func, arg, start_thread);
fflush(stdout);
/* Check and see if this is really needed */
	if (setjmp(t->jbuf) != 0)
		shouldnt("Return from setjmp in fix_jbuf");

   lptr = (long *)t->stack_top;
   *lptr-- = 0;		/* A6 to previous frame (which doesn't exist)	*/
   *lptr-- = arg;			/* Arg 2			*/
   *lptr-- = (long)func;		/* Arg 1			*/

   *lptr   = (long)t->stack_top;	/* Pointer to next frame	*/
   t->jbuf[FRAME_POINTER] = (long)lptr;	/* should be the current A6	*/

#ifdef	FAST
   lptr--;				/* That's for MACH Porting !!!!	*/
#endif	FAST

   *lptr = (long)lptr;
   t->jbuf[STACK_POINTER] = (long)lptr;	/* This is the current A7 (sp)	*/

/**************************************************/
/*       TOPOLOGY HACK                            */
/**************************************************/

   t->jbuf[SIGMASK] = (long)0;	

#ifndef	FAST
   *(long *)((long)(&(t->jbuf[PROGRAM_COUNTER])) - 2 ) = 
   		(long)start_thread;	/* jmp address			*/
					/* The funny syntax is because the
					 * addresses are not word aligned
					 * therefore SR is stored in word,
					 * so program counter doesn't
					 * start from a long word boundary.
					 */
#else	FAST
   t->jbuf[PROGRAM_COUNTER] = (long)start_thread; /* jmp address. _setjmp */
#endif	FAST
} /* fix_jbuf */ 

void
  fix_jbufV(t, func, n, args)
cthread_t	t;
any_t			(*func)();
int		n;
any_t		*args;
{
   long	*lptr;
   long *p;
   int		i;

   if (setjmp(t->jbuf) != 0)
      shouldnt("Return from setjmp in fix_jbuf");
   lptr = (long *)t->stack_top;
   *lptr-- = 0;		/* A6 to previous frame (which doesn't exist)	*/
   p = (long*) args + n;
   for (i=0; i<n ; i++) {
      *lptr-- = * --p;
   }
   *lptr-- = n;
   *lptr-- = (long)func;		/* function			*/
   /* *lptr-- = (long)cthread_exit;	 * Return address		*/
   *lptr = (long)t->stack_top;		/* Pointer to next frame	*/
   t->jbuf[FRAME_POINTER] = (long)lptr;	/* This should be the current A6*/
#ifdef	FAST
   lptr--;				/* That's for MACH Porting !!!!	*/
#endif	FAST
   *lptr = (long)lptr;
   t->jbuf[STACK_POINTER] = (long)lptr;	/* This is the current A7 (sp)	*/
#ifndef	FAST
   *(long *)((long)(&(t->jbuf[PROGRAM_COUNTER])) - 2 ) = 
   			(long)start_threadV;	/* jmp address	*/
   /* The funny syntax is because the
    * addresses are not word aligned
    * therefore SR is stored in word,
    * so program counter doesn't
    * start from a long word boundary.
    */
#endif	FAST

#ifdef	FAST
   t->jbuf[PROGRAM_COUNTER] = (long)start_threadV;
   					/* jmp address. _setjmp		*/
#endif	FAST
#ifdef	DEBUG
   printf("In fix_jbuf(), func is %x, arg is %x, start_threadV is %x\n"
      		, func, args, start_threadV);
#endif	DEBUG
}
#ifdef WITH_MONITOR

any_t
allocate_monmem(size,node)
vm_size_t       size;
int             node;
{

        u_char *address;
        address = (u_char *)((int)monmem_base + monmem_offset);
        monmem_offset = monmem_offset + size;

        return((any_t) address);
}

void 
bfly_monitorinit(procs)
int procs;
{
        int size;
        int mem_size, tmp;
	int i;

   	vm_address_t         address;
   	kern_return_t        kres;

   	address = 0;
      	kres = vm_allocate(task_self(), &address, size, TRUE);
      	if(kres != KERN_SUCCESS) {
         error_exit("AC","Can't allocate memory ", kres, NO_NODE, size);
      	}

   	kres = vm_inherit(task_self(), address, size,VM_INHERIT_SHARE);
   	if(kres != KERN_SUCCESS) {
      	error_exit("AD","Inherit failed", kres, NO_NODE, size);
   	}

        monmem_base = (u_char*)address;
        monmem_offset = 0;

	monitorinit(procs);
}


doinit()
{
   kres = fork_and_bind(0, cluster_id, &chid);

	/* This is to ensure compatibility between thread logical processor
         * mapping and cluster processor mapping
         */
   if(kres != KERN_SUCCESS)
      error_exit("AB", "Can't fork to processor", kres, 0, NO_SIZE);

   if(chid == 0) {
       exit(doinit(start_func,0));
   } else {
       union wait		stat;
       wid = wait(&stat);
       DBG3("Returned from Wait (wid = %d) ret code %d, termin code %d\n"
	    ,wid, stat.w_T.w_Retcode, stat.w_T.w_Termsig);
       if(stat.w_retcode != 0)
	 return(stat.w_retcode);
       else
	 exit(stat.w_retcode);
   }
   exit(stat.w_T.w_Retcode);
}

shared unsigned long start_time;

unsigned long
cthread_timestamp()
{
    unsigned long  timestamp;

    if (logicalTimeStamp) {
	internal_mutex_lock(&logicalTimeStamp_lock->mlock);
	timestamp = (*logicalTimeStamp_counter)++;
	internal_mutex_unlock(&logicalTimeStamp_lock->mlock);
    } else {
	timestamp = (unsigned long) getrtc();
	timestamp -= start_time;
    }
}

void
get_start_timestamp_and_resolution(start_time_p, resolution_p)
unsigned long *start_time_p;
unsigned long *resolution_p;
{

    if (logicalTimeStamp) {
	*timestamp_resolution_p = 1;
	start_time = 0;
    } else {
	*timestamp_resolution_p = 1000000;
	start_time = (unsigned long) getrtc();
    }
    *start_time_p = start_time;
}
