/* sched_io.c
*/
#define TRACE_OPEN

/* PROBLEM: gethostbyaddr can not make it thru
 * O_THREADCONTROL 0x1000 !! make sure it works with the library
 */
/*
 * TO DO:
 * ?? on open: if O_NDELAY set, should this imply all the operations to be  
 *           non blocking 
 * ?? abort IO: cancel all pending IO's (to be used when the
 *           user wants to cancel the app.)
 * ?? after poll, the errno is set ??, 
 * ?? after error detected by poll, retrying the command will give 
 *            return the appropriate errno; if yes:: our socket interfaces
 *            should keep track of the last error happend on the 
 *            socket
 *
 *      if use select: capture the SIGPIPE errors and the appropriate
 *                      socket number
 *    
 *       select   poll
 *       setsockopt  
 *       getsockname
 *       gethostname 
 *       gethostid 
 *       getpeername 
 */

#include "config.h"
#include <stdio.h>
#include <sys/types.h>
#ifndef HAVE_WINDOWS_H
#include <sys/time.h>
#include <sys/socket.h>
#include <sys/uio.h>
#include <netinet/in.h>
#include <arpa/inet.h>
#ifdef HAVE_SYS_SELECT_H
#include <sys/select.h>
#endif
#else
#define FD_SETSIZE 1024
#include <windows.h>
#include <winsock.h>   
#include  "nt_init.h"     
#endif

#include <sys/stat.h>

#include <fcntl.h>
#include <memory.h>
#include <malloc.h>
#include <signal.h>

 
#include "results.h"
#include "general.h"
#include "sched_utils.h"
#include "internal.h"
#include "hwlib.h"
#include "sched_io.h"
#include "arch_init.h"
#include "arch_sync.h"
#include "memalloc.h"
#include "sched_io_local.h"
#include "init.h"

extern void perror();
#ifndef HAVE_WINDOWS_H
extern int poll ();
extern int  select();
#endif
extern void bzero();


extern void cthread_perror ARGS((char *msg, RESULT err));
extern int cth_close ARGS(( int s));
extern void setup_sigalrm_handler ARGS((void (*hndlr)()));
extern void setup_sigpipe_handler ARGS((void (*hndlr)()));
void handler_SIGPIPE ARGS(());
void handler_SIGALRM ARGS(());


int SIGPIPE_error_flag = -1;
fd_set            *global_socket_list = NULL;
atom_int          *global_socket_count = NULL;

mutex_t           global_socket_lock = NULL;
SocketDescriptor  *socket_descriptor_list = NULL;
SocketDescriptor  **socket_descriptors = NULL;


#ifdef HAVE_POLL_H
#include <poll.h>
#else
typedef struct pollfd {
	int fd;				/* file desc to poll */
	short events;			/* events of interest on fd */
	short revents;			/* events that occurred on fd */
} pollfd_t;
#define	POLLIN		0x0001		/* fd is readable */
#define	POLLPRI		0x0002		/* high priority info at fd */
#define	POLLOUT		0x0004		/* fd is writeable (won't block) */
#define	POLLRDNORM	0x0040		/* normal data is readable */
#define	POLLWRNORM	POLLOUT
#define	POLLRDBAND	0x0080		/* out-of-band data is readable */
#define	POLLWRBAND	0x0100		/* out-of-band data is writeable */
#define	POLLNORM	POLLRDNORM
#define	POLLERR		0x0008		/* fd has error condition */
#define	POLLHUP		0x0010		/* fd has been hung up on */
#define	POLLNVAL	0x0020		/* invalid pollfd entry */
#endif

#ifndef POLLRDNORM
#define POLLRDNORM POLLIN
#endif
#ifndef POLLWRNORM
#define POLLWRNORM POLLOUT
#endif

typedef struct io_info {
        fd_set  io_error;
        void   *pending_io_threads[FD_SETSIZE];
        fd_set  pending_connect_list;
        int     pending_connect_count;
        struct pollfd pending_set[FD_SETSIZE];
} io_info;

int 
init_global_io() 
{
    int res, i;

    if( global_socket_list != NULL) {
          fprintf(stderr,"init_global_io: already executed\n");
          return -1;
    }
#ifdef GLOBAL_MEMORY
    res =  memory_alloc( (memory_t*)&global_socket_list,
                                    sizeof(fd_set), N_CURRENT);
    if( res != T_SUCCEED) {
       cthread_perror("allocate global_socket_list", res);
       return -1;
    }

    res = memory_alloc( (memory_t*)&global_socket_count,
                                    sizeof(atom_int), N_CURRENT);
    if( res != T_SUCCEED) {
       cthread_perror("allocate global_socket_count", res);
       return -1;
    }

    res = memory_alloc( (memory_t*)&socket_descriptor_list,
                                 FD_SETSIZE * sizeof(SocketDescriptor),
                                 N_ANYWHERE);
    if( res != T_SUCCEED) {
       cthread_perror("allocate socket_descriptor_list", res);
       return -1;
    }
    res = memory_alloc( (memory_t*) &socket_descriptors,
                                 FD_SETSIZE * sizeof(SocketDescriptor*),
                                 N_ANYWHERE);
    if( res != T_SUCCEED) {
       cthread_perror("allocate socket_descriptors", res);
       return -1;


   
    }
#else
    /* not global memory */
    global_socket_list =  (fd_set *) malloc(sizeof(fd_set));
    global_socket_count = (atom_int*) malloc(sizeof(atom_int));

    socket_descriptor_list = (SocketDescriptor*) 
                       malloc(FD_SETSIZE * sizeof(SocketDescriptor));

    socket_descriptors = (SocketDescriptor**) 
                       malloc(FD_SETSIZE * sizeof(SocketDescriptor*));
  
    if( global_socket_list == NULL || global_socket_count == NULL ||
        socket_descriptor_list == NULL || socket_descriptors == NULL) {
          fprintf(stderr,"allocate socket_descriptors");
          return -1;
    }

#endif
    res = internal_mutex_alloc( &global_socket_lock, N_CURRENT);
    if( res != T_SUCCEED) {
       cthread_perror("allocate global_socket_lock", res);
       return -1;
    }


    FD_ZERO(global_socket_list);
    *global_socket_count = 0;
   
    for( i = 0; i < FD_SETSIZE; i++) {
       socket_descriptors[i] = NULL;
       socket_descriptor_list[i].domain = 0;
       socket_descriptor_list[i].type = AF_NULL;
       socket_descriptor_list[i].protocol = -1;
       socket_descriptor_list[i].flags = 0;
       socket_descriptor_list[i].state = 0;
       socket_descriptor_list[i].duplicates = 0;
       socket_descriptor_list[i].syscall = NULL;
    }

    return 0;
}

int
close_global_io()
{
   int i;

   if( global_socket_count != NULL && *global_socket_count > 0) {
        for( i = 0; i < sizeof(fd_set) && *global_socket_count > 0; i++) {
              if( FD_ISSET(i, global_socket_list)) {
                   cth_close(i);
              } 
        }      
   } 
#ifdef GLOBAL_MEMORY

   memory_free((memory_t)global_socket_count);
   memory_free((memory_t)global_socket_list);
   memory_free((memory_t)socket_descriptors);
   memory_free((memory_t)socket_descriptor_list);
#else
   free((char*)global_socket_count);
   free((char*)global_socket_list);
   free((char*)socket_descriptors);
   free((char*)socket_descriptor_list);

#endif
   global_socket_count = NULL; 
   global_socket_list = NULL;
   socket_descriptors = NULL;
   socket_descriptor_list = NULL;

   return 0;
} 

int
init_sched_io(sched_info_t scb)
{  

  /* 
   *  space scb->io structure was allocated *behind* the scb.  
   *  Fill in pointer here...
   */
  scb->io = (io_info_t)((char*)scb + sizeof(struct sched));
  memset( scb->io->pending_set, 0, sizeof(sizeof(struct pollfd) *  FD_SETSIZE));

  FD_ZERO(&scb->io->io_error);
  scb->pending_io_count = 0;
  memset(&scb->io->pending_io_threads, 0, sizeof(void *) * FD_SETSIZE);
  FD_ZERO(&scb->io->pending_connect_list);
  scb->io->pending_connect_count = 0;
#ifndef HAVE_WINDOWS_H
 setup_sigpipe_handler(handler_SIGPIPE); 
  if( SIGPIPE_error_flag < 0)  SIGPIPE_error_flag = 0;

  setup_sigalrm_handler(handler_SIGALRM);
#endif
  return 0;
}


int
sched_io_has_error(int fd)
{
    return FD_ISSET(fd, &(local_scheduler[virtual_processor()]->io->io_error));
}

int  io_info_size()
{
    return sizeof(struct io_info);
}

void
check_pending_io()
{
   sched_info_t  scb;
   int proc = virtual_processor();
 
   scb = local_scheduler[proc];
   if( !scb->pending_io_count)   return;
   else {
   
       int            ready_io, i, l;
       cthread_t      tcb;
       struct pollfd *fds;    
       struct timeval timeout;
       fd_set connect_list;
                             /* Timeout 0 milisec */
#ifdef POLL
       ready_io = poll(scb->io->pending_set, scb->pending_io_count, 0);
       if( scb->io->pending_connect_count) {
            timeout.tv_sec = 0;
            timeout.tv_usec = 0;
            memcpy((char*)&connect_list, (char*)&scb->io->pending_connect_list,
                                            sizeof(scb->io->pending_connect_list));
            ready_io += select(FD_SETSIZE, (fd_set*)NULL, &connect_list, 
                                   (fd_set*) NULL, &timeout);   
	}
#else
        fd_set         read_set;
        fd_set         write_set;
        FD_ZERO(&read_set);
        FD_ZERO(&write_set);
        l = 0;
        ready_io = 0;

        for( i = 0, fds = &scb->io->pending_set[0]; i < scb->pending_io_count;
                          i++, fds++) {
          fds->revents = 0;
          if(fds->events & POLLIN) {
            FD_SET(fds->fd, &read_set);
	    if (fds->fd > FD_SETSIZE) {
		fprintf(stderr, "Internal Error, stupid WINSOCK large FD bug.\n");
		fprintf(stderr, "Increase FD_SETSIZE.  Item not added to fdset.\n");
	    }
            if( l < fds->fd) l = fds->fd;
            ready_io = 1;
	  }
          if(fds->events & POLLOUT) {
            FD_SET(fds->fd, &write_set);
	    if (fds->fd > FD_SETSIZE) {
		fprintf(stderr, "Internal Error, stupid WINSOCK large FD bug.\n");
		fprintf(stderr, "Increase FD_SETSIZE.  Item not added to fdset.\n");
	    }
            if( l < fds->fd) l = fds->fd;
            ready_io = 1;
	  }
	}
       if( ready_io > 0) {
            timeout.tv_sec = 0;
            timeout.tv_usec = 0;
            ready_io = select(l+1, &read_set, &write_set, (fd_set*) 0,
     		      &timeout);   
            if( ready_io > 0) {
              for( i = 0, fds = &scb->io->pending_set[0]; 
                      i < scb->pending_io_count;  i++, fds++) {
                  if( FD_ISSET(fds->fd, &read_set)) {
                      fds->revents |= POLLIN;
              	  }
                 if(FD_ISSET(fds->fd, &write_set)) {
                    fds->revents |= POLLOUT;
		  }
		}
	    }
	  }
#endif
       if( ready_io > 0) {
    
     	  /* ready_io <= scb->pending_io_count */
     
     	  for( fds = &scb->io->pending_set[0], i = 0, l = 0;
                            i < scb->pending_io_count; i++, fds++) {

/*               fprintf(stderr,"event %x\n",fds->revents);*/
               if( ((fds->revents & (POLLRDNORM | POLLWRNORM| POLLERR|POLLHUP)) != 0) ||
                    FD_ISSET( fds->fd, &connect_list)) { 
                    	 tcb = ((cthread_t)scb->io->pending_io_threads[fds->fd]);
                    	 if( tcb->io.type == PENDING_CONNECT) {
                              	 FD_CLR((unsigned) fds->fd, &scb->io->pending_connect_list);
                               	 scb->io->pending_connect_count--;
			       }
                         tcb->io.type = NO_PENDING_IO;
     		    	 (scb->sched_put_thread)(proc, tcb);
       		    	 scb->io->pending_io_threads[fds->fd] = NULL;
                    	 if( fds->revents & (POLLERR | POLLHUP)) {
                                 fprintf(stderr,"error on %d\n", (int) fds->fd);
                    		 FD_SET( fds->fd, &scb->io->io_error);
				 if (fds->fd > FD_SETSIZE) {
				     fprintf(stderr, "Internal Error, stupid WINSOCK large FD bug.\n");
				     fprintf(stderr, "Increase FD_SETSIZE.  Item not added to fdset.\n");
				 }
			       }
		   }
              else {
                    if( l != i) {
                          memcpy( &scb->io->pending_set[l], fds, sizeof(fds));
			}
                     l++;
		   }
	     }
           scb->pending_io_count = l;
     	}
     }
 }


void 
handler_SIGPIPE()
{
   SIGPIPE_error_flag = 1;
   fprintf(stderr,"EEEEEEEEEror PIPE\n");
}

void 
handler_SIGALRM()
{
   fprintf(stderr,"EEEEEEEEEror ALARM\n");
}

void
queue_io_and_schedule()
{
    (local_scheduler[virtual_processor()]->schedule)();
}

int
register_IO(int s, int io_type)
{
  int proc = virtual_processor();
  sched_info_t  scb = local_scheduler[proc];
  cthread_t tcb = current_thread_array[proc];
  struct pollfd *fds = &scb->io->pending_set[scb->pending_io_count];

  switch( io_type) {
	case PENDING_RECV:
	case PENDING_RECVFROM:
	case PENDING_READV:
	case PENDING_ACCEPT:
        default:
	     fds->events = POLLIN; /* | POLLPRI */
             break;
        case PENDING_SEND:
        case PENDING_SENDTO:
        case PENDING_WRITEV:
	     fds->events = POLLOUT;
             break;
	case PENDING_CONNECT:
	     fds->events = POLLOUT;
             FD_SET(s, &scb->io->pending_connect_list);
	     if (s > FD_SETSIZE) {
		 fprintf(stderr, "Internal Error, stupid WINSOCK large FD bug.\n");
		 fprintf(stderr, "Increase FD_SETSIZE.  Item not added to fdset.\n");
	     }
             scb->io->pending_connect_count++;
             break;
  }
  fds->fd = s;
  tcb->io.type = io_type;
  tcb->io.socket = s;

  scb->pending_io_count++;
  scb->io->pending_io_threads[s] = current_thread_array[proc];

  /* this doesn't work if multiple concurrent operations are allowed 
   * on the socket
   */
  socket_descriptors[s]->state = io_type;   
  return 0;
}

