/***********************************************************************
*
*     File:  ClassExtractor.cpp
*
*   Author:  Daniel Popescu
*
*  Version:  1.0, 15.12.2005
*
*  Comment:
*
*
*
* Dowser ClassExtractor, hereinafter (Software)
*
* Copyright 2005
* Georgia Tech Research Corporation
* Atlanta, Georgia  30332
* All Rights Reserved
*
* The following Software is posted on the Internet by the Georgia
* Tech Research Corporation (GTRC).  It was developed by students and
* faculty of the Georgia Institute of Technology.  GTRC hereby grants
* to the user a non-exclusive, royalty-free license to utilize such 
* Software for the user's own purposes pursuant to the following 
* conditions.
*
* THE SOFTWARE IS LICENSED ON AN "AS IS" BASIS.  GTRC MAKES NO
* WARRANTY THAT ALL ERRORS CAN BE OR HAVE BEEN ELIMINATED FROM THE
* SOFTWARE. GTRC SHALL NOT BE RESPONSIBLE FOR LOSSES OF ANY KIND
* RESULTING FROM THE USE OF THE SOFTWARE AND ITS ACCOMPANYING
* DOCUMENTATION, AND CAN IN NO WAY PROVIDE COMPENSATION FOR ANY
* LOSSES SUSTAINED, INCLUDING BUT NOT LIMITED TO ANY OBLIGATION,
* LIABILITY, RIGHT, CLAIM OR REMEDY FOR TORT, OR FOR ANY ACTUAL OR
* ALLEGED INFRINGEMENT OF PATENTS, COPYRIGHTS, TRADE SECRETS, OR
* SIMILAR RIGHTS OF THIRD PARTIES, NOR ANY BUSINESS EXPENSE, MACHINE
* DOWNTIME OR DAMAGES CAUSED LICENSEE BY ANY DEFICIENCY, DEFECT OR
* ERROR IN THE SOFTWARE OR MALFUNCTION THEREOF, NOR ANY INCIDENTAL
* OR CONSEQUENTIAL DAMAGES, HOWEVER CAUSED. GTRC DISCLAIMS ALL
* WARRANTIES, BOTH EXPRESS AND IMPLIED RESPECTING THE USE AND
* OPERATION OF THE SOFTWARE AND ANY ACCOMPANYING DOCUMENTATION,
* INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
* FOR PARTICULAR PURPOSE AND ANY IMPLIED WARRANTY ARISING FROM
* COURSE OF PERFORMANCE, COURSE OF DEALING OR USAGE OF TRADE.  GTRC
* MAKES NO WARRANTY THAT THE SOFTWARE IS ADEQUATELY OR COMPLETELY
* DESCRIBED IN, OR BEHAVES IN ACCORDANCE WITH ANY ACCOMPANYING
* DOCUMENTATION.  THE USER OF THE SOFTWARE IS EXPECTED TO MAKE THE
* FINAL EVALUATION OF THE SOFTWARE'S USEFULNESS IN USER'S OWN
* ENVIRONMENT.
*
*********************************************************************/
extern "C" {
#include "check.h"
}
#include "BottomUpClassification.h"
#include "ClassExtractionRule.h"
#include "ClassExtractor.h"
#include "DirectObjectRule.h"
#include "HasRule.h"
#include "PassiveWithObjectRule.h"
#include "PassiveRule.h"
#include "InfinitiveObjectRule.h"
#include "BecomesRule.h"
#include "StateRule.h"
#include "string.h"
#include "StringTokenizer.h"
#include "ModelPrinter.h"
#include "UMLGraphPrinter.h"
#include "WithRule.h"
#include "AmountOfRule.h"
#include <fstream>
#include <iostream>
#include <map>
#include "SubjectRule.h"
#include "GenitiveSRule.h"
#include "GenitiveOfRule.h"
#include <stdlib.h>
#include <string>
#include <unistd.h>
#include <vector>

int static const MINIMUM_SENTENCE_LENGTH = 5;

ClassExtractor::ClassExtractor(){
    printer = new UMLGraphPrinter;
    model_printer = new ModelPrinter;
}


vector<ClassExtractionRule*>* ClassExtractor::setupRules(){
    ClassExtractionRule  * rule = new HasRule; 
    vector<ClassExtractionRule*> * rules = new vector<ClassExtractionRule*>;
    rules->push_back(rule); 
    rule = new AmountOfRule;
    rules->push_back(rule);
    rule = new WithRule;
    rules->push_back(rule);
    rule = new InfinitiveObjectRule;
    rules->push_back(rule);
    rule = new BecomesRule;
    rules->push_back(rule);
    rule = new PassiveRule;
    rules->push_back(rule);
    rule = new StateRule;
    rules->push_back(rule);
    rule = new BottomUpClassification;
    rules->push_back(rule);
    rule = new DirectObjectRule;
    rules->push_back(rule);
    rule = new GenitiveOfRule;
    rules->push_back(rule);
    rule = new GenitiveSRule;
    rules->push_back(rule);
    rule = new SubjectRule;
    rules->push_back(rule);
    rule = new PassiveWithObjectRule;
    rules->push_back(rule);
    return rules;
}

/*
* copies a string object in a char array
* */
char * getCopyOfString(string &currentSentence){
   char * returnString;
   /* the size of the returnString is by one large than the string
    * because a C string needs an ending character*/
   returnString = (char*) calloc(currentSentence.size()+1, sizeof(char));
   CHECK(returnString == NULL, false);	
   currentSentence.copy(returnString, currentSentence.size());
   
   return returnString;
}

map<string, DomainClass>& ClassExtractor::extract(vector<string> * sentence,
					vector<ClassExtractionRule*>* rules){
    
    Parse_Options opts  = parse_options_create();
    Dictionary dict  = dictionary_create("4.0.dict", "4.0.knowledge", 
		    "4.0.constituent-knowledge", "4.0.affix");

    map<string, DomainClass>* ptr = new map<string, DomainClass>;
    map<string, DomainClass>& classMap = *ptr; 

    for(int i = 0; i < sentence->size(); ++i){
	    char * currentSentence = getCopyOfString(sentence->at(i));
	    Sentence sent = sentence_create(currentSentence, dict);
	    int num_linkages = sentence_parse(sent, opts);
	    if (num_linkages > 0) {
		    Linkage linkage = linkage_create(0, sent, opts);
		    int num_words = linkage_get_num_words(linkage);
		    CHECK((num_words >= 0), true);
		    int num_sublinkages = linkage_get_num_sublinkages(linkage);
		    bool oneRuleWasAppliedToSentence = false;
		    for (int count = 0; count < num_sublinkages ; count++) {
			    linkage_set_current_sublinkage(linkage, count);
			    for (int j=0; j < num_words; ++j){
				    vector<ClassExtractionRule*>::iterator 
					    ruleIterator;
				    for (ruleIterator=rules->begin();
						    ruleIterator != 
						    rules->end();
						    ruleIterator++){
					    bool ruleApplied
						    = (*ruleIterator)->
						    apply(linkage,j
							, sent, classMap);
					    if (ruleApplied) {
						    oneRuleWasAppliedToSentence
							    = true;
						    cout << (*ruleIterator)->
							    printRuleName()
							    << endl;

					    }
				    }
			    }
		    }
		    linkage_delete(linkage);
		    sentence_delete(sent);

		    if (!oneRuleWasAppliedToSentence 
				    && sentence->at(i).size() 
				    > MINIMUM_SENTENCE_LENGTH) {
			    cout << sentence->at(i) << endl;
		    }
	    }
    }
    dictionary_delete(dict);
    parse_options_delete(opts);
    return classMap;
}

void ClassExtractor::removeActor(map<string, DomainClass>& classMap, 
		string& actor, bool isSystem) {
	DomainClass actorClass = classMap[actor];
	if (actorClass.getName() == actor) {
		map<string, Association>::iterator assocIterator;
		map<string, Association> tempAssoc = 
			actorClass.getAssociations();

		DomainClass* currentClass = new DomainClass;
		string* currentName = new string("");
		for (assocIterator = tempAssoc.begin(); 
				assocIterator != tempAssoc.end();
				assocIterator++) {
			*currentName = assocIterator->
				second.getTarget()->getName();
			*currentClass =	classMap[*currentName];
			if (!currentClass 
					|| currentClass->getName()
					!= *currentName) {
				currentClass = new DomainClass(*currentName);
			}
			currentClass->addMethod(
					assocIterator->second.getName());
			classMap[*currentName] = *currentClass;
		}

		classMap.erase(actor);

	}

}
void ClassExtractor::removeSystem(map<string, DomainClass>& classMap){
	string system("system");
	removeActor(classMap, system, true);
}

void ClassExtractor::removeActors(map<string, DomainClass>& classMap, 
		vector<string>& actors){
	vector<string>::iterator iter;
	for(iter = actors.begin(); iter!=actors.end(); iter++) {
		removeActor(classMap, *iter, false);
	}

}

vector<string>* ClassExtractor::extractActors(int argc, char **argv){
	bool isActorFileSpecified = false;
	int c;
	char* actorFileName = NULL;
	vector<string>* actors = new vector<string>;

	//reading the command line parameters
	//only -a actorFile is a valid parameter
	while ((c = getopt (argc, argv, "a:")) != -1)
         switch (c)
           {
           case 'a':
             isActorFileSpecified = true;
	     actorFileName = optarg;
             break;
           case '?':
             if (isprint (optopt))
               fprintf (stderr, "Unknown option `-%c'.\n", optopt);
             else
               fprintf (stderr,
                        "Unknown option character `\\x%x'.\n",
                        optopt);
           default:;
           }
     
	
	if (isActorFileSpecified) {
		fstream actorFile(actorFileName, ios::in);

		if (!actorFile) {
			exit(-1);
		}
		const int bufferSize = 50000;
		char buffer[bufferSize];
		actorFile.read(buffer, bufferSize);
		string input(buffer); 
		string seperators(" .;:!?");
		StringTokenizer st;
		actors = st.split(input, seperators, false);
	}
	return actors;
}
map<string, DomainClass> ClassExtractor::getAllAssociatedClasses(
		map<string, DomainClass>& classMap){
	map<string, DomainClass> associatedClasses;
	map<string, DomainClass>::iterator iter;
	for (iter = classMap.begin(); iter!= classMap.end(); iter++) {
		DomainClass& currentClass = iter->second;
		map<string, Association> associations = 
			currentClass.getAssociations();
		map<string, Association>::iterator assocIterator;
		for (assocIterator = associations.begin();
				assocIterator != associations.end();
				assocIterator++) {
			DomainClass* targetClass =
				assocIterator->second.getTarget();
			associatedClasses[targetClass->getName()] 
				= *targetClass;
		}
	}

	return associatedClasses;
}

void ClassExtractor::transformAggregationsIntoAttributes(
		map<string, DomainClass>& classMap){
	map<string, DomainClass> associatedClasses;
	associatedClasses = getAllAssociatedClasses(classMap);
	map<string, DomainClass> newClassMap;
	map<string, DomainClass>::iterator iter;
	for (iter = classMap.begin(); iter!= classMap.end(); iter++) {
		iter->second.transferAggregations(classMap, associatedClasses);
	}

	for (iter = classMap.begin(); iter!= classMap.end(); iter++) {
		DomainClass& currentClass = iter->second;
		if(!currentClass.shallBeDeleted()) {
			newClassMap[currentClass.getName()] = currentClass;
		}
	}
	classMap = newClassMap;
}


int ClassExtractor::run(int argc, char **argv){
	vector<string>* actors = extractActors(argc, argv);
	vector<string>* inputSentences = NULL;
	const int bufferSize = 50000;
	char buffer[bufferSize];
	cin.read(buffer, bufferSize);
	string input(buffer); 
	string seperators(".;:!?");
	StringTokenizer st;
	inputSentences = st.split(input, seperators, true);
	CHECK(inputSentences == NULL, false);
	CHECK(printer == NULL, false);
	CHECK(model_printer == NULL, false);
	map<string, DomainClass> classMap = 
		extract(inputSentences, setupRules());
	removeActors(classMap, *actors);
	removeSystem(classMap);
	//	The transformation function does not work always.
	//	Therefore, it is disabled.
	//transformAggregationsIntoAttributes(classMap);
	printer->print(classMap);
	model_printer->print(classMap);
	return 0;
}
