
/* File:        org.molgenis.omx/model/SNP.java
 * Copyright:   GBIC 2000-2013, all rights reserved
 * Date:        November 22, 2013
 * 
 * generator:   org.molgenis.generators.db.EntityImporterGen 4.0.0-testing
 *
 * 
 * THIS FILE HAS BEEN GENERATED, PLEASE DO NOT EDIT!
 */

package org.molgenis.omx.xgap.db;

import java.io.IOException;
import java.util.ArrayList;
import java.util.LinkedHashSet;
import java.util.List;
import java.util.Map;
import java.util.TreeMap;
import java.util.concurrent.atomic.AtomicInteger;

import org.apache.commons.lang3.StringUtils;
import com.google.common.collect.Sets;
import org.apache.log4j.Logger;
import org.molgenis.data.Entity;
import org.molgenis.data.Repository;
import org.molgenis.framework.db.Database;
import org.molgenis.framework.db.DatabaseException;
import org.molgenis.framework.db.Database.DatabaseAction;
import org.molgenis.framework.db.EntityImporter;
import org.molgenis.io.processor.LowerCaseProcessor;
import org.molgenis.io.TupleReader;
import org.molgenis.util.tuple.Tuple;

import org.molgenis.omx.xgap.Chromosome;
import org.molgenis.omx.xgap.Marker;
import org.molgenis.omx.xgap.Polymorphism;
import org.molgenis.omx.xgap.SNP;


/**
 * Reads SNP from a delimited (csv) file, resolving xrefs to ids where needed, that is the tricky bit ;-)
 */
public class SNPEntityImporter implements EntityImporter
{
	private static final Logger logger = Logger.getLogger(SNPEntityImporter.class);
	
	private static int BATCH_SIZE = 10000;
	//foreign key map for xref 'chromosome' (maps chromosome.Identifier -> chromosome.id)			
	final Map<String,Integer> chromosomeKeymap = new TreeMap<String,Integer>();	
	//foreign key map for xref 'reportsFor' (maps marker.Identifier -> marker.id)			
	final Map<String,Integer> reportsForKeymap = new TreeMap<String,Integer>();	
	//foreign key map for xref 'polymorphism' (maps polymorphism.Identifier -> polymorphism.id)			
	final Map<String,Integer> polymorphismKeymap = new TreeMap<String,Integer>();	
			
	/**
	 * Imports SNP from tab/comma delimited File
	 * @param db database to import into
	 * @param reader csv reader to load data from
	 * @param defaults to set default values for each row
	 * @param dbAction indicating wether to add,update,remove etc
	 * @param missingValues indicating what value in the csv is treated as 'null' (e.g. "" or "NA")
	 * @return number of elements imported
	 */
	@Override
	public int importEntity(Repository<? extends Entity> repository, Database db, DatabaseAction dbAction) throws IOException, DatabaseException 
	{
		//wrapper to count
		final AtomicInteger total = new AtomicInteger(0);
	try {
		//cache for entities of which xrefs couldn't be resolved (e.g. if there is a self-refence)
		//these entities can be updated with their xrefs in a second round when all entities are in the database
		List<SNP> sNPsMissingRefs = new ArrayList<SNP>();
	
		//cache for objects to be imported from file (in batch)
		final List<SNP> sNPList = new ArrayList<SNP>(BATCH_SIZE); // FIXME
		
		for(Entity entity : repository)
		{
			// skip empty rows
			if (!hasValues(entity)) continue;
			
			//parse object, setting defaults and values from file
			SNP object = new SNP();
			object.set(entity, false);				
			sNPList.add(object);		
			
			//add to db when batch size is reached
			if(sNPList.size() == BATCH_SIZE)
			{
				//resolve foreign keys and copy those entities that could not be resolved to the missingRefs list
				sNPsMissingRefs.addAll(resolveForeignKeys(db, sNPList));
				sNPList.removeAll(sNPsMissingRefs);
				
				//update objects in the database using xref_label defined secondary key(s) 'Identifier' defined in xref_label
				db.update(sNPList,dbAction, "Identifier");
				
				//clear for next batch						
				sNPList.clear();		
				
				//keep count
				total.set(total.get() + BATCH_SIZE);
				
				db.getEntityManager().flush();
				db.getEntityManager().clear();		
			}
		}
			
		//add remaining elements to the database
		if(!sNPList.isEmpty())
		{
			total.set(total.get() + sNPList.size());
			
			//resolve foreign keys, again keeping track of those entities that could not be solved
			sNPsMissingRefs.addAll(resolveForeignKeys(db, sNPList));
			sNPList.removeAll(sNPsMissingRefs);
			
			//update objects in the database using xref_label defined secondary key(s) 'Identifier' defined in xref_label
			db.update(sNPList,dbAction, "Identifier");
		}
		
		//Try to resolve FK's for entities until all are resolved or we have more then 100 iterations
		List<SNP> sNPs = new ArrayList<SNP>(sNPsMissingRefs);

		int iterationCount = 0;

		do
		{
			sNPsMissingRefs = resolveForeignKeys(db, sNPsMissingRefs);
			
			LinkedHashSet<SNP> differenceSet = new LinkedHashSet<SNP>();
			Sets.symmetricDifference(new LinkedHashSet<SNP>(sNPs), new LinkedHashSet<SNP>(sNPsMissingRefs)).copyInto(differenceSet);
			List<SNP> resolvablesNPs = new ArrayList<SNP>(differenceSet);
			
			sNPs.removeAll(resolvablesNPs);
			
			db.update(resolvablesNPs,dbAction, "Identifier");

			if (iterationCount++ > 100)
			{
			String identifier = "";
			String name = "";
				for(SNP blaat : sNPsMissingRefs){
					identifier = blaat.getValues().get("Identifier").toString();
					name = blaat.getValues().get("Name").toString();
				}
				throw new Exception(
						"Import of 'sNP' entity failed:"
								+ "This is probably caused by a(n) 'sNP' that has a reference but that does not exist."
								+"(identifier:"+identifier+", name:"+name+")");		
			}
		}
		while (sNPsMissingRefs.size() > 0);

		logger.info("imported " + total.get() + " sNP from CSV");

		} catch(Exception e) {throw new IOException(e);}
		return total.get();
	}	
	
	private boolean hasValues(Entity entity)
	{
		for (String attributeName : entity.getAttributeNames())
		{
			if (entity.get(attributeName) != null) return true;
		}
		return false;
	}
	
	/**
	 * This method tries to resolve foreign keys (i.e. xref_field) based on the secondary key/key (i.e. xref_labels).
	 *
	 * @param db database
	 * @param sNPList 
	 * @return the entities for which foreign keys cannot be resolved
	 */
	private List<SNP> resolveForeignKeys(Database db, List<SNP> sNPList) throws Exception
	{
		//keep a list of SNP instances that miss a reference which might be resolvable later
		List<SNP> sNPsMissingRefs = new ArrayList<SNP>();
	
		//resolve xref 'chromosome' from chromosome.Identifier -> chromosome.id
		for(SNP o: sNPList) 
		{
			if(o.getChromosome_Identifier() != null) 
				chromosomeKeymap.put(o.getChromosome_Identifier(), null);
		}
		
		if(chromosomeKeymap.size() > 0) 
		{
			List<Chromosome> chromosomeList = db.query(Chromosome.class).in("Identifier",new ArrayList<Object>(chromosomeKeymap.keySet())).find();
			for(Chromosome xref :  chromosomeList)
			{
				chromosomeKeymap.put(xref.getIdentifier(), xref.getId());
			}
		}
		//resolve xref 'reportsFor' from marker.Identifier -> marker.id
		for(SNP o: sNPList) for(String xref_label: o.getReportsFor_Identifier())
		{
			if(xref_label != null) 
				reportsForKeymap.put(xref_label.trim(), null);
		}
		
		if(reportsForKeymap.size() > 0) 
		{
			List<Marker> reportsForList = db.query(Marker.class).in("Identifier",new ArrayList<Object>(reportsForKeymap.keySet())).find();
			for(Marker xref :  reportsForList)
			{
				reportsForKeymap.put(xref.getIdentifier(), xref.getId());
			}
		}
		//resolve xref 'polymorphism' from polymorphism.Identifier -> polymorphism.id
		for(SNP o: sNPList) for(String xref_label: o.getPolymorphism_Identifier())
		{
			if(xref_label != null) 
				polymorphismKeymap.put(xref_label.trim(), null);
		}
		
		if(polymorphismKeymap.size() > 0) 
		{
			List<Polymorphism> polymorphismList = db.query(Polymorphism.class).in("Identifier",new ArrayList<Object>(polymorphismKeymap.keySet())).find();
			for(Polymorphism xref :  polymorphismList)
			{
				polymorphismKeymap.put(xref.getIdentifier(), xref.getId());
			}
		}
		//update objects with foreign key values
		for(SNP o:  sNPList)
		{
			while(true){
				//update xref Chromosome
				if(o.getChromosome_Identifier() != null) 
				{
					String key = o.getChromosome_Identifier();
					if(chromosomeKeymap.get(key) == null)
					{
						throw new Exception("Import of 'SNP' objects failed: cannot find Chromosome for chromosome_Identifier='"+o.getChromosome_Identifier()+"'");
					}
					o.setChromosome_Id(chromosomeKeymap.get(key));
				}
				//update mref ReportsFor
				if(o.getReportsFor_Identifier() != null) 
				{
					List<Integer> mrefs = new ArrayList<Integer>();
					boolean breakToNextSNP = false;

					int listSize = 0;
					if(o.getReportsFor_Identifier() != null) listSize = Math.max(o.getReportsFor_Identifier().size(), listSize);
					for(int i = 0; i < listSize; i++)
					{
						String key = o.getReportsFor_Identifier().get(i);
							key = key.trim();				
						if(reportsForKeymap.get(key) == null){
							logger.error("Import of 'SNP' objects failed: "+o);
							throw new Exception("Import of 'SNP' objects failed:" 
							+"cannot find reportsFor_Identifier='"+(o.getReportsFor_Identifier() != null && i < o.getReportsFor_Identifier().size() ? o.getReportsFor_Identifier().get(i) : "null")+"'");
						}
						mrefs.add(reportsForKeymap.get(key));
					}
					if(breakToNextSNP){
						break;
					}
					o.setReportsFor_Id(mrefs);
				}
				//update mref Polymorphism
				if(o.getPolymorphism_Identifier() != null) 
				{
					List<Integer> mrefs = new ArrayList<Integer>();
					boolean breakToNextSNP = false;

					int listSize = 0;
					if(o.getPolymorphism_Identifier() != null) listSize = Math.max(o.getPolymorphism_Identifier().size(), listSize);
					for(int i = 0; i < listSize; i++)
					{
						String key = o.getPolymorphism_Identifier().get(i);
							key = key.trim();				
						if(polymorphismKeymap.get(key) == null){
							logger.error("Import of 'SNP' objects failed: "+o);
							throw new Exception("Import of 'SNP' objects failed:" 
							+"cannot find polymorphism_Identifier='"+(o.getPolymorphism_Identifier() != null && i < o.getPolymorphism_Identifier().size() ? o.getPolymorphism_Identifier().get(i) : "null")+"'");
						}
						mrefs.add(polymorphismKeymap.get(key));
					}
					if(breakToNextSNP){
						break;
					}
					o.setPolymorphism_Id(mrefs);
				}
				break;
			}
		}
		
		chromosomeKeymap.clear();
		reportsForKeymap.clear();
		polymorphismKeymap.clear();
		
		return sNPsMissingRefs;
	}
}

