import java.sql.*;
import java.util.*;

/**
 *  Class used for clustering similar users together based on their ratings data.
 *
 * @author     Brandon Douthit-Wood
 * @created    March 31, 2004
 */
public class Cluster {

	/**  Hashtable to hold words and frequencies from movies the user's in the cluster have rated. */
	public Hashtable wordHash;
	/**  ID of the cluster. */
	public int id;
	/** Vector of users in cluster */
	public Vector userList;

	/**  Initializes the wordHash, with nothing inside. */
	public Cluster() {
		wordHash = new Hashtable();
		userList = new Vector();
	}

	/**
	 *  Initializes the wordHash with the words passed in.
	 *
	 * @param  words  Colon-delimited string (word:frequency)
	 */
	public Cluster( String words ) {
		wordHash = new Hashtable();
		userList = new Vector();
		String wordArray[] = words.split( ":" );
		String word;
		Double freq;
		int i;

		// get word frequencies from wordArray
		for ( i = 0; i < wordArray.length; i += 2 ) {
			if ( i + 1 >= wordArray.length ) {
				continue;
			}
			word = wordArray[i];
			freq = new Double( wordArray[i + 1] );
			wordHash.put( word, freq );
		}
	}

	/**
	 *  Merges two clusters together.  Averages the word-frequency values between the two clusters.
	 *
	 * @param  c  The cluster to merge into the current cluster.
	 * @return    Returns the newly formed cluster.
	 */
	public Cluster merge( Cluster c ) {
		String word;
		Double frequency;
		double freq1;
		double freq2;

		// get all values from hash table
		Enumeration enum = c.wordHash.keys();

		// enumerate through wordHash values
		while ( enum.hasMoreElements() ) {
			word = (String) enum.nextElement();
			freq1 = ( (Double) c.wordHash.get( word ) ).doubleValue();
			frequency = (Double) this.wordHash.get( word );

			// word not yet in wordHash
			if ( frequency == null ) {
				this.wordHash.put( word, new Double( freq1 ) );
			}
			// find average of frequency values
			else {
				freq2 = frequency.doubleValue();
				frequency = new Double( ( freq1 + freq2 ) / 2 );
				this.wordHash.put( word, frequency );
			}
		}

		// return 'this' Cluster object
		return this;
	}

	/**
	 *  Adds a user to the list of users in this cluster.
	 *
	 * @param  u  The user to add
	 */
	public void addUser( User u ) {
		this.userList.add( u );
	}

	/**
	 *  Compares word-frequency values between two clusters.  Words that they
	 *  do not share in common are ignored.
	 *
	 * @param  c  The cluster to compare to.
	 * @return    Returns the cluster's similarity, calculated using the Pearson Correlation Coefficient
	 */
	public double compareTo( Cluster c ) {
		String word;
		Double frequency;
		int termsInCommon = 0;
		double numer;
		double denom;
		double freq1;
		double freq2;
		double x = 0;
		double x2 = 0;
		double y = 0;
		double y2 = 0;
		double xy = 0;

		// get all values from wordHash
		Enumeration enum = this.wordHash.keys();

		// enumerate through all wordHash values
		while ( enum.hasMoreElements() ) {
			word = (String) enum.nextElement();
			frequency = (Double) c.wordHash.get( word );
			if ( frequency == null ) {
				continue;
			}

			termsInCommon++;
			freq1 = ( (Double) this.wordHash.get( word ) ).doubleValue();
			freq2 = frequency.doubleValue();
			x += freq1;
			y += freq2;
			x2 += Math.pow( freq1, 2 );
			y2 += Math.pow( freq2, 2 );
			xy += ( freq1 * freq2 );
		}

		// if nothing in common, return 0
		if ( termsInCommon == 0 ) {
			return 0;
		}

		// calculate Pearson Correlation Coefficient
		numer = ( xy - ( x * y / termsInCommon ) );
		numer = Math.abs( numer );
		denom = ( x2 - ( Math.pow( x, 2 ) / termsInCommon ) ) * ( y2 - ( Math.pow( y, 2 ) / termsInCommon ) );
		denom = Math.sqrt( Math.abs( denom ) );
		if ( denom == 0.0 ) {
			denom = 0.001;
		}
		return numer / denom;
	}

	/**
	 *  Gets all users from the given cluster
	 *
	 * @param  clusterID  ID of cluster to get users from
	 * @return            Returns a Vector of clusters
	 */
	public static Vector getUsersFromCluster( String clusterID ) {
		String query = "";
		String userID;
		String words;
		String rating;
		String movieID;
		Vector userList = new Vector();
		User user;

		// get all users from cluster with id of clusterID
		query = "select id,pos_word_freq from user where cluster_id=" + clusterID;
		ResultSet userResult = Query.executeQuery( query );

		try {
			// loop through all users in cluster, get all their ratings
			while ( userResult.next() ) {
				userID = userResult.getString( "id" );
				words = userResult.getString( "pos_word_freq" );
				user = new User( userID, words );

				// get all of user's ratings
				System.out.print( "\tGetting movie ratings for user " + userID + "..." );
				query = "select rating,movie_id from rating where training='' and user_id=" + userID;
				ResultSet ratingResult = Query.executeQuery( query );
				// put ratings into hash table for quick retrieval later
				while ( ratingResult.next() ) {
					rating = ratingResult.getString( "rating" );
					movieID = ratingResult.getString( "movie_id" );
					user.ratingHash.put( movieID, rating );
				}
				ratingResult.close();
				System.out.println( user.ratingHash.size() + " ratings found" );
				userList.add( user );
			}
			userResult.close();
		}
		catch ( SQLException e ) {
			System.err.println( "Error getting users from cluster " + clusterID + "..." );
			e.printStackTrace();
			System.exit( 0 );
		}
		return userList;
	}
}

