import java.io.*;
import java.lang.*;
import java.math.*;
import java.sql.*;
import java.util.*;

/**
 *  Clustering algorithm to group similar users.
 *
 * @author     Brandon Douthit-Wood
 * @created    March 31, 2004
 */
public class UserCluster {

	private Vector clusters;

	/** Initializes the clusters Vector, connects to database */
	public UserCluster() {
		clusters = new Vector();

		if ( !Query.connectToDB() ) {
			System.exit( 0 );
		}
	}

	/**
	 *  Clusters similar users together.  If their similarity is calculated at 95% or greater, they are merged into a new cluster.
	 *  If a user has 90% or more similarity with an existing cluster, they are merged into that cluster.
	 */
	public void clusterUsers() {
		int bestIndex = -1;
		double currentValue = 0.0;
		String query = "";
		String words1 = "";
		String words2 = "";
		String userID1 = "";
		String userID2 = "";
		Cluster c1;
		Cluster c2;

		// clear clusters
		System.out.print( "Clearing old clusters..." );
		query = "update user set cluster_id=-1";
		Query.executeUpdate( query );
		System.out.println( "done" );

		while ( true ) {
			// continually loop through users until all are processed
			query = "select id,pos_word_freq from user where cluster_id=-1 limit 1000";
			ResultSet userResult = Query.executeQuery( query );
			if ( Query.getNumResults( userResult ) == 0 ) {
				break;
			}

			try {
				while ( userResult.next() ) {
					userID1 = userResult.getString( "id" );
					words1 = userResult.getString( "pos_word_freq" );
					// ignore user if they do not have any words
					if ( words1.equals( "x" ) ) {
						query = "update user set cluster_id=-2 where id=" + userID1;
						Query.executeUpdate( query );
						continue;
					}

					// create new cluster with just that user in it
					c1 = new Cluster( words1 );

					double bestValue = 0.0;
					boolean mergeClusters = false;
					boolean mergeUsers = false;
					String bestID = "";
					String list = "";

					// try to merge with existing cluster first
					for ( int i = 0; i < clusters.size(); i++ ) {
						c2 = (Cluster) clusters.elementAt( i );

						currentValue = c1.compareTo( c2 );

						System.out.println( "Comparing user " + userID1 + " to cluster " + i + " -> " + currentValue );

						if ( currentValue > bestValue && currentValue <= 1.0 ) {
							bestValue = currentValue;
							bestIndex = i;
						}
					}

					// merge user into existing cluster if similarity greater than 90%
					if ( bestValue > 0.90 ) {
						mergeClusters = true;
					}

					// keep going if we didn't merge user into cluster, and until we find another user with at least 95% similarity
					while ( !mergeClusters && bestValue < 0.9 ) {
						query = "select id,pos_word_freq from user where cluster_id=-1 and id !=" + userID1 + " limit 1000";
						ResultSet userResult2 = Query.executeQuery( query );
						if ( Query.getNumResults( userResult2 ) <= 0 ) {
							break;
						}

						while ( userResult2.next() ) {
							userID2 = userResult2.getString( "id" );
							words2 = userResult2.getString( "pos_word_freq" );
							if ( words2.equals( "x" ) ) {
								query = "update user set cluster_id=-2 where id=" + userID2;
								Query.executeUpdate( query );
								continue;
							}

							// create new cluster with just this user in it
							c2 = new Cluster( words2 );

							// keep list of user id's - we need to clear temporary flags and it
							// is much quicker to keep track of which ones we have changed and only
							// change those ones back
							list += userID2 + ",";

							currentValue = c1.compareTo( c2 );

							System.out.println( "Comparing user " + userID1 + " to user " + userID2 + " -> " + currentValue );

							if ( currentValue > bestValue && currentValue <= 1.0 ) {
								mergeUsers = true;
								bestValue = currentValue;
								bestID = userID2;
							}

							// set temporary flag so we know we've already seen this user
							query = "update user set cluster_id=-2 where id=" + userID2;
							Query.executeUpdate( query );
						}
						System.out.println( "   ***BEST SO FAR: " + bestID + " -> " + bestValue );

						// just some error checking
						int id = ( new Integer( userID2 ) ).intValue();
						if ( id > 10000 && bestValue < 0.001 ) {
							break;
						}
					}

					// clear any temporary clusters we may have made...
					if ( list.length() > 0 ) {
						System.out.print( "\n Clearing temporary clusters..." );
						list = list.substring( 0, list.length() - 1 );
						list = "(" + list + ")";

						query = "update user set cluster_id=-1 where id in " + list;
						Query.executeUpdate( query );
						System.out.println( "done" );
					}

					// set cluster id of user to cluster they were merged into
					if ( mergeClusters ) {
						c2 = (Cluster) clusters.elementAt( bestIndex );
						clusters.setElementAt( c1.merge( c2 ), bestIndex );
						String index = ( new Integer( bestIndex ) ).toString();

						query = "update user set cluster_id=" + index + " where id=" + userID1;
						Query.executeUpdate( query );
						System.out.println( "Merging user " + userID1 + " into cluster " + index );
					}
					// two users merged into new cluster, must create that cluster
					else if ( mergeUsers ) {
						query = "select pos_word_freq from user where id=" + bestID;
						ResultSet wordResult = Query.executeQuery( query );
						wordResult.first();
						words2 = wordResult.getString( "pos_word_freq" );
						c2 = new Cluster( words2 );
						clusters.add( c1.merge( c2 ) );
						String index = ( new Integer( clusters.size() - 1 ) ).toString();

						// update user table with cluster id
						query = "update user set cluster_id=" + index + " where id=" + userID1;
						Query.executeUpdate( query );
						query = "update user set cluster_id=" + index + " where id=" + bestID;
						Query.executeUpdate( query );

						System.out.println( "Merging user " + userID1 + " and user " + bestID );
					}
					// couldn't merge user, skip
					else {
						System.out.println( "Couldn't cluster user " + userID1 );
						// update user table with dummy cluster id
						query = "update user set cluster_id=-2 where id=" + userID1;
						Query.executeUpdate( query );
					}

					System.out.println( "\n" );
				}
				userResult.close();
			}
			catch ( SQLException e ) {
				System.err.println( "Error clustering users..." );
				e.printStackTrace();
				System.exit( 0 );
			}
		}
	}

	/**
	 *  Clusters the users.
	 *
	 * @param  args  The command line arguments
	 */
	public static void main( String[] args ) {
		UserCluster cluster = new UserCluster();
		cluster.clusterUsers();
	}
}

