cmonkey.membership
index
/home/weiju/Projects/ISB/cmonkey-python/cmonkey/membership.py

membership.py - cMonkey cluster membership functionality
This module captures the microarray-specific scoring component
of cMonkey.
 
This file is part of cMonkey Python. Please see README and LICENSE for
more information and licensing details.

 
Modules
       
array
cPickle
cmonkey.datamatrix
logging
math
numpy
random
rpy2.robjects
sqlite3
sys
cmonkey.util

 
Classes
       
OrigMembership

 
class OrigMembership
    This is an implementation of a membership data structure that more
closely resembles the R original. It is much simpler than
ClusterMembership, with a smaller memory footprint
 
  Methods defined here:
__init__(self, row_names, col_names, row_is_member_of, col_is_member_of, config_params, row_indexes=None, col_indexes=None)
identical constructor to ClusterMembership
add_cluster_to_column(self, col, cluster, force=False)
add_cluster_to_row(self, row, cluster, force=False)
clusters_for_column(self, column)
determine the clusters for the specified column
clusters_for_row(self, row)
determine the clusters for the specified row
clusters_not_in_column(self, col, clusters)
clusters_not_in_row(self, row, clusters)
columns_for_cluster(self, cluster)
free_slots_for_column(self, col)
free_slots_for_row(self, row)
is_column_in_cluster(self, col, cluster)
is_row_in_cluster(self, row, cluster)
max_changes_per_col(self)
returns the maximum number of changes per column
max_changes_per_row(self)
returns the maximum number of changes per row
max_cluster_rows_allowed(self)
returns the maximum number of rows that should be in a cluster
min_cluster_columns_allowed(self)
returns the minimum number of columns that should be in a cluster
min_cluster_rows_allowed(self)
returns the minimum number of rows that should be in a cluster
num_clusters(self)
returns the number of clusters
num_clusters_for_column(self, column)
returns the number of clusters for the column
num_clusters_for_row(self, row)
returns the number of clusters for the row
num_clusters_per_column(self)
returns the number of clusters per row
num_clusters_per_row(self)
returns the number of clusters per row
num_column_members(self, cluster)
num_row_members(self, cluster)
pickle_path(self)
returns the function-specific pickle-path
probability_seeing_col_change(self)
returns the probability for seeing a row change
probability_seeing_row_change(self)
returns the probability for seeing a row change
replace_column_cluster(self, col, index, new)
replace_row_cluster(self, row, index, new)
rows_for_cluster(self, cluster)
update(self, matrix, row_scores, column_scores, num_iterations, iteration_result)
top-level update method
write_column_members(self, filename)
Mostly for debugging, write out the current column membership state into a TSV file
write_row_members(self, filename)
Mostly for debugging, write out the current row membership state into a TSV file

 
Functions
       
adjust_cluster(membership, cluster, rowscores, cutoff, limit)
adjust a single cluster
compensate_size(membership, matrix, rd_scores, cd_scores)
size compensation function
create_membership(matrix, seed_row_memberships, seed_column_memberships, config_params)
create instance of ClusterMembership using
the provided seeding algorithms
fuzzify(membership, row_scores, column_scores, num_iterations, iteration_result, add_fuzz)
Provide an iteration-specific fuzzification
get_best_clusters(scores, n, sort=False)
retrieve the n best scored clusters for the given row/column score matrix
get_cc_scores(membership, scores, bandwidth, cluster)
calculate the density scores for the given column score values in the
specified cluster
get_col_density_scores(membership, col_scores)
get_density_scores(membership, row_scores, col_scores)
get_row_density_scores(membership, row_scores)
getting density scores improves small clusters
get_rr_scores(membership, rowscores, bandwidth, cluster)
calculate the density scores for the given row score values in the
specified cluster
make_db_column_seeder(outdb)
make_db_row_seeder(outdb)
make_file_column_seeder(filename, sep=' ')
make_file_seeder(filename, sep=' ')
uses a TSV file to seed row membership
make_kmeans_row_seeder(num_clusters)
creates a row seeding function based on k-means
old_fuzzy_coefficient(iteration, num_iterations)
standard fuzzy coefficient as defined in cMonkey
postadjust(membership, rowscores, cutoff=0.33, limit=100)
adjusting the cluster memberships after the main iterations have been done
Returns true if the function changed the membership, false if not
replace_delta_column_member(membership, col, cm, cd_scores)
replace_delta_row_member(membership, row, rm, rd_scores)
seeing_change(prob)
returns true if the update is seeing the change
std_fuzzy_coefficient(iteration, num_iterations)
standard fuzzy coefficient as defined in cMonkey
update_for_cols(membership, cd_scores, multiprocessing)
updating column memberships according to cd_scores
update_for_rows(membership, rd_scores, multiprocessing)
generically updating row memberships according to  rd_scores

 
Data
        KEY_CLUSTERS_PER_COL = 'memb.clusters_per_col'
KEY_CLUSTERS_PER_ROW = 'memb.clusters_per_row'
KEY_COL_IS_MEMBER_OF = 'memb.col_is_member_of'
KEY_MAX_CHANGES_PER_COL = 'memb.max_changes_per_col'
KEY_MAX_CHANGES_PER_ROW = 'memb.max_changes_per_row'
KEY_MAX_CLUSTER_ROWS_ALLOWED = 'memb.max_cluster_rows_allowed'
KEY_MIN_CLUSTER_ROWS_ALLOWED = 'memb.min_cluster_rows_allowed'
KEY_NUM_CLUSTERS = 'num_clusters'
KEY_PROB_COL_CHANGE = 'memb.prob_col_change'
KEY_PROB_ROW_CHANGE = 'memb.prob_row_change'
KEY_ROW_IS_MEMBER_OF = 'memb.row_is_member_of'
MAX_ADJUST_TRIES = 50
UPDATE_MEMBERSHIP = None