30 namespace Clustering {
34 std::vector<std::size_t> concat_limits,
35 std::size_t n_lag_steps) {
36 if (n_lag_steps == 0) {
37 std::cerr <<
"error: lagtime of 0 does not make any sense for MPP clustering" << std::endl;
40 std::vector<std::size_t>::iterator next_limit = concat_limits.begin();
41 std::size_t i_max = (*std::max_element(trajectory.begin(), trajectory.end()));
43 for (std::size_t i=0; i < trajectory.size() - n_lag_steps; ++i) {
44 std::size_t from = trajectory[i];
45 std::size_t to = trajectory[i+n_lag_steps];
46 if (next_limit != concat_limits.end()) {
48 if (i+n_lag_steps < (*next_limit)) {
49 count_matrix(from, to) += 1;
50 }
else if (i+1 == (*next_limit)) {
56 count_matrix(from, to) += 1;
64 std::set<std::size_t> cluster_names) {
65 std::size_t n_rows = count_matrix.size1();
66 std::size_t n_cols = count_matrix.size2();
68 for (std::size_t i: cluster_names) {
69 std::size_t row_sum = 0;
70 for (std::size_t j=0; j < n_cols; ++j) {
71 row_sum += count_matrix(i,j);
74 for (std::size_t j=0; j < n_cols; ++j) {
75 if (count_matrix(i,j) != 0) {
76 transition_matrix(i,j) = count_matrix(i,j) / row_sum;
81 return transition_matrix;
84 std::map<std::size_t, std::size_t>
86 std::set<std::size_t> cluster_names,
88 std::map<std::size_t, float> min_free_energy) {
89 std::map<std::size_t, std::size_t> future_state;
90 for (std::size_t i: cluster_names) {
91 std::vector<std::size_t> candidates;
92 float max_trans_prob = 0.0f;
93 if (transition_matrix(i,i) >= q_min) {
97 for (std::size_t j: cluster_names) {
102 if (transition_matrix(i,j) > max_trans_prob) {
103 max_trans_prob = transition_matrix(i,j);
105 }
else if (transition_matrix(i,j) == max_trans_prob && max_trans_prob > 0.0f) {
106 candidates.push_back(j);
111 if (candidates.size() == 0) {
112 std::cerr <<
"error: state '" << i
113 <<
"' has self-transition probability of "
114 << transition_matrix(i,i)
117 <<
" and does not find any transition candidates."
118 <<
" please have a look at your trajectory!"
121 }
else if (candidates.size() == 1) {
122 future_state[i] = candidates[0];
125 auto min_fe_compare = [&](std::size_t i, std::size_t j) {
126 return min_free_energy[i] < min_free_energy[j];
128 future_state[i] = (*std::min_element(candidates.begin(), candidates.end(), min_fe_compare));
134 std::map<std::size_t, std::vector<std::size_t>>
136 std::set<std::size_t> cluster_names) {
137 std::map<std::size_t, std::vector<std::size_t>> mpp;
138 for (std::size_t i: cluster_names) {
139 std::vector<std::size_t> path = {i};
140 std::set<std::size_t> visited = {i};
141 std::size_t next_state = future_state[i];
144 while (visited.count(next_state) == 0) {
145 path.push_back(next_state);
146 visited.insert(next_state);
147 next_state = future_state[next_state];
154 std::map<std::size_t, std::size_t>
156 std::set<std::size_t> cluster_names) {
157 std::map<std::size_t, std::size_t> pops;
158 for (std::size_t i: cluster_names) {
159 pops[i] = std::count(clusters.begin(), clusters.end(), i);
166 std::map<std::size_t, float>
168 const std::vector<float>& free_energy) {
169 std::map<std::size_t, float> min_fe;
170 for (std::size_t i=0; i < clustering.size(); ++i) {
171 std::size_t state = clustering[i];
172 if (min_fe.count(state) == 0) {
173 min_fe[state] = free_energy[i];
175 if (free_energy[i] < min_fe[state]) {
176 min_fe[state] = free_energy[i];
183 std::map<std::size_t, std::size_t>
185 std::map<std::size_t, std::vector<std::size_t>> mpp,
187 std::set<std::size_t> cluster_names,
189 std::vector<float> free_energy) {
192 std::map<std::size_t, std::size_t> sinks;
193 for (std::size_t i: cluster_names) {
194 std::vector<std::size_t> metastable_states;
195 for (std::size_t j: mpp[i]) {
197 if (transition_matrix(j,j) > q_min) {
198 metastable_states.push_back(j);
201 if (metastable_states.size() == 0) {
203 metastable_states = mpp[i];
206 auto pop_compare = [&](std::size_t i, std::size_t j) ->
bool {
207 return pops[i] < pops[j];
210 auto fe_compare = [&](std::size_t i, std::size_t j) ->
bool {
211 return min_free_energy[i] < min_free_energy[j];
214 auto candidate = std::min_element(metastable_states.begin(), metastable_states.end(), fe_compare);
215 float min_fe = free_energy[*candidate];
216 std::set<std::size_t> sink_candidates;
217 while (candidate != metastable_states.end() && free_energy[*candidate] == min_fe) {
220 sink_candidates.insert(*candidate);
221 metastable_states.erase(candidate);
222 candidate = std::min_element(metastable_states.begin(), metastable_states.end(), fe_compare);
225 if (sink_candidates.size() == 1) {
226 sinks[i] = (*sink_candidates.begin());
229 sinks[i] = (*std::max_element(sink_candidates.begin(), sink_candidates.end(), pop_compare));
237 std::vector<std::size_t>
239 std::map<std::size_t, std::size_t> sinks) {
240 for (std::size_t& state: trajectory) {
241 state = sinks[state];
248 std::tuple<std::vector<std::size_t>, std::map<std::size_t, std::size_t>>
250 std::vector<std::size_t> concat_limits,
253 std::vector<float> free_energy) {
254 std::set<std::size_t> microstate_names;
255 std::vector<std::size_t> traj = initial_trajectory;
256 std::map<std::size_t, std::size_t> lumping;
257 const uint MAX_ITER=100;
259 for (iter=0; iter < MAX_ITER; ++iter) {
261 microstate_names = std::set<std::size_t>(traj.begin(), traj.end());
262 if (microstate_names.count(0)) {
263 std::cerr <<
"\nwarning:\n"
264 <<
" there is a state '0' in your trajectory.\n"
265 <<
" are you sure you generated a proper trajectory of microstates\n"
266 <<
" (e.g. by running a final, seeded density-clustering to fill up the FEL)?\n"
269 logger(std::cout) <<
"iteration "
275 logger(std::cout) <<
" calculating transition probabilities" << std::endl;
280 logger(std::cout) <<
" calculating future states" << std::endl;
286 logger(std::cout) <<
" calculating most probable path" << std::endl;
287 std::map<std::size_t, std::vector<std::size_t>> mpp =
most_probable_path(future_state, microstate_names);
289 logger(std::cout) <<
" calculating path sinks" << std::endl;
290 std::map<std::size_t, std::size_t> sinks =
path_sinks(traj
297 std::vector<std::size_t> traj_old = traj;
298 logger(std::cout) <<
" lumping trajectory" << std::endl;
300 for (
auto from_to: sinks) {
301 std::size_t from = from_to.first;
302 std::size_t to = from_to.second;
308 if (traj_old == traj) {
312 if (iter == MAX_ITER) {
315 return std::make_tuple(traj, lumping);
320 main(boost::program_options::variables_map args) {
321 std::string basename = args[
"basename"].as<std::string>();
323 std::map<std::size_t, std::pair<std::size_t, float>> transitions;
324 std::map<std::size_t, std::size_t> max_pop;
325 std::map<std::size_t, float> max_qmin;
330 float q_min_from = args[
"qmin-from"].as<
float>();
331 float q_min_to = args[
"qmin-to"].as<
float>();
332 float q_min_step = args[
"qmin-step"].as<
float>();
333 int lagtime = args[
"lagtime"].as<
int>();
335 std::vector<std::size_t> concat_limits;
336 if (args.count(
"concat-limits")) {
337 concat_limits = Clustering::Tools::read_single_column<std::size_t>(args[
"concat-limits"].as<std::string>());
338 }
else if (args.count(
"concat-nframes")) {
339 std::size_t n_frames_per_subtraj = args[
"concat-nframes"].as<std::size_t>();
340 for (std::size_t i=n_frames_per_subtraj; i < traj.size(); i += n_frames_per_subtraj) {
341 concat_limits.push_back(i);
344 for (
float q_min=q_min_from; q_min <= q_min_to; q_min += q_min_step) {
347 traj = std::get<0>(traj_sinks);
353 std::map<std::size_t, std::size_t> sinks = std::get<1>(traj_sinks);
354 for (
auto from_to: sinks) {
355 transitions[from_to.first] = {from_to.second, q_min};
364 for (std::size_t
id: std::set<std::size_t>(traj.begin(), traj.end())) {
365 max_pop[id] = pops[id];
366 max_qmin[id] = q_min;
371 std::ofstream ofs(basename +
"_transitions.dat");
372 for (
auto trans: transitions) {
373 ofs << trans.first <<
" " << trans.second.first <<
" " << trans.second.second <<
"\n";
376 Clustering::Tools::write_map<std::size_t, std::size_t>(basename +
"_max_pop.dat", max_pop);
377 Clustering::Tools::write_map<std::size_t, float>(basename +
"_max_qmin.dat", max_qmin);
boost::numeric::ublas::mapped_matrix< float > SparseMatrixF
BOOST implementation of a sparse matrix for floats.
SparseMatrixF row_normalized_transition_probabilities(SparseMatrixF count_matrix, std::set< std::size_t > cluster_names)
compute transition matrix from counts by normalization of rows
std::map< std::size_t, std::size_t > microstate_populations(std::vector< std::size_t > clusters, std::set< std::size_t > cluster_names)
compute cluster populations
std::map< std::size_t, float > microstate_min_free_energy(const std::vector< std::size_t > &clustering, const std::vector< float > &free_energy)
std::map< std::size_t, std::vector< std::size_t > > most_probable_path(std::map< std::size_t, std::size_t > future_state, std::set< std::size_t > cluster_names)
std::map< std::size_t, std::size_t > path_sinks(std::vector< std::size_t > clusters, std::map< std::size_t, std::vector< std::size_t >> mpp, SparseMatrixF transition_matrix, std::set< std::size_t > cluster_names, float q_min, std::vector< float > free_energy)
std::ostream & logger(std::ostream &s)
std::map< std::size_t, std::size_t > single_step_future_state(SparseMatrixF transition_matrix, std::set< std::size_t > cluster_names, float q_min, std::map< std::size_t, float > min_free_energy)
SparseMatrixF transition_counts(std::vector< std::size_t > trajectory, std::vector< std::size_t > concat_limits, std::size_t n_lag_steps)
std::tuple< std::vector< std::size_t >, std::map< std::size_t, std::size_t > > fixed_metastability_clustering(std::vector< std::size_t > initial_trajectory, std::vector< std::size_t > concat_limits, float q_min, std::size_t lagtime, std::vector< float > free_energy)
run clustering for given Q_min value
std::vector< std::size_t > lumped_trajectory(std::vector< std::size_t > trajectory, std::map< std::size_t, std::size_t > sinks)
void main(boost::program_options::variables_map args)