clustering  0.12
Clustering suite for molecular dynamics trajectories.
 All Classes Namespaces Files Functions Variables Typedefs
coring.cpp
1 /*
2 Copyright (c) 2015, Florian Sittel (www.lettis.net)
3 All rights reserved.
4 
5 Redistribution and use in source and binary forms, with or without modification,
6 are permitted provided that the following conditions are met:
7 
8 1. Redistributions of source code must retain the above copyright notice,
9  this list of conditions and the following disclaimer.
10 
11 2. Redistributions in binary form must reproduce the above copyright notice,
12  this list of conditions and the following disclaimer in the documentation
13  and/or other materials provided with the distribution.
14 
15 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY
16 EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
17 OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT
18 SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
19 SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT
20 OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21 HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR
22 TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
23 EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
24 */
25 
26 #include "coring.hpp"
27 
28 #include "logger.hpp"
29 #include "tools.hpp"
30 
31 #include <iostream>
32 #include <fstream>
33 #include <algorithm>
34 #include <vector>
35 
36 #include <omp.h>
37 
38 namespace Clustering {
39 namespace Coring {
40  WTDMap
41  compute_wtd(std::list<std::size_t> streaks) {
42  WTDMap wtd;
43  if (streaks.size() > 0) {
44  streaks.sort(std::greater<std::size_t>());
45  std::size_t max_streak = streaks.front();
46  for (std::size_t i=0; i <= max_streak; ++i) {
47  float n_steps = 0.0f;
48  for (auto s: streaks) {
49  if (i > s) {
50  break;
51  }
52  n_steps += 1.0f;
53  }
54  wtd[i] = n_steps / ((float) streaks.size());
55  }
56  }
57  return wtd;
58  }
59 
60  void
61  main(boost::program_options::variables_map args) {
62  using namespace Clustering::Tools;
63  namespace b_po = boost::program_options;
64  // load states
65  std::vector<std::size_t> states = Clustering::Tools::read_clustered_trajectory(args["states"].as<std::string>());
66  std::set<std::size_t> state_names(states.begin(), states.end());
67  std::size_t n_frames = states.size();
68  if (args.count("output") || args.count("distribution") || args.count("cores")) {
69  // load concatenation limits to treat concatenated trajectories correctly
70  // when performing dynamical corrections
71  std::vector<std::size_t> concat_limits;
72  if (args.count("concat-limits")) {
73  concat_limits = Clustering::Tools::read_single_column<std::size_t>(args["concat-limits"].as<std::string>());
74  } else if (args.count("concat-nframes")) {
75  std::size_t n_frames_per_subtraj = args["concat-nframes"].as<std::size_t>();
76  for (std::size_t i=n_frames_per_subtraj; i <= n_frames; i += n_frames_per_subtraj) {
77  concat_limits.push_back(i);
78  }
79  } else {
80  concat_limits = {n_frames};
81  }
82  // load window size information
83  std::map<std::size_t, std::size_t> coring_windows;
84  {
85  std::ifstream ifs(args["windows"].as<std::string>());
86  std::string buf1, buf2;
87  std::size_t size_for_all = 1;
88  while (ifs.good()) {
89  ifs >> buf1;
90  ifs >> buf2;
91  if (ifs.good()) {
92  if (buf1 == "*") {
93  size_for_all = string_to_num<std::size_t>(buf2);
94  } else {
95  coring_windows[string_to_num<std::size_t>(buf1)] = string_to_num<std::size_t>(buf2);
96  }
97  }
98  }
99  // fill remaining, not explicitly defined states with common window size
100  for (std::size_t name: state_names) {
101  if ( ! coring_windows.count(name)){
102  coring_windows[name] = size_for_all;
103  }
104  }
105  }
106  // core trajectory
107  std::vector<std::size_t> cored_traj(n_frames);
108  std::size_t current_core = states[0];
109  std::vector<long> cores(n_frames);
110  // honour concatenation limits, i.e. treat every concatenated trajectory-part on its own
111  std::size_t last_limit = 0;
112  for (std::size_t next_limit: concat_limits) {
113  for (std::size_t i=last_limit; i < next_limit; ++i) {
114  // coring window
115  std::size_t w = std::min(i+coring_windows[states[i]], next_limit);
116  bool is_in_core = true;
117  for (std::size_t j=i+1; j < w; ++j) {
118  if (states[j] != states[i]) {
119  is_in_core = false;
120  break;
121  }
122  }
123  if (is_in_core) {
124  current_core = states[i];
125  cores[i] = current_core;
126  } else {
127  cores[i] = -1;
128  }
129  cored_traj[i] = current_core;
130  }
131  last_limit = next_limit;
132  }
133  // write cored trajectory to file
134  if (args.count("output")) {
135  Clustering::Tools::write_clustered_trajectory(args["output"].as<std::string>(), cored_traj);
136  }
137  // write core information to file
138  if (args.count("cores")) {
139  Clustering::Tools::write_single_column<long>(args["cores"].as<std::string>(), cores, false);
140  }
141  // compute/save escape time distributions
142  if (args.count("distribution")) {
143  std::map<std::size_t, std::list<std::size_t>> streaks;
144  std::size_t current_state = cored_traj[0];
145  long n_counts = 0;
146  for (std::size_t state: cored_traj) {
147  if (state == current_state) {
148  ++n_counts;
149  } else {
150  streaks[current_state].push_back(n_counts);
151  current_state = state;
152  n_counts = 1;
153  }
154  }
155  streaks[current_state].push_back(n_counts);
156 
157  std::map<std::size_t, WTDMap> etds;
158  for (std::size_t state: state_names) {
159  etds[state] = compute_wtd(streaks[state]);
160  }
161  // write WTDs to file
162  for (auto state_etd: etds) {
163  std::string fname = Clustering::Tools::stringprintf(args["distribution"].as<std::string>() + "_%d", state_etd.first);
164  Clustering::Tools::write_map<std::size_t, float>(fname, state_etd.second);
165  }
166  }
167  } else {
168  std::cerr << "\n" << "error (coring): nothing to do! please define '--output', '--distribution' or both!" << "\n\n";
169  exit(EXIT_FAILURE);
170  }
171  }
172 } // end namespace Coring
173 } // end namespace Clustering
174 
void write_clustered_trajectory(std::string filename, std::vector< std::size_t > traj)
write state trajectory into plain text file
Definition: tools.cpp:73
std::vector< std::size_t > read_clustered_trajectory(std::string filename)
read states from trajectory (given as plain text file)
Definition: tools.cpp:54
std::string stringprintf(const std::string &str,...)
printf-version for std::string
Definition: tools.cpp:95