clustering  0.12
Clustering suite for molecular dynamics trajectories.
 All Classes Namespaces Files Functions Variables Typedefs
clustering.cpp
Go to the documentation of this file.
1 /*
2 Copyright (c) 2015, Florian Sittel (www.lettis.net)
3 All rights reserved.
4 
5 Redistribution and use in source and binary forms, with or without modification,
6 are permitted provided that the following conditions are met:
7 
8 1. Redistributions of source code must retain the above copyright notice,
9  this list of conditions and the following disclaimer.
10 
11 2. Redistributions in binary form must reproduce the above copyright notice,
12  this list of conditions and the following disclaimer in the documentation
13  and/or other materials provided with the distribution.
14 
15 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY
16 EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
17 OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT
18 SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
19 SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT
20 OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21 HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR
22 TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
23 EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
24 */
25 
37 #include "config.hpp"
38 // sub-modules
39 #include "density_clustering.hpp"
40 #ifdef DC_USE_MPI
41  #include "density_clustering_mpi.hpp"
42 #endif
43 #include "mpp.hpp"
44 #include "network_builder.hpp"
45 #include "state_filter.hpp"
46 #include "coring.hpp"
47 // toolset
48 #include "logger.hpp"
49 #include "tools.hpp"
50 
51 #include <omp.h>
52 #include <boost/program_options.hpp>
53 
54 int main(int argc, char* argv[]) {
55  namespace b_po = boost::program_options;
56  std::string general_help =
57  "clustering - a classification framework for MD data\n"
58  "\n"
59  "modes:\n"
60  " density: run density clustering\n"
61  " network: build network from density clustering results\n"
62  " mpp: run MPP (Most Probable Path) clustering\n"
63  " (based on density-results)\n"
64  " coring: boundary corrections for clustering results.\n"
65  " filter: filter phase space (e.g. dihedrals) for given state\n"
66  "\n"
67  "usage:\n"
68  " clustering MODE --option1 --option2 ...\n"
69  "\n"
70  "for a list of available options per mode, run with '-h' option, e.g.\n"
71  " clustering density -h\n"
72  ;
73 
74  enum {DENSITY, MPP, NETWORK, FILTER, CORING} mode;
75 
76  if (argc <= 2) {
77  std::cerr << general_help;
78  return EXIT_FAILURE;
79 
80  } else {
81  std::string str_mode(argv[1]);
82  if (str_mode.compare("density") == 0) {
83  mode = DENSITY;
84  } else if (str_mode.compare("mpp") == 0) {
85  mode = MPP;
86  } else if (str_mode.compare("network") == 0) {
87  mode = NETWORK;
88  } else if (str_mode.compare("filter") == 0) {
89  mode = FILTER;
90  } else if (str_mode.compare("coring") == 0) {
91  mode = CORING;
92  } else {
93  std::cerr << "\nerror: unrecognized mode '" << str_mode << "'\n\n";
94  std::cerr << general_help;
95  return EXIT_FAILURE;
96  }
97  }
98  b_po::variables_map args;
99  b_po::positional_options_description pos_opts;
100  // density options
101  b_po::options_description desc_dens (std::string(argv[1]).append(
102  "\n\n"
103  "perform clustering of MD data based on phase space densities.\n"
104  "densities are approximated by counting neighboring frames inside\n"
105  "a n-dimensional hypersphere of specified radius.\n"
106  "distances are measured with n-dim P2-norm.\n"
107  "\n"
108  "options"));
109  desc_dens.add_options()
110  ("help,h", b_po::bool_switch()->default_value(false), "show this help.")
111  ("file,f", b_po::value<std::string>()->required(), "input (required): phase space coordinates (space separated ASCII).")
112  ("radius,r", b_po::value<float>(), "parameter: hypersphere radius.")
113  // optional
114  ("threshold,t", b_po::value<float>(), "parameter: Free Energy threshold for clustering (FEL is normalized to zero).")
115  ("threshold-screening,T", b_po::value<std::vector<float>>()->multitoken(),
116  "parameters: screening of free energy landscape. format: FROM STEP TO; e.g.: '-T 0.1 0.1 11.1'.\n"
117  "set -T -1 for default values: FROM=0.1, STEP=0.1, TO=MAX_FE.\n"
118  "parameters may be given partially, e.g.: -T 0.2 0.4 to start at 0.2 and go to MAX_FE at steps 0.4.\n"
119  "for threshold-screening, --output denotes the basename only. output files will have the"
120  " current threshold limit appended to the given filename.")
121  ("output,o", b_po::value<std::string>(), "output (optional): clustering information.")
122  ("input,i", b_po::value<std::string>(), "input (optional): initial state definition.")
123  ("radii,R", b_po::value<std::vector<float>>()->multitoken(), "parameter: list of radii for population/free energy calculations "
124  "(i.e. compute populations/free energies for several radii in one go).")
125  ("population,p", b_po::value<std::string>(), "output (optional): population per frame (if -R is set: this defines only the basename).")
126  ("free-energy,d", b_po::value<std::string>(), "output (optional): free energies per frame (if -R is set: this defines only the basename).")
127  ("free-energy-input,D", b_po::value<std::string>(), "input (optional): reuse free energy info.")
128  ("nearest-neighbors,b", b_po::value<std::string>(), "output (optional): nearest neighbor info.")
129  ("nearest-neighbors-input,B", b_po::value<std::string>(), "input (optional): reuse nearest neighbor info.")
130  // defaults
131  ("only-initial,I", b_po::bool_switch()->default_value(false),
132  "only assign initial (i.e. low free energy / high density) frames to clusters. "
133  "leave unclustered frames as state '0'.")
134  ("nthreads,n", b_po::value<int>()->default_value(0),
135  "number of OpenMP threads. default: 0; i.e. use OMP_NUM_THREADS env-variable.")
136  ("verbose,v", b_po::bool_switch()->default_value(false), "verbose mode: print runtime information to STDOUT.")
137  ;
138  // MPP options
139  b_po::options_description desc_mpp (std::string(argv[1]).append(
140  "\n\n"
141  "TODO: description for MPP"
142  "\n"
143  "options"));
144  desc_mpp.add_options()
145  ("help,h", b_po::bool_switch()->default_value(false), "show this help.")
146  ("input,i", b_po::value<std::string>()->required(), "input (required): initial state definition.")
147  ("free-energy-input,D", b_po::value<std::string>()->required(), "input (required): reuse free energy info.")
148  ("lagtime,l", b_po::value<int>()->required(), "input (required): lagtime in units of frame numbers.")
149  ("qmin-from", b_po::value<float>()->default_value(0.01, "0.01"), "initial Qmin value (default: 0.01).")
150  ("qmin-to", b_po::value<float>()->default_value(1.0, "1.00"), "final Qmin value (default: 1.00).")
151  ("qmin-step", b_po::value<float>()->default_value(0.01, "0.01"), "Qmin stepping (default: 0.01).")
152  ("concat-nframes", b_po::value<std::size_t>(),
153  "input (parameter): no. of frames per (equally sized) sub-trajectory for concatenated trajectory files.")
154  ("concat-limits", b_po::value<std::string>(),
155  "input (file): file with frame ids (base 0) of first frames per (not equally sized) sub-trajectory for concatenated trajectory files.")
156  // defaults
157  ("basename", b_po::value<std::string>()->default_value("mpp"), "basename for output files (default: 'mpp').")
158  ("nthreads,n", b_po::value<int>()->default_value(0),
159  "number of OpenMP threads. default: 0; i.e. use OMP_NUM_THREADS env-variable.")
160  ("verbose,v", b_po::bool_switch()->default_value(false), "verbose mode: print runtime information to STDOUT.")
161  ;
162  // network options
163  b_po::options_description desc_network (std::string(argv[1]).append(
164  "\n\n"
165  "TODO: description for network builder"
166  "\n"
167  "options"));
168  desc_network.add_options()
169  ("help,h", b_po::bool_switch()->default_value(false), "show this help.")
170  // optional
171  ("basename,b", b_po::value<std::string>()->default_value("clust.\%0.2f"),
172  "(optional): basename of input files (default: clust.\%0.2f).")
173  ("min", b_po::value<float>()->default_value(0.1f, "0.10"), "(optional): minimum free energy (default: 0.10).")
174  ("max", b_po::value<float>()->default_value(0.0f, "0"), "(optional): maximum free energy (default: 0; i.e. max. available).")
175  ("step", b_po::value<float>()->default_value(0.1f, "0.10"), "(optional): free energy stepping (default: 0.10).")
176  ("minpop,p", b_po::value<std::size_t>()->default_value(1),
177  "(optional): minimum population of node to be considered for network (default: 1).")
178  // defaults
179  ("verbose,v", b_po::bool_switch()->default_value(false), "verbose mode: print runtime information to STDOUT.")
180  ;
181  // filter options
182  b_po::options_description desc_filter (std::string(argv[1]).append(
183  "\n\n"
184  "filter phase space (e.g. dihedral angles, cartesian coords, etc.) for given state."
185  "\n"
186  "options"));
187  desc_filter.add_options()
188  ("help,h", b_po::bool_switch()->default_value(false),
189  "show this help.")
190  ("states,s", b_po::value<std::string>()->required(),
191  "(required): file with state information (i.e. clustered trajectory).")
192  ("coords,c", b_po::value<std::string>(),
193  "file with coordinates (either plain ASCII or GROMACS' xtc).")
194  ("output,o", b_po::value<std::string>(),
195  "filtered data.")
196  ("state,S", b_po::value<std::size_t>(),
197  "state id of selected state.")
198 
199  ("list", b_po::bool_switch()->default_value(false),
200  "list states and their populations")
201  ;
202  // coring options
203  b_po::options_description desc_coring (std::string(argv[1]).append(
204  "\n\n"
205  "compute boundary corrections for clustering results."
206  "\n"
207  "options"));
208  desc_coring.add_options()
209  ("help,h", b_po::bool_switch()->default_value(false),
210  "show this help.")
211  // optional
212  ("states,s", b_po::value<std::string>()->required(),
213  "(required): file with state information (i.e. clustered trajectory")
214  ("windows,w", b_po::value<std::string>()->required(),
215  "(required): file with window sizes."
216  "format is space-separated lines of\n\n"
217  "STATE_ID WINDOW_SIZE\n\n"
218  "use * as STATE_ID to match all (other) states.\n"
219  "e.g.:\n\n"
220  "* 20\n"
221  "3 40\n"
222  "4 60\n\n"
223  "matches 40 frames to state 3, 60 frames to state 4 and 20 frames to all the other states")
224  ("output,o", b_po::value<std::string>(),
225  "(optional): cored trajectory")
226  ("distribution,d", b_po::value<std::string>(),
227  "(optional): write waiting time distributions to file.")
228  ("cores,c", b_po::value<std::string>(),
229  "(optional): write core information to file, i.e. trajectory with state name if in core region or -1 if not in core region")
230  ("concat-nframes", b_po::value<std::size_t>(),
231  "input (optional parameter): no. of frames per (equally sized) sub-trajectory for concatenated trajectory files.")
232  ("concat-limits", b_po::value<std::string>(),
233  "input (optional, file): file with frame ids (base 0) of first frames per (not equally sized) sub-trajectory for concatenated trajectory files.")
234  // defaults
235  ("verbose,v", b_po::bool_switch()->default_value(false),
236  "verbose mode: print runtime information to STDOUT.")
237  ;
238  // parse cmd arguments
239  b_po::options_description desc;
240  switch(mode){
241  case DENSITY:
242  desc.add(desc_dens);
243  break;
244  case MPP:
245  desc.add(desc_mpp);
246  break;
247  case NETWORK:
248  desc.add(desc_network);
249  break;
250  case FILTER:
251  desc.add(desc_filter);
252  break;
253  case CORING:
254  desc.add(desc_coring);
255  break;
256  default:
257  std::cerr << "error: unknown mode. this should never happen." << std::endl;
258  return EXIT_FAILURE;
259  }
260  try {
261  b_po::store(b_po::command_line_parser(argc, argv).options(desc).run(), args);
262  b_po::notify(args);
263  } catch (b_po::error& e) {
264  if ( ! args["help"].as<bool>()) {
265  std::cerr << "\nerror parsing arguments:\n\n" << e.what() << "\n\n" << std::endl;
266  }
267  std::cerr << desc << std::endl;
268  return EXIT_FAILURE;
269  }
270  if (args["help"].as<bool>()) {
271  std::cout << desc << std::endl;
272  return EXIT_SUCCESS;
273  }
274  // setup defaults
275  if (args.count("verbose")) {
276  Clustering::verbose = args["verbose"].as<bool>();
277  }
278  // setup OpenMP
279  int n_threads = 0;
280  if (args.count("nthreads")) {
281  n_threads = args["nthreads"].as<int>();
282  }
283  if (n_threads > 0) {
284  omp_set_num_threads(n_threads);
285  }
286  // run selected subroutine
287  switch(mode) {
288  case DENSITY:
289  #ifdef DC_USE_MPI
291  #else
293  #endif
294  break;
295  case MPP:
296  Clustering::MPP::main(args);
297  break;
298  case NETWORK:
300  break;
301  case FILTER:
303  break;
304  case CORING:
305  Clustering::Coring::main(args);
306  break;
307  default:
308  std::cerr << "error: unknown mode. this should never happen." << std::endl;
309  return EXIT_FAILURE;
310  }
311  return EXIT_SUCCESS;
312 }
313 
bool verbose
global flag: use verbose output?
Definition: logger.cpp:29
void main(boost::program_options::variables_map args)
void main(boost::program_options::variables_map args)
void main(boost::program_options::variables_map args)
void main(boost::program_options::variables_map args)
void main(boost::program_options::variables_map args)
Definition: mpp.cpp:320