26 #include "network_builder.hpp"
30 #include "embedded_cytoscape.hpp"
34 #include <unordered_set>
37 #include <boost/program_options.hpp>
38 #include <boost/filesystem.hpp>
46 int HORIZONTAL_SPACING = 10;
47 int VERTICAL_SPACING = 50;
56 std::size_t POP_MIN = 0;
57 std::size_t POP_MAX = 0;
66 std::map<std::size_t, Node> children;
69 int _subtree_width = 0;
72 Node(std::size_t _id,
float _fe, std::size_t _pop);
73 Node* find_parent_of(std::size_t search_id);
74 void set_pos(
int x,
int y);
76 void print_subtree(std::ostream& os);
77 void print_node_and_subtree(std::ostream& os);
80 constexpr
bool fuzzy_equal(
float a,
float b,
float prec) {
81 return (a <= b + prec) && (a >= b - prec);
86 std::ostream& operator<<(std::ostream& os,
const Node& n) {
94 os <<
Clustering::Tools::stringprintf(
"{group:'nodes',id:'n%d',position:{x:%d,y:%d},data:{id:'n%d',pop:%d,fe:%f,info:'%d: fe=%0.2f, pop=%d',logpop:%0.2f}},\n",
95 n.id, n.pos_x, n.pos_y, n.id, n.pop, n.fe, n.id, n.fe, n.pop, log_pop);
97 for (
auto& id_child: n.children) {
98 std::size_t cid = id_child.first;
106 Node::Node(std::size_t _id,
114 Node* Node::find_parent_of(std::size_t search_id) {
115 if (this->children.count(search_id)) {
118 for (
auto& id_child: children) {
119 Node* cc = id_child.second.find_parent_of(search_id);
128 void Node::set_pos(
int x,
int y) {
131 std::vector<int> width_children;
133 for (
auto& id_child: children) {
134 width_children.push_back(id_child.second.subtree_width());
135 total_width += id_child.second.subtree_width();
139 int cur_x = (x-0.5*total_width);
140 for (
auto& id_child: children) {
141 int stw = id_child.second.subtree_width();
142 id_child.second.set_pos(cur_x + 0.5*stw, y + VERTICAL_SPACING);
147 int Node::subtree_width() {
150 if ( ! this->_subtree_width) {
151 int self_width = 10 + 2*HORIZONTAL_SPACING;
152 if (children.empty()) {
153 this->_subtree_width = self_width;
156 for (
auto& id_child: children) {
157 sum += id_child.second.subtree_width();
159 if (sum > self_width) {
160 this->_subtree_width = sum;
162 this->_subtree_width = self_width;
166 return this->_subtree_width;
169 void Node::print_subtree(std::ostream& os) {
170 for (
auto& id_child: children) {
171 id_child.second.print_node_and_subtree(os);
175 void Node::print_node_and_subtree(std::ostream& os) {
176 os << (*this) << std::endl;
177 this->print_subtree(os);
182 save_network_links(std::string fname,
183 std::map<std::size_t, std::size_t> network) {
185 std::ofstream ofs(fname);
187 std::cerr <<
"error: cannot open file '" << fname <<
"' for writing." << std::endl;
190 for (
auto p: network) {
191 ofs << p.second <<
" " << p.first <<
"\n";
197 save_node_info(std::string fname,
198 std::map<std::size_t, float> free_energies,
199 std::map<std::size_t, std::size_t> pops) {
201 std::ofstream ofs(fname);
203 std::cerr <<
"error: cannot open file '" << fname <<
"' for writing." << std::endl;
206 for (
auto node_pop: pops) {
207 std::size_t key = node_pop.first;
208 ofs << key <<
" " << free_energies[key] <<
" " << node_pop.second <<
"\n";
213 std::set<std::size_t>
214 compute_and_save_leaves(std::string fname,
215 std::map<std::size_t, std::size_t> network) {
217 std::set<std::size_t> leaves;
218 std::set<std::size_t> not_leaves;
219 std::ofstream ofs(fname);
221 std::cerr <<
"error: cannot open file '" << fname <<
"' for writing." << std::endl;
224 for (
auto from_to: network) {
225 std::size_t src = from_to.first;
226 std::size_t target = from_to.second;
227 not_leaves.insert(target);
228 if (not_leaves.count(src)) {
234 for (std::size_t leaf: leaves) {
242 save_traj_of_leaves(std::string fname,
243 std::set<std::size_t> leaves,
247 std::string remapped_name,
248 std::size_t n_rows) {
249 Clustering::logger(std::cout) <<
"saving end-node trajectory for seeding" << std::endl;
250 std::ofstream ofs(fname);
252 std::cerr <<
"error: cannot open file '" << fname <<
"' for writing." << std::endl;
255 std::vector<std::size_t> traj(n_rows);
256 const float prec = d_step / 10.0f;
257 for (
float d=d_min; ! fuzzy_equal(d, d_max+d_step, prec); d += d_step) {
260 for (std::size_t i=0; i < n_rows; ++i) {
261 if (leaves.count(cl_now[i])) {
266 for (std::size_t i: traj) {
273 save_network_to_html(std::string fname,
274 std::map<std::size_t, std::size_t> network,
275 std::map<std::size_t, float> free_energies,
276 std::map<std::size_t, std::size_t> pops) {
279 FE_MAX = std::max_element(free_energies.begin(),
281 [](std::pair<std::size_t, float> fe1,
282 std::pair<std::size_t, float> fe2) ->
bool {
283 return fe1.second < fe2.second;
285 FE_MIN = std::min_element(free_energies.begin(),
287 [](std::pair<std::size_t, float> fe1,
288 std::pair<std::size_t, float> fe2) ->
bool {
289 return fe1.second < fe2.second;
291 POP_MAX = std::max_element(pops.begin(),
293 [](std::pair<std::size_t, std::size_t> p1,
294 std::pair<std::size_t, std::size_t> p2) ->
bool {
295 return p1.second < p2.second;
297 POP_MIN = std::min_element(pops.begin(),
299 [](std::pair<std::size_t, std::size_t> p1,
300 std::pair<std::size_t, std::size_t> p2) ->
bool {
301 return p1.second < p2.second;
306 std::size_t network_size = network.size();
307 std::size_t i_frame = 0;
308 for (
auto from_to: network) {
309 std::cerr << ++i_frame <<
" / " << network_size <<
"\n";
311 std::size_t i_from = from_to.first;
312 std::size_t i_to = from_to.second;
314 Node* parent_to = fake_root.find_parent_of(i_to);
316 fake_root.children[i_to] = {i_to, free_energies[i_to], pops[i_to]};
317 parent_to = &fake_root;
319 Node* parent_from = fake_root.find_parent_of(i_from);
322 parent_to->children[i_to].children[i_from] = parent_from->children[i_from];
323 parent_from->children.erase(i_from);
326 parent_to->children[i_to].children[i_from] = {i_from, free_energies[i_from], pops[i_from]};
330 std::ofstream ofs(fname);
332 std::cerr <<
"error: cannot open file '" << fname <<
"' for writing." << std::endl;
335 float LOG_POP_MIN, LOG_POP_MAX;
339 LOG_POP_MIN = log(POP_MIN);
344 LOG_POP_MAX = log(POP_MAX);
346 ofs << Clustering::Network::viewer_header
348 <<
"style: cytoscape.stylesheet().selector('node').css({"
353 <<
".selector('edge').css({'opacity': '1.0', 'width': '5', 'target-arrow-shape': 'triangle'})"
354 <<
".selector(':selected').css({'content': 'data(info)', 'font-size': 24, 'color': '#00ff00'})"
356 <<
", elements: [\n";
357 fake_root.set_pos(0, 0);
358 fake_root.print_subtree(ofs);
360 ofs << Clustering::Network::viewer_footer;
366 namespace Clustering {
367 namespace NetworkBuilder {
370 main(boost::program_options::variables_map args) {
371 namespace b_fs = boost::filesystem;
372 using namespace Clustering::Tools;
374 omp_set_num_threads(2);
378 float d_min = args[
"min"].as<
float>();
379 float d_max = args[
"max"].as<
float>();
380 float d_step = args[
"step"].as<
float>();
381 std::string basename = args[
"basename"].as<std::string>();
382 std::string remapped_name =
"remapped_" + basename;
383 std::size_t minpop = args[
"minpop"].as<std::size_t>();
385 std::map<std::size_t, std::size_t> network;
386 std::map<std::size_t, std::size_t> pops;
387 std::map<std::size_t, float> free_energies;
390 if ( ! b_fs::exists(fname_next)) {
391 std::cerr <<
"error: file does not exist: " << fname_next << std::endl;
395 std::vector<std::size_t> cl_now;
397 std::size_t n_rows = cl_next.size();
401 const float prec = d_step / 10.0f;
404 d_max = std::numeric_limits<float>::max();
409 for (d=d_min; ! fuzzy_equal(d, d_max, prec) && b_fs::exists(fname_next); d += d_step) {
413 #pragma omp parallel sections
421 if (b_fs::exists(fname_next)) {
423 max_id = *std::max_element(cl_now.begin(), cl_now.end());
424 for (std::size_t i=0; i < n_rows; ++i) {
425 if (cl_next[i] != 0) {
426 cl_next[i] += max_id;
427 if (cl_now[i] != 0) {
428 network[cl_now[i]] = cl_next[i];
430 free_energies[cl_now[i]] = d;
443 std::unordered_set<std::size_t> removals;
444 auto pop_it = pops.begin();
446 while (pop_it != pops.end()) {
447 if (pop_it->second < minpop) {
448 removals.insert(pop_it->first);
449 pops.erase(pop_it++);
455 auto net_it = network.begin();
456 while (net_it != network.end()) {
457 std::size_t a = net_it->first;
458 std::size_t b = net_it->second;
459 if (removals.count(a) > 0 || removals.count(b) > 0) {
460 network.erase(net_it++);
468 save_network_links(
"network_links.dat", network);
470 save_node_info(
"network_nodes.dat", free_energies, pops);
472 std::set<std::size_t> leaves = compute_and_save_leaves(
"network_leaves.dat", network);
475 save_traj_of_leaves(
"network_end_node_traj.dat", leaves, d_min, d_max, d_step, remapped_name, n_rows);
477 save_network_to_html(
"network_visualization.html", network, free_energies, pops);
bool verbose
global flag: use verbose output?
std::ostream & logger(std::ostream &s)