ra4_draw  4bd0201e3d922d42bd545d4b045ed44db33454a4
clusterizer.cpp
Go to the documentation of this file.
1 #include "core/clusterizer.hpp"
2 
3 #include <cmath>
4 
5 #include <tuple>
6 #include <array>
7 #include <random>
8 
9 #include "core/utilities.hpp"
10 
11 using namespace std;
12 using namespace Clustering;
13 
14 namespace{
15  mt19937_64 InitializePRNG(){
16  array<int, 128> sd;
17  random_device r;
18  generate_n(sd.begin(), sd.size(), ref(r));
19  seed_seq ss(begin(sd), end(sd));
20  return mt19937_64(ss);
21  }
22 }
23 
24 Point::Point(float x, float y, float w):
25  x_(x),
26  y_(y),
27  w_(w){
28  }
29 
30 bool Point::operator<(const Point &other) const{
31  return make_tuple(x_, y_, w_)<make_tuple(other.x_, other.y_, other.w_);
32 }
33 
34 float Clustering::WeightedDistance(const Point &a, const Point &b){
35  float dx = a.x_ - b.x_;
36  float dy = a.y_ - b.y_;
37  return a.w_*b.w_*(dx*dx+dy*dy)/(a.w_+b.w_);
38 }
39 
40 Node::Node(float x, float y, float w):
41  Point(x, y, w),
42  dist_to_neighbor_(-1.),
43  neighbor_(),
44  neighbor_of_(){
45 }
46 
47 Node::Node(const Point &p):
48  Point(p),
49  dist_to_neighbor_(-1.),
50  neighbor_(),
51  neighbor_of_(){
52 }
53 
54 bool Node::operator<(const Node &other) const{
55  return make_tuple(x_, y_, w_, dist_to_neighbor_)<make_tuple(other.x_, other.y_, other.w_, other.dist_to_neighbor_);
56 }
57 
58 mt19937_64 Clusterizer::prng_ = InitializePRNG();
59 uniform_real_distribution<float> Clusterizer::urd_(0., 1.);
60 
61 Clusterizer::Clusterizer(const TH2D &hist_template, long max_points):
62  max_points_(max_points),
63  hist_mode_(max_points == 0),
64  hist_(hist_template),
65  orig_points_(),
66  nodes_(),
67  final_points_(),
68  clustered_lumi_(-1.){
69  if(max_points_ >= 0 && max_points_ < hist_.GetNcells()){
70  max_points_ = hist_.GetNcells();
71  }
72 }
73 
74 void Clusterizer::AddPoint(float x, float y, float w){
75  clustered_lumi_ = -1.;
76  if(orig_points_.size() >= static_cast<size_t>(max_points_)
77  && max_points_ >= 0){
78  hist_mode_ = true;
79  orig_points_.clear();
80  }
81  hist_.Fill(x, y, w);
82  if(!hist_mode_){
83  orig_points_.emplace_back(x, y, w);
84  }
85 }
86 
87 void Clusterizer::SetPoints(const vector<Point> &points){
88  clustered_lumi_ = -1.;
90  if(points.size() > static_cast<size_t>(max_points_) && max_points_ >= 0){
91  hist_mode_ = true;
92  orig_points_.clear();
93  }else{
94  hist_mode_ = false;
95  orig_points_ = points;
96  }
97  for(const auto &p: points){
98  hist_.Fill(p.x_, p.y_, p.w_);
99  }
100 }
101 
102 void Clusterizer::SetPoints(const TH2D &h){
103  clustered_lumi_ = -1.;
104  EmptyHistogram();
105  hist_mode_ = true;
106  hist_ = h;
107  if(max_points_ >= 0 && max_points_ < hist_.GetNcells()){
108  max_points_ = hist_.GetNcells();
109  }
110 }
111 
112 TH2D Clusterizer::GetHistogram(double luminosity) const{
113  TH2D h = hist_;
114  h.Scale(luminosity);
115  return h;
116 }
117 
118 TGraph Clusterizer::GetGraph(double luminosity, bool keep_in_frame) const{
119  Cluster(luminosity);
120  float xmin = hist_.GetXaxis()->GetBinLowEdge(1);
121  float xmax = hist_.GetXaxis()->GetBinUpEdge(hist_.GetNbinsX());
122  float dx = 0.0001*(xmax-xmin);
123  xmin += dx;
124  xmax -= dx;
125  float ymin = hist_.GetYaxis()->GetBinLowEdge(1);
126  float ymax = hist_.GetYaxis()->GetBinUpEdge(hist_.GetNbinsY());
127  float dy = 0.0001*(ymax-ymin);
128  ymin += dy;
129  ymax -= dy;
130 
131  TGraph g(final_points_.size());
132  g.SetMarkerStyle(hist_.GetMarkerStyle());
133  g.SetMarkerColor(hist_.GetMarkerColor());
134  g.SetMarkerSize(hist_.GetMarkerSize());
135  g.SetTitle(FullTitle(hist_).c_str());
136 
137  for(size_t i = 0; i < final_points_.size(); ++i){
138  const Point &p = final_points_.at(i);
139  float x = p.x_;
140  float y = p.y_;
141  if(keep_in_frame){
142  if(x < xmin) x = xmin;
143  if(x > xmax) x = xmax;
144  if(y < xmin) y = ymin;
145  if(y > ymax) y = ymax;
146  }
147  g.SetPoint(i, x, y);
148  }
149 
150  return g;
151 }
152 
153 void Clusterizer::InsertPoint(float x, float y, float w) const{
154  InsertPoint(Point(x, y, w));
155 }
156 
157 void Clusterizer::InsertPoint(const Point &p) const{
158  nodes_.emplace_front(p);
159  if(nodes_.size() == 2){
160  auto first = nodes_.begin();
161  auto second = first;
162  ++second;
163  float dist = WeightedDistance(*first, *second);
164  Link(first, second, dist);
165  Link(second, first, dist);
166  }else if(nodes_.size() == 1){
167  return;
168  }
169 
170  list<Node>::iterator this_node = nodes_.begin();
171  auto the_end = nodes_.end();
172  for(auto it = ++nodes_.begin(); it != the_end; ++it){
173  float dist = WeightedDistance(nodes_.front(), *it);
174 
175  if(dist < this_node->dist_to_neighbor_ || this_node->dist_to_neighbor_ < 0.){
176  Link(this_node, it, dist);
177  }
178 
179  if(dist < it->dist_to_neighbor_){
180  Link(it, this_node, dist);
181  }
182  }
183 }
184 
185 void Clusterizer::RemovePoint(list<Node>::iterator node) const{
186  if(nodes_.size() == 2){
187  nodes_.erase(node);
188  nodes_.front().dist_to_neighbor_ = -1.;
189  nodes_.front().neighbor_ = list<Node>::iterator();
190  nodes_.front().neighbor_of_.clear();
191  return;
192  }else if(nodes_.size() == 1){
193  nodes_.clear();
194  return;
195  }
196 
197  auto pos = find(node->neighbor_->neighbor_of_.begin(), node->neighbor_->neighbor_of_.end(), node);
198  node->neighbor_->neighbor_of_.erase(pos);
199 
200  auto invalidated = node->neighbor_of_;
201 
202  nodes_.erase(node);
203 
204  //Set invalid distance on nodes with unknown neighbor
205  for(auto &bad_node: invalidated){
206  bad_node->dist_to_neighbor_ = -1.;
207  }
208 
209  //Recompute neighbor of all nodes which had neighbor removed
210  for(auto new_neighbor = nodes_.begin(); new_neighbor != nodes_.end(); ++new_neighbor){
211  for(auto &bad_node: invalidated){
212  if(bad_node == new_neighbor) continue;
213  float dist = WeightedDistance(*bad_node, *new_neighbor);
214  if(dist < bad_node->dist_to_neighbor_ || bad_node->dist_to_neighbor_ < 0.){
215  Link(bad_node, new_neighbor, dist);
216  }
217  }
218  }
219 }
220 
221 list<Node>::iterator Clusterizer::NearestNeighbors() const{
222  float min_dist = -1., min_dist_high_weight = -1.;
223  list<Node>::iterator best_node = nodes_.begin(), best_node_high_weight = nodes_.begin();
224  for(auto node = nodes_.begin(); node != nodes_.end(); ++node){
225  if(node->dist_to_neighbor_ < min_dist || min_dist < 0.){
226  best_node = node;
227  min_dist = node->dist_to_neighbor_;
228  }
229  if((node->dist_to_neighbor_ < min_dist_high_weight || min_dist_high_weight < 0.)
230  && node->w_ > 1. && node->neighbor_->w_ < 1.){
231  best_node_high_weight = node;
232  min_dist_high_weight = node->dist_to_neighbor_;
233  }
234  }
235  if(min_dist < 0.) ERROR("Could not find neighboring points.");
236  if(min_dist_high_weight > 0.){
237  return best_node_high_weight;
238  }else{
239  return best_node;
240  }
241 }
242 
243 void Clusterizer::Link(list<Node>::iterator node,
244  list<Node>::iterator neighbor,
245  float dist){
246  //Remove backlink from previous neighbor if necessary
247  if(node->dist_to_neighbor_ >= 0.){
248  auto pos = find(node->neighbor_->neighbor_of_.begin(), node->neighbor_->neighbor_of_.end(), node);
249  node->neighbor_->neighbor_of_.erase(pos);
250  }
251 
252  //Set up new link
253  node->dist_to_neighbor_ = dist;
254  node->neighbor_ = neighbor;
255  neighbor->neighbor_of_.push_back(node);
256 }
257 
259  for(int i = 0; i < hist_.GetNcells(); ++i){
260  hist_.SetBinContent(i, 0.);
261  hist_.SetBinError(i, 0.);
262  }
263  hist_.SetEntries(0.);
264 }
265 
266 void Clusterizer::Cluster(double luminosity) const{
267  if(luminosity == clustered_lumi_) return;
268 
269  SetupNodes(luminosity);
270  MergeNodes();
271 
272  clustered_lumi_ = luminosity;
273 }
274 
275 void Clusterizer::SetupNodes(double luminosity) const{
276  nodes_.clear();
277  final_points_.clear();
278 
279  if(hist_mode_){
280  int nx = hist_.GetNbinsX();
281  int ny = hist_.GetNbinsY();
282  float xmin = hist_.GetXaxis()->GetBinLowEdge(1);
283  float xmax = hist_.GetXaxis()->GetBinUpEdge(hist_.GetNbinsX());
284  float dx = 0.5*(xmax-xmin);
285  float ymin = hist_.GetYaxis()->GetBinLowEdge(1);
286  float ymax = hist_.GetYaxis()->GetBinUpEdge(hist_.GetNbinsY());
287  float dy = 0.5*(ymax-ymin);
288  for(int ix = 0; ix <= nx+1; ++ix){
289  float xlow = (ix <= 0) ? (xmin-dx)
290  : (ix > hist_.GetNbinsX()) ? (xmax+dx)
291  : hist_.GetXaxis()->GetBinLowEdge(ix);
292  float xhigh = (ix <= 0) ? (xmin-dx)
293  : (ix > hist_.GetNbinsX()) ? (xmax+dx)
294  : hist_.GetXaxis()->GetBinUpEdge(ix);
295  for(int iy = 0; iy <= ny+1; ++iy){
296  float ylow = (iy <= 0) ? (ymin-dy)
297  : (iy > hist_.GetNbinsY()) ? (ymax+dy)
298  : hist_.GetYaxis()->GetBinLowEdge(iy);
299  float yhigh = (iy <= 0) ? (ymin-dy)
300  : (iy > hist_.GetNbinsY()) ? (ymax+dy)
301  : hist_.GetYaxis()->GetBinUpEdge(iy);
302 
303  float w = luminosity*hist_.GetBinContent(ix, iy);
304  while(w > 0.){
305  float x = xlow + urd_(prng_)*(xhigh-xlow);
306  float y = ylow + urd_(prng_)*(yhigh-ylow);
307  if(w >= 1.){
308  final_points_.emplace_back(x, y, 1.);
309  w -= 1.;
310  }else{
311  InsertPoint(x, y, w);
312  w = 0.;
313  }
314  }
315  }
316  }
317  }else{
318  for(const auto &p: orig_points_){
319  float w = luminosity * p.w_;
320  if(w <= 0.) continue;
321  if(w == 1.){
322  final_points_.emplace_back(p.x_, p.y_, w);
323  }else{
324  InsertPoint(p.x_, p.y_, w);
325  }
326  }
327  }
328 }
329 
331  while(nodes_.size()>0){
332  while(nodes_.size()>1){
333  list<Node>::iterator root_node = NearestNeighbors();
334  list<Node>::iterator neighbor = root_node->neighbor_;
335  MergeNodes(root_node, neighbor);
336  }
337 
338  if(nodes_.size()==1){
339  if(nodes_.front().w_ > 1.5){
340  SplitNode();
341  }else if(nodes_.front().w_ >= 0.5){
342  final_points_.push_back(static_cast<Point>(nodes_.front()));
343  nodes_.clear();
344  }else{
345  nodes_.clear();
346  }
347  }
348  }
349 }
350 
351 void Clusterizer::MergeNodes(list<Node>::iterator a,
352  list<Node>::iterator b) const{
353  if(a->w_ < b->w_){
354  //Make sure node "A" has higher weight
355  MergeNodes(b, a);
356  return;
357  }
358 
359  if(a->w_ + b->w_ <= 1.){
360  //Merge two points into one
361  Point c((a->w_*a->x_+b->w_*b->x_)/(a->w_+b->w_),
362  (a->w_*a->y_+b->w_*b->y_)/(a->w_+b->w_),
363  a->w_+b->w_);
364  RemovePoint(a);
365  RemovePoint(b);
366  if(c.w_ == 1.){
367  final_points_.push_back(c);
368  }else{
369  InsertPoint(c);
370  }
371  }else{
372  //Partition so one point has weight exactly 1
373  float sumw = a->w_ + b->w_;
374  float summ1 = sumw - 1.;
375  float rt = sqrt(a->w_*b->w_*summ1);
376 
377  if(fabs(1.-a->w_) <= fabs(1.-b->w_)){
378  //Transfer weight until A has weight exactly 1
379  Point c(((a->w_+rt)*a->x_ + (b->w_-rt)*b->x_)/sumw,
380  ((a->w_+rt)*a->y_ + (b->w_-rt)*b->y_)/sumw,
381  1.);
382  Point d(((a->w_*summ1-rt)*a->x_ + (b->w_*summ1+rt)*b->x_)/(sumw*summ1),
383  ((a->w_*summ1-rt)*a->y_ + (b->w_*summ1+rt)*b->y_)/(sumw*summ1),
384  summ1);
385  RemovePoint(a);
386  RemovePoint(b);
387  final_points_.push_back(c);
388  if(d.w_ == 1.){
389  final_points_.push_back(d);
390  }else{
391  InsertPoint(d);
392  }
393  }else{
394  //Transfer weight until B has weight exactly 1
395  Point c(((a->w_*summ1+rt)*a->x_ + (b->w_*summ1-rt)*b->x_)/(sumw*summ1),
396  ((a->w_*summ1+rt)*a->y_ + (b->w_*summ1-rt)*b->y_)/(sumw*summ1),
397  summ1);
398  Point d(((a->w_-rt)*a->x_ + (b->w_+rt)*b->x_)/sumw,
399  ((a->w_-rt)*a->y_ + (b->w_+rt)*b->y_)/sumw,
400  1.);
401  RemovePoint(a);
402  RemovePoint(b);
403  final_points_.push_back(d);
404  if(c.w_ == 1.){
405  final_points_.push_back(c);
406  }else{
407  InsertPoint(c);
408  }
409  }
410  }
411 }
412 
414  list<Node>::iterator old = nodes_.begin();
415  Point a, b;
416  if(final_points_.size() > 0){
417  float min_dist = -1.;
418  size_t best_index = 0;
419  for(size_t index = 0; index < final_points_.size(); ++index){
420  float dist = WeightedDistance(*old, final_points_.at(index));
421  if(dist < min_dist || min_dist < 0.){
422  min_dist = dist;
423  best_index = index;
424  }
425  }
426  Point &p = final_points_.at(best_index);
427  float dx = old->x_ - p.x_;
428  float dy = old->y_ - p.y_;
429  float scale = 0.25;
430  a = Point(old->x_ + scale*dy, old->y_ - scale*dx, 0.5*old->w_);
431  b = Point(old->x_ - scale*dy, old->y_ + scale*dx, 0.5*old->w_);
432  }else{
433  a = Point(old->x_+1., old->y_+1., 0.5*old->w_);
434  b = Point(old->x_-1., old->y_-1., 0.5*old->w_);
435  }
436  RemovePoint(old);
437  if(a.w_ != 1.){
438  InsertPoint(a);
439  }else{
440  final_points_.push_back(a);
441  }
442  if(b.w_ != 1.){
443  InsertPoint(b);
444  }else{
445  final_points_.push_back(b);
446  }
447 }
448 
449 ostream & operator<<(ostream &stream, const Clustering::Point &p){
450  stream << "Point(x=" << p.x_ << ",y=" << p.y_ << ",w=" << p.w_ << ")";
451  return stream;
452 }
453 
454 ostream & operator<<(ostream &stream, const Clustering::Node &n){
455  stream << "Point(x=" << n.x_ << ",y=" << n.y_ << ",w=" << n.w_ << ",d=" << n.dist_to_neighbor_ << ")";
456  return stream;
457 }
static void Link(std::list< Node >::iterator node, std::list< Node >::iterator neighbor, float dist)
void RemovePoint(std::list< Node >::iterator node) const
void Cluster(double luminosity) const
float WeightedDistance(const Point &a, const Point &b)
Definition: clusterizer.cpp:34
std::string FullTitle(const TH1 &h)
Definition: utilities.cpp:331
bool operator<(const Point &other) const
Definition: clusterizer.cpp:30
STL namespace.
std::vector< Point > orig_points_
Definition: clusterizer.hpp:55
Node(float x, float y, float z)
Definition: clusterizer.cpp:40
std::list< Node > nodes_
Definition: clusterizer.hpp:56
static std::mt19937_64 prng_
Definition: clusterizer.hpp:60
void SetupNodes(double luminosity) const
TH2D GetHistogram(double luminosity) const
#define ERROR(x)
Definition: utilities.hpp:17
TGraph GetGraph(double luminosity, bool keep_in_frame=true) const
void InsertPoint(float x, float y, float w) const
Clusterizer(const TH2D &hist_template, long max_points=-1)
Definition: clusterizer.cpp:61
std::vector< Point > final_points_
Definition: clusterizer.hpp:57
std::list< Node >::iterator neighbor_
Definition: clusterizer.hpp:34
void MergeNodes() const
void AddPoint(float x, float y, float w)
Definition: clusterizer.cpp:74
ostream & operator<<(ostream &stream, const Clustering::Point &p)
static std::uniform_real_distribution< float > urd_
Definition: clusterizer.hpp:61
void SetPoints(const std::vector< Point > &points)
Definition: clusterizer.cpp:87
std::list< Node >::iterator NearestNeighbors() const
std::vector< std::list< Node >::iterator > neighbor_of_
Definition: clusterizer.hpp:35
bool operator<(const Node &other) const
Definition: clusterizer.cpp:54