vocabtree  0.0.1
bench_all.cxx
Go to the documentation of this file.
1 #include "bench_config.hpp"
2 
3 #include <fstream>
4 
5 #include <config.hpp>
6 #include <utils/filesystem.hpp>
7 #include <utils/numerics.hpp>
8 #include <utils/dataset.hpp>
9 #include <utils/misc.hpp>
10 #include <utils/vision.hpp>
11 #include <utils/logger.hpp>
12 #include <utils/image.hpp>
13 #include <utils/cycletimer.hpp>
17 #include <vis/matches_page.hpp>
18 
19 #if ENABLE_MULTITHREADING && ENABLE_OPENMP
20 #include <omp.h>
21 #endif
22 #if ENABLE_MULTITHREADING && ENABLE_MPI
23 #include <mpi.h>
24 #endif
25 
26 _INITIALIZE_EASYLOGGINGPP
27 
28 void compute_features(Dataset &dataset) {
29 #if ENABLE_MULTITHREADING && ENABLE_OPENMP
30 #pragma omp parallel for schedule(dynamic)
31 #endif
32  for (int64_t i = 0; i < dataset.num_images(); i++) {
33 
34  std::shared_ptr<SimpleDataset::SimpleImage> image = std::static_pointer_cast<SimpleDataset::SimpleImage>(dataset.image(i));
35  if (image == nullptr) continue;
36 
37  const std::string &keypoints_location = dataset.location(image->feature_path("keypoints"));
38  const std::string &descriptors_location = dataset.location(image->feature_path("descriptors"));
39  if (filesystem::file_exists(keypoints_location) && filesystem::file_exists(descriptors_location)) continue;
40 
41  const std::string &image_location = dataset.location(image->location());
42 
43  if (!filesystem::file_exists(image_location)) continue;
44 
45  cv::Mat im = cv::imread(image_location, cv::IMREAD_GRAYSCALE);
46 
47  cv::Mat keypoints, descriptors;
48  if (!vision::compute_sparse_sift_feature(im, nullptr, keypoints, descriptors)) continue;
49 
50  filesystem::create_file_directory(keypoints_location);
51  filesystem::create_file_directory(descriptors_location);
52 
53  filesystem::write_cvmat(keypoints_location, keypoints);
54  filesystem::write_cvmat(descriptors_location, descriptors);
55  }
56 }
57 
58 void compute_bow_features(Dataset &dataset, std::shared_ptr<BagOfWords> bow, uint32_t num_clusters) {
59  const std::vector< std::shared_ptr<const Image> > &all_images = dataset.all_images();
60 #if ENABLE_MULTITHREADING && ENABLE_OPENMP
61  uint32_t num_threads = omp_get_max_threads();
62  std::vector< cv::Ptr<cv::DescriptorMatcher> > matchers;
63  for (uint32_t i = 0; i < num_threads; i++) {
64  matchers.push_back(vision::construct_descriptor_matcher(bow->vocabulary()));
65  }
66 #pragma omp parallel for schedule(dynamic)
67 #else
68  const cv::Ptr<cv::DescriptorMatcher> &matcher = vision::construct_descriptor_matcher(bow->vocabulary());
69 #endif
70  for (int64_t i = 0; i < (int64_t)all_images.size(); i++) {
71 #if ENABLE_MULTITHREADING && ENABLE_OPENMP
72  const cv::Ptr<cv::DescriptorMatcher> &matcher = matchers[omp_get_thread_num()];
73 #endif
74  const std::string &sift_descriptor_location = dataset.location(all_images[i]->feature_path("descriptors"));
75  const std::string &bow_descriptor_location = dataset.location(all_images[i]->feature_path("bow_descriptors"));
76 
77  cv::Mat descriptors, bow_descriptors, descriptorsf;
78  if (!filesystem::file_exists(sift_descriptor_location)) continue;
79  if (!filesystem::load_cvmat(sift_descriptor_location, descriptors)) continue;
80  descriptors.convertTo(descriptorsf, CV_32FC1);
81  filesystem::create_file_directory(bow_descriptor_location);
82 
83  if (!vision::compute_bow_feature(descriptorsf, matcher, bow_descriptors, nullptr)) continue;
84  const std::vector< std::pair<uint32_t, float> > &bow_descriptors_sparse = numerics::sparsify(bow_descriptors);
85  filesystem::write_sparse_vector(bow_descriptor_location, bow_descriptors_sparse);
86 
87  LINFO << "Wrote " << bow_descriptor_location;
88  }
89 }
90 
91 std::shared_ptr<VocabTree> train_tree(Dataset &dataset, uint32_t num_images, uint32_t split, uint32_t depth) {
92  VocabTree vt;
93  std::shared_ptr<VocabTree::TrainParams> train_params = std::make_shared<VocabTree::TrainParams>();
94  train_params->depth = depth;
95  train_params->split = split;
96  vt.train(dataset, train_params, dataset.random_images(num_images));
97 
98  std::stringstream vocab_output_file;
99  vocab_output_file << dataset.location() << "/tree/" << split << "." << depth << ".vocab";
100  vt.save(vocab_output_file.str());
101  return std::make_shared<VocabTree>(vt);
102 }
103 
104 std::shared_ptr<BagOfWords> train_bow(Dataset &dataset, uint32_t num_images, uint32_t num_clusters) {
105  BagOfWords bow;
106  std::shared_ptr<BagOfWords::TrainParams> train_params = std::make_shared<BagOfWords::TrainParams>();
107  train_params->numClusters = num_clusters;
108  const std::vector< std::shared_ptr<const Image> > &random_images = dataset.random_images(num_images);
109  bow.train(dataset, train_params, random_images);
110  std::stringstream vocab_output_file;
111  vocab_output_file << dataset.location() << "/vocabulary/" << train_params->numClusters << ".vocab";
112  bow.save(vocab_output_file.str());
113  return std::make_shared<BagOfWords>(bow);
114 }
115 
116 std::shared_ptr<InvertedIndex> train_index(Dataset &dataset, std::shared_ptr<BagOfWords> bow) {
117  InvertedIndex ii;
118  std::shared_ptr<InvertedIndex::TrainParams> train_params = std::make_shared<InvertedIndex::TrainParams>();
119  train_params->bag_of_words = bow;
120  ii.train(dataset, train_params, dataset.all_images());
121 
122  return std::make_shared<InvertedIndex>(ii);
123 }
124 
125 void benchmark_dataset(Dataset &dataset) {
126  compute_features(dataset); // compute sift features
127 
128  // parameters
129  uint32_t num_images = 128;
130 
131  uint32_t bow_clusters[] = { 256, 3125, 46656 };
132  std::pair<uint32_t, uint32_t> tree_branches[] = {
133  std::pair<uint32_t, uint32_t>(4, 4),
134  std::pair<uint32_t, uint32_t>(5, 5),
135  std::pair<uint32_t, uint32_t>(6, 6)};
136 
137  std::stringstream timings_file_name;
138  timings_file_name << dataset.location() + "/results/times.index.json";
139  filesystem::create_file_directory(timings_file_name.str());
140  std::ofstream ofs(timings_file_name.str(), std::ios::app);
141  // o letsdoit
142  for(size_t i=0; i<3; i++) {
143  LINFO << "Training bag of words";
144  double start_time_bow = CycleTimer::currentSeconds();
145  std::shared_ptr<BagOfWords> bow = train_bow(dataset, num_images, bow_clusters[i]);
146  double end_time_bow = CycleTimer::currentSeconds();
147  {
148  std::stringstream timing;
149  timing << "{ " <<
150  "\"machine\" : \"" << misc::get_machine_name() << "\", " <<
151  "\"operation\" : \"" << "bow_train" << "\", " <<
152  "\"bow_numclusters\" : " << bow->num_clusters() << ", " <<
153  "\"db_size\" : " << dataset.num_images() << ", " <<
154  "\"time\" : " << end_time_bow - start_time_bow << ", " <<
155  "\"multithreading\" : " << ENABLE_MULTITHREADING << ", " <<
156  "\"openmp\" : " << ENABLE_OPENMP << ", " <<
157  "\"mpi\" : " << ENABLE_MPI << ", " <<
158  "}" << std::endl;
159  ofs.write(timing.str().c_str(), timing.str().size());
160  ofs.flush();
161  }
162 
163  LINFO << "Computing bag of words features";
164  double start_time_bowfeatures = CycleTimer::currentSeconds();
165  compute_bow_features(dataset, bow, bow_clusters[i]);
166  double end_time_bowfeatures = CycleTimer::currentSeconds();
167  {
168  std::stringstream timing;
169  timing << "{ " <<
170  "\"machine\" : \"" << misc::get_machine_name() << "\", " <<
171  "\"operation\" : \"" << "bow_features" << "\", " <<
172  "\"bow_numclusters\" : " << bow->num_clusters() << ", " <<
173  "\"db_size\" : " << dataset.num_images() << ", " <<
174  "\"time\" : " << end_time_bowfeatures - start_time_bowfeatures << ", " <<
175  "\"multithreading\" : " << ENABLE_MULTITHREADING << ", " <<
176  "\"openmp\" : " << ENABLE_OPENMP << ", " <<
177  "\"mpi\" : " << ENABLE_MPI << ", " <<
178  "}" << std::endl;
179  ofs.write(timing.str().c_str(), timing.str().size());
180  ofs.flush();
181  }
182 
183  LINFO << "Computing index";
184  double start_time_index = CycleTimer::currentSeconds();
185  std::shared_ptr<InvertedIndex> ii = train_index(dataset, bow);
186  double end_time_index = CycleTimer::currentSeconds();
187  {
188  std::stringstream timing;
189  timing << "{ " <<
190  "\"machine\" : \"" << misc::get_machine_name() << "\", " <<
191  "\"operation\" : \"" << "index_train" << "\", " <<
192  "\"index_numclusters\" : " << ii->num_clusters() << ", " <<
193  "\"db_size\" : " << dataset.num_images() << ", " <<
194  "\"time\" : " << end_time_index - start_time_index << ", " <<
195  "\"multithreading\" : " << ENABLE_MULTITHREADING << ", " <<
196  "\"openmp\" : " << ENABLE_OPENMP << ", " <<
197  "\"mpi\" : " << ENABLE_MPI << ", " <<
198  "}" << std::endl;
199  ofs.write(timing.str().c_str(), timing.str().size());
200  ofs.flush();
201  }
202 
203  LINFO << "Training tree";
204  double start_time_tree = CycleTimer::currentSeconds();
205  std::shared_ptr<VocabTree> vt = train_tree(dataset, num_images, tree_branches[i].first, tree_branches[i].second);
206  double end_time_tree = CycleTimer::currentSeconds();
207  {
208  std::stringstream timing;
209  timing << "{ " <<
210  "\"machine\" : \"" << misc::get_machine_name() << "\", " <<
211  "\"operation\" : \"" << "tree_train" << "\", " <<
212  "\"tree_depth\" : " << vt->tree_depth() << ", " <<
213  "\"tree_split\" : " << vt->tree_splits() << ", " <<
214  "\"db_size\" : " << dataset.num_images() << ", " <<
215  "\"time\" : " << end_time_tree - start_time_tree << ", " <<
216  "\"multithreading\" : " << ENABLE_MULTITHREADING << ", " <<
217  "\"openmp\" : " << ENABLE_OPENMP << ", " <<
218  "\"mpi\" : " << ENABLE_MPI << ", " <<
219  "}" << std::endl;
220  ofs.write(timing.str().c_str(), timing.str().size());
221  ofs.flush();
222  }
223 
224  uint32_t num_validate = 10;
225  uint32_t total_iterations = MIN(dataset.num_images(), 128);
226 
227  LINFO << "Running index search";
228  // search index
229  {
230  MatchesPage html_output_index;
231  double total_time = 0.0;
232  uint32_t total_correct = 0, total_tested = 0;
233  for (uint32_t i = 0; i < total_iterations; i++) {
234  double start_time = CycleTimer::currentSeconds();
235 
236  std::shared_ptr<SimpleDataset::SimpleImage> query_image = std::static_pointer_cast<SimpleDataset::SimpleImage>(dataset.image(i));
237  std::shared_ptr<InvertedIndex::MatchResults> matches_index =
238  std::static_pointer_cast<InvertedIndex::MatchResults>(ii->search(dataset, nullptr, query_image));
239  if (matches_index == nullptr) {
240  LERROR << "Error while running search.";
241  continue;
242  }
243  double end_time = CycleTimer::currentSeconds();
244  total_time += (end_time - start_time);
245 
246  // validate matches
247  cv::Mat keypoints_0, descriptors_0;
248  const std::string &query_keypoints_location = dataset.location(query_image->feature_path("keypoints"));
249  const std::string &query_descriptors_location = dataset.location(query_image->feature_path("descriptors"));
250  filesystem::load_cvmat(query_keypoints_location, keypoints_0);
251  filesystem::load_cvmat(query_descriptors_location, descriptors_0);
252  std::vector<int> validated(MIN(num_validate, matches_index->matches.size()), 0);
253  total_tested += validated.size();
254  uint32_t total_correct_tmp = 0;
255 #if ENABLE_MULTITHREADING && ENABLE_OPENMP
256 #pragma omp parallel for schedule(dynamic) reduction(+:total_correct_tmp)
257 #endif
258  for (int32_t j = 0; j < validated.size(); j++) {
259  cv::Mat keypoints_1, descriptors_1;
260  std::shared_ptr<SimpleDataset::SimpleImage> match_image = std::static_pointer_cast<SimpleDataset::SimpleImage>(dataset.image(matches_index->matches[j]));
261  const std::string &match_keypoints_location = dataset.location(match_image->feature_path("keypoints"));
262  const std::string &match_descriptors_location = dataset.location(match_image->feature_path("descriptors"));
263  filesystem::load_cvmat(match_keypoints_location, keypoints_1);
264  filesystem::load_cvmat(match_descriptors_location, descriptors_1);
265 
266  cv::detail::MatchesInfo match_info;
267  vision::geo_verify_f(descriptors_0, keypoints_0, descriptors_1, keypoints_1, match_info);
268 
269  validated[j] = vision::is_good_match(match_info) ? 1 : -1;
270  if (validated[j] > 0) total_correct_tmp++;
271  }
272  total_correct += total_correct_tmp;
273  html_output_index.add_match(i, matches_index->matches, dataset, std::make_shared< std::vector<int> >(validated));
274 
275  std::stringstream outfilestr;
276  outfilestr << dataset.location() << "/results/matches/index." << bow->num_clusters();
277  html_output_index.write(outfilestr.str());
278  }
279 
280  // Write out the timings
281  std::stringstream timing;
282  timing << "{ " <<
283  "\"machine\" : \"" << misc::get_machine_name() << "\", " <<
284  "\"operation\" : \"" << "index_search" << "\", " <<
285  "\"index_numclusters\" : " << ii->num_clusters() << ", " <<
286  "\"db_size\" : " << dataset.num_images() << ", " <<
287  "\"time\" : " << total_time << ", " <<
288  "\"iterations\" : " << total_iterations << ", " <<
289  "\"correct\" : " << total_correct << ", " <<
290  "\"tested\" : " << total_tested << ", " <<
291  "\"multithreading\" : " << ENABLE_MULTITHREADING << ", " <<
292  "\"openmp\" : " << ENABLE_OPENMP << ", " <<
293  "\"mpi\" : " << ENABLE_MPI << ", " <<
294  "}" << std::endl;
295  ofs.write(timing.str().c_str(), timing.str().size());
296  ofs.flush();
297  }
298 
299  LINFO << "Running tree search";
300  // search tree
301  {
302  MatchesPage html_output_tree;
303  double total_time = 0.0;
304  uint32_t total_correct = 0, total_tested = 0;
305  for (uint32_t i = 0; i < total_iterations; i++) {
306  double start_time = CycleTimer::currentSeconds();
307 
308  std::shared_ptr<SimpleDataset::SimpleImage> query_image = std::static_pointer_cast<SimpleDataset::SimpleImage>(dataset.image(i));
309  std::shared_ptr<InvertedIndex::MatchResults> matches_index =
310  std::static_pointer_cast<InvertedIndex::MatchResults>(vt->search(dataset, nullptr, query_image));
311  if (matches_index == nullptr) {
312  LERROR << "Error while running search.";
313  continue;
314  }
315  double end_time = CycleTimer::currentSeconds();
316  total_time += (end_time - start_time);
317 
318  // validate matches
319  cv::Mat keypoints_0, descriptors_0;
320  const std::string &query_keypoints_location = dataset.location(query_image->feature_path("keypoints"));
321  const std::string &query_descriptors_location = dataset.location(query_image->feature_path("descriptors"));
322  filesystem::load_cvmat(query_keypoints_location, keypoints_0);
323  filesystem::load_cvmat(query_descriptors_location, descriptors_0);
324  std::vector<int> validated(MIN(num_validate, matches_index->matches.size()), 0);
325  total_tested += validated.size();
326  uint32_t total_correct_tmp = 0;
327 #if ENABLE_MULTITHREADING && ENABLE_OPENMP
328 #pragma omp parallel for schedule(dynamic) reduction(+:total_correct_tmp)
329 #endif
330  for (int32_t j = 0; j < validated.size(); j++) {
331  cv::Mat keypoints_1, descriptors_1;
332  std::shared_ptr<SimpleDataset::SimpleImage> match_image = std::static_pointer_cast<SimpleDataset::SimpleImage>(dataset.image(matches_index->matches[j]));
333  const std::string &match_keypoints_location = dataset.location(match_image->feature_path("keypoints"));
334  const std::string &match_descriptors_location = dataset.location(match_image->feature_path("descriptors"));
335  filesystem::load_cvmat(match_keypoints_location, keypoints_1);
336  filesystem::load_cvmat(match_descriptors_location, descriptors_1);
337 
338  cv::detail::MatchesInfo match_info;
339  vision::geo_verify_f(descriptors_0, keypoints_0, descriptors_1, keypoints_1, match_info);
340 
341  validated[j] = vision::is_good_match(match_info) ? 1 : -1;
342  if (validated[j] > 0) total_correct_tmp++;
343  }
344  total_correct += total_correct_tmp;
345  html_output_tree.add_match(i, matches_index->matches, dataset, std::make_shared< std::vector<int> >(validated));
346 
347  std::stringstream outfilestr;
348  outfilestr << dataset.location() << "/results/matches/tree." << vt->tree_depth() << "." << vt->tree_splits();
349  html_output_tree.write(outfilestr.str());
350  }
351 
352  // Write out the timings
353  std::stringstream timing;
354  timing << "{ " <<
355  "\"machine\" : \"" << misc::get_machine_name() << "\", " <<
356  "\"operation\" : \"" << "tree_search" << "\", " <<
357  "\"tree_depth\" : " << vt->tree_depth() << ", " <<
358  "\"tree_split\" : " << vt->tree_splits() << ", " <<
359  "\"db_size\" : " << dataset.num_images() << ", " <<
360  "\"time\" : " << total_time << ", " <<
361  "\"iterations\" : " << total_iterations << ", " <<
362  "\"correct\" : " << total_correct << ", " <<
363  "\"tested\" : " << total_tested << ", " <<
364  "\"multithreading\" : " << ENABLE_MULTITHREADING << ", " <<
365  "\"openmp\" : " << ENABLE_OPENMP << ", " <<
366  "\"mpi\" : " << ENABLE_MPI << ", " <<
367  "}" << std::endl;
368  ofs.write(timing.str().c_str(), timing.str().size());
369  ofs.flush();
370  }
371 
372  }
373  ofs.close();
374 }
375 
376 int main(int argc, char *argv[]) {
377 #if ENABLE_MULTITHREADING && ENABLE_MPI
378  MPI::Init(argc, argv);
379  int rank = MPI::COMM_WORLD.Get_rank();
380  if(rank == 0) {
381 #endif
382 
383  SimpleDataset datasets[] = {
384 
388  };
389 
390  for (size_t i = 0; i < 3; i++) {
391  LINFO << datasets[i];
392  benchmark_dataset(datasets[i]);
393  }
394 
395 
396 #if ENABLE_MULTITHREADING && ENABLE_MPI
397  }
398  MPI::Finalize();
399 #endif
400  return 0;
401 }