vocabtree  0.0.1
vocab_tree.cxx
Go to the documentation of this file.
1 #include "vocab_tree.hpp"
2 #include <utils/filesystem.hpp>
3 #include <utils/vision.hpp>
4 #include <iostream>
5 #include <fstream>
6 #include <memory>
7 #include <math.h> // for pow
8 #include <utility> // std::pair
9 
10 #if ENABLE_MULTITHREADING && ENABLE_OPENMP
11 #include <omp.h>
12 #endif
13 #if ENABLE_MULTITHREADING && ENABLE_MPI
14 #include <mpi.h>
15 #endif
16 
18 
19 
20 }
21 
22 // struct used for writing and reading cv::mat's
23 struct cvmat_header {
24  uint64_t elem_size;
25  int32_t elem_type;
26  uint32_t rows, cols;
27 };
28 
29 bool VocabTree::load (const std::string &file_path) {
30  std::cout << "Reading vocab tree from " << file_path << "..." << std::endl;
31 
32  std::ifstream ifs(file_path, std::ios::binary);
33  ifs.read((char *)&split, sizeof(uint32_t));
34  ifs.read((char *)&maxLevel, sizeof(uint32_t));
35  ifs.read((char *)&numberOfNodes, sizeof(uint32_t));
36 
37  weights.resize(numberOfNodes);
38  ifs.read((char *)&weights[0], sizeof(float)*numberOfNodes);
39 
40  // load image data
41  uint32_t imageCount;
42  ifs.read((char *)&imageCount, sizeof(uint32_t));
43  for (uint32_t i = 0; i < imageCount; i++) {
44  uint64_t imageId;
45  std::vector<float> vec(numberOfNodes);
46  ifs.read((char *)&imageId, sizeof(uint64_t));
47  ifs.read((char *)&vec[0], sizeof(float)*numberOfNodes);
48  databaseVectors[imageId] = vec;
49  }
50 
51  // load inveted files
52  uint32_t invertedFileCount;
53  ifs.read((char *)&invertedFileCount, sizeof(uint32_t));
54  invertedFiles.resize(invertedFileCount);
55 
56  for (uint32_t i = 0; i < invertedFileCount; i++) {
57  uint32_t size;
58  ifs.read((char *)&size, sizeof(uint32_t));
59  for (uint32_t j = 0; j < size; j++) {
60  uint64_t imageId;
61  uint32_t imageCount;
62  ifs.read((char *)&imageId, sizeof(uint64_t));
63  ifs.read((char *)&imageCount, sizeof(uint32_t));
64  invertedFiles[i][imageId] = imageCount;
65  }
66  }
67 
68  // read in tree
69  tree.resize(numberOfNodes);
70  for (uint32_t i = 0; i < numberOfNodes; i++) {
71  ifs.read((char *)&tree[i].firstChildIndex, sizeof(uint32_t));
72  ifs.read((char *)&tree[i].index, sizeof(uint32_t));
73  ifs.read((char *)&tree[i].invertedFileLength, sizeof(uint32_t));
74  ifs.read((char *)&tree[i].level, sizeof(uint32_t));
75  ifs.read((char *)&tree[i].levelIndex, sizeof(uint32_t));
76 
77  // read cv::mat, copied from filesystem.cxx
78  cvmat_header h;
79  ifs.read((char *)&h, sizeof(cvmat_header));
80  tree[i].mean.create(h.rows, h.cols, h.elem_type);
81  if (h.rows == 0 || h.cols == 0) continue;
82  ifs.read((char *)tree[i].mean.ptr(), h.rows * h.cols * h.elem_size);
83  }
84 
85  std::cout << "Done reading vocab tree." << std::endl;
86 
87  return (ifs.rdstate() & std::ifstream::failbit) == 0;
88 }
89 
90 bool VocabTree::save (const std::string &file_path) const {
91  std::cout << "Writing vocab tree to " << file_path << "..." << std::endl;
92 
93  std::ofstream ofs(file_path, std::ios::binary | std::ios::trunc);
94 
95  //uint32_t num_clusters = inverted_index.size();
96  ofs.write((const char *)&split, sizeof(uint32_t));
97  ofs.write((const char *)&maxLevel, sizeof(uint32_t));
98  ofs.write((const char *)&numberOfNodes, sizeof(uint32_t));
99  ofs.write((const char *)&weights[0], sizeof(float)*numberOfNodes); // weights
100 
101  // write out databaseVectors
102  uint32_t imageCount = databaseVectors.size();
103  ofs.write((const char *)&imageCount, sizeof(uint32_t));
104  for (auto& pair : databaseVectors) {
105  ofs.write((const char *)&pair.first, sizeof(uint64_t));
106  ofs.write((const char *)&(pair.second)[0], sizeof(float)*numberOfNodes);
107  }
108 
109  // write out inverted files
110  uint32_t numInvertedFiles = invertedFiles.size();
111  ofs.write((const char *)&numInvertedFiles, sizeof(uint32_t));
112  for (std::unordered_map<uint64_t, uint32_t> invFile : invertedFiles) {
113  uint32_t size = invFile.size();
114  ofs.write((const char *)&size, sizeof(uint32_t));
115  for (std::pair<uint64_t, uint32_t> pair : invFile) {
116  ofs.write((const char *)&pair.first, sizeof(uint64_t));
117  ofs.write((const char *)&pair.second, sizeof(uint32_t));
118  }
119  }
120 
121  // write out tree
122  for (uint32_t i = 0; i < numberOfNodes; i++) {
123  TreeNode t = tree[i];
124  ofs.write((const char *)&t.firstChildIndex, sizeof(uint32_t));
125  ofs.write((const char *)&t.index, sizeof(uint32_t));
126  ofs.write((const char *)&t.invertedFileLength, sizeof(uint32_t));
127  ofs.write((const char *)&t.level, sizeof(uint32_t));
128  ofs.write((const char *)&t.levelIndex, sizeof(uint32_t));
129 
130  // write cv::mat, copied from filesystem.cxx
131  cvmat_header h;
132  h.elem_size = t.mean.elemSize();
133  h.elem_type = t.mean.type();
134  h.rows = t.mean.rows;
135  h.cols = t.mean.cols;
136  ofs.write((char *)&h, sizeof(cvmat_header));
137  ofs.write((char *)t.mean.ptr(), h.rows * h.cols * h.elem_size);
138  }
139 
140  std::cout << "Done writing vocab tree." << std::endl;
141 
142  return (ofs.rdstate() & std::ofstream::failbit) == 0;;
143 }
144 
145 bool VocabTree::train(Dataset &dataset, const std::shared_ptr<const TrainParamsBase> &params,
146  const std::vector< std::shared_ptr<const Image > > &examples) {
147 
148  const std::shared_ptr<const TrainParams> &vt_params = std::static_pointer_cast<const TrainParams>(params);
149  split = vt_params->split;
150  //uint32_t depth = vt_params->depth;
151  maxLevel = vt_params->depth;
152  numberOfNodes = (uint32_t)(pow(split, maxLevel) - 1) / (split - 1);
153  weights.resize(numberOfNodes);
154  tree.resize(numberOfNodes);
155  invertedFiles.resize((uint32_t)pow(split, maxLevel-1));
156 
157  // took the following from bag_of_words
158  std::vector<uint64_t> all_ids(examples.size());
159  for (uint32_t i = 0; i < examples.size(); i++) {
160  all_ids[i] = examples[i]->id;
161  }
162  std::random_shuffle(all_ids.begin(), all_ids.end());
163 
164  std::vector<cv::Mat> all_descriptors;
165  uint64_t num_features = 0;
166  for (size_t i = 0; i < all_ids.size(); i++) {
167  std::shared_ptr<Image> image = std::static_pointer_cast<Image>(dataset.image(all_ids[i]));
168  if (image == nullptr) continue;
169 
170  const std::string &descriptors_location = dataset.location(image->feature_path("descriptors"));
171  if (!filesystem::file_exists(descriptors_location)) continue;
172 
173  cv::Mat descriptors, descriptorsf;
174  if (filesystem::load_cvmat(descriptors_location, descriptors)) {
175  descriptors.convertTo(descriptorsf, CV_32FC1);
176  num_features += descriptors.rows;
177 
178  all_descriptors.push_back(descriptorsf);
179  }
180  }
181 
182  const cv::Mat merged_descriptor = vision::merge_descriptors(all_descriptors, true);
183  cv::Mat labels;
184  uint32_t attempts = 1;
185  cv::TermCriteria tc(cv::TermCriteria::COUNT | cv::TermCriteria::EPS, 18, 0.000001);
186  // end of stuff from bag of words
187 
188  tree[0].levelIndex = 0;
189  tree[0].index = 0;
190  buildTreeRecursive(0, merged_descriptor, tc, attempts, cv::KMEANS_PP_CENTERS, 0);
191 
192  databaseVectors.reserve(all_ids.size());
193 
194  // now generate data on the reference images - descriptors go down tree, add images to inverted lists at leaves,
195  // and generate di vector for image
196  // Also stores counts for how many images pass through each node to calculate weights
197  std::vector<uint32_t> counts(numberOfNodes);
198  for (size_t i = 0; i < numberOfNodes; i++)
199  counts[i] = 0;
200 
201 #if ENABLE_MULTITHREADING && ENABLE_OPENMP
202 #pragma omp parallel for schedule(dynamic)
203 #endif
204  for (size_t i = 0; i < all_ids.size(); i++) {
205  std::shared_ptr<Image> image = std::static_pointer_cast<Image>(dataset.image(all_ids[i]));
206  if (image == nullptr) continue;
207 
208  const std::string &descriptors_location = dataset.location(image->feature_path("descriptors"));
209  if (!filesystem::file_exists(descriptors_location)) continue;
210 
211  cv::Mat descriptors, descriptorsf;
212  if (filesystem::load_cvmat(descriptors_location, descriptors)) {
213  descriptors.convertTo(descriptorsf, CV_32FC1);
214  std::vector<float> result = generateVector(descriptorsf, false, all_ids[i]);
215  // accumulate counts
216  for (size_t j = 0; j < numberOfNodes; j++)
217  if (result[j] > 0)
218  counts[j]++;
219 
220  //databaseVectors.insert(std::make_pair<uint64_t, std::vector<float>>(all_ids[i], result));
221 #pragma omp critical
222  {
223  databaseVectors[all_ids[i]] = result;
224  }
225  }
226  }
227 
228  // create weights according to equation 4: w_i = ln(N / N_i)
229  for (size_t i = 0; i < numberOfNodes; i++) {
230  if (counts[i] == 0)
231  weights[i] = 0;
232  else
233  weights[i] = log(((float)all_ids.size()) / ((float)counts[i]));
234  // printf("Node %d, count %d, total %d, size %d, weight %f \n", i, counts[i], all_ids.size(), tree[i].invertedFileLength, weights[i]);
235  }
236 
237  // now that we have the weights we iterate over all images and adjust the vector by weights,
238  // then normalizes the vector
239  typedef std::unordered_map<uint64_t, std::vector<float>>::iterator it_type;
240  for (it_type iterator = databaseVectors.begin(); iterator != databaseVectors.end(); iterator++) {
241  float length = 0; // hopefully shouldn't overflow from adding doubles
242  for (size_t i = 0; i < numberOfNodes; i++) {
243  (iterator->second)[i] *= weights[i];
244  length += (float)pow((iterator->second)[i], 2.0);
245  }
246  // normalizing
247  length = sqrt(length);
248  for (size_t i = 0; i < numberOfNodes; i++)
249  (iterator->second)[i] /= length;
250  }
251 
252  for (uint32_t i = 0; i < (uint32_t)pow(split, maxLevel - 1); i++) {
253  // printf("Size of inv file %d: %d\n", i, invertedFiles[i].size());
254  }
255  // printf("\n\n");
256  uint32_t l = 0, inL = 0;
257  for (uint32_t i = 0; i < numberOfNodes; i++) {
258  // printf("Node %d, num %d, weight %f || ", i, tree[i].invertedFileLength, weights[i]);
259  inL++;
260  if (inL >= (uint32_t)pow(split, l)) {
261  l++;
262  inL = 0;
263  // printf("\n");
264  }
265  }
266 
267  return true;
268 }
269 
270 void VocabTree::buildTreeRecursive(uint32_t t, const cv::Mat &descriptors, cv::TermCriteria &tc,
271  int attempts, int flags, int currLevel) {
272 
273  tree[t].invertedFileLength = descriptors.rows;
274  tree[t].level = currLevel;
275 
276  // handles the leaves
277  if (currLevel == maxLevel - 1) {
278  tree[t].firstChildIndex = 0;
279  return;
280  }
281 
282  cv::Mat labels;
283  cv::Mat centers;
284 
285  std::vector<cv::Mat> groups(split);
286  for (uint32_t i = 0; i < split; i++)
287  groups[i] = cv::Mat();
288 
289  // printf("t: %d rows: %d, counts: ", t, descriptors.rows);
290 
291  bool enoughToFill = true;
292  if (descriptors.rows >= split) {
293  cv::kmeans(descriptors, split, labels, tc, attempts, flags, centers);
294 
295  for (int i = 0; i < labels.rows; i++) {
296  int index = labels.at<int>(i);
297  groups[index].push_back(descriptors.row(i));
298  }
299  }
300  else {
301  // *** THIS SHOULDN'T BE THE CASE, why is kmeans splitting poorly? ****
302  enoughToFill = false;
303  for (int i = 0; i < descriptors.rows; i++)
304  groups[i].push_back(descriptors.row(i));
305  }
306 
307  // for (uint32_t i = 0; i < split; i++)
308  // printf("%d, ", groups[i].rows);
309  // printf("\n");
310 
311  uint32_t totalChildren = pow(split, currLevel);
312 
313 #if ENABLE_MULTITHREADING && ENABLE_OPENMP && totalChildren<maxThreads
314  uint32_t maxThreads = omp_get_num_threads();
315 #pragma omp parallel for schedule(dynamic)
316 #endif
317  for (uint32_t i = 0; i < split; i++) {
318  uint32_t childLevelIndex = tree[t].levelIndex*split + i;
319  uint32_t childIndex = (uint32_t)((pow(split, tree[t].level+1)-1) / (split - 1)) + childLevelIndex;
320  if (enoughToFill)
321  tree[childIndex].mean = centers.row(i);
322  tree[childIndex].levelIndex = childLevelIndex;
323  tree[childIndex].index = childIndex;
324  if (i == 0)
325  tree[t].firstChildIndex = childIndex;
326 
327  buildTreeRecursive(childIndex, groups[i], tc, attempts, flags, currLevel + 1);
328  }
329 }
330 
331 std::vector<float> VocabTree::generateVector(const cv::Mat &descriptors, bool shouldWeight, int64_t id) {
332  std::unordered_set<uint32_t> dummy;
333  return generateVector(descriptors, shouldWeight, dummy, id);
334 }
335 
336 std::vector<float> VocabTree::generateVector(const cv::Mat &descriptors, bool shouldWeight,
337  std::unordered_set<uint32_t> & possibleMatches, int64_t id) {
338 
339  std::vector<float> vec(numberOfNodes);
340  for (uint32_t i = 0; i < numberOfNodes; i++)
341  vec[i] = 0;
342 
343  for (int r = 0; r < descriptors.rows; r++) {
344  generateVectorHelper(0, descriptors.row(r), vec, possibleMatches, id);
345  }
346 
347  // if shouldWeight is true then weight all values in the vector and normalize
348  if (shouldWeight) {
349  float length = 0; // for normalizing
350  for (uint32_t i = 0; i < numberOfNodes; i++) {
351  vec[i] *= weights[i];
352  length += vec[i] * vec[i];
353  }
354  length = sqrt(length);
355  for (uint32_t i = 0; i < numberOfNodes; i++) {
356  if(length == 0)
357  vec[i] = 0;
358  else
359  vec[i] /= length;
360  }
361 
362  }
363 
364  return vec;
365 }
366 
367 void VocabTree::generateVectorHelper(uint32_t nodeIndex, const cv::Mat &descriptor, std::vector<float> & counts,
368  std::unordered_set<uint32_t> & possibleMatches, int64_t id) {
369 
370  counts[nodeIndex]++;
371  // if leaf
372  if (tree[nodeIndex].firstChildIndex <= 0) {
373  std::unordered_map<uint64_t, uint32_t> & invFile = invertedFiles[tree[nodeIndex].levelIndex];
374  //printf("|%d|=%d, ", tree[nodeIndex].levelIndex, invFile.size());
375  // inserting image id into the inverted file
376  if (id >= 0) {
377  //printf("Leaf %d\n", nodeIndex);
378 #pragma omp critical
379  {
380  if (invFile.find(id) == invFile.end())
381  invFile[id] = 1;
382  else
383  invFile[id]++;
384  }
385  }
386  // accumulating image id's into possibleMatches
387  else {
388  // i don't like doing this serial, should find a better method
389  //typedef std::unordered_map<uint64_t, uint32_t>::iterator it_type;
390  //for (it_type iterator = invFile.begin(); iterator != invFile.end(); iterator++)
391  possibleMatches.insert(tree[nodeIndex].levelIndex); //iterator->first);
392  }
393  }
394  // if inner node
395  else {
396  uint32_t maxChild = tree[nodeIndex].firstChildIndex;
397  double max = descriptor.dot(tree[maxChild].mean);
398 
399  for (uint32_t i = 1; i < split; i++) {
400  if (tree[nodeIndex].invertedFileLength == 0)
401  continue;
402  uint32_t childIndex = tree[nodeIndex].firstChildIndex + i;
403  double dot = descriptor.dot(tree[childIndex].mean);
404 
405  if (dot>max) {
406  max = dot;
407  maxChild = childIndex;
408  }
409  }
410  generateVectorHelper(maxChild, descriptor, counts, possibleMatches, id);
411  }
412 }
413 
414 
415 std::shared_ptr<MatchResultsBase> VocabTree::search(Dataset &dataset, const std::shared_ptr<const SearchParamsBase> &params,
416  const std::shared_ptr<const Image > &example) {
417 
418  std::cout << "Searching for matching images..." << std::endl;
419  const std::shared_ptr<const SearchParams> &ii_params = std::static_pointer_cast<const SearchParams>(params);
420 
421  std::shared_ptr<MatchResults> match_result = std::make_shared<MatchResults>();
422 
423  // get descriptors for example
424  if (example == nullptr) return nullptr;
425  const std::string &descriptors_location = dataset.location(example->feature_path("descriptors"));
426  if (!filesystem::file_exists(descriptors_location)) return nullptr;
427 
428  cv::Mat descriptors, descriptorsf;
429  if (!filesystem::load_cvmat(descriptors_location, descriptors)) return nullptr;
430 
431  std::unordered_set<uint32_t> possibleMatches;
432  descriptors.convertTo(descriptorsf, CV_32FC1);
433  std::vector<float> vec = generateVector(descriptorsf, true, possibleMatches);
434 
435  typedef std::pair<uint64_t, float> matchPair;
436  struct myComparer {
437  bool operator() (matchPair a, matchPair b) { return a.second < b.second; };
438  } comparer;
439 
440  std::unordered_set<uint64_t> possibleImages;
441  for (uint32_t elem : possibleMatches) {
442  std::unordered_map<uint64_t, uint32_t> & invFile = invertedFiles[elem];
443 
444  typedef std::unordered_map<uint64_t, uint32_t>::iterator it_type;
445  for (it_type iterator = invFile.begin(); iterator != invFile.end(); iterator++)
446  if (possibleImages.count(iterator->first) == 0)
447  possibleImages.insert(iterator->first);
448  }
449 
450  //std::set<matchPair, myComparer> values;
451  std::vector<matchPair> values;
452  for (uint64_t elem : possibleImages) {
453  // compute L1 norm (based on paper eq 5)
454  //float l1norm = 0;
455  float score = 0;
456  for (uint32_t i = 0; i < numberOfNodes; i++) {
457  float t = vec[i] - (databaseVectors[elem])[i];
458  score += t*t;
459  // std::cout << vec[i] << std::endl;
460  //l1norm += abs(vec[i] * (databaseVectors[elem])[i]);
461  }
462  //values[elem] = l1norm;
463  //values.insert(elem, l1norm));
464 
465  values.push_back(matchPair(elem, sqrt(score)));
466  }
467 
468 
469  std::sort(values.begin(), values.end(), comparer);
470 
471  // printf("%d matches\n", values.size());
472  // add all images in order or match
473  for (matchPair m : values){
474  match_result->matches.push_back(m.first);
475  match_result->tfidf_scores.push_back(m.second);
476  // std::cout << m.second << std::endl;
477  }
478 
479  // add in matches, just do 2 for now
480  //possibleMatches.size() / 10.0
481  /*for (int i = 0; i < 1; i++) {
482  std::set<matchPair>::iterator top = values.begin();
483  match_result->matches.push_back(top->first);
484  match_result->tfidf_scores.push_back(top->second);
485  }*/
486  //match_result->matches.push_back(0);
487 
488  return (std::shared_ptr<MatchResultsBase>)match_result;
489 }
490 
491 uint32_t VocabTree::tree_splits() const {
492  return split;
493 }
494 
495 uint32_t VocabTree::tree_depth() const {
496  return maxLevel;
497 }