vocabtree  0.0.1
vocab_tree.hpp
Go to the documentation of this file.
1 #pragma once
2 
4 #include <unordered_map>
5 #include <unordered_set>
6 
7 class VocabTree : public SearchBase {
8 public:
9 
10  /// Subclass of train params base which specifies vocab tree training parameters.
11  struct TrainParams : public TrainParamsBase {
12  uint32_t depth; // tree depth
13  uint32_t split; // number of children per node
14  };
15 
16  /// Subclass of train params base which specifies Vocab Tree training parameters.
17  struct SearchParams : public SearchParamsBase {
18 
19  };
20 
21  /// Subclass of match results base which also returns scores
22  struct MatchResults : public MatchResultsBase {
23  std::vector<float> tfidf_scores;
24  };
25 
26  VocabTree();
27 
28  /// Given a set of training parameters, list of images, trains. Returns true if successful, false
29  /// if not successful.
30  bool train(Dataset &dataset, const std::shared_ptr<const TrainParamsBase> &params,
31  const std::vector< std::shared_ptr<const Image > > &examples);
32 
33  /// Loads a trained search structure from the input filepath
34  bool load (const std::string &file_path);
35 
36  /// Saves a trained search structure to the input filepath
37  bool save (const std::string &file_path) const;
38 
39  /// Given a set of search parameters, a query image, searches for matching images and returns the match
40  std::shared_ptr<MatchResultsBase> search(Dataset &dataset, const std::shared_ptr<const SearchParamsBase> &params, const std::shared_ptr<const Image > &example);
41 
42  /// returns the split size of each node
43  uint32_t tree_splits() const;
44 
45  /// returns the depth size of tree
46  uint32_t tree_depth() const;
47 protected:
48 
49  struct TreeNode {
51 
52  /// range from 0..maxLevel-1
53  uint32_t level;
54  /// index for this level, ranging from 0..split^level-1
55  /// For example: the first level children will simply have indexes from 0..split-1
56  /// for the second level the children of the first child will have 0..split-1
57  /// while the children of the second child we have split..2*split-1
58  /// This will be used to identify the node and used to index into the vectors for images
59  uint32_t levelIndex;
60 
61  /// index in a level order traversal of the tree
62  uint32_t index;
63  cv::Mat mean;
64  /// index into the array of nodes of the first child, all children are next to eachother
65  /// if this is = 0 then it is a leaf (because the root can never be a child)
66  uint32_t firstChildIndex;
67  };
68 
69  /// stores the amount of splits used to generate tree
70  uint32_t split;
71  /// Stores the max level of the tree
72  uint32_t maxLevel;// = 6;
73  /// number of nodes the tree will have, saved in variable so don't have to recompute
74  uint32_t numberOfNodes;
75 
76  std::vector<float> weights;
77 
78  std::vector<TreeNode> tree;
79  std::vector<std::unordered_map<uint64_t, uint32_t>> invertedFiles;
80 
81  /// Stores the database vectors for all images in the database - d_i in the paper
82  /// Indexes by the image id
83  std::unordered_map<uint64_t, std::vector<float>> databaseVectors;
84 
85  /// Recursively builds a tree, starting with 0 and ending with currLevel = maxLevel-1
86  void buildTreeRecursive(uint32_t t, const cv::Mat &descriptors, cv::TermCriteria &tc, int attempts, int flags, int currLevel);
87 
88  /// helper function, inserts a dummy possibleMatches
89  std::vector<float> generateVector(const cv::Mat &descriptors, bool shouldWeight, int64_t id = -1);
90 
91  /// To call with an id call without possibleMatches and it will go to the helper function
92  /// Takes descriptors for an image and for each descriptor finds the path down the tree generating a vector (describing the path)
93  /// Adds up all vectors (one from each descriptor) to return the vector of counts for each node
94  /// If shouldWeight is true will weight each by the weight of the node, should be true for general query and false for construction
95  /// If id is set then will insert that id into the invertedFile of each leaf visited, if negative or not set then won't do anything
96  /// When id is not set will use insert images into possibleMatches, possibleMatches will not be used if id is set
97  std::vector<float> generateVector(const cv::Mat &descriptors, bool shouldWeight, std::unordered_set<uint32_t> & possibleMatches, int64_t id = -1);
98 
99  /// Recursive function that recursively goes down the tree from t to find where the single descriptor belongs (stopping at leaf)
100  /// On each node increments cound in the counts vector
101  /// If id is set (>=0) then adds the image with that id to the leaf
102  /// Picks the child to traverse down based on the max dot product
103  void generateVectorHelper(uint32_t nodeIndex, const cv::Mat &descriptor, std::vector<float> & counts,
104  std::unordered_set<uint32_t> & possibleMatches, int64_t id = -1);
105 
106 };