10 #if ENABLE_MULTITHREADING && ENABLE_OPENMP
13 #if ENABLE_MULTITHREADING && ENABLE_MPI
30 std::cout <<
"Reading vocab tree from " << file_path <<
"..." << std::endl;
32 std::ifstream ifs(file_path, std::ios::binary);
33 ifs.read((
char *)&
split,
sizeof(uint32_t));
34 ifs.read((
char *)&
maxLevel,
sizeof(uint32_t));
42 ifs.read((
char *)&imageCount,
sizeof(uint32_t));
43 for (uint32_t i = 0; i < imageCount; i++) {
45 std::vector<float> vec(numberOfNodes);
46 ifs.read((
char *)&imageId,
sizeof(uint64_t));
47 ifs.read((
char *)&vec[0],
sizeof(
float)*numberOfNodes);
52 uint32_t invertedFileCount;
53 ifs.read((
char *)&invertedFileCount,
sizeof(uint32_t));
56 for (uint32_t i = 0; i < invertedFileCount; i++) {
58 ifs.read((
char *)&size,
sizeof(uint32_t));
59 for (uint32_t j = 0; j < size; j++) {
62 ifs.read((
char *)&imageId,
sizeof(uint64_t));
63 ifs.read((
char *)&imageCount,
sizeof(uint32_t));
69 tree.resize(numberOfNodes);
71 ifs.read((
char *)&
tree[i].firstChildIndex,
sizeof(uint32_t));
72 ifs.read((
char *)&
tree[i].index,
sizeof(uint32_t));
73 ifs.read((
char *)&
tree[i].invertedFileLength,
sizeof(uint32_t));
74 ifs.read((
char *)&
tree[i].level,
sizeof(uint32_t));
75 ifs.read((
char *)&
tree[i].levelIndex,
sizeof(uint32_t));
81 if (h.
rows == 0 || h.
cols == 0)
continue;
85 std::cout <<
"Done reading vocab tree." << std::endl;
87 return (ifs.rdstate() & std::ifstream::failbit) == 0;
91 std::cout <<
"Writing vocab tree to " << file_path <<
"..." << std::endl;
93 std::ofstream ofs(file_path, std::ios::binary | std::ios::trunc);
96 ofs.write((
const char *)&
split,
sizeof(uint32_t));
97 ofs.write((
const char *)&
maxLevel,
sizeof(uint32_t));
103 ofs.write((
const char *)&imageCount,
sizeof(uint32_t));
105 ofs.write((
const char *)&pair.first,
sizeof(uint64_t));
106 ofs.write((
const char *)&(pair.second)[0],
sizeof(
float)*numberOfNodes);
111 ofs.write((
const char *)&numInvertedFiles,
sizeof(uint32_t));
112 for (std::unordered_map<uint64_t, uint32_t> invFile :
invertedFiles) {
113 uint32_t size = invFile.size();
114 ofs.write((
const char *)&size,
sizeof(uint32_t));
115 for (std::pair<uint64_t, uint32_t> pair : invFile) {
116 ofs.write((
const char *)&pair.first,
sizeof(uint64_t));
117 ofs.write((
const char *)&pair.second,
sizeof(uint32_t));
125 ofs.write((
const char *)&t.
index,
sizeof(uint32_t));
127 ofs.write((
const char *)&t.
level,
sizeof(uint32_t));
128 ofs.write((
const char *)&t.
levelIndex,
sizeof(uint32_t));
140 std::cout <<
"Done writing vocab tree." << std::endl;
142 return (ofs.rdstate() & std::ofstream::failbit) == 0;;
146 const std::vector< std::shared_ptr<const Image > > &examples) {
148 const std::shared_ptr<const TrainParams> &vt_params = std::static_pointer_cast<
const TrainParams>(params);
149 split = vt_params->split;
158 std::vector<uint64_t> all_ids(examples.size());
159 for (uint32_t i = 0; i < examples.size(); i++) {
160 all_ids[i] = examples[i]->id;
162 std::random_shuffle(all_ids.begin(), all_ids.end());
164 std::vector<cv::Mat> all_descriptors;
165 uint64_t num_features = 0;
166 for (
size_t i = 0; i < all_ids.size(); i++) {
167 std::shared_ptr<Image> image = std::static_pointer_cast<
Image>(dataset.
image(all_ids[i]));
168 if (image ==
nullptr)
continue;
170 const std::string &descriptors_location = dataset.
location(image->feature_path(
"descriptors"));
173 cv::Mat descriptors, descriptorsf;
175 descriptors.convertTo(descriptorsf, CV_32FC1);
176 num_features += descriptors.rows;
178 all_descriptors.push_back(descriptorsf);
184 uint32_t attempts = 1;
185 cv::TermCriteria tc(cv::TermCriteria::COUNT | cv::TermCriteria::EPS, 18, 0.000001);
188 tree[0].levelIndex = 0;
201 #if ENABLE_MULTITHREADING && ENABLE_OPENMP
202 #pragma omp parallel for schedule(dynamic)
204 for (
size_t i = 0; i < all_ids.size(); i++) {
205 std::shared_ptr<Image> image = std::static_pointer_cast<
Image>(dataset.
image(all_ids[i]));
206 if (image ==
nullptr)
continue;
208 const std::string &descriptors_location = dataset.
location(image->feature_path(
"descriptors"));
211 cv::Mat descriptors, descriptorsf;
213 descriptors.convertTo(descriptorsf, CV_32FC1);
214 std::vector<float> result =
generateVector(descriptorsf,
false, all_ids[i]);
233 weights[i] = log(((
float)all_ids.size()) / ((
float)counts[i]));
239 typedef std::unordered_map<uint64_t, std::vector<float>>::iterator it_type;
243 (iterator->second)[i] *=
weights[i];
244 length += (float)pow((iterator->second)[i], 2.0);
247 length = sqrt(length);
249 (iterator->second)[i] /= length;
252 for (uint32_t i = 0; i < (uint32_t)pow(split,
maxLevel - 1); i++) {
256 uint32_t l = 0, inL = 0;
260 if (inL >= (uint32_t)pow(split, l)) {
271 int attempts,
int flags,
int currLevel) {
273 tree[t].invertedFileLength = descriptors.rows;
274 tree[t].level = currLevel;
278 tree[t].firstChildIndex = 0;
285 std::vector<cv::Mat> groups(
split);
286 for (uint32_t i = 0; i <
split; i++)
287 groups[i] = cv::Mat();
291 bool enoughToFill =
true;
292 if (descriptors.rows >= split) {
293 cv::kmeans(descriptors, split, labels, tc, attempts, flags, centers);
295 for (
int i = 0; i < labels.rows; i++) {
296 int index = labels.at<
int>(i);
297 groups[index].push_back(descriptors.row(i));
302 enoughToFill =
false;
303 for (
int i = 0; i < descriptors.rows; i++)
304 groups[i].push_back(descriptors.row(i));
311 uint32_t totalChildren = pow(split, currLevel);
313 #if ENABLE_MULTITHREADING && ENABLE_OPENMP && totalChildren<maxThreads
314 uint32_t maxThreads = omp_get_num_threads();
315 #pragma omp parallel for schedule(dynamic)
317 for (uint32_t i = 0; i <
split; i++) {
318 uint32_t childLevelIndex =
tree[t].levelIndex*split + i;
319 uint32_t childIndex = (uint32_t)((pow(split,
tree[t].level+1)-1) / (split - 1)) + childLevelIndex;
321 tree[childIndex].mean = centers.row(i);
322 tree[childIndex].levelIndex = childLevelIndex;
323 tree[childIndex].index = childIndex;
325 tree[t].firstChildIndex = childIndex;
332 std::unordered_set<uint32_t> dummy;
337 std::unordered_set<uint32_t> & possibleMatches, int64_t
id) {
343 for (
int r = 0; r < descriptors.rows; r++) {
352 length += vec[i] * vec[i];
354 length = sqrt(length);
368 std::unordered_set<uint32_t> & possibleMatches, int64_t
id) {
372 if (
tree[nodeIndex].firstChildIndex <= 0) {
373 std::unordered_map<uint64_t, uint32_t> & invFile =
invertedFiles[
tree[nodeIndex].levelIndex];
380 if (invFile.find(
id) == invFile.end())
391 possibleMatches.insert(tree[nodeIndex].levelIndex);
396 uint32_t maxChild =
tree[nodeIndex].firstChildIndex;
397 double max = descriptor.dot(
tree[maxChild].mean);
399 for (uint32_t i = 1; i <
split; i++) {
400 if (
tree[nodeIndex].invertedFileLength == 0)
402 uint32_t childIndex =
tree[nodeIndex].firstChildIndex + i;
403 double dot = descriptor.dot(
tree[childIndex].mean);
407 maxChild = childIndex;
416 const std::shared_ptr<const Image > &example) {
418 std::cout <<
"Searching for matching images..." << std::endl;
419 const std::shared_ptr<const SearchParams> &ii_params = std::static_pointer_cast<
const SearchParams>(params);
421 std::shared_ptr<MatchResults> match_result = std::make_shared<MatchResults>();
424 if (example ==
nullptr)
return nullptr;
425 const std::string &descriptors_location = dataset.
location(example->feature_path(
"descriptors"));
428 cv::Mat descriptors, descriptorsf;
431 std::unordered_set<uint32_t> possibleMatches;
432 descriptors.convertTo(descriptorsf, CV_32FC1);
433 std::vector<float> vec =
generateVector(descriptorsf,
true, possibleMatches);
435 typedef std::pair<uint64_t, float> matchPair;
437 bool operator() (matchPair a, matchPair b) {
return a.second < b.second; };
440 std::unordered_set<uint64_t> possibleImages;
441 for (uint32_t elem : possibleMatches) {
442 std::unordered_map<uint64_t, uint32_t> & invFile =
invertedFiles[elem];
444 typedef std::unordered_map<uint64_t, uint32_t>::iterator it_type;
445 for (it_type iterator = invFile.begin(); iterator != invFile.end(); iterator++)
446 if (possibleImages.count(iterator->first) == 0)
447 possibleImages.insert(iterator->first);
451 std::vector<matchPair> values;
452 for (uint64_t elem : possibleImages) {
465 values.push_back(matchPair(elem, sqrt(score)));
469 std::sort(values.begin(), values.end(), comparer);
473 for (matchPair m : values){
474 match_result->matches.push_back(m.first);
475 match_result->tfidf_scores.push_back(m.second);
488 return (std::shared_ptr<MatchResultsBase>)match_result;