vocabtree  0.0.1
dataset.cxx
Go to the documentation of this file.
1 #include "dataset.hpp"
2 #include "filesystem.hpp"
3 #include "vision.hpp"
4 
5 #include <fstream>
6 
7 Dataset::Dataset(const std::string &base_location) {
8  data_directory = base_location;
9 }
10 
11 Dataset::Dataset(const std::string &base_location, const std::string &db_data_location) {
12  data_directory = base_location;
13 }
14 
16 
17 std::string Dataset::location() const {
18  return data_directory;
19 }
20 
21 std::string Dataset::location(const std::string &relative_path) const {
22  return data_directory + "/" + relative_path;
23 }
24 
25 
26 std::vector< std::shared_ptr<const Image> > Dataset::all_images() const {
27  std::vector< std::shared_ptr< const Image> > images(this->num_images());
28  for (uint64_t i = 0; i < this->num_images(); i++) {
29  images[i] = this->image(i);
30  }
31  return images;
32 }
33 
34 std::vector< std::shared_ptr<const Image> > Dataset::random_images(size_t count) const {
35  std::vector< std::shared_ptr< const Image> > all = this->all_images();
36  std::random_shuffle(all.begin(), all.end());
37  std::vector< std::shared_ptr< const Image> > images(all.begin(), all.begin() + count);
38  return images;
39 }
40 
41 std::ostream& operator<< (std::ostream &out, const Dataset &dataset) {
42  out << "Dataset location: " << dataset.location() << ", number of images: " << dataset.num_images();
43  return out;
44 }
45 
46 SimpleDataset::SimpleDataset(const std::string &base_location) : Dataset(base_location) {
47  this->construct_dataset();
48 }
49 
50 SimpleDataset::SimpleDataset(const std::string &base_location, const std::string &db_data_location)
51  : Dataset(base_location, db_data_location) {
52  if (filesystem::file_exists(db_data_location)) {
53  this->read(db_data_location);
54  }
55  else {
56  this->construct_dataset();
57  this->write(db_data_location);
58  }
59 }
60 
62 
63 std::shared_ptr<Image> SimpleDataset::image(uint64_t id) const {
64  const std::string &image_path = id_image_map.right.at(id);
65 
66  std::shared_ptr<Image> current_image = std::make_shared<SimpleImage>(image_path, id);
67  return current_image;
68 }
69 
71  const std::vector<std::string> &image_file_paths = filesystem::list_files(data_directory + "/images/", ".jpg");
72  for (size_t i = 0; i < image_file_paths.size(); i++) {
73  id_image_map.insert(boost::bimap<std::string, uint64_t>::value_type( image_file_paths[i].substr(data_directory.size(), image_file_paths[i].size() - data_directory.size()), i));
74  }
75 }
76 
77 bool SimpleDataset::read(const std::string &db_data_location) {
78  if (!filesystem::file_exists(db_data_location)) return false;
79  std::ifstream ifs(db_data_location, std::ios::binary);
80 
81  uint64_t num_images;
82  ifs.read((char *)&num_images, sizeof(uint64_t));
83 
84  for (uint64_t i = 0; i < num_images; i++) {
85 
86  uint64_t image_id;
87  uint16_t length;
88  ifs.read((char *)&image_id, sizeof(uint64_t));
89  ifs.read((char *)&length, sizeof(uint16_t));
90 
91  std::string image_location;
92  image_location.resize(length);
93 
94  ifs.read((char *)&image_location[0], sizeof(char)* length);
95  std::shared_ptr<const SimpleImage> simage = std::make_shared<const SimpleImage>(image_location, image_id);
96  this->add_image(simage);
97 
98  }
99  return (ifs.rdstate() & std::ifstream::failbit) == 0;
100 }
101 
102 bool SimpleDataset::write(const std::string &db_data_location) {
103  filesystem::create_file_directory(db_data_location);
104 
105  std::ofstream ofs(db_data_location, std::ios::binary | std::ios::trunc);
106  uint64_t num_images = this->num_images();
107  ofs.write((const char *)&num_images, sizeof(uint64_t));
108 
109  for (uint64_t i = 0; i < this->num_images(); i++) {
110  std::shared_ptr<SimpleDataset::SimpleImage> image = std::static_pointer_cast<SimpleDataset::SimpleImage>(this->image(i));
111  const std::string &image_location = image->location();
112  uint64_t image_id = image->id;
113  uint16_t length = image_location.size();
114  ofs.write((const char *)&image_id, sizeof(uint64_t));
115  ofs.write((const char *)&length, sizeof(uint16_t));
116  ofs.write((const char *)&image_location[0], sizeof(char)* length);
117  }
118 
119  return (ofs.rdstate() & std::ofstream::failbit) == 0;
120 }
121 
122 uint64_t SimpleDataset::num_images() const {
123  return id_image_map.size();
124 }
125 
126 SimpleDataset::SimpleImage::SimpleImage(const std::string &path, uint64_t imageid) : Image(imageid) {
127  image_path = path;
128 }
129 
130 std::string SimpleDataset::SimpleImage::feature_path(const std::string &feat_name) const {
131  uint32_t level0 = id >> 20;
132  uint32_t level1 = (id - (level0 << 20)) >> 10;
133 
134  std::stringstream ss;
135  ss << "/feats/" << feat_name << "/" <<
136  std::setw(4) << std::setfill('0') << level0 << "/" <<
137  std::setw(4) << std::setfill('0') << level1 << "/" <<
138  std::setw(9) << std::setfill('0') << id << "." << feat_name;
139 
140  return ss.str();
141 }
142 
144  return image_path;
145 }
146 
147 bool SimpleDataset::add_image(const std::shared_ptr<const Image> &image) {
148  if (id_image_map.right.find(image->id) != id_image_map.right.end()) return false;
149  const std::shared_ptr<const SimpleDataset::SimpleImage> simage = std::static_pointer_cast<const SimpleDataset::SimpleImage>(image);
150  id_image_map.insert(boost::bimap<std::string, uint64_t>::value_type(simage->location(), simage->id));
151  return true;
152 }