Moved the cluster_data template into its own header.

Cleaned up the code slightly, and added some doc comments. [SVN r45221]
2008-05-08 17:40:52 +00:00
parent 51751da0fd
commit c91fe00184
2 changed files with 88 additions and 55 deletions
--- a/include/boost/algorithm/cluster/cluster_data.hpp
+++ b/include/boost/algorithm/cluster/cluster_data.hpp
@@ -0,0 +1,63 @@
 #if ! defined BOOST_ALGORITHM_CLUSTER_CLUSTER_DATA_HPP
 #define BOOST_ALGORITHM_CLUSTER_CLUSTER_DATA_HPP
 #include <boost/shared_ptr.hpp>
 #include <vector>
 namespace boost
 {
 namespace algorithm
 {
 namespace cluster
 {
 /*! TODO: Document this type.
 */
 template<typename Cluster>
 struct cluster_data
 {
  typedef Cluster value_type;
  typedef std::vector<value_type> clusters;
  cluster_data() : m_pClusters(new clusters) {}
  ~cluster_data() {}
  cluster_data(cluster_data const & c) : m_pClusters(c.m_pClusters) {}
  cluster_data const & cluster_data::operator=(cluster_data const & rhs)
  { m_pClusters = rhs.m_pClusters; }
  typedef typename clusters::iterator iterator;
  typedef typename clusters::const_iterator const_iterator;
  typedef typename clusters::reverse_iterator reverse_iterator;
  iterator begin() { return m_pClusters->begin(); }
  iterator end() { return m_pClusters->end(); }
  const_iterator begin() const { return m_pClusters->begin(); }
  const_iterator end() const { return m_pClusters->end(); }
  iterator rbegin() { return m_pClusters->rbegin(); }
  iterator rend() { return m_pClusters->rend(); }
  iterator insert(iterator loc, value_type const & val)
  { return m_pClusters->insert(loc, val); }
  void push_back(value_type const & v) { m_pClusters->push_back(v); }
  void pop_back() { m_pClusters->pop_back(); }
  value_type & back() { return m_pClusters->back(); }
  value_type const & back() const { return m_pClusters->back(); }
 private:
  boost::shared_ptr<clusters> m_pClusters;
 };
 } // End of namespace cluster
 // TODO: Should we be exporting this?
 using namespace cluster;
 } // End of namespace algorithm
 } // End of namespace boost
 #endif // BOOST_ALGORITHM_CLUSTER_CLUSTER_DATA_HPP
--- a/include/boost/algorithm/cluster/dbscan.hpp
+++ b/include/boost/algorithm/cluster/dbscan.hpp
@@ -1,11 +1,8 @@
 #if ! defined BOOST_ALGORITHM_CLUSTER_DBSCAN_HPP
 #define BOOST_ALGORITHM_CLUSTER_DBSCAN_HPP
-#include <boost/range/begin.hpp>
+#include <boost/algorithm/cluster/cluster_data.hpp>
 #include <boost/range/end.hpp>
 #include <boost/shared_ptr.hpp>
 #include <vector>
 #include <list>
 namespace boost
 {
@@ -18,13 +15,13 @@ namespace detail
 {
 // TODO: Replace this naive query function w/ R*-tree or fractional cascading.
-// It makes the runtime quadratic.
+// This query mechanism makes the runtime quadratic.
 template<typename NTupleIter, typename DistFun>
 static void query(
  NTupleIter const & query_pt,
  NTupleIter const & begin,
  NTupleIter const & end,
-  float eps,
+  typename NTupleIter::difference_type eps,
  DistFun const & d,
  std::vector<NTupleIter> & v)
 {
@@ -40,7 +37,7 @@ static void query(
  }
 }
-// TODO: Replace this so we don't have to store the cluster info for each tuple.
+// TODO: Replace this so we don't have to store the cluster info for each tuple?
 template<typename NTupleIter>
 struct node
 {
@@ -52,46 +49,14 @@ struct node
 } // End of namespace detail.
-// TODO: Document this type.
+/*! DBSCAN density-based clustering algorithm.
-template<typename Cluster>
+ * TODO: Document this function.
-struct cluster_data
+ * \param[in] begin
-{
+ * \param[in] end
-  typedef Cluster value_type;
+ * \param[in] eps
-  typedef std::vector<value_type> clusters;
+ * \param[in] min_points
-  cluster_data() : m_pClusters(new clusters) {}
+ * \param[in] d
-  ~cluster_data() {}
+ * \return The cluster data (partitioning of the tuples).
  cluster_data(cluster_data const & c) : m_pClusters(c.m_pClusters) {}
  cluster_data const & cluster_data::operator=(cluster_data const & rhs)
  { m_pClusters = rhs.m_pClusters; }
  typedef typename clusters::iterator iterator;
  typedef typename clusters::const_iterator const_iterator;
  typedef typename clusters::reverse_iterator reverse_iterator;
  iterator begin() { return m_pClusters->begin(); }
  iterator end() { return m_pClusters->end(); }
  const_iterator begin() const { return m_pClusters->begin(); }
  const_iterator end() const { return m_pClusters->end(); }
  iterator rbegin() { return m_pClusters->rbegin(); }
  iterator rend() { return m_pClusters->rend(); }
  iterator insert(iterator loc, value_type const & val)
  { return m_pClusters->insert(loc, val); }
  void push_back(value_type const & v) { m_pClusters->push_back(v); }
  void pop_back() { m_pClusters->pop_back(); }
  value_type & back() { return m_pClusters->back(); }
  value_type const & back() const { return m_pClusters->back(); }
 private:
  boost::shared_ptr<clusters> m_pClusters;
 };
 /**
 */
 template<typename Cluster, typename NTupleIter, typename DistFun>
 cluster_data<Cluster>
@@ -101,7 +66,10 @@ dbscan(NTupleIter const & begin,
       size_t min_points,
       DistFun const & d)
 {
-  // TODO: Rework the algorithm to NOT make this extra collection.
+  int const UNCLASSIFIED = -1;
  int const NOISE = 0;
  // TODO: Rework the algorithm to NOT make this extra collection?
  typedef detail::node<NTupleIter> node;
  typedef std::vector<node> ntuple_nodes;
  ntuple_nodes tuples;
@@ -111,24 +79,25 @@ dbscan(NTupleIter const & begin,
  for(NTupleIter it = begin; it != end; ++it)
  {
    //++num_elems;
    //it->cluster = UNCLASSIFIED;
    tuples.push_back(node(it));
  }
  typedef cluster_data<std::vector<NTupleIter> > cluster_data;
  cluster_data p;
-  // Do it...
+  // TODO: We should try to make cluster_num go away.
  int cluster_num = 0;
  for(ntuple_nodes::iterator it = tuples.begin(); it != tuples.end(); ++it)
  {
-    if (it->cluster != UNCLASSIFIED) // Been classified.
+    // Skip this tuple if its already been classified as a cluster or noise.
    if (it->cluster != UNCLASSIFIED)
      continue;
    // Expand cluster.
    std::vector<ntuple_nodes::iterator> seeds;
    detail::query(it, tuples.begin(), tuples.end(), eps, d, seeds);
    // If the neighborhood of this tuple is too small, then mark it as noise.
    if (seeds.size() < min_points)
    {
      it->cluster = NOISE;
@@ -137,20 +106,20 @@ dbscan(NTupleIter const & begin,
    // Start the next cluster.
    ++cluster_num;
-    p.push_back(Cluster());
+    p.push_back(Cluster()); // TODO: This is goofy.
    Cluster & cur_cluster = p.back();
-    // Mark entire neighborhood as part of current cluster.
+    // Mark entire neighborhood as part of the current cluster.
    it->cluster = cluster_num;
    cur_cluster.push_back(it->tuple);
    // TODO: Remove it from noise.
    for (size_t n = 0; n < seeds.size(); ++n)
    {
      seeds[n]->cluster = cluster_num;
      cur_cluster.push_back(seeds[n]->tuple);
      // TODO: Remove it from noise.
    }
    // Keep adding seeds and processing them until we find all points that
    // are Density Reachable.
    while (! seeds.empty())
    {
      ntuple_nodes::iterator cur = seeds.back();
@@ -181,6 +150,7 @@ dbscan(NTupleIter const & begin,
 } // End of namespace cluster
 // TODO: Should we be exporting this?
 using namespace cluster;
 } // End of namespace algorithm