diff --git a/include/boost/algorithm/cluster/cluster_data.hpp b/include/boost/algorithm/cluster/cluster_data.hpp new file mode 100644 index 0000000..7ba773b --- /dev/null +++ b/include/boost/algorithm/cluster/cluster_data.hpp @@ -0,0 +1,63 @@ +#if ! defined BOOST_ALGORITHM_CLUSTER_CLUSTER_DATA_HPP +#define BOOST_ALGORITHM_CLUSTER_CLUSTER_DATA_HPP + +#include +#include + +namespace boost +{ +namespace algorithm +{ +namespace cluster +{ + +/*! TODO: Document this type. + */ +template +struct cluster_data +{ + typedef Cluster value_type; + typedef std::vector clusters; + cluster_data() : m_pClusters(new clusters) {} + ~cluster_data() {} + + cluster_data(cluster_data const & c) : m_pClusters(c.m_pClusters) {} + cluster_data const & cluster_data::operator=(cluster_data const & rhs) + { m_pClusters = rhs.m_pClusters; } + + typedef typename clusters::iterator iterator; + typedef typename clusters::const_iterator const_iterator; + typedef typename clusters::reverse_iterator reverse_iterator; + + iterator begin() { return m_pClusters->begin(); } + iterator end() { return m_pClusters->end(); } + + const_iterator begin() const { return m_pClusters->begin(); } + const_iterator end() const { return m_pClusters->end(); } + + iterator rbegin() { return m_pClusters->rbegin(); } + iterator rend() { return m_pClusters->rend(); } + + iterator insert(iterator loc, value_type const & val) + { return m_pClusters->insert(loc, val); } + + void push_back(value_type const & v) { m_pClusters->push_back(v); } + void pop_back() { m_pClusters->pop_back(); } + + value_type & back() { return m_pClusters->back(); } + value_type const & back() const { return m_pClusters->back(); } + +private: + boost::shared_ptr m_pClusters; +}; + +} // End of namespace cluster + +// TODO: Should we be exporting this? +using namespace cluster; + +} // End of namespace algorithm + +} // End of namespace boost + +#endif // BOOST_ALGORITHM_CLUSTER_CLUSTER_DATA_HPP diff --git a/include/boost/algorithm/cluster/dbscan.hpp b/include/boost/algorithm/cluster/dbscan.hpp index 808dc84..58befd6 100644 --- a/include/boost/algorithm/cluster/dbscan.hpp +++ b/include/boost/algorithm/cluster/dbscan.hpp @@ -1,11 +1,8 @@ #if ! defined BOOST_ALGORITHM_CLUSTER_DBSCAN_HPP #define BOOST_ALGORITHM_CLUSTER_DBSCAN_HPP -#include -#include -#include +#include #include -#include namespace boost { @@ -18,13 +15,13 @@ namespace detail { // TODO: Replace this naive query function w/ R*-tree or fractional cascading. -// It makes the runtime quadratic. +// This query mechanism makes the runtime quadratic. template static void query( NTupleIter const & query_pt, NTupleIter const & begin, NTupleIter const & end, - float eps, + typename NTupleIter::difference_type eps, DistFun const & d, std::vector & v) { @@ -40,7 +37,7 @@ static void query( } } -// TODO: Replace this so we don't have to store the cluster info for each tuple. +// TODO: Replace this so we don't have to store the cluster info for each tuple? template struct node { @@ -52,46 +49,14 @@ struct node } // End of namespace detail. -// TODO: Document this type. -template -struct cluster_data -{ - typedef Cluster value_type; - typedef std::vector clusters; - cluster_data() : m_pClusters(new clusters) {} - ~cluster_data() {} - - cluster_data(cluster_data const & c) : m_pClusters(c.m_pClusters) {} - cluster_data const & cluster_data::operator=(cluster_data const & rhs) - { m_pClusters = rhs.m_pClusters; } - - typedef typename clusters::iterator iterator; - typedef typename clusters::const_iterator const_iterator; - typedef typename clusters::reverse_iterator reverse_iterator; - - iterator begin() { return m_pClusters->begin(); } - iterator end() { return m_pClusters->end(); } - - const_iterator begin() const { return m_pClusters->begin(); } - const_iterator end() const { return m_pClusters->end(); } - - iterator rbegin() { return m_pClusters->rbegin(); } - iterator rend() { return m_pClusters->rend(); } - - iterator insert(iterator loc, value_type const & val) - { return m_pClusters->insert(loc, val); } - - void push_back(value_type const & v) { m_pClusters->push_back(v); } - void pop_back() { m_pClusters->pop_back(); } - - value_type & back() { return m_pClusters->back(); } - value_type const & back() const { return m_pClusters->back(); } - -private: - boost::shared_ptr m_pClusters; -}; - -/** +/*! DBSCAN density-based clustering algorithm. + * TODO: Document this function. + * \param[in] begin + * \param[in] end + * \param[in] eps + * \param[in] min_points + * \param[in] d + * \return The cluster data (partitioning of the tuples). */ template cluster_data @@ -101,7 +66,10 @@ dbscan(NTupleIter const & begin, size_t min_points, DistFun const & d) { - // TODO: Rework the algorithm to NOT make this extra collection. + int const UNCLASSIFIED = -1; + int const NOISE = 0; + + // TODO: Rework the algorithm to NOT make this extra collection? typedef detail::node node; typedef std::vector ntuple_nodes; ntuple_nodes tuples; @@ -111,24 +79,25 @@ dbscan(NTupleIter const & begin, for(NTupleIter it = begin; it != end; ++it) { //++num_elems; - //it->cluster = UNCLASSIFIED; tuples.push_back(node(it)); } typedef cluster_data > cluster_data; cluster_data p; - // Do it... + // TODO: We should try to make cluster_num go away. int cluster_num = 0; for(ntuple_nodes::iterator it = tuples.begin(); it != tuples.end(); ++it) { - if (it->cluster != UNCLASSIFIED) // Been classified. + // Skip this tuple if its already been classified as a cluster or noise. + if (it->cluster != UNCLASSIFIED) continue; // Expand cluster. std::vector seeds; detail::query(it, tuples.begin(), tuples.end(), eps, d, seeds); + // If the neighborhood of this tuple is too small, then mark it as noise. if (seeds.size() < min_points) { it->cluster = NOISE; @@ -137,20 +106,20 @@ dbscan(NTupleIter const & begin, // Start the next cluster. ++cluster_num; - p.push_back(Cluster()); + p.push_back(Cluster()); // TODO: This is goofy. Cluster & cur_cluster = p.back(); - // Mark entire neighborhood as part of current cluster. + // Mark entire neighborhood as part of the current cluster. it->cluster = cluster_num; cur_cluster.push_back(it->tuple); - // TODO: Remove it from noise. for (size_t n = 0; n < seeds.size(); ++n) { seeds[n]->cluster = cluster_num; cur_cluster.push_back(seeds[n]->tuple); - // TODO: Remove it from noise. } + // Keep adding seeds and processing them until we find all points that + // are Density Reachable. while (! seeds.empty()) { ntuple_nodes::iterator cur = seeds.back(); @@ -181,6 +150,7 @@ dbscan(NTupleIter const & begin, } // End of namespace cluster +// TODO: Should we be exporting this? using namespace cluster; } // End of namespace algorithm