Moved the cluster_data template into its own header.

Cleaned up the code slightly, and added some doc comments.



[SVN r45221]
This commit is contained in:
Jonathan Franklin
2008-05-08 17:40:52 +00:00
parent 51751da0fd
commit c91fe00184
2 changed files with 88 additions and 55 deletions

View File

@ -0,0 +1,63 @@
#if ! defined BOOST_ALGORITHM_CLUSTER_CLUSTER_DATA_HPP
#define BOOST_ALGORITHM_CLUSTER_CLUSTER_DATA_HPP
#include <boost/shared_ptr.hpp>
#include <vector>
namespace boost
{
namespace algorithm
{
namespace cluster
{
/*! TODO: Document this type.
*/
template<typename Cluster>
struct cluster_data
{
typedef Cluster value_type;
typedef std::vector<value_type> clusters;
cluster_data() : m_pClusters(new clusters) {}
~cluster_data() {}
cluster_data(cluster_data const & c) : m_pClusters(c.m_pClusters) {}
cluster_data const & cluster_data::operator=(cluster_data const & rhs)
{ m_pClusters = rhs.m_pClusters; }
typedef typename clusters::iterator iterator;
typedef typename clusters::const_iterator const_iterator;
typedef typename clusters::reverse_iterator reverse_iterator;
iterator begin() { return m_pClusters->begin(); }
iterator end() { return m_pClusters->end(); }
const_iterator begin() const { return m_pClusters->begin(); }
const_iterator end() const { return m_pClusters->end(); }
iterator rbegin() { return m_pClusters->rbegin(); }
iterator rend() { return m_pClusters->rend(); }
iterator insert(iterator loc, value_type const & val)
{ return m_pClusters->insert(loc, val); }
void push_back(value_type const & v) { m_pClusters->push_back(v); }
void pop_back() { m_pClusters->pop_back(); }
value_type & back() { return m_pClusters->back(); }
value_type const & back() const { return m_pClusters->back(); }
private:
boost::shared_ptr<clusters> m_pClusters;
};
} // End of namespace cluster
// TODO: Should we be exporting this?
using namespace cluster;
} // End of namespace algorithm
} // End of namespace boost
#endif // BOOST_ALGORITHM_CLUSTER_CLUSTER_DATA_HPP

View File

@ -1,11 +1,8 @@
#if ! defined BOOST_ALGORITHM_CLUSTER_DBSCAN_HPP #if ! defined BOOST_ALGORITHM_CLUSTER_DBSCAN_HPP
#define BOOST_ALGORITHM_CLUSTER_DBSCAN_HPP #define BOOST_ALGORITHM_CLUSTER_DBSCAN_HPP
#include <boost/range/begin.hpp> #include <boost/algorithm/cluster/cluster_data.hpp>
#include <boost/range/end.hpp>
#include <boost/shared_ptr.hpp>
#include <vector> #include <vector>
#include <list>
namespace boost namespace boost
{ {
@ -18,13 +15,13 @@ namespace detail
{ {
// TODO: Replace this naive query function w/ R*-tree or fractional cascading. // TODO: Replace this naive query function w/ R*-tree or fractional cascading.
// It makes the runtime quadratic. // This query mechanism makes the runtime quadratic.
template<typename NTupleIter, typename DistFun> template<typename NTupleIter, typename DistFun>
static void query( static void query(
NTupleIter const & query_pt, NTupleIter const & query_pt,
NTupleIter const & begin, NTupleIter const & begin,
NTupleIter const & end, NTupleIter const & end,
float eps, typename NTupleIter::difference_type eps,
DistFun const & d, DistFun const & d,
std::vector<NTupleIter> & v) std::vector<NTupleIter> & v)
{ {
@ -40,7 +37,7 @@ static void query(
} }
} }
// TODO: Replace this so we don't have to store the cluster info for each tuple. // TODO: Replace this so we don't have to store the cluster info for each tuple?
template<typename NTupleIter> template<typename NTupleIter>
struct node struct node
{ {
@ -52,46 +49,14 @@ struct node
} // End of namespace detail. } // End of namespace detail.
// TODO: Document this type. /*! DBSCAN density-based clustering algorithm.
template<typename Cluster> * TODO: Document this function.
struct cluster_data * \param[in] begin
{ * \param[in] end
typedef Cluster value_type; * \param[in] eps
typedef std::vector<value_type> clusters; * \param[in] min_points
cluster_data() : m_pClusters(new clusters) {} * \param[in] d
~cluster_data() {} * \return The cluster data (partitioning of the tuples).
cluster_data(cluster_data const & c) : m_pClusters(c.m_pClusters) {}
cluster_data const & cluster_data::operator=(cluster_data const & rhs)
{ m_pClusters = rhs.m_pClusters; }
typedef typename clusters::iterator iterator;
typedef typename clusters::const_iterator const_iterator;
typedef typename clusters::reverse_iterator reverse_iterator;
iterator begin() { return m_pClusters->begin(); }
iterator end() { return m_pClusters->end(); }
const_iterator begin() const { return m_pClusters->begin(); }
const_iterator end() const { return m_pClusters->end(); }
iterator rbegin() { return m_pClusters->rbegin(); }
iterator rend() { return m_pClusters->rend(); }
iterator insert(iterator loc, value_type const & val)
{ return m_pClusters->insert(loc, val); }
void push_back(value_type const & v) { m_pClusters->push_back(v); }
void pop_back() { m_pClusters->pop_back(); }
value_type & back() { return m_pClusters->back(); }
value_type const & back() const { return m_pClusters->back(); }
private:
boost::shared_ptr<clusters> m_pClusters;
};
/**
*/ */
template<typename Cluster, typename NTupleIter, typename DistFun> template<typename Cluster, typename NTupleIter, typename DistFun>
cluster_data<Cluster> cluster_data<Cluster>
@ -101,7 +66,10 @@ dbscan(NTupleIter const & begin,
size_t min_points, size_t min_points,
DistFun const & d) DistFun const & d)
{ {
// TODO: Rework the algorithm to NOT make this extra collection. int const UNCLASSIFIED = -1;
int const NOISE = 0;
// TODO: Rework the algorithm to NOT make this extra collection?
typedef detail::node<NTupleIter> node; typedef detail::node<NTupleIter> node;
typedef std::vector<node> ntuple_nodes; typedef std::vector<node> ntuple_nodes;
ntuple_nodes tuples; ntuple_nodes tuples;
@ -111,24 +79,25 @@ dbscan(NTupleIter const & begin,
for(NTupleIter it = begin; it != end; ++it) for(NTupleIter it = begin; it != end; ++it)
{ {
//++num_elems; //++num_elems;
//it->cluster = UNCLASSIFIED;
tuples.push_back(node(it)); tuples.push_back(node(it));
} }
typedef cluster_data<std::vector<NTupleIter> > cluster_data; typedef cluster_data<std::vector<NTupleIter> > cluster_data;
cluster_data p; cluster_data p;
// Do it... // TODO: We should try to make cluster_num go away.
int cluster_num = 0; int cluster_num = 0;
for(ntuple_nodes::iterator it = tuples.begin(); it != tuples.end(); ++it) for(ntuple_nodes::iterator it = tuples.begin(); it != tuples.end(); ++it)
{ {
if (it->cluster != UNCLASSIFIED) // Been classified. // Skip this tuple if its already been classified as a cluster or noise.
if (it->cluster != UNCLASSIFIED)
continue; continue;
// Expand cluster. // Expand cluster.
std::vector<ntuple_nodes::iterator> seeds; std::vector<ntuple_nodes::iterator> seeds;
detail::query(it, tuples.begin(), tuples.end(), eps, d, seeds); detail::query(it, tuples.begin(), tuples.end(), eps, d, seeds);
// If the neighborhood of this tuple is too small, then mark it as noise.
if (seeds.size() < min_points) if (seeds.size() < min_points)
{ {
it->cluster = NOISE; it->cluster = NOISE;
@ -137,20 +106,20 @@ dbscan(NTupleIter const & begin,
// Start the next cluster. // Start the next cluster.
++cluster_num; ++cluster_num;
p.push_back(Cluster()); p.push_back(Cluster()); // TODO: This is goofy.
Cluster & cur_cluster = p.back(); Cluster & cur_cluster = p.back();
// Mark entire neighborhood as part of current cluster. // Mark entire neighborhood as part of the current cluster.
it->cluster = cluster_num; it->cluster = cluster_num;
cur_cluster.push_back(it->tuple); cur_cluster.push_back(it->tuple);
// TODO: Remove it from noise.
for (size_t n = 0; n < seeds.size(); ++n) for (size_t n = 0; n < seeds.size(); ++n)
{ {
seeds[n]->cluster = cluster_num; seeds[n]->cluster = cluster_num;
cur_cluster.push_back(seeds[n]->tuple); cur_cluster.push_back(seeds[n]->tuple);
// TODO: Remove it from noise.
} }
// Keep adding seeds and processing them until we find all points that
// are Density Reachable.
while (! seeds.empty()) while (! seeds.empty())
{ {
ntuple_nodes::iterator cur = seeds.back(); ntuple_nodes::iterator cur = seeds.back();
@ -181,6 +150,7 @@ dbscan(NTupleIter const & begin,
} // End of namespace cluster } // End of namespace cluster
// TODO: Should we be exporting this?
using namespace cluster; using namespace cluster;
} // End of namespace algorithm } // End of namespace algorithm