forked from boostorg/algorithm
Moved the cluster_data template into its own header.
Cleaned up the code slightly, and added some doc comments. [SVN r45221]
This commit is contained in:
63
include/boost/algorithm/cluster/cluster_data.hpp
Normal file
63
include/boost/algorithm/cluster/cluster_data.hpp
Normal file
@ -0,0 +1,63 @@
|
|||||||
|
#if ! defined BOOST_ALGORITHM_CLUSTER_CLUSTER_DATA_HPP
|
||||||
|
#define BOOST_ALGORITHM_CLUSTER_CLUSTER_DATA_HPP
|
||||||
|
|
||||||
|
#include <boost/shared_ptr.hpp>
|
||||||
|
#include <vector>
|
||||||
|
|
||||||
|
namespace boost
|
||||||
|
{
|
||||||
|
namespace algorithm
|
||||||
|
{
|
||||||
|
namespace cluster
|
||||||
|
{
|
||||||
|
|
||||||
|
/*! TODO: Document this type.
|
||||||
|
*/
|
||||||
|
template<typename Cluster>
|
||||||
|
struct cluster_data
|
||||||
|
{
|
||||||
|
typedef Cluster value_type;
|
||||||
|
typedef std::vector<value_type> clusters;
|
||||||
|
cluster_data() : m_pClusters(new clusters) {}
|
||||||
|
~cluster_data() {}
|
||||||
|
|
||||||
|
cluster_data(cluster_data const & c) : m_pClusters(c.m_pClusters) {}
|
||||||
|
cluster_data const & cluster_data::operator=(cluster_data const & rhs)
|
||||||
|
{ m_pClusters = rhs.m_pClusters; }
|
||||||
|
|
||||||
|
typedef typename clusters::iterator iterator;
|
||||||
|
typedef typename clusters::const_iterator const_iterator;
|
||||||
|
typedef typename clusters::reverse_iterator reverse_iterator;
|
||||||
|
|
||||||
|
iterator begin() { return m_pClusters->begin(); }
|
||||||
|
iterator end() { return m_pClusters->end(); }
|
||||||
|
|
||||||
|
const_iterator begin() const { return m_pClusters->begin(); }
|
||||||
|
const_iterator end() const { return m_pClusters->end(); }
|
||||||
|
|
||||||
|
iterator rbegin() { return m_pClusters->rbegin(); }
|
||||||
|
iterator rend() { return m_pClusters->rend(); }
|
||||||
|
|
||||||
|
iterator insert(iterator loc, value_type const & val)
|
||||||
|
{ return m_pClusters->insert(loc, val); }
|
||||||
|
|
||||||
|
void push_back(value_type const & v) { m_pClusters->push_back(v); }
|
||||||
|
void pop_back() { m_pClusters->pop_back(); }
|
||||||
|
|
||||||
|
value_type & back() { return m_pClusters->back(); }
|
||||||
|
value_type const & back() const { return m_pClusters->back(); }
|
||||||
|
|
||||||
|
private:
|
||||||
|
boost::shared_ptr<clusters> m_pClusters;
|
||||||
|
};
|
||||||
|
|
||||||
|
} // End of namespace cluster
|
||||||
|
|
||||||
|
// TODO: Should we be exporting this?
|
||||||
|
using namespace cluster;
|
||||||
|
|
||||||
|
} // End of namespace algorithm
|
||||||
|
|
||||||
|
} // End of namespace boost
|
||||||
|
|
||||||
|
#endif // BOOST_ALGORITHM_CLUSTER_CLUSTER_DATA_HPP
|
@ -1,11 +1,8 @@
|
|||||||
#if ! defined BOOST_ALGORITHM_CLUSTER_DBSCAN_HPP
|
#if ! defined BOOST_ALGORITHM_CLUSTER_DBSCAN_HPP
|
||||||
#define BOOST_ALGORITHM_CLUSTER_DBSCAN_HPP
|
#define BOOST_ALGORITHM_CLUSTER_DBSCAN_HPP
|
||||||
|
|
||||||
#include <boost/range/begin.hpp>
|
#include <boost/algorithm/cluster/cluster_data.hpp>
|
||||||
#include <boost/range/end.hpp>
|
|
||||||
#include <boost/shared_ptr.hpp>
|
|
||||||
#include <vector>
|
#include <vector>
|
||||||
#include <list>
|
|
||||||
|
|
||||||
namespace boost
|
namespace boost
|
||||||
{
|
{
|
||||||
@ -18,13 +15,13 @@ namespace detail
|
|||||||
{
|
{
|
||||||
|
|
||||||
// TODO: Replace this naive query function w/ R*-tree or fractional cascading.
|
// TODO: Replace this naive query function w/ R*-tree or fractional cascading.
|
||||||
// It makes the runtime quadratic.
|
// This query mechanism makes the runtime quadratic.
|
||||||
template<typename NTupleIter, typename DistFun>
|
template<typename NTupleIter, typename DistFun>
|
||||||
static void query(
|
static void query(
|
||||||
NTupleIter const & query_pt,
|
NTupleIter const & query_pt,
|
||||||
NTupleIter const & begin,
|
NTupleIter const & begin,
|
||||||
NTupleIter const & end,
|
NTupleIter const & end,
|
||||||
float eps,
|
typename NTupleIter::difference_type eps,
|
||||||
DistFun const & d,
|
DistFun const & d,
|
||||||
std::vector<NTupleIter> & v)
|
std::vector<NTupleIter> & v)
|
||||||
{
|
{
|
||||||
@ -40,7 +37,7 @@ static void query(
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// TODO: Replace this so we don't have to store the cluster info for each tuple.
|
// TODO: Replace this so we don't have to store the cluster info for each tuple?
|
||||||
template<typename NTupleIter>
|
template<typename NTupleIter>
|
||||||
struct node
|
struct node
|
||||||
{
|
{
|
||||||
@ -52,46 +49,14 @@ struct node
|
|||||||
|
|
||||||
} // End of namespace detail.
|
} // End of namespace detail.
|
||||||
|
|
||||||
// TODO: Document this type.
|
/*! DBSCAN density-based clustering algorithm.
|
||||||
template<typename Cluster>
|
* TODO: Document this function.
|
||||||
struct cluster_data
|
* \param[in] begin
|
||||||
{
|
* \param[in] end
|
||||||
typedef Cluster value_type;
|
* \param[in] eps
|
||||||
typedef std::vector<value_type> clusters;
|
* \param[in] min_points
|
||||||
cluster_data() : m_pClusters(new clusters) {}
|
* \param[in] d
|
||||||
~cluster_data() {}
|
* \return The cluster data (partitioning of the tuples).
|
||||||
|
|
||||||
cluster_data(cluster_data const & c) : m_pClusters(c.m_pClusters) {}
|
|
||||||
cluster_data const & cluster_data::operator=(cluster_data const & rhs)
|
|
||||||
{ m_pClusters = rhs.m_pClusters; }
|
|
||||||
|
|
||||||
typedef typename clusters::iterator iterator;
|
|
||||||
typedef typename clusters::const_iterator const_iterator;
|
|
||||||
typedef typename clusters::reverse_iterator reverse_iterator;
|
|
||||||
|
|
||||||
iterator begin() { return m_pClusters->begin(); }
|
|
||||||
iterator end() { return m_pClusters->end(); }
|
|
||||||
|
|
||||||
const_iterator begin() const { return m_pClusters->begin(); }
|
|
||||||
const_iterator end() const { return m_pClusters->end(); }
|
|
||||||
|
|
||||||
iterator rbegin() { return m_pClusters->rbegin(); }
|
|
||||||
iterator rend() { return m_pClusters->rend(); }
|
|
||||||
|
|
||||||
iterator insert(iterator loc, value_type const & val)
|
|
||||||
{ return m_pClusters->insert(loc, val); }
|
|
||||||
|
|
||||||
void push_back(value_type const & v) { m_pClusters->push_back(v); }
|
|
||||||
void pop_back() { m_pClusters->pop_back(); }
|
|
||||||
|
|
||||||
value_type & back() { return m_pClusters->back(); }
|
|
||||||
value_type const & back() const { return m_pClusters->back(); }
|
|
||||||
|
|
||||||
private:
|
|
||||||
boost::shared_ptr<clusters> m_pClusters;
|
|
||||||
};
|
|
||||||
|
|
||||||
/**
|
|
||||||
*/
|
*/
|
||||||
template<typename Cluster, typename NTupleIter, typename DistFun>
|
template<typename Cluster, typename NTupleIter, typename DistFun>
|
||||||
cluster_data<Cluster>
|
cluster_data<Cluster>
|
||||||
@ -101,7 +66,10 @@ dbscan(NTupleIter const & begin,
|
|||||||
size_t min_points,
|
size_t min_points,
|
||||||
DistFun const & d)
|
DistFun const & d)
|
||||||
{
|
{
|
||||||
// TODO: Rework the algorithm to NOT make this extra collection.
|
int const UNCLASSIFIED = -1;
|
||||||
|
int const NOISE = 0;
|
||||||
|
|
||||||
|
// TODO: Rework the algorithm to NOT make this extra collection?
|
||||||
typedef detail::node<NTupleIter> node;
|
typedef detail::node<NTupleIter> node;
|
||||||
typedef std::vector<node> ntuple_nodes;
|
typedef std::vector<node> ntuple_nodes;
|
||||||
ntuple_nodes tuples;
|
ntuple_nodes tuples;
|
||||||
@ -111,24 +79,25 @@ dbscan(NTupleIter const & begin,
|
|||||||
for(NTupleIter it = begin; it != end; ++it)
|
for(NTupleIter it = begin; it != end; ++it)
|
||||||
{
|
{
|
||||||
//++num_elems;
|
//++num_elems;
|
||||||
//it->cluster = UNCLASSIFIED;
|
|
||||||
tuples.push_back(node(it));
|
tuples.push_back(node(it));
|
||||||
}
|
}
|
||||||
|
|
||||||
typedef cluster_data<std::vector<NTupleIter> > cluster_data;
|
typedef cluster_data<std::vector<NTupleIter> > cluster_data;
|
||||||
cluster_data p;
|
cluster_data p;
|
||||||
|
|
||||||
// Do it...
|
// TODO: We should try to make cluster_num go away.
|
||||||
int cluster_num = 0;
|
int cluster_num = 0;
|
||||||
for(ntuple_nodes::iterator it = tuples.begin(); it != tuples.end(); ++it)
|
for(ntuple_nodes::iterator it = tuples.begin(); it != tuples.end(); ++it)
|
||||||
{
|
{
|
||||||
if (it->cluster != UNCLASSIFIED) // Been classified.
|
// Skip this tuple if its already been classified as a cluster or noise.
|
||||||
|
if (it->cluster != UNCLASSIFIED)
|
||||||
continue;
|
continue;
|
||||||
|
|
||||||
// Expand cluster.
|
// Expand cluster.
|
||||||
|
|
||||||
std::vector<ntuple_nodes::iterator> seeds;
|
std::vector<ntuple_nodes::iterator> seeds;
|
||||||
detail::query(it, tuples.begin(), tuples.end(), eps, d, seeds);
|
detail::query(it, tuples.begin(), tuples.end(), eps, d, seeds);
|
||||||
|
// If the neighborhood of this tuple is too small, then mark it as noise.
|
||||||
if (seeds.size() < min_points)
|
if (seeds.size() < min_points)
|
||||||
{
|
{
|
||||||
it->cluster = NOISE;
|
it->cluster = NOISE;
|
||||||
@ -137,20 +106,20 @@ dbscan(NTupleIter const & begin,
|
|||||||
|
|
||||||
// Start the next cluster.
|
// Start the next cluster.
|
||||||
++cluster_num;
|
++cluster_num;
|
||||||
p.push_back(Cluster());
|
p.push_back(Cluster()); // TODO: This is goofy.
|
||||||
Cluster & cur_cluster = p.back();
|
Cluster & cur_cluster = p.back();
|
||||||
|
|
||||||
// Mark entire neighborhood as part of current cluster.
|
// Mark entire neighborhood as part of the current cluster.
|
||||||
it->cluster = cluster_num;
|
it->cluster = cluster_num;
|
||||||
cur_cluster.push_back(it->tuple);
|
cur_cluster.push_back(it->tuple);
|
||||||
// TODO: Remove it from noise.
|
|
||||||
for (size_t n = 0; n < seeds.size(); ++n)
|
for (size_t n = 0; n < seeds.size(); ++n)
|
||||||
{
|
{
|
||||||
seeds[n]->cluster = cluster_num;
|
seeds[n]->cluster = cluster_num;
|
||||||
cur_cluster.push_back(seeds[n]->tuple);
|
cur_cluster.push_back(seeds[n]->tuple);
|
||||||
// TODO: Remove it from noise.
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Keep adding seeds and processing them until we find all points that
|
||||||
|
// are Density Reachable.
|
||||||
while (! seeds.empty())
|
while (! seeds.empty())
|
||||||
{
|
{
|
||||||
ntuple_nodes::iterator cur = seeds.back();
|
ntuple_nodes::iterator cur = seeds.back();
|
||||||
@ -181,6 +150,7 @@ dbscan(NTupleIter const & begin,
|
|||||||
|
|
||||||
} // End of namespace cluster
|
} // End of namespace cluster
|
||||||
|
|
||||||
|
// TODO: Should we be exporting this?
|
||||||
using namespace cluster;
|
using namespace cluster;
|
||||||
|
|
||||||
} // End of namespace algorithm
|
} // End of namespace algorithm
|
||||||
|
Reference in New Issue
Block a user