Moving cluster dir to project specific location in repo.

[SVN r45238]
2008-05-09 01:43:15 +00:00
parent fe73f86604
commit 7850e71c9e
5 changed files with 0 additions and 505 deletions
--- a/include/boost/algorithm/cluster/cluster_data.hpp
+++ b/include/boost/algorithm/cluster/cluster_data.hpp
@ -1,69 +0,0 @@
-//  (C) Copyright Jonathan Franklin 2008.
-//  Use, modification and distribution are subject to the
-//  Boost Software License, Version 1.0. (See accompanying file
-//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
-
-#if ! defined BOOST_ALGORITHM_CLUSTER_CLUSTER_DATA_HPP
-#define BOOST_ALGORITHM_CLUSTER_CLUSTER_DATA_HPP
-
-#include <boost/shared_ptr.hpp>
-#include <vector>
-
-namespace boost
-{
-namespace algorithm
-{
-namespace cluster
-{
-
-/*! TODO: Document this type.
- */
-template<typename ClusterT>
-struct cluster_data
-{
-  typedef ClusterT value_type;
-  typedef std::vector<value_type> clusters;
-  cluster_data() : m_pClusters(new clusters) {}
-  ~cluster_data() {}
-
-  cluster_data(cluster_data const & c) : m_pClusters(c.m_pClusters) {}
-  cluster_data const & cluster_data::operator=(cluster_data const & rhs)
-  { m_pClusters = rhs.m_pClusters; }
-
-  typedef typename clusters::iterator iterator;
-  typedef typename clusters::const_iterator const_iterator;
-  typedef typename clusters::reverse_iterator reverse_iterator;
-
-  iterator begin() { return m_pClusters->begin(); }
-  iterator end() { return m_pClusters->end(); }
-
-  const_iterator begin() const { return m_pClusters->begin(); }
-  const_iterator end() const { return m_pClusters->end(); }
-
-  iterator rbegin() { return m_pClusters->rbegin(); }
-  iterator rend() { return m_pClusters->rend(); }
-
-  iterator insert(iterator loc, value_type const & val)
-  { return m_pClusters->insert(loc, val); }
-
-  void push_back(value_type const & v) { m_pClusters->push_back(v); }
-  void pop_back() { m_pClusters->pop_back(); }
-
-  value_type & back() { return m_pClusters->back(); }
-  value_type const & back() const { return m_pClusters->back(); }
-
-  size_t size() const { return m_pClusters->size(); }
-private:
-  boost::shared_ptr<clusters> m_pClusters;
-};
-
-} // End of namespace cluster
-
-// TODO: Should we be exporting this?
-using namespace cluster;
-
-} // End of namespace algorithm
-
-} // End of namespace boost
-
-#endif // BOOST_ALGORITHM_CLUSTER_CLUSTER_DATA_HPP
--- a/include/boost/algorithm/cluster/concept.hpp
+++ b/include/boost/algorithm/cluster/concept.hpp
@ -1,38 +0,0 @@
-//  (C) Copyright Jonathan Franklin 2008.
-//  Use, modification and distribution are subject to the
-//  Boost Software License, Version 1.0. (See accompanying file
-//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
-
-#if ! defined BOOST_ALGORITHM_CLUSTER_CONCEPT_HPP
-#define BOOST_ALGORITHM_CLUSTER_CONCEPT_HPP
-
-#include <boost/concept_check.hpp>
-
-namespace boost
-{
-namespace algorithm
-{
-namespace cluster
-{
-
-  // TODO: Document the purpose of this concept.
-  template<typename T, typename DistanceFunT>
-  struct DistanceComparableConcept
-  {
-    void constraints()
-    {
-      // Operation
-      d(t, t);
-    }
-  private:
-    T t;
-    DistanceFunT d;
-  };
-
-  // TODO: Add concepts here, then delete this comment.
-
-} // End of namespace cluster;
-} // End of namespace algorithm;
-} // End of namespace boost;
-
-#endif // BOOST_ALGORITHM_CLUSTER_CONCEPT_HPP
--- a/include/boost/algorithm/cluster/dbscan.hpp
+++ b/include/boost/algorithm/cluster/dbscan.hpp
@ -1,153 +0,0 @@
-//  (C) Copyright Jonathan Franklin 2008.
-//  Use, modification and distribution are subject to the
-//  Boost Software License, Version 1.0. (See accompanying file
-//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
-
-#if ! defined BOOST_ALGORITHM_CLUSTER_DBSCAN_HPP
-#define BOOST_ALGORITHM_CLUSTER_DBSCAN_HPP
-
-#include <boost/algorithm/cluster/cluster_data.hpp>
-#include <boost/algorithm/cluster/concept.hpp>
-#include <boost/algorithm/cluster/detail/naive_query.hpp>
-#include <boost/utility/result_of.hpp>
-#include <vector>
-
-namespace boost
-{
-namespace algorithm
-{
-namespace cluster
-{
-
-namespace detail
-{
-// TODO: Where should we put these?
-int const UNCLASSIFIED = -1;
-int const NOISE = 0;
-
-// TODO: Replace this so we don't have to store the cluster info for each tuple?
-template<typename NTupleIterT>
-struct node
-{
-  node(NTupleIterT const & t) : tuple(t), cluster(UNCLASSIFIED) {}
-
-  NTupleIterT tuple;
-  int cluster;
-};
-
-} // End of namespace detail.
-
-/*! DBSCAN density-based clustering algorithm.
- * TODO: Document this function.
- * \param[in] begin
- * \param[in] end
- * \param[in] eps
- * \param[in] min_points
- * \param[in] d
- * \return The cluster data (partitioning of the tuples).
- */
-template<typename ClusterT, typename NTupleIterT,
-         typename DistanceT, typename DistFunT>
-cluster_data<ClusterT>
-dbscan(NTupleIterT const & begin,
-       NTupleIterT const & end, 
-       DistanceT const & eps,
-       size_t min_points,
-       DistFunT const & d)
-{
-  // Concept check.
-  function_requires<
-    DistanceComparableConcept<typename NTupleIterT::value_type, DistFunT> >();
-    //DistanceComparableConcept<int, DistFunT> >();
-  function_requires<
-    DistanceComparableConcept<DistanceT, DistFunT> >();
-
-  // TODO: Rework the algorithm to NOT make this extra collection?
-  typedef detail::node<NTupleIterT> node;
-  typedef std::vector<node> ntuple_nodes;
-  ntuple_nodes tuples;
-
-  // Initialize algorithm.
-  //size_t num_elems = 0;
-  for(NTupleIterT it = begin; it != end; ++it)
-  {
-    //++num_elems;
-    tuples.push_back(node(it));
-  }
-
-  typedef cluster_data<std::vector<NTupleIterT> > cluster_data;
-  cluster_data p;
-
-  // TODO: We should try to make cluster_num go away.
-  int cluster_num = 0;
-  for(ntuple_nodes::iterator it = tuples.begin(); it != tuples.end(); ++it)
-  {
-    // Skip this tuple if its already been classified as a cluster or noise.
-    if (it->cluster != detail::UNCLASSIFIED)
-      continue;
-
-    // Expand cluster.
-
-    std::vector<ntuple_nodes::iterator> seeds;
-    detail::naive_query(it, tuples.begin(), tuples.end(), eps, d, seeds);
-    // If the neighborhood of this tuple is too small, then mark it as noise.
-    if (seeds.size() < min_points)
-    {
-      it->cluster = detail::NOISE;
-      continue;
-    }
-
-    // Start the next cluster.
-    ++cluster_num;
-    p.push_back(ClusterT()); // TODO: This is goofy.
-    ClusterT & cur_cluster = p.back();
-
-    // Mark entire neighborhood as part of the current cluster.
-    it->cluster = cluster_num;
-    cur_cluster.push_back(it->tuple);
-    for (size_t n = 0; n < seeds.size(); ++n)
-    {
-      seeds[n]->cluster = cluster_num;
-      cur_cluster.push_back(seeds[n]->tuple);
-    }
-
-    // Keep adding seeds and processing them until we find all points that
-    // are Density Reachable.
-    while (! seeds.empty())
-    {
-      ntuple_nodes::iterator cur = seeds.back();
-      seeds.pop_back();
-
-      std::vector<ntuple_nodes::iterator> results;
-      detail::naive_query(cur, tuples.begin(), tuples.end(), eps, d, results);
-
-      if (results.size() >= min_points)
-      {
-        for (size_t n = 0; n < results.size(); ++n)
-        {
-          if (results[n]->cluster < 1) // Not assigned to cluster yet.
-          {
-            if (detail::UNCLASSIFIED == results[n]->cluster)
-              seeds.push_back(results[n]);
-            results[n]->cluster = cluster_num;
-            cur_cluster.push_back(results[n]->tuple);
-          }
-        }
-      }
-    }
-
-  } // Outer loop for all tuples.
-
-  return p;
-}
-
-} // End of namespace cluster
-
-// TODO: Should we be exporting this?
-using namespace cluster;
-
-} // End of namespace algorithm
-
-} // End of namespace boost
-
-#endif // BOOST_ALGORITHM_CLUSTER_DBSCAN_HPP
--- a/include/boost/algorithm/cluster/detail/naive_query.hpp
+++ b/include/boost/algorithm/cluster/detail/naive_query.hpp
@ -1,50 +0,0 @@
-//  (C) Copyright Jonathan Franklin 2008.
-//  Use, modification and distribution are subject to the
-//  Boost Software License, Version 1.0. (See accompanying file
-//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
-
-#if ! defined BOOST_ALGORITHM_CLUSTER_DETAIL_NAIVE_QUERY_HPP
-#define BOOST_ALGORITHM_CLUSTER_DETAIL_NAIVE_QUERY_HPP
-
-#include <boost/algorithm/cluster/cluster_data.hpp>
-#include <boost/algorithm/cluster/concept.hpp>
-#include <vector>
-
-namespace boost
-{
-namespace algorithm
-{
-namespace cluster
-{
-namespace detail
-{
-
-// TODO: Replace this naive query function w/ R*-tree or fractional cascading.
-// This query mechanism makes the runtime quadratic.
-template<typename NTupleIterT, typename DistanceT, typename DistFunT>
-static void naive_query(
-  NTupleIterT const & query_pt,
-  NTupleIterT const & begin,
-  NTupleIterT const & end,
-  DistanceT const & eps,
-  DistFunT const & d,
-  std::vector<NTupleIterT> & v)
-{
-  for(NTupleIterT cur_pt = begin; cur_pt != end; ++cur_pt)
-  {
-    if (query_pt == cur_pt)
-      continue;
-
-    if (d(*query_pt->tuple, *cur_pt->tuple) > eps)
-      continue;
-
-    v.push_back(cur_pt);
-  }
-}
-
-} // End of namespace detail.
-} // End of namespace cluster
-} // End of namespace algorithm
-} // End of namespace boost
-
-#endif // BOOST_ALGORITHM_CLUSTER_DETAIL_NAIVE_QUERY_HPP
--- a/include/boost/algorithm/cluster/k_means.hpp
+++ b/include/boost/algorithm/cluster/k_means.hpp
@ -1,195 +0,0 @@
-/*****
-** References
-** - J. MacQueen, "Some methods for classification and analysis
-**   of multivariate observations", Fifth Berkeley Symposium on
-**   Math Statistics and Probability, 281-297, 1967.
-** - I.S. Dhillon and D.S. Modha, "A data-clustering algorithm
-**   on distributed memory multiprocessors",
-**   Large-Scale Parallel Data Mining, 245-260, 1999.
-** Yuanming Chen, 2008-05-08
-*/
-
-#ifndef BOOST_ALGORITHM_CLUSTER_K_MEANS_HPP
-#define BOOST_ALGORITHM_CLUSTER_K_MEANS_HPP
-
-#include <cmath>
-#include <float.h>
-//#include "common.hpp"
-#include <vector>
-#include <list>
-#include <cassert>
-
-namespace boost {
-  namespace algorithm {
-      namespace cluster {
-          namespace detail {  
-                template<typename AttributeType, typename differenceType>
-                //The original C function
-                int *k_means(AttributeType **data, int n, int m, int k, differenceType eps, AttributeType **centroids)
-                {
-                   /* output cluster label for each data point */
-                   int *labels = (int*)calloc(n, sizeof(int));
-
-                   int h, i, j; /* loop counters, of course :) */
-                   int *counts = (int*)calloc(k, sizeof(int)); /* size of each cluster */
-                   AttributeType old_error, error = FLT_MAX; /* sum of squared euclidean distance */
-                   AttributeType **c = centroids ? centroids : (AttributeType**)calloc(k, sizeof(AttributeType*));
-                   AttributeType **c1 = (AttributeType**)calloc(k, sizeof(AttributeType*)); /* temp centroids */
-
-                   //assert(data && k > 0 && k <= n && m > 0 && t >= 0); /* for debugging */
-
-                   /****
-                   ** initialization */
-
-                   for (h = i = 0; i < k; h += n / k, i++) {
-                      c1[i] = (AttributeType*)calloc(m, sizeof(AttributeType));
-                      if (!centroids) {
-                         c[i] = (AttributeType*)calloc(m, sizeof(AttributeType));
-                      }
-                      /* pick k points as initial centroids */
-                      for (j = m; j-- > 0; c[i][j] = data[h][j]);
-                   }
-
-                   /****
-                   ** main loop */
-
-                   do {
-                      /* save error from last step */
-                      old_error = error, error = 0;
-
-                      /* clear old counts and temp centroids */
-                      for (i = 0; i < k; counts[i++] = 0) {
-                         for (j = 0; j < m; c1[i][j++] = 0);
-                      }
-
-                      for (h = 0; h < n; h++) {
-                         /* identify the closest cluster */
-                         AttributeType min_distance = FLT_MAX;
-                         for (i = 0; i < k; i++) {
-                            AttributeType distance = 0;
-                            for (j = m; j-- > 0; distance += pow(data[h][j] - c[i][j], 2));
-                            if (distance < min_distance) {
-                               labels[h] = i;
-                               min_distance = distance;
-                            }
-                         }
-                         /* update size and temp centroid of the destination cluster */
-                         for (j = m; j-- > 0; c1[labels[h]][j] += data[h][j]);
-                         counts[labels[h]]++;
-                         /* update standard error */
-                         error += min_distance;
-                      }
-
-                      for (i = 0; i < k; i++) { /* update all centroids */
-                         for (j = 0; j < m; j++) {
-                            c[i][j] = counts[i] ? c1[i][j] / counts[i] : c1[i][j];
-                         }
-                      }
-
-                   } while (fabs(error - old_error) > eps);
-
-                   /****
-                   ** housekeeping */
-
-                   for (i = 0; i < k; i++) {
-                      if (!centroids) {
-                         free(c[i]);
-                      }
-                      free(c1[i]);
-                   }
-
-                   if (!centroids) {
-                      free(c);
-                   }
-                   free(c1);
-
-                   free(counts);
-
-                   return labels;
-                }
-          } //End of details namespace
-
-            template<typename PointType>
-            struct KMeansCluster {
-                PointType centroid;
-                std::vector<int> points; //The indice of points are stored here 
-            };
-
-            template <typename KMeansCluster> 
-            struct KMeansClustering { 
-                typedef std::vector< KMeansCluster > type; 
-                type clusters;
-            };
-
-            /** 
-            * @param first: the first data point's iterator
-            * @param last: the last data point's iterator
-            * @param k: the k value for the k-mean algorithm
-            * @return collections of clusters
-            */
-            template <typename NTupleIter>
-            typename KMeansClustering< typename KMeansCluster<typename NTupleIter::value_type> >
-            k_means(NTupleIter first, NTupleIter last, unsigned k, 
-                   typename NTupleIter::difference_type const & eps)
-            {
-                typedef NTupleIter::difference_type DistanceType;
-                typedef NTupleIter::value_type PointType;
-                typedef PointType::value_type AttributeType; //For the c funtion test, it will be a double type
-                const DistanceType knumOfPoints = last - first; //The n variable in the C function
-                const size_t knDimension = PointType::size(); //The m variable in the C function
-
-                AttributeType** ppData = new AttributeType* [knumOfPoints];
-                AttributeType** centroids = new AttributeType* [k]; 
-                //Pre-allocate the result array
-                for(size_t nCentroid = 0; nCentroid < k; nCentroid++)
-                {
-                    centroids[nCentroid] = new AttributeType[knDimension];
-                }
-
-                int nIndex = 0;
-                for(NTupleIter iter = first; iter != last; iter++, nIndex++)
-                {
-                    PointType& pt= *iter; //A point
-                    ppData[nIndex] = new AttributeType[knDimension];
-                    for(unsigned int nAttribute = 0; nAttribute < knDimension; nAttribute++)
-                    {
-                        ppData[nIndex][nAttribute] = pt[nAttribute];
-                    }
-                }
-
-                int* labels = detail::k_means(ppData, (int) knumOfPoints, (int) knDimension, k, eps, centroids);
-
-                typedef KMeansCluster<PointType> KMeansClusterType;
-                KMeansClustering< KMeansClusterType > clustering;
-                for(size_t nCentroid = 0; nCentroid < k; nCentroid++)
-                {
-                    
-                    KMeansClusterType cluster;
-                    PointType centroid;
-                    for(unsigned int nAttribute = 0; nAttribute < knDimension; nAttribute++)
-                    {
-                        centroid[nAttribute] = centroids[nCentroid][nAttribute];
-                    }
-                    cluster.centroid = centroid;
-                    clustering.clusters.push_back(cluster);
-                    delete[] centroids[nCentroid];
-                }
-
-                for(int nPoint = 0; nPoint < knumOfPoints; nPoint++)
-                {
-                    int nCentroidIndex = labels[nPoint];
-                    clustering.clusters[nCentroidIndex].points.push_back(nPoint);
-                    delete[] ppData[nPoint];
-                }
-
-                delete[] centroids;
-                delete[] ppData;
-                delete[] labels;
-
-                return clustering;
-            }
-        } //End of cluster namespace
-    } //End of algorithm namespace
-} //End of boost namespace
-
-#endif // BOOST_ALGORITHM_CLUSTER_K_MEANS_HPP