mirror of
				https://github.com/boostorg/unordered.git
				synced 2025-11-03 17:21:48 +01:00 
			
		
		
		
	
		
			
	
	
		
			189 lines
		
	
	
		
			6.6 KiB
		
	
	
	
		
			Markdown
		
	
	
	
	
	
		
		
			
		
	
	
			189 lines
		
	
	
		
			6.6 KiB
		
	
	
	
		
			Markdown
		
	
	
	
	
	
| 
								 | 
							
								# Refactoring Roadmap
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								[Proof of concept](https://github.com/joaquintides/fca_unordered) implementation for a fast closed-addressing implementation.
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								## Plan of Refactoring
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								* remove `ptr_node` and `ptr_bucket`
							 | 
						||
| 
								 | 
							
								* see if the code can survive a lack of the `extra_node` or maybe we hard-code it in
							 | 
						||
| 
								 | 
							
								* implement bucket groups as they are in `fca` but don't use them directly yet, add alongside the `buckets_` data member in `struct table`
							 | 
						||
| 
								 | 
							
								* try to remove `bucket_info_` from the node structure (breaks all call-sites that use `get_bucket()` and dependents)
							 | 
						||
| 
								 | 
							
								* make sure `fca` can successfully handle multi-variants at this stage + supports mutable iterators for `map`/`multimap`
							 | 
						||
| 
								 | 
							
								* do a hard-break:
							 | 
						||
| 
								 | 
							
								    * update code to no longer use one single linked list across all buckets (each bucket contains its own unique list)
							 | 
						||
| 
								 | 
							
								    * integrate the `bucket_group<Node>` structure into the `table` (update iterator call-sites to include `bucket_iterator`s)
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								Blockers:
							 | 
						||
| 
								 | 
							
								* how to handle `multi` variants with new `fca` prototype
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								## Implementation Differences
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								### Unordered
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								### Node Type
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								Bullet Points:
							 | 
						||
| 
								 | 
							
								* reify node type into a single one
							 | 
						||
| 
								 | 
							
								* come up with implementation for multi- variants
							 | 
						||
| 
								 | 
							
								* code that touches `get_bucket()` and `*_in_group()` member functions may need updating
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								There are two node types in Unordered, `struct node` and `struct ptr_node`, and the node type is selected conditionally based on the Allocator's pointer type:
							 | 
						||
| 
								 | 
							
								```c++
							 | 
						||
| 
								 | 
							
								template <typename A, typename T, typename NodePtr, typename BucketPtr>
							 | 
						||
| 
								 | 
							
								struct pick_node2
							 | 
						||
| 
								 | 
							
								{
							 | 
						||
| 
								 | 
							
								  typedef boost::unordered::detail::node<A, T> node;
							 | 
						||
| 
								 | 
							
								  // ...
							 | 
						||
| 
								 | 
							
								};
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								template <typename A, typename T>
							 | 
						||
| 
								 | 
							
								struct pick_node2<A, T, boost::unordered::detail::ptr_node<T>*,
							 | 
						||
| 
								 | 
							
								  boost::unordered::detail::ptr_bucket*>
							 | 
						||
| 
								 | 
							
								{ 
							 | 
						||
| 
								 | 
							
								  typedef boost::unordered::detail::ptr_node<T> node;
							 | 
						||
| 
								 | 
							
								  // ...
							 | 
						||
| 
								 | 
							
								};
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								template <typename A, typename T> struct pick_node
							 | 
						||
| 
								 | 
							
								{
							 | 
						||
| 
								 | 
							
								  typedef typename boost::remove_const<T>::type nonconst;
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								  typedef boost::unordered::detail::allocator_traits<
							 | 
						||
| 
								 | 
							
								    typename boost::unordered::detail::rebind_wrap<A,
							 | 
						||
| 
								 | 
							
								      boost::unordered::detail::ptr_node<nonconst> >::type>
							 | 
						||
| 
								 | 
							
								    tentative_node_traits;
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								  typedef boost::unordered::detail::allocator_traits<
							 | 
						||
| 
								 | 
							
								    typename boost::unordered::detail::rebind_wrap<A,
							 | 
						||
| 
								 | 
							
								      boost::unordered::detail::ptr_bucket>::type>
							 | 
						||
| 
								 | 
							
								    tentative_bucket_traits;
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								  typedef pick_node2<A, nonconst, typename tentative_node_traits::pointer,
							 | 
						||
| 
								 | 
							
								    typename tentative_bucket_traits::pointer>
							 | 
						||
| 
								 | 
							
								    pick;
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								  typedef typename pick::node node;
							 | 
						||
| 
								 | 
							
								  typedef typename pick::bucket bucket;
							 | 
						||
| 
								 | 
							
								  typedef typename pick::link_pointer link_pointer;
							 | 
						||
| 
								 | 
							
								};
							 | 
						||
| 
								 | 
							
								```
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								The node types are identical in terms of interface and the only difference is that `node` is chosen when the Allocator uses fancy pointers and `ptr_node` is chosen when the Allocator's pointer type is `T*`.
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								Nodes in Unorderd store `bucket_info_`:
							 | 
						||
| 
								 | 
							
								```cpp
							 | 
						||
| 
								 | 
							
								template <typename A, typename T>
							 | 
						||
| 
								 | 
							
								struct node : boost::unordered::detail::value_base<T>
							 | 
						||
| 
								 | 
							
								{
							 | 
						||
| 
								 | 
							
								  link_pointer next_;
							 | 
						||
| 
								 | 
							
								  std::size_t bucket_info_;
							 | 
						||
| 
								 | 
							
								  node() : next_(), bucket_info_(0) {}
							 | 
						||
| 
								 | 
							
								  // ...
							 | 
						||
| 
								 | 
							
								};
							 | 
						||
| 
								 | 
							
								```
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								`bucket_info_` maps each node back to its corresponding bucket via the member function:
							 | 
						||
| 
								 | 
							
								```cpp
							 | 
						||
| 
								 | 
							
								std::size_t get_bucket() const
							 | 
						||
| 
								 | 
							
								{
							 | 
						||
| 
								 | 
							
								  return bucket_info_ & ((std::size_t)-1 >> 1);
							 | 
						||
| 
								 | 
							
								}
							 | 
						||
| 
								 | 
							
								```
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								`bucket_info_` is also used to demarcate the start of equivalent nodes in the containers via:
							 | 
						||
| 
								 | 
							
								```cpp
							 | 
						||
| 
								 | 
							
								// Note that nodes start out as the first in their group, as `bucket_info_` defaults to 0.
							 | 
						||
| 
								 | 
							
								std::size_t is_first_in_group() const
							 | 
						||
| 
								 | 
							
								{ return !(bucket_info_ & ~((std::size_t)-1 >> 1)); }
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								void set_first_in_group()
							 | 
						||
| 
								 | 
							
								{ bucket_info_ = bucket_info_ & ((std::size_t)-1 >> 1); }
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								void reset_first_in_group()
							 | 
						||
| 
								 | 
							
								{ bucket_info_ = bucket_info_ | ~((std::size_t)-1 >> 1); }
							 | 
						||
| 
								 | 
							
								```
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								A goal of refactoring is to simply have one node type:
							 | 
						||
| 
								 | 
							
								```cpp
							 | 
						||
| 
								 | 
							
								template<class T>
							 | 
						||
| 
								 | 
							
								struct node {
							 | 
						||
| 
								 | 
							
								  node *next;
							 | 
						||
| 
								 | 
							
								  T    value;
							 | 
						||
| 
								 | 
							
								};
							 | 
						||
| 
								 | 
							
								```
							 | 
						||
| 
								 | 
							
								that is used unconditionally. This also requires updating the code that touches the `bucket_info_` along with the code that that touches the `*_in_group()` member functions.
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								### Bucket Type
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								Bullet points:
							 | 
						||
| 
								 | 
							
								* reify bucket structure into a single one
							 | 
						||
| 
								 | 
							
								* figure out how to add `bucket_group`s to the table struct
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								Buckets are similar to nodes in that there are two variations: `template<class NodePointer> struct bucket` and `struct ptr_bucket`.
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								The buckets exist to contain a pointer to a node, however they contain an `enum { extra_node = true };` or `enum { extra_node = false }` to determine whether or not the code should explicitly allocate a default constructed node whose address assigned as the dummy node at the end of the bucket array.
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								`extra_node` is used in the creation and deletion of the bucket array but it is not inherently clear what its intended purpose is.
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								### Iterators
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								Iterators are currently templated on the type of Node they store. Because `fca` constructs iterators with two arguments, all the call-sites that instantiate iterators will need to be updated but this a straight-forward mechanical change.
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								Iterators are selected, as of now, via the `detail::map` and `detail::set` class templates.
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								For example, for `unordered_map`, `iterator` is defined as:
							 | 
						||
| 
								 | 
							
								```cpp
							 | 
						||
| 
								 | 
							
								typedef boost::unordered::detail::map<A, K, T, H, P> types;
							 | 
						||
| 
								 | 
							
								typedef typename types::table table;
							 | 
						||
| 
								 | 
							
								typedef typename table::iterator iterator;
							 | 
						||
| 
								 | 
							
								```
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								The iterator is a member typedef of the `table` which is `types::table`. Examining `types` (aka `detail::map<...>`), we see:
							 | 
						||
| 
								 | 
							
								```cpp
							 | 
						||
| 
								 | 
							
								template <typename A, typename K, typename M, typename H, typename P>
							 | 
						||
| 
								 | 
							
								struct map {
							 | 
						||
| 
								 | 
							
								  // ...
							 | 
						||
| 
								 | 
							
								  typedef boost::unordered::detail::table<types> table;
							 | 
						||
| 
								 | 
							
								  // ...
							 | 
						||
| 
								 | 
							
								};
							 | 
						||
| 
								 | 
							
								```
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								Examining the `detail::table<types>` struct, we see:
							 | 
						||
| 
								 | 
							
								```cpp
							 | 
						||
| 
								 | 
							
								template <typename Types>
							 | 
						||
| 
								 | 
							
								struct table {
							 | 
						||
| 
								 | 
							
								  // ...
							 | 
						||
| 
								 | 
							
								  typedef typename Types::iterator iterator;
							 | 
						||
| 
								 | 
							
								  // ...
							 | 
						||
| 
								 | 
							
								}
							 | 
						||
| 
								 | 
							
								```
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								Collapsing all of this, we see that our iterator types are defined here:
							 | 
						||
| 
								 | 
							
								```cpp
							 | 
						||
| 
								 | 
							
								template <typename A, typename K, typename M, typename H, typename P>
							 | 
						||
| 
								 | 
							
								struct map
							 | 
						||
| 
								 | 
							
								{
							 | 
						||
| 
								 | 
							
								  // ...
							 | 
						||
| 
								 | 
							
								  typedef boost::unordered::detail::pick_node<A, value_type> pick;
							 | 
						||
| 
								 | 
							
								  typedef typename pick::node node;
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								  typedef boost::unordered::iterator_detail::iterator<node> iterator;
							 | 
						||
| 
								 | 
							
								  typedef boost::unordered::iterator_detail::c_iterator<node> c_iterator;
							 | 
						||
| 
								 | 
							
								  typedef boost::unordered::iterator_detail::l_iterator<node> l_iterator;
							 | 
						||
| 
								 | 
							
								  typedef boost::unordered::iterator_detail::cl_iterator<node>
							 | 
						||
| 
								 | 
							
								    cl_iterator;
							 | 
						||
| 
								 | 
							
								  // ...
							 | 
						||
| 
								 | 
							
								};
							 | 
						||
| 
								 | 
							
								```
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								This is similarly designed for `detail::set`:
							 | 
						||
| 
								 | 
							
								```cpp
							 | 
						||
| 
								 | 
							
								typedef boost::unordered::iterator_detail::c_iterator<node> iterator;
							 | 
						||
| 
								 | 
							
								typedef boost::unordered::iterator_detail::c_iterator<node> c_iterator;
							 | 
						||
| 
								 | 
							
								typedef boost::unordered::iterator_detail::cl_iterator<node> l_iterator;
							 | 
						||
| 
								 | 
							
								typedef boost::unordered::iterator_detail::cl_iterator<node>
							 | 
						||
| 
								 | 
							
								  cl_iterator;
							 | 
						||
| 
								 | 
							
								```
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								The only difference here is that `set::iterator` is always a `c_iterator`, a `const_iterator` type.
							 |