src/bd_tree.cpp
author Dmitriy Morozov <dmitriy@mrzv.org>
Tue, 14 Jul 2009 09:23:09 -0700
changeset 0 e2bb6f169431
permissions -rw-r--r--
Initial commit: ANN 1.1.1

//----------------------------------------------------------------------
// File:			bd_tree.cpp
// Programmer:		David Mount
// Description:		Basic methods for bd-trees.
// Last modified:	01/04/05 (Version 1.0)
//----------------------------------------------------------------------
// Copyright (c) 1997-2005 University of Maryland and Sunil Arya and
// David Mount.  All Rights Reserved.
// 
// This software and related documentation is part of the Approximate
// Nearest Neighbor Library (ANN).  This software is provided under
// the provisions of the Lesser GNU Public License (LGPL).  See the
// file ../ReadMe.txt for further information.
// 
// The University of Maryland (U.M.) and the authors make no
// representations about the suitability or fitness of this software for
// any purpose.  It is provided "as is" without express or implied
// warranty.
//----------------------------------------------------------------------
// History:
//	Revision 0.1  03/04/98
//		Initial release
//	Revision l.0  04/01/05
//		Fixed centroid shrink threshold condition to depend on the
//			dimension.
//		Moved dump routine to kd_dump.cpp.
//----------------------------------------------------------------------

#include "bd_tree.h"					// bd-tree declarations
#include "kd_util.h"					// kd-tree utilities
#include "kd_split.h"					// kd-tree splitting rules

#include <ANN/ANNperf.h>				// performance evaluation

//----------------------------------------------------------------------
//	Printing a bd-tree 
//		These routines print a bd-tree.   See the analogous procedure
//		in kd_tree.cpp for more information.
//----------------------------------------------------------------------

void ANNbd_shrink::print(				// print shrinking node
		int level,						// depth of node in tree
		ostream &out)					// output stream
{
	child[ANN_OUT]->print(level+1, out);		// print out-child

	out << "    ";
	for (int i = 0; i < level; i++)				// print indentation
		out << "..";
	out << "Shrink";
	for (int j = 0; j < n_bnds; j++) {			// print sides, 2 per line
		if (j % 2 == 0) {
			out << "\n";						// newline and indentation
			for (int i = 0; i < level+2; i++) out << "  ";
		}
		out << "  ([" << bnds[j].cd << "]"
			 << (bnds[j].sd > 0 ? ">=" : "< ")
			 << bnds[j].cv << ")";
	}
	out << "\n";

	child[ANN_IN]->print(level+1, out);			// print in-child
}

//----------------------------------------------------------------------
//	kd_tree statistics utility (for performance evaluation)
//		This routine computes various statistics information for
//		shrinking nodes.  See file kd_tree.cpp for more information.
//----------------------------------------------------------------------

void ANNbd_shrink::getStats(					// get subtree statistics
	int					dim,					// dimension of space
	ANNkdStats			&st,					// stats (modified)
	ANNorthRect			&bnd_box)				// bounding box
{
	ANNkdStats ch_stats;						// stats for children
	ANNorthRect inner_box(dim);					// inner box of shrink

	annBnds2Box(bnd_box,						// enclosing box
				dim,							// dimension
				n_bnds,							// number of bounds
				bnds,							// bounds array
				inner_box);						// inner box (modified)
												// get stats for inner child
	ch_stats.reset();							// reset
	child[ANN_IN]->getStats(dim, ch_stats, inner_box);
	st.merge(ch_stats);							// merge them
												// get stats for outer child
	ch_stats.reset();							// reset
	child[ANN_OUT]->getStats(dim, ch_stats, bnd_box);
	st.merge(ch_stats);							// merge them

	st.depth++;									// increment depth
	st.n_shr++;									// increment number of shrinks
}

//----------------------------------------------------------------------
// bd-tree constructor
//		This is the main constructor for bd-trees given a set of points.
//		It first builds a skeleton kd-tree as a basis, then computes the
//		bounding box of the data points, and then invokes rbd_tree() to
//		actually build the tree, passing it the appropriate splitting
//		and shrinking information.
//----------------------------------------------------------------------

ANNkd_ptr rbd_tree(						// recursive construction of bd-tree
	ANNpointArray		pa,				// point array
	ANNidxArray			pidx,			// point indices to store in subtree
	int					n,				// number of points
	int					dim,			// dimension of space
	int					bsp,			// bucket space
	ANNorthRect			&bnd_box,		// bounding box for current node
	ANNkd_splitter		splitter,		// splitting routine
	ANNshrinkRule		shrink);		// shrinking rule

ANNbd_tree::ANNbd_tree(					// construct from point array
	ANNpointArray		pa,				// point array (with at least n pts)
	int					n,				// number of points
	int					dd,				// dimension
	int					bs,				// bucket size
	ANNsplitRule		split,			// splitting rule
	ANNshrinkRule		shrink)			// shrinking rule
	: ANNkd_tree(n, dd, bs)				// build skeleton base tree
{
	pts = pa;							// where the points are
	if (n == 0) return;					// no points--no sweat

	ANNorthRect bnd_box(dd);			// bounding box for points
										// construct bounding rectangle
	annEnclRect(pa, pidx, n, dd, bnd_box);
										// copy to tree structure
	bnd_box_lo = annCopyPt(dd, bnd_box.lo);
	bnd_box_hi = annCopyPt(dd, bnd_box.hi);

	switch (split) {					// build by rule
	case ANN_KD_STD:					// standard kd-splitting rule
		root = rbd_tree(pa, pidx, n, dd, bs, bnd_box, kd_split, shrink);
		break;
	case ANN_KD_MIDPT:					// midpoint split
		root = rbd_tree(pa, pidx, n, dd, bs, bnd_box, midpt_split, shrink);
		break;
	case ANN_KD_SUGGEST:				// best (in our opinion)
	case ANN_KD_SL_MIDPT:				// sliding midpoint split
		root = rbd_tree(pa, pidx, n, dd, bs, bnd_box, sl_midpt_split, shrink);
		break;
	case ANN_KD_FAIR:					// fair split
		root = rbd_tree(pa, pidx, n, dd, bs, bnd_box, fair_split, shrink);
		break;
	case ANN_KD_SL_FAIR:				// sliding fair split
		root = rbd_tree(pa, pidx, n, dd, bs,
						bnd_box, sl_fair_split, shrink);
		break;
	default:
		annError("Illegal splitting method", ANNabort);
	}
}

//----------------------------------------------------------------------
//	Shrinking rules
//----------------------------------------------------------------------

enum ANNdecomp {SPLIT, SHRINK};			// decomposition methods

//----------------------------------------------------------------------
//	trySimpleShrink - Attempt a simple shrink
//
//		We compute the tight bounding box of the points, and compute
//		the 2*dim ``gaps'' between the sides of the tight box and the
//		bounding box.  If any of the gaps is large enough relative to
//		the longest side of the tight bounding box, then we shrink
//		all sides whose gaps are large enough.  (The reason for
//		comparing against the tight bounding box, is that after
//		shrinking the longest box size will decrease, and if we use
//		the standard bounding box, we may decide to shrink twice in
//		a row.  Since the tight box is fixed, we cannot shrink twice
//		consecutively.)
//----------------------------------------------------------------------
const float BD_GAP_THRESH = 0.5;		// gap threshold (must be < 1)
const int   BD_CT_THRESH  = 2;			// min number of shrink sides

ANNdecomp trySimpleShrink(				// try a simple shrink
	ANNpointArray		pa,				// point array
	ANNidxArray			pidx,			// point indices to store in subtree
	int					n,				// number of points
	int					dim,			// dimension of space
	const ANNorthRect	&bnd_box,		// current bounding box
	ANNorthRect			&inner_box)		// inner box if shrinking (returned)
{
	int i;
												// compute tight bounding box
	annEnclRect(pa, pidx, n, dim, inner_box);

	ANNcoord max_length = 0;					// find longest box side
	for (i = 0; i < dim; i++) {
		ANNcoord length = inner_box.hi[i] - inner_box.lo[i];
		if (length > max_length) {
			max_length = length;
		}
	}

	int shrink_ct = 0;							// number of sides we shrunk
	for (i = 0; i < dim; i++) {					// select which sides to shrink
												// gap between boxes
		ANNcoord gap_hi = bnd_box.hi[i] - inner_box.hi[i];
												// big enough gap to shrink?
		if (gap_hi < max_length*BD_GAP_THRESH)
			inner_box.hi[i] = bnd_box.hi[i];	// no - expand
		else shrink_ct++;						// yes - shrink this side

												// repeat for high side
		ANNcoord gap_lo = inner_box.lo[i] - bnd_box.lo[i];
		if (gap_lo < max_length*BD_GAP_THRESH)
			inner_box.lo[i] = bnd_box.lo[i];	// no - expand
		else shrink_ct++;						// yes - shrink this side
	}

	if (shrink_ct >= BD_CT_THRESH)				// did we shrink enough sides?
		 return SHRINK;
	else return SPLIT;
}

//----------------------------------------------------------------------
//	tryCentroidShrink - Attempt a centroid shrink
//
//	We repeatedly apply the splitting rule, always to the larger subset
//	of points, until the number of points decreases by the constant
//	fraction BD_FRACTION.  If this takes more than dim*BD_MAX_SPLIT_FAC
//	splits for this to happen, then we shrink to the final inner box
//	Otherwise we split.
//----------------------------------------------------------------------

const float	BD_MAX_SPLIT_FAC = 0.5;		// maximum number of splits allowed
const float	BD_FRACTION = 0.5;			// ...to reduce points by this fraction
										// ...This must be < 1.

ANNdecomp tryCentroidShrink(			// try a centroid shrink
	ANNpointArray		pa,				// point array
	ANNidxArray			pidx,			// point indices to store in subtree
	int					n,				// number of points
	int					dim,			// dimension of space
	const ANNorthRect	&bnd_box,		// current bounding box
	ANNkd_splitter		splitter,		// splitting procedure
	ANNorthRect			&inner_box)		// inner box if shrinking (returned)
{
	int n_sub = n;						// number of points in subset
	int n_goal = (int) (n*BD_FRACTION); // number of point in goal
	int n_splits = 0;					// number of splits needed
										// initialize inner box to bounding box
	annAssignRect(dim, inner_box, bnd_box);

	while (n_sub > n_goal) {			// keep splitting until goal reached
		int cd;							// cut dim from splitter (ignored)
		ANNcoord cv;					// cut value from splitter (ignored)
		int n_lo;						// number of points on low side
										// invoke splitting procedure
		(*splitter)(pa, pidx, inner_box, n_sub, dim, cd, cv, n_lo);
		n_splits++;						// increment split count

		if (n_lo >= n_sub/2) {			// most points on low side
			inner_box.hi[cd] = cv;		// collapse high side
			n_sub = n_lo;				// recurse on lower points
		}
		else {							// most points on high side
			inner_box.lo[cd] = cv;		// collapse low side
			pidx += n_lo;				// recurse on higher points
			n_sub -= n_lo;
		}
	}
    if (n_splits > dim*BD_MAX_SPLIT_FAC)// took too many splits
		return SHRINK;					// shrink to final subset
	else
		return SPLIT;
}

//----------------------------------------------------------------------
//	selectDecomp - select which decomposition to use
//----------------------------------------------------------------------

ANNdecomp selectDecomp(			// select decomposition method
	ANNpointArray		pa,				// point array
	ANNidxArray			pidx,			// point indices to store in subtree
	int					n,				// number of points
	int					dim,			// dimension of space
	const ANNorthRect	&bnd_box,		// current bounding box
	ANNkd_splitter		splitter,		// splitting procedure
	ANNshrinkRule		shrink,			// shrinking rule
	ANNorthRect			&inner_box)		// inner box if shrinking (returned)
{
	ANNdecomp decomp = SPLIT;			// decomposition

	switch (shrink) {					// check shrinking rule
	case ANN_BD_NONE:					// no shrinking allowed
		decomp = SPLIT;
		break;
	case ANN_BD_SUGGEST:				// author's suggestion
	case ANN_BD_SIMPLE:					// simple shrink
		decomp = trySimpleShrink(
				pa, pidx,				// points and indices
				n, dim,					// number of points and dimension
				bnd_box,				// current bounding box
				inner_box);				// inner box if shrinking (returned)
		break;
	case ANN_BD_CENTROID:				// centroid shrink
		decomp = tryCentroidShrink(
				pa, pidx,				// points and indices
				n, dim,					// number of points and dimension
				bnd_box,				// current bounding box
				splitter,				// splitting procedure
				inner_box);				// inner box if shrinking (returned)
		break;
	default:
		annError("Illegal shrinking rule", ANNabort);
	}
	return decomp;
}

//----------------------------------------------------------------------
//	rbd_tree - recursive procedure to build a bd-tree
//
//		This is analogous to rkd_tree, but for bd-trees.  See the
//		procedure rkd_tree() in kd_split.cpp for more information.
//
//		If the number of points falls below the bucket size, then a
//		leaf node is created for the points.  Otherwise we invoke the
//		procedure selectDecomp() which determines whether we are to
//		split or shrink.  If splitting is chosen, then we essentially
//		do exactly as rkd_tree() would, and invoke the specified
//		splitting procedure to the points.  Otherwise, the selection
//		procedure returns a bounding box, from which we extract the
//		appropriate shrinking bounds, and create a shrinking node.
//		Finally the points are subdivided, and the procedure is
//		invoked recursively on the two subsets to form the children.
//----------------------------------------------------------------------

ANNkd_ptr rbd_tree(				// recursive construction of bd-tree
	ANNpointArray		pa,				// point array
	ANNidxArray			pidx,			// point indices to store in subtree
	int					n,				// number of points
	int					dim,			// dimension of space
	int					bsp,			// bucket space
	ANNorthRect			&bnd_box,		// bounding box for current node
	ANNkd_splitter		splitter,		// splitting routine
	ANNshrinkRule		shrink)			// shrinking rule
{
	ANNdecomp decomp;					// decomposition method

	ANNorthRect inner_box(dim);			// inner box (if shrinking)

	if (n <= bsp) {						// n small, make a leaf node
		if (n == 0)						// empty leaf node
			return KD_TRIVIAL;			// return (canonical) empty leaf
		else							// construct the node and return
			return new ANNkd_leaf(n, pidx); 
	}
	
	decomp = selectDecomp(				// select decomposition method
				pa, pidx,				// points and indices
				n, dim,					// number of points and dimension
				bnd_box,				// current bounding box
				splitter, shrink,		// splitting/shrinking methods
				inner_box);				// inner box if shrinking (returned)
	
	if (decomp == SPLIT) {				// split selected
		int cd;							// cutting dimension
		ANNcoord cv;					// cutting value
		int n_lo;						// number on low side of cut
										// invoke splitting procedure
		(*splitter)(pa, pidx, bnd_box, n, dim, cd, cv, n_lo);

		ANNcoord lv = bnd_box.lo[cd];	// save bounds for cutting dimension
		ANNcoord hv = bnd_box.hi[cd];

		bnd_box.hi[cd] = cv;			// modify bounds for left subtree
		ANNkd_ptr lo = rbd_tree(		// build left subtree
				pa, pidx, n_lo,			// ...from pidx[0..n_lo-1]
				dim, bsp, bnd_box, splitter, shrink);
		bnd_box.hi[cd] = hv;			// restore bounds

		bnd_box.lo[cd] = cv;			// modify bounds for right subtree
		ANNkd_ptr hi = rbd_tree(		// build right subtree
				pa, pidx + n_lo, n-n_lo,// ...from pidx[n_lo..n-1]
				dim, bsp, bnd_box, splitter, shrink);
		bnd_box.lo[cd] = lv;			// restore bounds
										// create the splitting node
		return new ANNkd_split(cd, cv, lv, hv, lo, hi);
	}
	else {								// shrink selected
		int n_in;						// number of points in box
		int n_bnds;						// number of bounding sides

		annBoxSplit(					// split points around inner box
				pa,						// points to split
				pidx,					// point indices
				n,						// number of points
				dim,					// dimension
				inner_box,				// inner box
				n_in);					// number of points inside (returned)

		ANNkd_ptr in = rbd_tree(		// build inner subtree pidx[0..n_in-1]
				pa, pidx, n_in, dim, bsp, inner_box, splitter, shrink);
		ANNkd_ptr out = rbd_tree(		// build outer subtree pidx[n_in..n]
				pa, pidx+n_in, n - n_in, dim, bsp, bnd_box, splitter, shrink);

		ANNorthHSArray bnds = NULL;		// bounds (alloc in Box2Bnds and
										// ...freed in bd_shrink destroyer)

		annBox2Bnds(					// convert inner box to bounds
				inner_box,				// inner box
				bnd_box,				// enclosing box
				dim,					// dimension
				n_bnds,					// number of bounds (returned)
				bnds);					// bounds array (modified)

										// return shrinking node
		return new ANNbd_shrink(n_bnds, bnds, in, out);
	}
}