/* The code is adopted from VLFeat with heavily rewrite.
 * The original code is licenced under 2-clause BSD license,
 * should be compatible with New BSD Licence used by ccv.
 * The original Copyright:
 *
 * Copyright (C) 2007-12, Andrea Vedaldi and Brian Fulkerson
 * All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions are
 * met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the
 *    distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
 * HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 */

#include "ccv.h"
#include "ccv_internal.h"

const ccv_sift_param_t ccv_sift_default_params = {
	.noctaves = 3,
	.nlevels = 6,
	.up2x = 1,
	.edge_threshold = 10,
	.norm_threshold = 0,
	.peak_threshold = 0,
};

inline static double _ccv_keypoint_interpolate(float N9[3][9], int ix, int iy, int is, ccv_keypoint_t* kp)
{
	double Dxx = N9[1][3] - 2 * N9[1][4] + N9[1][5]; 
	double Dyy = N9[1][1] - 2 * N9[1][4] + N9[1][7];
	double Dxy = (N9[1][8] - N9[1][6] - N9[1][2] + N9[1][0]) * 0.25;
	double score = (Dxx + Dyy) * (Dxx + Dyy) / (Dxx * Dyy - Dxy * Dxy);
	double Dx = (N9[1][5] - N9[1][3]) * 0.5;
	double Dy = (N9[1][7] - N9[1][1]) * 0.5;
	double Ds = (N9[2][4] - N9[0][4]) * 0.5;
	double Dxs = (N9[2][5] + N9[0][3] - N9[2][3] - N9[0][5]) * 0.25;
	double Dys = (N9[2][7] + N9[0][1] - N9[2][1] - N9[0][7]) * 0.25;
	double Dss = N9[0][4] - 2 * N9[1][4] + N9[2][4];
	double A[3][3] = { { Dxx, Dxy, Dxs },
					   { Dxy, Dyy, Dys },
					   { Dxs, Dys, Dss } };
	double b[3] = { -Dx, -Dy, -Ds };
	/* Gauss elimination */
	int i, j, ii, jj;
	for(j = 0; j < 3; j++)
	{
		double maxa = 0;
		double maxabsa = 0;
		int maxi = j;
		double tmp;

		/* look for the maximally stable pivot */
		for (i = j; i < 3; i++)
		{
			double a = A[i][j];
			double absa = fabs(a);
			if (absa > maxabsa)
			{
				maxa = a;
				maxabsa = absa;
				maxi = i;
			}
		}

		/* if singular give up */
		if (maxabsa < 1e-10f)
		{
			b[0] = b[1] = b[2] = 0;
			break;
		}

		i = maxi;

		/* swap j-th row with i-th row and normalize j-th row */
		for(jj = j; jj < 3; jj++)
		{
			tmp = A[i][jj];
			A[i][jj] = A[j][jj];
			A[j][jj] = tmp;
			A[j][jj] /= maxa;
		}
		tmp = b[j];
		b[j] = b[i];
		b[i] = tmp;
		b[j] /= maxa;

		/* elimination */
		for (ii = j + 1; ii < 3; ii++)
		{
			double x = A[ii][j];
			for (jj = j; jj < 3; jj++)
				A[ii][jj] -= x * A[j][jj];
			b[ii] -= x * b[j];
		}
	}

	/* backward substitution */
	for (i = 2; i > 0; i--)
	{
		double x = b[i];
		for (ii = i - 1; ii >= 0; ii--)
		  b[ii] -= x * A[ii][i];
	}
	kp->x = ix + ccv_min(ccv_max(b[0], -1), 1);
	kp->y = iy + ccv_min(ccv_max(b[1], -1), 1);
	kp->regular.scale = is + b[2];
	return score;
}

static float _ccv_mod_2pi(float x)
{
	while (x > 2 * CCV_PI)
		x -= 2 * CCV_PI;
	while (x < 0)
		x += 2 * CCV_PI;
	return x;
}

static int _ccv_floor(float x)
{
	int xi = (int)x;
	if (x >= 0 || (float)xi == x)
		return xi;
	return xi - 1;
}

#define EXPN_SZ  256 /* fast_expn table size */
#define EXPN_MAX 25.0 /* fast_expn table max */
static double _ccv_expn_tab[EXPN_SZ + 1]; /* fast_expn table */
static int _ccv_expn_init = 0;

static inline double _ccv_expn(double x)
{
	double a, b, r;
	int i;
	assert(0 <= x && x <= EXPN_MAX);
	if (x > EXPN_MAX)
		return 0.0;
	x *= EXPN_SZ / EXPN_MAX;
	i = (int)x;
	r = x - i;
	a = _ccv_expn_tab[i];
	b = _ccv_expn_tab[i + 1];
	return a + r * (b - a);
}

static void _ccv_precomputed_expn()
{
	int i;
	for(i = 0; i < EXPN_SZ + 1; i++)
		_ccv_expn_tab[i] = exp(-(double)i * (EXPN_MAX / EXPN_SZ));
	_ccv_expn_init = 1;
}

void ccv_sift(ccv_dense_matrix_t* a, ccv_array_t** _keypoints, ccv_dense_matrix_t** _desc, int type, ccv_sift_param_t params)
{
	assert(CCV_GET_CHANNEL(a->type) == CCV_C1);
	ccv_dense_matrix_t** g = (ccv_dense_matrix_t**)alloca(sizeof(ccv_dense_matrix_t*) * (params.nlevels + 1) * (params.up2x ? params.noctaves + 1 : params.noctaves));
	memset(g, 0, sizeof(ccv_dense_matrix_t*) * (params.nlevels + 1) * (params.up2x ? params.noctaves + 1 : params.noctaves));
	ccv_dense_matrix_t** dog = (ccv_dense_matrix_t**)alloca(sizeof(ccv_dense_matrix_t*) * (params.nlevels - 1) * (params.up2x ? params.noctaves + 1 : params.noctaves));
	memset(dog, 0, sizeof(ccv_dense_matrix_t*) * (params.nlevels - 1) * (params.up2x ? params.noctaves + 1 : params.noctaves));
	ccv_dense_matrix_t** th = (ccv_dense_matrix_t**)alloca(sizeof(ccv_dense_matrix_t*) * (params.nlevels - 3) * (params.up2x ? params.noctaves + 1 : params.noctaves));
	memset(th, 0, sizeof(ccv_dense_matrix_t*) * (params.nlevels - 3) * (params.up2x ? params.noctaves + 1 : params.noctaves));
	ccv_dense_matrix_t** md = (ccv_dense_matrix_t**)alloca(sizeof(ccv_dense_matrix_t*) * (params.nlevels - 3) * (params.up2x ? params.noctaves + 1 : params.noctaves));
	memset(md, 0, sizeof(ccv_dense_matrix_t*) * (params.nlevels - 3) * (params.up2x ? params.noctaves + 1 : params.noctaves));
	if (params.up2x)
	{
		g += params.nlevels + 1;
		dog += params.nlevels - 1;
		th += params.nlevels - 3;
		md += params.nlevels - 3;
	}
	ccv_array_t* keypoints = *_keypoints;
	int custom_keypoints = 0;
	if (keypoints == 0)
		keypoints = *_keypoints = ccv_array_new(sizeof(ccv_keypoint_t), 10, 0);
	else
		custom_keypoints = 1;
	int i, j, k, x, y;
	double sigma0 = 1.6;
	double sigmak = pow(2.0, 1.0 / (params.nlevels - 3));
	double dsigma0 = sigma0 * sigmak * sqrt(1.0 - 1.0 / (sigmak * sigmak));
	if (params.up2x)
	{
		ccv_sample_up(a, &g[-(params.nlevels + 1)], 0, 0, 0);
		/* since there is a gaussian filter in sample_up function already,
		 * the default sigma for upsampled image is sqrt(2) */
		double sd = sqrt(sigma0 * sigma0 - 2.0);
		ccv_blur(g[-(params.nlevels + 1)], &g[-(params.nlevels + 1) + 1], CCV_32F | CCV_C1, sd);
		ccv_matrix_free(g[-(params.nlevels + 1)]);
		for (j = 1; j < params.nlevels; j++)
		{
			sd = dsigma0 * pow(sigmak, j - 1);
			ccv_blur(g[-(params.nlevels + 1) + j], &g[-(params.nlevels + 1) + j + 1], 0, sd);
			ccv_subtract(g[-(params.nlevels + 1) + j + 1], g[-(params.nlevels + 1) + j], (ccv_matrix_t**)&dog[-(params.nlevels - 1) + j - 1], 0);
			if (j > 1 && j < params.nlevels - 1)
				ccv_gradient(g[-(params.nlevels + 1) + j], &th[-(params.nlevels - 3) + j - 2], 0, &md[-(params.nlevels - 3) + j - 2], 0, 1, 1);
			ccv_matrix_free(g[-(params.nlevels + 1) + j]);
		}
		ccv_matrix_free(g[-1]);
	}
	double sd = sqrt(sigma0 * sigma0 - 0.25);
	g[0] = a;
	/* generate gaussian pyramid (g, dog) & gradient pyramid (th, md) */
	ccv_blur(g[0], &g[1], CCV_32F | CCV_C1, sd);
	for (j = 1; j < params.nlevels; j++)
	{
		sd = dsigma0 * pow(sigmak, j - 1);
		ccv_blur(g[j], &g[j + 1], 0, sd);
		ccv_subtract(g[j + 1], g[j], (ccv_matrix_t**)&dog[j - 1], 0);
		if (j > 1 && j < params.nlevels - 1)
			ccv_gradient(g[j], &th[j - 2], 0, &md[j - 2], 0, 1, 1);
		ccv_matrix_free(g[j]);
	}
	ccv_matrix_free(g[params.nlevels]);
	for (i = 1; i < params.noctaves; i++)
	{
		ccv_sample_down(g[(i - 1) * (params.nlevels + 1)], &g[i * (params.nlevels + 1)], 0, 0, 0);
		if (i - 1 > 0)
			ccv_matrix_free(g[(i - 1) * (params.nlevels + 1)]);
		sd = sqrt(sigma0 * sigma0 - 0.25);
		ccv_blur(g[i * (params.nlevels + 1)], &g[i * (params.nlevels + 1) + 1], CCV_32F | CCV_C1, sd);
		for (j = 1; j < params.nlevels; j++)
		{
			sd = dsigma0 * pow(sigmak, j - 1);
			ccv_blur(g[i * (params.nlevels + 1) + j], &g[i * (params.nlevels + 1) + j + 1], 0, sd);
			ccv_subtract(g[i * (params.nlevels + 1) + j + 1], g[i * (params.nlevels + 1) + j], (ccv_matrix_t**)&dog[i * (params.nlevels - 1) + j - 1], 0);
			if (j > 1 && j < params.nlevels - 1)
				ccv_gradient(g[i * (params.nlevels + 1) + j], &th[i * (params.nlevels - 3) + j - 2], 0, &md[i * (params.nlevels - 3) + j - 2], 0, 1, 1);
			ccv_matrix_free(g[i * (params.nlevels + 1) + j]);
		}
		ccv_matrix_free(g[i * (params.nlevels + 1) + params.nlevels]);
	}
	ccv_matrix_free(g[(params.noctaves - 1) * (params.nlevels + 1)]);
	if (!custom_keypoints)
	{
		/* detect keypoint */
		for (i = (params.up2x ? -1 : 0); i < params.noctaves; i++)
		{
			double s = pow(2.0, i);
			int rows = dog[i * (params.nlevels - 1)]->rows;
			int cols = dog[i * (params.nlevels - 1)]->cols;
			for (j = 1; j < params.nlevels - 2; j++)
			{
				float* bf = dog[i * (params.nlevels - 1) + j - 1]->data.f32 + cols;
				float* cf = dog[i * (params.nlevels - 1) + j]->data.f32 + cols;
				float* uf = dog[i * (params.nlevels - 1) + j + 1]->data.f32 + cols;
				for (y = 1; y < rows - 1; y++)
				{
					for (x = 1; x < cols - 1; x++)
					{
						float v = cf[x];
#define locality_if(CMP, SGN) \
	(v CMP ## = SGN params.peak_threshold && v CMP cf[x - 1] && v CMP cf[x + 1] && \
	 v CMP cf[x - cols - 1] && v CMP cf[x - cols] && v CMP cf[x - cols + 1] && \
	 v CMP cf[x + cols - 1] && v CMP cf[x + cols] && v CMP cf[x + cols + 1] && \
	 v CMP bf[x - 1] && v CMP bf[x] && v CMP bf[x + 1] && \
	 v CMP bf[x - cols - 1] && v CMP bf[x - cols] && v CMP bf[x - cols + 1] && \
	 v CMP bf[x + cols - 1] && v CMP bf[x + cols] && v CMP bf[x + cols + 1] && \
	 v CMP uf[x - 1] && v CMP uf[x] && v CMP uf[x + 1] && \
	 v CMP uf[x - cols - 1] && v CMP uf[x - cols] && v CMP uf[x - cols + 1] && \
	 v CMP uf[x + cols - 1] && v CMP uf[x + cols] && v CMP uf[x + cols + 1])
						if (locality_if(<, -) || locality_if(>, +))
						{
							ccv_keypoint_t kp;
							int ix = x, iy = y;
							double score = -1;
							int cvg = 0;
							int offset = ix + (iy - y) * cols;
							/* iteratively converge to meet subpixel accuracy */
							for (k = 0; k < 5; k++)
							{
								offset = ix + (iy - y) * cols;
								float N9[3][9] = { { bf[offset - cols - 1], bf[offset - cols], bf[offset - cols + 1],
													 bf[offset - 1], bf[offset], bf[offset + 1],
													 bf[offset + cols - 1], bf[offset + cols], bf[offset + cols + 1] },
												   { cf[offset - cols - 1], cf[offset - cols], cf[offset - cols + 1],
													 cf[offset - 1], cf[offset], cf[offset + 1],
													 cf[offset + cols - 1], cf[offset + cols], cf[offset + cols + 1] },
												   { uf[offset - cols - 1], uf[offset - cols], uf[offset - cols + 1],
													 uf[offset - 1], uf[offset], uf[offset + 1],
													 uf[offset + cols - 1], uf[offset + cols], uf[offset + cols + 1] } };
								score = _ccv_keypoint_interpolate(N9, ix, iy, j, &kp);
								if (kp.x >= 1 && kp.x <= cols - 2 && kp.y >= 1 && kp.y <= rows - 2)
								{
									int nx = (int)(kp.x + 0.5);
									int ny = (int)(kp.y + 0.5);
									if (ix == nx && iy == ny)
										break;
									ix = nx;
									iy = ny;
								} else {
									cvg = -1;
									break;
								}
							}
							if (cvg == 0 && fabs(cf[offset]) > params.peak_threshold && score >= 0 && score < (params.edge_threshold + 1) * (params.edge_threshold + 1) / params.edge_threshold && kp.regular.scale > 0 && kp.regular.scale < params.nlevels - 1)
							{
								kp.x *= s;
								kp.y *= s;
								kp.octave = i;
								kp.level = j;
								kp.regular.scale = sigma0 * sigmak * pow(2.0, kp.regular.scale / (double)(params.nlevels - 3));
								ccv_array_push(keypoints, &kp);
							}
						}
#undef locality_if
					}
					bf += cols;
					cf += cols;
					uf += cols;
				}
			}
		}
	}
	/* repeatable orientation/angle (p.s. it will push more keypoints (with different angles) to array) */
	float const winf = 1.5;
	double bins[36];
	int kpnum = keypoints->rnum;
	if (!_ccv_expn_init)
		_ccv_precomputed_expn();
	for (i = 0; i < kpnum; i++)
	{
		ccv_keypoint_t* kp = (ccv_keypoint_t*)ccv_array_get(keypoints, i);
		float ds = pow(2.0, kp->octave);
		float dx = kp->x / ds;
		float dy = kp->y / ds;
		int ix = (int)(dx + 0.5);
		int iy = (int)(dy + 0.5);
		float const sigmaw = winf * kp->regular.scale;
		int wz = ccv_max((int)(3.0 * sigmaw + 0.5), 1);
		ccv_dense_matrix_t* tho = th[kp->octave * (params.nlevels - 3) + kp->level - 1];
		ccv_dense_matrix_t* mdo = md[kp->octave * (params.nlevels - 3) + kp->level - 1];
		assert(tho->rows == mdo->rows && tho->cols == mdo->cols);
		if (ix >= 0 && ix < tho->cols && iy >=0 && iy < tho->rows)
		{
			float* theta = tho->data.f32 + ccv_max(iy - wz, 0) * tho->cols;
			float* magnitude = mdo->data.f32 + ccv_max(iy - wz, 0) * mdo->cols;
			memset(bins, 0, 36 * sizeof(double));
			/* oriented histogram with bilinear interpolation */
			for (y = ccv_max(iy - wz, 0); y <= ccv_min(iy + wz, tho->rows - 1); y++)
			{
				for (x = ccv_max(ix - wz, 0); x <= ccv_min(ix + wz, tho->cols - 1); x++)
				{
					float r2 = (x - dx) * (x - dx) + (y - dy) * (y - dy);
					if (r2 > wz * wz + 0.6)
						continue;
					float weight = _ccv_expn(r2 / (2.0 * sigmaw * sigmaw));
					float fbin = theta[x] * 0.1;
					int ibin = _ccv_floor(fbin - 0.5);
					float rbin = fbin - ibin - 0.5;
					/* bilinear interpolation */
					bins[(ibin + 36) % 36] += (1 - rbin) * magnitude[x] * weight;
					bins[(ibin + 1) % 36] += rbin * magnitude[x] * weight;
				}
				theta += tho->cols;
				magnitude += mdo->cols;
			}
			/* smoothing histogram */
			for (j = 0; j < 6; j++)
			{
				double first = bins[0];
				double prev = bins[35];
				for (k = 0; k < 35; k++)
				{
					double nb = (prev + bins[k] + bins[k + 1]) / 3.0;
					prev = bins[k];
					bins[k] = nb;
				}
				bins[35] = (prev + bins[35] + first) / 3.0;
			}
			int maxib = 0;
			for (j = 1; j < 36; j++)
				if (bins[j] > bins[maxib])
					maxib = j;
			double maxb = bins[maxib];
			double bm = bins[(maxib + 35) % 36];
			double bp = bins[(maxib + 1) % 36];
			double di = -0.5 * (bp - bm) / (bp + bm - 2 * maxb);
			kp->regular.angle = 2 * CCV_PI * (maxib + di + 0.5) / 36.0;
			maxb *= 0.8;
			for (j = 0; j < 36; j++)
				if (j != maxib)
				{
					bm = bins[(j + 35) % 36];
					bp = bins[(j + 1) % 36];
					if (bins[j] > maxb && bins[j] > bm && bins[j] > bp)
					{
						di = -0.5 * (bp - bm) / (bp + bm - 2 * bins[j]);
						ccv_keypoint_t nkp = *kp;
						nkp.regular.angle = 2 * CCV_PI * (j + di + 0.5) / 36.0;
						ccv_array_push(keypoints, &nkp);
					}
				}
		}
	}
	/* calculate descriptor */
	if (_desc != 0)
	{
		ccv_dense_matrix_t* desc = *_desc = ccv_dense_matrix_new(keypoints->rnum, 128, CCV_32F | CCV_C1, 0, 0);
		float* fdesc = desc->data.f32;
		memset(fdesc, 0, sizeof(float) * keypoints->rnum * 128);
		for (i = 0; i < keypoints->rnum; i++)
		{
			ccv_keypoint_t* kp = (ccv_keypoint_t*)ccv_array_get(keypoints, i);
			float ds = pow(2.0, kp->octave);
			float dx = kp->x / ds;
			float dy = kp->y / ds;
			int ix = (int)(dx + 0.5);
			int iy = (int)(dy + 0.5);
			double SBP = 3.0 * kp->regular.scale;
			int wz = ccv_max((int)(SBP * sqrt(2.0) * 2.5 + 0.5), 1);
			ccv_dense_matrix_t* tho = th[kp->octave * (params.nlevels - 3) + kp->level - 1];
			ccv_dense_matrix_t* mdo = md[kp->octave * (params.nlevels - 3) + kp->level - 1];
			assert(tho->rows == mdo->rows && tho->cols == mdo->cols);
			assert(ix >= 0 && ix < tho->cols && iy >=0 && iy < tho->rows);
			float* theta = tho->data.f32 + ccv_max(iy - wz, 0) * tho->cols;
			float* magnitude = mdo->data.f32 + ccv_max(iy - wz, 0) * mdo->cols;
			float ca = cos(kp->regular.angle);
			float sa = sin(kp->regular.angle);
			float sigmaw = 2.0;
			/* sidenote: NBP = 4, NBO = 8 */
			for (y = ccv_max(iy - wz, 0); y <= ccv_min(iy + wz, tho->rows - 1); y++)
			{
				for (x = ccv_max(ix - wz, 0); x <= ccv_min(ix + wz, tho->cols - 1); x++)
				{
					float nx = (ca * (x - dx) + sa * (y - dy)) / SBP;
					float ny = (-sa * (x - dx) + ca * (y - dy)) / SBP;
					float nt = 8.0 * _ccv_mod_2pi(theta[x] * CCV_PI / 180.0 - kp->regular.angle) / (2.0 * CCV_PI);
					float weight = _ccv_expn((nx * nx + ny * ny) / (2.0 * sigmaw * sigmaw));
					int binx = _ccv_floor(nx - 0.5);
					int biny = _ccv_floor(ny - 0.5);
					int bint = _ccv_floor(nt);
					float rbinx = nx - (binx + 0.5);
					float rbiny = ny - (biny + 0.5);
					float rbint = nt - bint;
					int dbinx, dbiny, dbint;
					/* Distribute the current sample into the 8 adjacent bins*/
					for(dbinx = 0; dbinx < 2; dbinx++)
						for(dbiny = 0; dbiny < 2; dbiny++)
							for(dbint = 0; dbint < 2; dbint++)
								if (binx + dbinx >= -2 && binx + dbinx < 2 && biny + dbiny >= -2 && biny + dbiny < 2)
									fdesc[(2 + biny + dbiny) * 32 + (2 + binx + dbinx) * 8 + (bint + dbint) % 8] += weight * magnitude[x] * fabs(1 - dbinx - rbinx) * fabs(1 - dbiny - rbiny) * fabs(1 - dbint - rbint);
				}
				theta += tho->cols;
				magnitude += mdo->cols;
			}
			ccv_dense_matrix_t tm = ccv_dense_matrix(1, 128, CCV_32F | CCV_C1, fdesc, 0);
			ccv_dense_matrix_t* tmp = &tm;
 			double norm = ccv_normalize(&tm, (ccv_matrix_t**)&tmp, 0, CCV_L2_NORM);
			int num = (ccv_min(iy + wz, tho->rows - 1) - ccv_max(iy - wz, 0) + 1) * (ccv_min(ix + wz, tho->cols - 1) - ccv_max(ix - wz, 0) + 1);
			if (params.norm_threshold && norm < params.norm_threshold * num)
			{
				for (j = 0; j < 128; j++)
					fdesc[j] = 0;
			} else {
				for (j = 0; j < 128; j++)
					if (fdesc[j] > 0.2)
						fdesc[j] = 0.2;
				ccv_normalize(&tm, (ccv_matrix_t**)&tmp, 0, CCV_L2_NORM);
			}
			fdesc += 128;
		}
	}
	for (i = (params.up2x ? -(params.nlevels - 1) : 0); i < (params.nlevels - 1) * params.noctaves; i++)
		ccv_matrix_free(dog[i]);
	for (i = (params.up2x ? -(params.nlevels - 3) : 0); i < (params.nlevels - 3) * params.noctaves; i++)
	{
		ccv_matrix_free(th[i]);
		ccv_matrix_free(md[i]);
	}
}