#ifndef H_KINO_SIMILARITY
#define H_KINO_SIMILARITY 1

#include "KinoSearch/Util/Obj.r"

typedef struct kino_Similarity kino_Similarity;
typedef struct KINO_SIMILARITY_VTABLE KINO_SIMILARITY_VTABLE;

struct kino_ByteBuf;
struct kino_ViewByteBuf;

KINO_CLASS("KinoSearch::Search::Similarity", "Sim", "KinoSearch::Util::Obj");

struct kino_Similarity {
    KINO_SIMILARITY_VTABLE *_;
    KINO_OBJ_MEMBER_VARS;
    float         *norm_decoder;
    float         *prox_decoder;
};

/* Constructor.
 */
kino_Similarity* 
kino_Sim_new(const char *class_name);

/* Deserializer.
 */
kino_Similarity*
kino_Sim_deserialize(struct kino_ViewByteBuf *serialized);

/* Return a score factor based on the frequency of a term in a given document.
 * The default implementation is sqrt(freq).  Other implementations typically
 * produce ascending scores with ascending freqs, since the more times a doc
 * matches, the more relevant it is likely to be.
 */
float  
kino_Sim_tf(kino_Similarity *self, float freq);
KINO_METHOD("Kino_Sim_TF");

/* Calculate a score factor based on the number of terms which match. 
 */
float
kino_Sim_coord(kino_Similarity *self, chy_u32_t overlap, 
               chy_u32_t max_overlap);
KINO_METHOD("Kino_Sim_Coord");

/* encode_norm and decode_norm encode and decode between 32-bit IEEE floating
 * point numbers and a 5-bit exponent, 3-bit mantissa float.  The range
 * covered by the single-byte encoding is 7x10^9 to 2x10^-9.  The accuracy is
 * about one significant decimal digit.
 */
chy_u32_t 
kino_Sim_encode_norm(kino_Similarity *self, float f);
KINO_METHOD("Kino_Sim_Encode_Norm");

/* See encode_norm.
 */
float
kino_Sim_decode_norm(kino_Similarity *self, chy_u32_t input);
KINO_METHOD("Kino_Sim_Decode_Norm");

/* Normalize a Query's weight so that it is comparable to other Queries. 
 */
float
kino_Sim_query_norm(kino_Similarity *self, float sum_of_squared_weights);
KINO_METHOD("Kino_Sim_Query_Norm");

/* Return a boost based which rewards smaller distances between tokens in a
 * search match.
 */
float
kino_Sim_prox_boost(kino_Similarity *self, chy_u32_t distance);
KINO_METHOD("Kino_Sim_Prox_Boost");



/* Assess an array of positions and return a scoring multiplier based on how
 * clustered they are.  The assumption is that documents whose matches are
 * right next to each other deserve higher rank than documents whose matches
 * are spread out and presumably unrelated.
 */
float
kino_Sim_prox_coord(kino_Similarity *self, chy_u32_t *prox, 
                    chy_u32_t num_prox);
KINO_METHOD("Kino_Sim_Prox_Coord");

void
kino_Sim_destroy(kino_Similarity *self);
KINO_METHOD("Kino_Sim_Destroy");

void
kino_Sim_serialize(kino_Similarity *self, struct kino_ByteBuf *target);
KINO_METHOD("Kino_Sim_Serialize");

KINO_END_CLASS

#endif /* H_KINO_SIMILARITY */

/* Copyright 2006-2007 Marvin Humphrey
 *
 * This program is free software; you can redistribute it and/or modify
 * under the same terms as Perl itself.
 */