#ifdef MODULE
#if LINUX_VERSION_CODE >= 0x20400
#include <linux/module.h>
#endif
#include <linux/modversions.h>
#endif

#include <bw_mgmt.h>

/*
 * Bandwidth managment hash list manipulation.
 *
 * Copyright (C) 1999-2001, Sun Microsystems, Inc.
 * All rights reserved.
 *
 * These routines manipulate the lookup hashes for the various
 * limit types.  There is code for IP, user id and group id,
 * but only IP is used for now.  The routines also support
 * input and output limits, but only output is regulated now.
 *
 * There are two hash lookups per interface is used to create
 * or update a record - this looks for an exact match, and
 * returns the address of the pointer to the record.  The other
 * routines is just a lookup, and returns the record, or NULL
 * if the target cannot be found.  For now, an exact match is
 * only maningful for IP, where a normal lookup tries to find
 * a mask pattern, but an insert/update lookup requires the
 * masks to be equal.
 *
 * There is also a "bw_unlimitq" per type that keeps track of
 * every socket that has no limit imposed.  These are scanned
 * when a new limit is creeated in case existing connections
 * need to be limited.
 *
 * If a limit is updated (as opposed to create), the new limit
 * will be used for the next banwidth calculation anyway.
 *
 * XXX These routines are not multithreaded - the 'limit' lock
 *  in bw_mgmt.c should be held before calling anything that
 *  mucks with the hash lists.
 */
struct bw_info *bw_unlimitq[N_BW_TYPES];

#define DPRINTF if (0) printk

/*
 * Add an entry to the unlimit q for the particular type
 */
void
bw_unlimit_enq(int type, struct bw_info *bws)
{
    struct bw_info *nbws;

    BW_ENTER_FUNC();
    ASSERT(type < N_BW_TYPES);
    nbws = bw_unlimitq[type];

    if (nbws) {
	bws->bw_unlimit_forw[type] = nbws;
	bws->bw_unlimit_back[type] = nbws->bw_unlimit_back[type];
	nbws->bw_unlimit_back[type] = bws;
	bws->bw_unlimit_back[type]->bw_unlimit_forw[type] = bws;
    }
    else bws->bw_unlimit_back[type] = bws->bw_unlimit_forw[type] = bws;

    bw_unlimitq[type] = bws;
    bws->bw_flags |= BW_UNLIMIT << type;
    BW_EXIT_FUNC();
}

/*
 * Delete an entry from the unlimit q for the particular type
 */
void
bw_unlimit_deq(int type, struct bw_info *bws)
{
    BW_ENTER_FUNC();
    ASSERT(type < N_BW_TYPES);
    if (bws->bw_unlimit_forw[type] == bws) {
	bws->bw_unlimit_back[type] = bws->bw_unlimit_forw[type] = 0;
    }
    else {
	bws->bw_unlimit_back[type]->bw_unlimit_forw[type]
		= bws->bw_unlimit_forw[type];
	bws->bw_unlimit_forw[type]->bw_unlimit_back[type]
		= bws->bw_unlimit_back[type];
    }

    if (bw_unlimitq[type] == bws)
	bw_unlimitq[type] = bws->bw_unlimit_forw[type];

    bws->bw_flags &= ~(BW_UNLIMIT << type);
    BW_EXIT_FUNC();
}

/*
 * IP address management.  This is somewhat of a pain because we
 * are trying to regulate based on X.Y.X.A/mask style addresses,
 * so a given address can have several limits.  We probe for the 
 * most specific match first, then probe again for the next most
 * specific, etc.
 *
 * bw_ipmask_mask tells us what masks have a limit attached, so we
 * don't need to check every limit every time.  The bw_ipmask_cnt
 * array lets us know when we can clear a bit.
 */

unsigned int bw_ipmask_mask;  /* 32 bits : bit n == 1 means that mask exists */ 
int bw_ipmask_cnt[32];   /* reference count for above bits */

#define BW_NEW_MASK(mask) 			\
	bw_ipmask_mask |= 1 << ((mask) - 1);	\
	bw_ipmask_cnt[(mask) - 1]++;


/*
 * BWIP_HASHCNT : the size of the IP hash list.
 */
#define BWIP_HASHCNT 64
#define BWIP_HASHMASK (BWIP_HASHCNT - 1)

struct bw_limit_hash *bw_iphash_tbl[BWIP_HASHCNT];

/*
 * The IP hash routine
 * mask the ignored bits, then XOR each byte and the mask.
 * This function has been created on the fly.  It may be ok.
 */
inline unsigned int BWIP_HASH(unsigned int addr, unsigned int mask)
{
    unsigned int key;

    key = (addr & (-1 << (32 - (mask)))) + mask;
    key = ((key >> 24 ^ (key >> 16)) ^ (key >> 8) ^ key) & BWIP_HASHMASK;

    return key;
}

/*
 * BW_RW_CHECK macro checks to see if a hash entry is the same
 * input/output orientation as the lookup key.  This is a little
 * complex because a limit can be marked "BOTH", and we need to
 * have different compares depending on whether this lookup is
 * exact (i.e. create/update) vs a search (i.e. mask).
 */
#define BW_RW_CHECK(tbl_rw, look_rw, both_rw)	\
	((tbl_rw)->bwid_rw == (look_rw) || (both_rw) == BW_RDWRIO)

/*
 * BWIP_EQUAL check to see if the various fields in a limit
 * match the id we are trying to find.
 */
#define BWIP_EQUAL(ref_blid, tst_blid, mask)				     \
	((ref_blid)->bwid_mask == (mask)				     \
	&& ( ! (ref_blid)->bwid_port ||					     \
		(ref_blid)->bwid_port == (tst_blid)->bwid_port)		     \
	&& ( ! (ref_blid)->bwid_protocol || 				     \
		(ref_blid)->bwid_protocol == (tst_blid)->bwid_protocol)      \
	&& (((ref_blid)->bwid_addr & (-1 << (32 - (mask))))		     \
		    == ((tst_blid)->bwid_addr & (-1 << (32 - (mask))))))

/*
 * FLS - find last set.  Need an inline asm.
 */
int fls(unsigned int);

/*
 * bw_iphash : look for a ip limit.  Check all the various mask
 * levels until we match.  Return NULL on failure.
 */
struct bw_limit_hash *
bw_iphash(struct bw_limit_id *bwid, enum bw_iotype rw)
{
    unsigned int curmask;
    unsigned int mask_mask;
    struct bw_limit_hash *bwidp;

    BW_ENTER_FUNC();
    mask_mask = bw_ipmask_mask;

    while (mask_mask) {

	curmask = fls(mask_mask);
 	mask_mask ^= 1 << (curmask - 1);

	bwidp = bw_iphash_tbl[BWIP_HASH(bwid->bwid_addr, curmask)];
	DPRINTF("bw_iphash: looking for 0x%x, mask %d, rw %d in bucket %d\n",
	    bwid->bwid_addr, curmask, rw, BWIP_HASH(bwid->bwid_addr, curmask));

	for ( ; bwidp; bwidp = bwidp->bwh_hashnext) {

	    DPRINTF("bw_iphash: hash @ 0x%x, addr 0x%x, mask %d, rw %d\n",
		    (int) bwidp, (int) bwidp->bwh_lim_id.bwid_addr,
				bwidp->bwh_lim_id.bwid_mask,
				bwidp->bwh_lim_id.bwid_rw);

	    if (BW_RW_CHECK(&bwidp->bwh_lim_id, rw, bwidp->bwh_lim_id.bwid_rw)
		    && BWIP_EQUAL(&bwidp->bwh_lim_id, bwid, curmask)) {
		DPRINTF("bw_iphash found 0x%x\n", (int) bwidp);
		BW_EXIT_FUNC();
		return bwidp;
	    }
	}
    }
    DPRINTF("bw_iphash cannot find 0x%x. bw_ipmask_mask == 0x%x\n",
	    bwid->bwid_addr, bw_ipmask_mask);
    BW_EXIT_FUNC();
    return NULL;
}

/*
 * A version of the above that looks for the exact matching
 * hash entry.  This doesn't need to check all possible masks.
 *
 * Since this is used to create a new entry, we return the
 * pointer to where the new entry should go.  If no entry is
 * found, the return value is the address of the link where
 * a new entry should be created.
 */
struct bw_limit_hash **
bw_iphash_new(struct bw_limit_id *bwid, enum bw_iotype rw)
{
    int mask;
    struct bw_limit_hash **bwidpp;

    BW_ENTER_FUNC();

    mask = bwid->bwid_mask;

    bwidpp = &bw_iphash_tbl[BWIP_HASH(bwid->bwid_addr, mask)];
    DPRINTF("bw_iphash_new: initial bucket for 0x%x, mask %d at %d\n",
	    bwid->bwid_addr, mask, BWIP_HASH(bwid->bwid_addr, mask));

    while (*bwidpp) {
	DPRINTF("bw_iphash_new: comparing new %d to hash %d, ans %d\n",
		rw, (*bwidpp)->bwh_lim_id.bwid_rw,
		BW_RW_CHECK(&(*bwidpp)->bwh_lim_id, rw, rw));
	if (BW_RW_CHECK(&(*bwidpp)->bwh_lim_id, rw, rw)
		    && BWIP_EQUAL(&(*bwidpp)->bwh_lim_id, bwid, mask)) {
	    DPRINTF("bw_iphash_new: changing limit for 0x%x limit at 0x%x\n",
		    (int) bwid->bwid_addr, (int) *bwidpp);
	    BW_EXIT_FUNC();
	    return bwidpp;
	}

	bwidpp = &(*bwidpp)->bwh_hashnext;
    }

    DPRINTF("bw_iphash_new: new limit for 0x%x at 0x%x ( == & NULL)\n",
	    bwid->bwid_addr, (int) bwidpp);
    BW_EXIT_FUNC();
    return bwidpp;
}

#define BWUID_HASHCNT 64
struct bw_limit_hash *bw_uidhash_tbl[BWUID_HASHCNT];
#define BWUID_HASH(x)	(((x) ^ ((x) >> 8)) & (BWUID_HASHCNT - 1))

#define BWGID_HASHCNT 64
struct bw_limit_hash *bw_gidhash_tbl[BWGID_HASHCNT];

#define BWGID_HASH(x)	(((x) ^ ((x) >> 8)) & (BWGID_HASHCNT - 1))


/*
 * UID and GID hashes.  Much simpler than IP stuff.
 */
struct bw_limit_hash *
bw_uidhash(struct bw_limit_id *bwid, enum bw_iotype rw)
{
    struct bw_limit_hash *bwidp;
    
    BW_ENTER_FUNC();

    bwidp = bw_uidhash_tbl[BWUID_HASH(bwid->bwid_uid)];

    while (bwidp) {
	if (BW_RW_CHECK(&bwidp->bwh_lim_id, rw, bwidp->bwh_lim_id.bwid_rw)
		&& bwidp->bwh_lim_id.bwid_uid == bwid->bwid_uid)
	    BW_EXIT_FUNC();
	    return bwidp;

	bwidp = bwidp->bwh_hashnext;
    }

    BW_EXIT_FUNC();
    return bwidp;
}

struct bw_limit_hash **
bw_uidhash_new(struct bw_limit_id *bwid, enum bw_iotype rw)
{
    struct bw_limit_hash **bwidpp;

    BW_ENTER_FUNC();
    bwidpp = &bw_uidhash_tbl[BWUID_HASH(bwid->bwid_uid)];

    while (*bwidpp) {
	if (BW_RW_CHECK(&(*bwidpp)->bwh_lim_id, rw, rw)
		&& (*bwidpp)->bwh_lim_id.bwid_uid == bwid->bwid_uid) {
	    /*
	     * XXX Need to delete WR or RD twin if new limit is both
	     */
	    BW_EXIT_FUNC();
	    return bwidpp;
	}

	bwidpp = &(*bwidpp)->bwh_hashnext;
    }
    BW_EXIT_FUNC();
    return bwidpp;
}

struct bw_limit_hash *
bw_gidhash(struct bw_limit_id *bwid, enum bw_iotype rw)
{
    struct bw_limit_hash *bwidp;

    BW_ENTER_FUNC();
    bwidp = bw_gidhash_tbl[BWGID_HASH(bwid->bwid_gid)];

    while (bwidp) {
	if (BW_RW_CHECK(&bwidp->bwh_lim_id, rw, bwidp->bwh_lim_id.bwid_rw)
		&& bwidp->bwh_lim_id.bwid_gid == bwid->bwid_gid)
	    BW_EXIT_FUNC();
	    return bwidp;

	bwidp = bwidp->bwh_hashnext;
    }
    BW_EXIT_FUNC();
    return bwidp;
}

struct bw_limit_hash **
bw_gidhash_new(struct bw_limit_id *bwid, enum bw_iotype rw)
{
    struct bw_limit_hash **bwidpp;

    BW_ENTER_FUNC();
    bwidpp = &bw_gidhash_tbl[BWGID_HASH(bwid->bwid_gid)];

    while (*bwidpp) {
	if (BW_RW_CHECK(&(*bwidpp)->bwh_lim_id, rw, rw)
		&& (*bwidpp)->bwh_lim_id.bwid_gid == bwid->bwid_gid) {
	    /*
	     * XXX Need to delete WR or RD twin if new limit is both
	     */
	    BW_EXIT_FUNC();
	    return bwidpp;
	}

	bwidpp = &(*bwidpp)->bwh_hashnext;
    }
    BW_EXIT_FUNC();
    return bwidpp;
}

struct bw_limit_hash **(*bw_hash_new[N_BW_TYPES])(struct bw_limit_id *, enum bw_iotype rw) =
{
	bw_iphash_new,
	bw_uidhash_new,
	bw_gidhash_new,
};

struct bw_limit_hash *(*bw_hash_lookup[N_BW_TYPES])(struct bw_limit_id *, enum bw_iotype rw) =
{
	bw_iphash,
	bw_uidhash,
	bw_gidhash,
};

#define bw_dolookup_new(bwid, rw) ((*bw_hash_new[bwid->bwid_type])(bwid, rw))
#define bw_dolookup(bwid, type, rw) ((*bw_hash_lookup[type])(bwid, rw))

/*
 * bw_create : add a new limit to the hash structures.  Return the
 * resulting limit.  If the limit already exists, return that one.
 * If the new limit conflicts with the input/output specs of the
 * existing limit, try to make things work, or return -EINVAL.
 *
 * The return address is not really useful.  addresses are < 0
 * also, so callers just test vs. NULL.
 *
 * Must be caled with limit lock held
 */
struct bw_limit_hash *
bw_create(struct bw_limit_id *bwid)
{
    struct bw_limit_hash **blhpp, *blhp;
    struct bw_limit_hash *rdlim, *wrlim;
    struct bw_info	*bwi;
    struct bw_info	*tmp_unlimit;


    BW_ENTER_FUNC();
    blhpp = bw_dolookup_new(bwid, bwid->bwid_rw);

    if (*blhpp) {
	DPRINTF("bw_create found match\n");
	BW_EXIT_FUNC();
	return *blhpp;
    }


    /*
     * If the new limit is BW_RDWRIO, there can't be an existing
     * BW_RDIO and BW_WRIO limit.  Similarly, if there is an
     * existing BW_RDWRIO, we can't load a new flavor.
     *
     * We look here, and error out if any of the offending
     * combinations is attempted.
     */
    if (bwid->bwid_rw == BW_RDWRIO) {

	rdlim = *bw_dolookup_new(bwid, BW_RDIO);
	wrlim = *bw_dolookup_new(bwid, BW_WRIO);

	if (rdlim && wrlim)
	{
	    BW_EXIT_FUNC();
	    return NULL;
	}

	/*
	 * If there is a single read or write limit, we assume that
	 * the request is to convert it to a BW_RDWRIO.  This works
	 * because bwid_rw is checked in the hash compare, but not
	 * used in the key.
	 */
	if (rdlim) {
	    rdlim->bwh_lim_id.bwid_rw = BW_RDWRIO;
	    BW_EXIT_FUNC();
	    return rdlim;
	}
	else if (wrlim) {
	    wrlim->bwh_lim_id.bwid_rw = BW_RDWRIO;
	    BW_EXIT_FUNC();
	    return wrlim;
	}
    }
    else if (*bw_dolookup_new(bwid, BW_RDWRIO)) {
	DPRINTF("bw_create - found RDWR for single mode insert\n");
	BW_EXIT_FUNC();
	return NULL;
    }

    blhp = (struct bw_limit_hash *) kmalloc(sizeof *blhp, GFP_ATOMIC);

    if ( ! blhp)
    {
	BW_EXIT_FUNC();
	return NULL;
    }

    memset(blhp, 0, sizeof(*blhp));

    blhp->bwh_lim_id = *bwid;
    BW_NEW_MASK(bwid->bwid_mask);

    *blhpp = blhp;

    /*
     * Ok, new limit.  Gotta go through and see if it aplies
     * to any existing connections.  Setting the list to NULL
     * means new connections can be created - their lookups
     * will be correct.  (SMP issue - not important yet).
     */
    ASSERT(bwid->bwid_type < N_BW_TYPES);
    tmp_unlimit = bw_unlimitq[bwid->bwid_type];
    bw_unlimitq[bwid->bwid_type] = NULL;

    for (bwi = tmp_unlimit; bwi; bwi = tmp_unlimit) {
	/*
	 * Avoid use after free implicit in bw_unlimit_deq()
	 */
	tmp_unlimit = bwi->bw_unlimit_forw[bwid->bwid_type];
	if (tmp_unlimit == bwi)
	    tmp_unlimit = NULL;
    
	if (bw_type_valid(bwi, bwid->bwid_type)) {
	    bw_unlimit_deq(bwid->bwid_type, bwi);
	    if (bwid->bwid_rw == BW_RDWRIO) {
		bw_new_ent(bwi, bwid->bwid_type, BW_RDIO);
		bw_new_ent(bwi, bwid->bwid_type, BW_WRIO);
	    }
	    else bw_new_ent(bwi, bwid->bwid_type, bwid->bwid_rw);
	}
    }
    bw_proc_newip(blhp);

    BW_EXIT_FUNC();
    return blhp;
}

int
bw_delete(struct bw_limit_id *bwid)
{
    struct bw_limit_hash **blhpp, *blhp;

    BW_ENTER_FUNC();

    blhpp = bw_dolookup_new(bwid, bwid->bwid_rw);
    blhp = *blhpp;

    if ( ! blhp) {
	DPRINTF("bw_delete found no match\n");
	BW_EXIT_FUNC();
	return -ENOENT;
    }

    /*
     * Zap the entry so no one will find it.
     */
    *blhpp = blhp->bwh_hashnext;

    /*
     * Flag this entry as dead.  This is lazy evaluation.  Whenever
     * a limit is processed, we check to see if it is in delete mode.
     * If so, we decrement some counters and clean up some.
     */
    blhp->bwh_flags |= BWPARAM_DELETE;

    bw_proc_delip(blhp);

    /*
     * XXXX - the other cleanup half of this function is in
     *        bw_mgmt.c:bw_io_check()
     * It just duplicates the following two lines, so a function
     * seemed like overkill.  Still, if you change the next lines
     * you should fix the other two.
     */
    if (blhp->bwh_lim_refcnt == 0 && blhp->bwh_active_ios == 0)
	kfree(blhp);

    BW_EXIT_FUNC();
    return 0;
}

/*
 * bw_new_ent looks for a limit for a new connection.
 *
 * must be called with limitlock held
 */
int
bw_new_ent(struct bw_info *bwi, enum bw_types type, enum bw_iotype rw)
{
    struct bw_limit_hash *limit;

    BW_ENTER_FUNC();

    limit = bw_dolookup(&bwi->bw_type_id[type], type, rw);

    if (limit > 0) {
	bwi->bw_io[rw].bw_limit[type] = limit;
	limit->bwh_lim_refcnt++;
	BW_EXIT_FUNC();
	return 1;
    }
    else if ( ! bw_type_unlimit(bwi, type)) {
	bw_unlimit_enq(type, bwi);
    }

    BW_EXIT_FUNC();
    return 0;
}

/*
 * bw_delete_ent - nuke a existing connection.
 */
void
bw_delete_ent(struct bw_info *bwi, enum bw_types type, enum bw_iotype rw)
{
    BW_ENTER_FUNC();

    if (bwi->bw_io[rw].bw_limit[type]) {
	bwi->bw_io[rw].bw_limit[type]->bwh_lim_refcnt--;
    }
    else if (bw_type_unlimit(bwi, type)) {
	/*
	 * XXX : if read and write are being bandwidth limited,
	 * we should check to be sure both have been deleted
	 * before we bw_unlimit_deq()
	 */
	bw_unlimit_deq(type, bwi);
    }
    BW_EXIT_FUNC();
}

/* 
 * fls - find last set.
 * should be inline asm - called for every new connection.
 * Do later.
 */
int
fls(unsigned int bits)
{
    int ret = 0;

    if (bits >> 16)
	ret += 16;

    if (bits & (0xff00 << ret))
	ret += 8;

    if (bits & (0xf0 << ret))
	ret += 4;

    if (bits & (0xC << ret))
	ret += 2;

    if (bits & (0x2 << ret))
	ret += 1;

    if (bits)
	ret++;

    return ret;
}

// LICENSE:
// This software is subject to the terms of the GNU GENERAL 
// PUBLIC LICENSE Version 2, June 1991
