SOURCES (LINUX_2_6_20): wrr-linux-051111-2.6.20.patch (NEW) - http...

mguevara mguevara at pld-linux.org
Mon Feb 26 10:42:45 CET 2007


Author: mguevara                     Date: Mon Feb 26 09:42:45 2007 GMT
Module: SOURCES                       Tag: LINUX_2_6_20
---- Log message:
- http://www.zz9.dk/patches/wrr-linux-051111-2.6.20.patch.gz

---- Files affected:
SOURCES:
   wrr-linux-051111-2.6.20.patch (NONE -> 1.1.2.1)  (NEW)

---- Diffs:

================================================================
Index: SOURCES/wrr-linux-051111-2.6.20.patch
diff -u /dev/null SOURCES/wrr-linux-051111-2.6.20.patch:1.1.2.1
--- /dev/null	Mon Feb 26 10:42:45 2007
+++ SOURCES/wrr-linux-051111-2.6.20.patch	Mon Feb 26 10:42:40 2007
@@ -0,0 +1,1795 @@
+diff -urN linux-2.6.20.orig/include/linux/pkt_sched.h linux-2.6.20/include/linux/pkt_sched.h
+--- linux-2.6.20.orig/include/linux/pkt_sched.h	2007-02-04 19:44:54.000000000 +0100
++++ linux-2.6.20/include/linux/pkt_sched.h	2007-02-09 09:38:50.000000000 +0100
+@@ -23,6 +23,8 @@
+ 
+ #define TC_PRIO_MAX			15
+ 
++#include <linux/if_ether.h>
++
+ /* Generic queue statistics, available for all the elements.
+    Particular schedulers may have also their private records.
+  */
+@@ -466,4 +468,97 @@
+ 
+ #define NETEM_DIST_SCALE	8192
+ 
++
++/* WRR section */
++
++/*
++ * A sub weight and of a class
++ * All numbers are represented as parts of (2^64-1).
++ */
++struct tc_wrr_class_weight {
++	__u64	val;	// Current value	(0 is not valid)
++	__u64	decr;	// Value pr bytes	(2^64-1 is not valid)
++	__u64	incr;	// Value pr seconds	(2^64-1 is not valid)
++	__u64	min;	// Minimal value	(0 is not valid)
++	__u64	max;	// Minimal value	(0 is not valid)
++	time_t	tim;	// The time where the above information was correct
++};
++
++/* Packet send when modifying a class */
++struct tc_wrr_class_modf {
++	/* Not-valid values are ignored */
++	struct tc_wrr_class_weight weight1;
++	struct tc_wrr_class_weight weight2;
++};
++
++/* Packet returned when quering a class */
++struct tc_wrr_class_stats {
++	char	used;			/* If this is false the information below is invalid */
++	struct tc_wrr_class_modf class_modf;
++	unsigned char addr[ETH_ALEN];
++	char	usemac;			/* True if addr is a MAC address, else it is an IP address
++					   (this value is only for convience, it is always the same
++					    value as in the qdisc) */
++	int	heappos;		/* Current heap position or 0 if not in heap */
++	__u64	penal_ls;		/* Penalty value in heap (ls) */
++	__u64	penal_ms;		/* Penalty value in heap (ms) */
++};
++
++/* Qdisc-wide penalty information (boolean values - 2 not valid) */
++struct tc_wrr_qdisc_weight {
++	signed char	weight_mode;	/* 0=No automatic change to weight
++					   1=Decrease normally
++					   2=Also multiply with number of machines
++					   3=Instead multiply with priority divided
++					     with priority of the other.
++					   -1=no change */
++};
++
++/* Packet send when modifing a qdisc */
++struct tc_wrr_qdisc_modf {
++	/* Not-valid values are ignored */
++	struct tc_wrr_qdisc_weight weight1;
++	struct tc_wrr_qdisc_weight weight2;
++};
++
++/* Packet send when creating a qdisc */
++struct tc_wrr_qdisc_crt {
++	struct tc_wrr_qdisc_modf qdisc_modf;
++	char	srcaddr;	/* 1=lookup source, 0=lookup destination */
++	char	usemac;		/* 1=Classify on MAC addresses, 0=classify on IP */
++	char	usemasq;	/* 1=Classify based on masqgrading - only valid
++				     if usemac is zero */
++	int	bands_max;	/* Maximal number of bands (i.e.: classes) */
++	int	proxy_maxconn;	/* If differnt from 0 then we support proxy remapping
++				   of packets. And this is the number of maximal
++				   concurrent proxy connections. */
++};
++
++/* Packet returned when quering a qdisc */
++struct tc_wrr_qdisc_stats {
++	struct tc_wrr_qdisc_crt qdisc_crt;
++	int	proxy_curconn;		     
++	int	nodes_in_heap;	/* Current number of bands wanting to send something */
++	int	bands_cur;	/* Current number of bands used (i.e.: MAC/IP addresses seen) */
++	int	bands_reused;	/* Number of times this band has been reused. */
++	int	packets_requed;	/* Number of times packets have been requeued. */
++	__u64	priosum;	/* Sum of priorities in heap where 1 is 2^32 */
++};
++
++struct tc_wrr_qdisc_modf_std {
++	char	proxy;		/* This indicates which of the tc_wrr_qdisc_modf structers this is. 0=This struct */
++	char	change_class;	/* Should we also change a class? */
++	struct tc_wrr_qdisc_modf qdisc_modf; /* Only valid if change_class is false */
++	unsigned char addr[ETH_ALEN]; /* Class to change (non-used bytes should be 0). Valid only of change_class is true */
++	struct tc_wrr_class_modf class_modf; /* The change */
++};
++
++/* Used for proxyremapping */
++struct tc_wrr_qdisc_modf_proxy {
++	char	proxy;		/* This indicates which of the tc_wrr_qdisc_modf structers this is. 1=This struct */
++	char	reset;		/* This is 1 if the proxyremap information should be reset */
++	int	changec;	/* changec is the number of elements in changes. */
++	long	changes[0];	/* This is an array of type ProxyRemapBlock */
++};
++
+ #endif
+diff -urN linux-2.6.20.orig/include/linux/wrr.h linux-2.6.20/include/linux/wrr.h
+--- linux-2.6.20.orig/include/linux/wrr.h	1970-01-01 01:00:00.000000000 +0100
++++ linux-2.6.20/include/linux/wrr.h	2007-02-09 09:38:50.000000000 +0100
+@@ -0,0 +1,67 @@
++#ifndef WRR_H
++#define WRR_H
++
++/*
++ * This describes the information that is written in proxyremap.log and which
++ * are used in the communication between proxyremapserver and proxyremapclient.
++ * Everything is in network order.
++ */
++
++/* First this header is send */
++#define PROXY_WELCOME_LINE "ProxyRemap 1.02. This is a binary protocol.\r\n"
++
++/* 
++ * Then this block is send every time a connection is opened or closed.
++ * Note how it is alligned to use small space usage - arrays of this
++ * structure are saved in many places.
++ */
++typedef struct {
++	/* Server endpoint of connection */
++	unsigned saddr; 
++	unsigned short sport;
++
++	/* IP protocol for this connection (typically udp or tcp) */
++	unsigned char proto;
++
++	/* Is the connection opened or closed? */
++	unsigned char open;
++
++	/* Client the packets should be accounted to */
++	unsigned caddr;
++	unsigned char macaddr[6];	/* Might be 0. */
++
++	/* An informal two-charecter code from the proxyserver. Used for debugging. */
++	char proxyinfo[2];
++} ProxyRemapBlock;
++
++
++/*
++ * This is common code for for handling the tables containing information about 
++ * which proxyserver connections are associated with which machines..
++ */
++
++/* Returns the number of bytes that should be available in the area
++ * maintained by this module given the maximal number of concurrent 
++ * connections. */
++int proxyGetMemSize(int max_connections);
++
++/* Initializes a memory area to use. There must be as many bytes
++   available as returned by getMemSize. */
++void proxyInitMem(void *data, int max_connections);
++
++/* Queries */
++int proxyGetCurConn(void *data);	/* Returns current number of connections */
++int proxyMaxCurConn(void *data);	/* Returns maximal number of connections */
++
++/* This is called to open and close conenctions. Returns -1 if
++   a protocol error occores (i.e.: If it is discovered) */
++int proxyConsumeBlock(void *data, ProxyRemapBlock *);
++
++/* Returns the RemapBlock associated with this connection or 0: */
++ProxyRemapBlock *proxyLookup(void *data, unsigned ipaddr, unsigned short port,
++			     char proto);
++
++/* Return the maximum number of connections */
++int proxyGetMaxConn(void *data);
++
++#endif
+diff -urN linux-2.6.20.orig/MAINTAINERS linux-2.6.20/MAINTAINERS
+--- linux-2.6.20.orig/MAINTAINERS	2007-02-04 19:44:54.000000000 +0100
++++ linux-2.6.20/MAINTAINERS	2007-02-09 09:38:50.000000000 +0100
+@@ -3680,6 +3680,12 @@
+ W:	http://advogato.org/person/acme
+ S:	Maintained
+ 
++WRR NETWORK SCHEDULER
++P:	Rasmus Břg Hansen
++M	moffe at zz9.dk
++W:	http://www.zz9.dk/wrr
++S:	Maintained
++
+ X.25 NETWORK LAYER
+ P:	Henner Eisen
+ M:	eis at baty.hanse.de
+diff -urN linux-2.6.20.orig/net/sched/Kconfig linux-2.6.20/net/sched/Kconfig
+--- linux-2.6.20.orig/net/sched/Kconfig	2007-02-04 19:44:54.000000000 +0100
++++ linux-2.6.20/net/sched/Kconfig	2007-02-09 09:38:50.000000000 +0100
+@@ -167,6 +167,25 @@
+ 	  To compile this code as a module, choose M here: the
+ 	  module will be called sch_prio.
+ 
++config NET_SCH_WRR
++	tristate "WRR packet scheduler"
++	depends on NET_SCHED && ( IP_NF_CONNTRACK || !IP_NF_CONNTRACK )
++	help
++	  The weighted robin-robin scheduling algorithm directs network
++	  connections to different real servers based on server weights
++	  in a round-robin manner. Servers with higher weights receive
++	  new connections first than those with less weights, and servers
++	  with higher weights get more connections than those with less
++	  weights and servers with equal weights get equal connections.
++
++	  If you want masquerading (the "masq" option to the tc userspace
++	  program) you need to enable connection tracking (IP_NF_CONNTRACK)
++	  in the netfilter options.
++
++	  If you want to compile it in kernel, say Y. If you want to compile
++	  it as a module, say M here and read Documentation/modules.txt. The
++	  module will be called sch_wrr. If unsure, say N.
++
+ config NET_SCH_RED
+ 	tristate "Random Early Detection (RED)"
+ 	---help---
+diff -urN linux-2.6.20.orig/net/sched/Makefile linux-2.6.20/net/sched/Makefile
+--- linux-2.6.20.orig/net/sched/Makefile	2007-02-04 19:44:54.000000000 +0100
++++ linux-2.6.20/net/sched/Makefile	2007-02-09 09:44:17.000000000 +0100
+@@ -3,6 +3,7 @@
+ #
+ 
+ obj-y	:= sch_generic.o
++sch_wrr-objs = wrr.o wrr_proxydict.o
+ 
+ obj-$(CONFIG_NET_SCHED)		+= sch_api.o sch_blackhole.o
+ obj-$(CONFIG_NET_CLS)		+= cls_api.o
+@@ -24,6 +25,7 @@
+ obj-$(CONFIG_NET_SCH_INGRESS)	+= sch_ingress.o 
+ obj-$(CONFIG_NET_SCH_DSMARK)	+= sch_dsmark.o
+ obj-$(CONFIG_NET_SCH_SFQ)	+= sch_sfq.o
++obj-$(CONFIG_NET_SCH_WRR)	+= sch_wrr.o
+ obj-$(CONFIG_NET_SCH_TBF)	+= sch_tbf.o
+ obj-$(CONFIG_NET_SCH_TEQL)	+= sch_teql.o
+ obj-$(CONFIG_NET_SCH_PRIO)	+= sch_prio.o
+diff -urN linux-2.6.20.orig/net/sched/wrr.c linux-2.6.20/net/sched/wrr.c
+--- linux-2.6.20.orig/net/sched/wrr.c	1970-01-01 01:00:00.000000000 +0100
++++ linux-2.6.20/net/sched/wrr.c	2007-02-09 10:00:45.000000000 +0100
+@@ -0,0 +1,1385 @@
++/*-----------------------------------------------------------------------------
++Weighted Round Robin scheduler.
++  
++Written by Christian Worm Mortensen, cworm at it-c.dk.
++
++Introduction
++============
++This module implements a weighted round robin queue with build-in classifier.
++The classifier currently map each MAC or IP address (configurable either MAC
++or IP and either source or destination) to different classes. Each such class 
++is called a band. Whan using MAC addresses only bridged packets can be 
++classified other packets go to a default MAC address.
++
++Each band has a weight value, where 0<weight<=1. The bandwidth each band
++get is proportional to the weight as can be deduced from the next section.
++
++The queue
++=========
++Each band has a penalty value. Bands having something to sent are kept in
++a heap according to this value. The band with the lowest penalty value
++is in the root of the heap. The penalty value is a 128 bit number. Initially 
++no bands are in the heap.
++
++Two global 64 bit values counter_low_penal and couter_high_penal are initialized
++to 0 and to 2^63 respectively.
++
++Enqueing:
++  The packet is inserted in the queue for the band it belongs to. If the band 
++  is not in the heap it is inserted into it. In this case, the upper 64 bits 
++  of its penalty value is set to the same as for the root-band of the heap. 
++  If the heap is empty 0 is used. The lower 64 bit is set to couter_low_penal
++  and couter_low_penal is incremented by 1.
++  
++Dequing:
++  If the heap is empty we have nothing to send. 
++  
++  If the root band has a non-empty queue a packet is dequeued from that.
++  The upper 64 bit of the penalty value of the band is incremented by the 
++  packet size divided with the weight of the band. The lower 64 bit is set to 
++  couter_high_penal and couter_high_penal is incremented by 1.
++
++  If the root element for some reason has an  empty queue it is removed from 
++  the heap and we try to dequeue again.
++
++The effect of the heap and the upper 64 bit of the penalty values is to 
++implement a weighted round robin queue. The effect of counter_low_penal,
++counter_high_penal and the lower 64 bit of the penalty value is primarily to
++stabilize the queue and to give better quality of service to machines only 
++sending a packet now and then. For example machines which have a single 
++interactive connection such as telnet or simple text chatting.
++
++Setting weight
++==============
++The weight value can be changed dynamically by the queue itself. The weight 
++value and how it is changed is described by the two members weight1 and 
++weight2 which has type tc_wrr_class_weight and which are in each class. And 
++by the two integer value members of the qdisc called penalfact1 and penalfact2.
++The structure is defined as:
++
++  struct tc_wrr_class_weight {
++    // All are represented as parts of (2^64-1).
++    __u64 val;  // Current value                        (0 is not valid)
++    __u64 decr; // Value pr bytes                       (2^64-1 is not valid)
++    __u64 incr; // Value pr seconds                     (2^64-1 is not valid)
++    __u64 min;  // Minimal value                        (0 is not valid)
++    __u64 max;  // Minimal value                        (0 is not valid)
++
++    // The time where the above information was correct:
++    time_t tim;
++  };
++    
++The weight value used by the dequeue operations is calculated as 
++weight1.val*weight2.val. weight1 and weight2 and handled independently and in the 
++same way as will be described now.
++
++Every second, the val parameter is incremented by incr.
++
++Every time a packet is transmitted the value is increment by decr times
++the packet size. Depending on the value of the weight_mode parameter it
++is also mulitplied with other numbers. This makes it possible to give 
++penalty to machines transferring much data.
++
++-----------------------------------------------------------------------------*/
++
++#include <linux/autoconf.h>
++#include <linux/module.h>
++#include <asm/uaccess.h>
++#include <asm/system.h>
++#include <linux/bitops.h>
++#include <linux/types.h>
++#include <linux/kernel.h>
++#include <linux/vmalloc.h>
++#include <linux/sched.h>
++#include <linux/string.h>
++#include <linux/mm.h>
++#include <linux/socket.h>
++#include <linux/sockios.h>
++#include <linux/in.h>
++#include <linux/errno.h>
++#include <linux/interrupt.h>
++#include <linux/if_ether.h>
++#include <linux/inet.h>
++#include <linux/netdevice.h>
++#include <linux/etherdevice.h>
++#include <linux/notifier.h>
++#include <net/ip.h>
++#include <net/route.h>
++#include <linux/skbuff.h>
++#include <net/sock.h>
++#include <net/pkt_sched.h>
++#include <linux/if_arp.h>
++#include <linux/version.h>
++#include <linux/wrr.h>
++
++#define WRR_VER "051111"
++
++#define my_malloc(size) kmalloc(size,GFP_KERNEL)
++#define my_free(ptr)    kfree(ptr)
++
++#define LOCK_START sch_tree_lock(sch);
++#define LOCK_END   sch_tree_unlock(sch);
++#define ENQUEUE_SUCCESS 0
++#define ENQUEUE_FAIL    NET_XMIT_DROP
++
++#if defined CONFIG_IP_NF_CONNTRACK || defined CONFIG_IP_NF_CONNTRACK_MODULE
++#include <linux/netfilter_ipv4/ip_conntrack.h>
++#define MASQ_SUPPORT
++#endif
++
++/* The penalty (priority) type */
++typedef u64 penalty_base_t;
++#define penalty_base_t_max ((penalty_base_t)-1)
++typedef struct penalty_t {
++	penalty_base_t ms;
++	penalty_base_t ls;
++} penalty_t;
++#define penalty_leq(a,b) (a.ms<b.ms || (a.ms==b.ms && a.ls<=b.ls))
++#define penalty_le(a,b)  (a.ms<b.ms || (a.ms==b.ms && a.ls<b.ls))
++static penalty_t penalty_max = { penalty_base_t_max, penalty_base_t_max };
++
++/*
++ * A general heap.
++ */
++
++struct heap;
++struct heap_element;
++
++/* Initializes an empty heap:
++ *   he:   A pointer to an unintialized heap structure identifying the heap
++ *   size: Maximal number of elements the heap can contain
++ *   poll: An array of size "size" used by the heap.     
++ */
++static void heap_init(struct heap *he, int size, struct heap_element *poll);
++
++/* Each element in the heap is identified by a user-assigned id which
++ * should be a non negative integer less than the size argument
++ * given to heap_init.
++ */
++static void heap_insert(struct heap *, int id, penalty_t);
++static void heap_remove(struct heap *, int id);
++static void heap_set_penalty(struct heap *, int id, penalty_t);
++
++/* Retreviewing information */
++static char heap_empty(struct heap *);				/* Heap empty? */
++static char heap_contains(struct heap *, int id);		/* Does heap contain 
++						      		   the given id? */
++static int heap_root(struct heap *);				/* Returns the id of the root */
++static penalty_t heap_get_penalty(struct heap *, int id);	/* Returns penalty
++							 	   of root node */
++
++/*
++ * Heap implementation
++ */
++
++struct heap_element {
++	penalty_t penalty;
++	int id;			/* The user-assigned id of this element */
++	int id2idx;		/* Maps from user-assigned ids to indices in root_1 */
++};
++
++struct heap {
++	struct heap_element *root_1;
++	int elements;
++};
++
++/* Heap implementation */
++static void heap_init(struct heap *h, int size, struct heap_element *poll)
++{
++	int i;
++
++	h->elements = 0;
++	h->root_1 = poll - 1;
++
++	for (i = 0; i < size; i++)
++		poll[i].id2idx = 0;
++};
++
++static char heap_empty(struct heap *h)
++{
++	return h->elements == 0;
++}
++
++static char heap_contains(struct heap *h, int id)
++{
++	return h->root_1[id + 1].id2idx != 0;
++}
++
++static int heap_root(struct heap *h)
++{
++	return h->root_1[1].id;
++}
++
++static penalty_t heap_get_penalty(struct heap *h, int id)
++{
++	return h->root_1[h->root_1[id + 1].id2idx].penalty;
++}
++
++static void heap_penalty_changed_internal(struct heap *h, int idx);
++
++static void heap_set_penalty(struct heap *h, int id, penalty_t p)
++{
++	int idx = h->root_1[id + 1].id2idx;
++	h->root_1[idx].penalty = p;
++	heap_penalty_changed_internal(h, idx);
++}
++
++static void heap_insert(struct heap *h, int id, penalty_t p)
++{
++	/* Insert at the end of the heap */
++	h->elements++;
++	h->root_1[h->elements].id = id;
++	h->root_1[h->elements].penalty = p;
++	h->root_1[id + 1].id2idx = h->elements;
++
++	/* And put it in the right position */
++	heap_penalty_changed_internal(h, h->elements);
++}
++
++static void heap_remove(struct heap *h, int id)
++{
++	int idx = h->root_1[id + 1].id2idx;
++	int mvid;
++	h->root_1[id + 1].id2idx = 0;
++
++	if (h->elements == idx) {
++		h->elements--;
++		return;
++	}
++
++	mvid = h->root_1[h->elements].id;
++	h->root_1[idx].id = mvid;
++	h->root_1[idx].penalty = h->root_1[h->elements].penalty;
++	h->root_1[mvid + 1].id2idx = idx;
++
++	h->elements--;
++	heap_penalty_changed_internal(h, idx);
++}
++
++static void heap_swap(struct heap *h, int idx0, int idx1)
++{
++	penalty_t tmp_p;
++	int tmp_id;
++	int id0, id1;
++
++	/* Simple content */
++	tmp_p = h->root_1[idx0].penalty;
++	tmp_id = h->root_1[idx0].id;
++	h->root_1[idx0].penalty = h->root_1[idx1].penalty;
++	h->root_1[idx0].id = h->root_1[idx1].id;
++	h->root_1[idx1].penalty = tmp_p;
++	h->root_1[idx1].id = tmp_id;
++
++	/* Update reverse pointers */
++	id0 = h->root_1[idx0].id;
++	id1 = h->root_1[idx1].id;
++	h->root_1[id0 + 1].id2idx = idx0;
++	h->root_1[id1 + 1].id2idx = idx1;
++}
++
++static void heap_penalty_changed_internal(struct heap *h, int cur)
++{
++	if (cur == 1
++	    || penalty_leq(h->root_1[cur >> 1].penalty,
++			   h->root_1[cur].penalty)) {
++		/* We are in heap order upwards - so we should move the element down */
++		for (;;) {
++			int nxt0 = cur << 1;
++			int nxt1 = nxt0 + 1;
++			penalty_t pen_c = h->root_1[cur].penalty;
++			penalty_t pen_0 =
++			    nxt0 <=
++			    h->elements ? h->root_1[nxt0].penalty : penalty_max;
++			penalty_t pen_1 =
++			    nxt1 <=
++			    h->elements ? h->root_1[nxt1].penalty : penalty_max;
++
++			if (penalty_le(pen_0, pen_c)
++			    && penalty_leq(pen_0, pen_1)) {
++				/* Swap with child 0 */
++				heap_swap(h, cur, nxt0);
++				cur = nxt0;
++			} else if (penalty_le(pen_1, pen_c)) {
++				/* Swap with child 1 */
++				heap_swap(h, cur, nxt1);
++				cur = nxt1;
++			} else {
++				/* Heap in heap order */
++				return;
++			}
++		}
++	} else {
++		/* We are not in heap order upwards (and thus we must be it downwards).
++		   We move up */
++		while (cur != 1) {	/* While not root */
++			int nxt = cur >> 1;
++			if (penalty_leq
++			    (h->root_1[nxt].penalty, h->root_1[cur].penalty))
++				return;
++			heap_swap(h, cur, nxt);
++			cur = nxt;
++		}
++	}
++};
++
++/*
++ * Classification based on MAC or IP adresses. Note that of historical reason
++ * these are prefixed with mac_ since originally only MAC bases classification
++ * was supported.
++
++ * This code should be in a separate filter module - but it isn't.
++ */
++
++
++/* Interface */
++
++struct mac_head;
++
++/* Initialices/destroys the structure we maintain.
++   Returns -1 on error */
++static int mac_init(struct mac_head *, int max_macs, char srcaddr,
++		    char usemac, char usemasq, void *proxyremap);
++static void mac_done(struct mac_head *);
<<Diff was trimmed, longer than 597 lines>>


More information about the pld-cvs-commit mailing list