SOURCES (LINUX_2_6_20): wrr-linux-051111-2.6.20.patch (NEW) - http...
mguevara
mguevara at pld-linux.org
Mon Feb 26 10:42:45 CET 2007
Author: mguevara Date: Mon Feb 26 09:42:45 2007 GMT
Module: SOURCES Tag: LINUX_2_6_20
---- Log message:
- http://www.zz9.dk/patches/wrr-linux-051111-2.6.20.patch.gz
---- Files affected:
SOURCES:
wrr-linux-051111-2.6.20.patch (NONE -> 1.1.2.1) (NEW)
---- Diffs:
================================================================
Index: SOURCES/wrr-linux-051111-2.6.20.patch
diff -u /dev/null SOURCES/wrr-linux-051111-2.6.20.patch:1.1.2.1
--- /dev/null Mon Feb 26 10:42:45 2007
+++ SOURCES/wrr-linux-051111-2.6.20.patch Mon Feb 26 10:42:40 2007
@@ -0,0 +1,1795 @@
+diff -urN linux-2.6.20.orig/include/linux/pkt_sched.h linux-2.6.20/include/linux/pkt_sched.h
+--- linux-2.6.20.orig/include/linux/pkt_sched.h 2007-02-04 19:44:54.000000000 +0100
++++ linux-2.6.20/include/linux/pkt_sched.h 2007-02-09 09:38:50.000000000 +0100
+@@ -23,6 +23,8 @@
+
+ #define TC_PRIO_MAX 15
+
++#include <linux/if_ether.h>
++
+ /* Generic queue statistics, available for all the elements.
+ Particular schedulers may have also their private records.
+ */
+@@ -466,4 +468,97 @@
+
+ #define NETEM_DIST_SCALE 8192
+
++
++/* WRR section */
++
++/*
++ * A sub weight and of a class
++ * All numbers are represented as parts of (2^64-1).
++ */
++struct tc_wrr_class_weight {
++ __u64 val; // Current value (0 is not valid)
++ __u64 decr; // Value pr bytes (2^64-1 is not valid)
++ __u64 incr; // Value pr seconds (2^64-1 is not valid)
++ __u64 min; // Minimal value (0 is not valid)
++ __u64 max; // Minimal value (0 is not valid)
++ time_t tim; // The time where the above information was correct
++};
++
++/* Packet send when modifying a class */
++struct tc_wrr_class_modf {
++ /* Not-valid values are ignored */
++ struct tc_wrr_class_weight weight1;
++ struct tc_wrr_class_weight weight2;
++};
++
++/* Packet returned when quering a class */
++struct tc_wrr_class_stats {
++ char used; /* If this is false the information below is invalid */
++ struct tc_wrr_class_modf class_modf;
++ unsigned char addr[ETH_ALEN];
++ char usemac; /* True if addr is a MAC address, else it is an IP address
++ (this value is only for convience, it is always the same
++ value as in the qdisc) */
++ int heappos; /* Current heap position or 0 if not in heap */
++ __u64 penal_ls; /* Penalty value in heap (ls) */
++ __u64 penal_ms; /* Penalty value in heap (ms) */
++};
++
++/* Qdisc-wide penalty information (boolean values - 2 not valid) */
++struct tc_wrr_qdisc_weight {
++ signed char weight_mode; /* 0=No automatic change to weight
++ 1=Decrease normally
++ 2=Also multiply with number of machines
++ 3=Instead multiply with priority divided
++ with priority of the other.
++ -1=no change */
++};
++
++/* Packet send when modifing a qdisc */
++struct tc_wrr_qdisc_modf {
++ /* Not-valid values are ignored */
++ struct tc_wrr_qdisc_weight weight1;
++ struct tc_wrr_qdisc_weight weight2;
++};
++
++/* Packet send when creating a qdisc */
++struct tc_wrr_qdisc_crt {
++ struct tc_wrr_qdisc_modf qdisc_modf;
++ char srcaddr; /* 1=lookup source, 0=lookup destination */
++ char usemac; /* 1=Classify on MAC addresses, 0=classify on IP */
++ char usemasq; /* 1=Classify based on masqgrading - only valid
++ if usemac is zero */
++ int bands_max; /* Maximal number of bands (i.e.: classes) */
++ int proxy_maxconn; /* If differnt from 0 then we support proxy remapping
++ of packets. And this is the number of maximal
++ concurrent proxy connections. */
++};
++
++/* Packet returned when quering a qdisc */
++struct tc_wrr_qdisc_stats {
++ struct tc_wrr_qdisc_crt qdisc_crt;
++ int proxy_curconn;
++ int nodes_in_heap; /* Current number of bands wanting to send something */
++ int bands_cur; /* Current number of bands used (i.e.: MAC/IP addresses seen) */
++ int bands_reused; /* Number of times this band has been reused. */
++ int packets_requed; /* Number of times packets have been requeued. */
++ __u64 priosum; /* Sum of priorities in heap where 1 is 2^32 */
++};
++
++struct tc_wrr_qdisc_modf_std {
++ char proxy; /* This indicates which of the tc_wrr_qdisc_modf structers this is. 0=This struct */
++ char change_class; /* Should we also change a class? */
++ struct tc_wrr_qdisc_modf qdisc_modf; /* Only valid if change_class is false */
++ unsigned char addr[ETH_ALEN]; /* Class to change (non-used bytes should be 0). Valid only of change_class is true */
++ struct tc_wrr_class_modf class_modf; /* The change */
++};
++
++/* Used for proxyremapping */
++struct tc_wrr_qdisc_modf_proxy {
++ char proxy; /* This indicates which of the tc_wrr_qdisc_modf structers this is. 1=This struct */
++ char reset; /* This is 1 if the proxyremap information should be reset */
++ int changec; /* changec is the number of elements in changes. */
++ long changes[0]; /* This is an array of type ProxyRemapBlock */
++};
++
+ #endif
+diff -urN linux-2.6.20.orig/include/linux/wrr.h linux-2.6.20/include/linux/wrr.h
+--- linux-2.6.20.orig/include/linux/wrr.h 1970-01-01 01:00:00.000000000 +0100
++++ linux-2.6.20/include/linux/wrr.h 2007-02-09 09:38:50.000000000 +0100
+@@ -0,0 +1,67 @@
++#ifndef WRR_H
++#define WRR_H
++
++/*
++ * This describes the information that is written in proxyremap.log and which
++ * are used in the communication between proxyremapserver and proxyremapclient.
++ * Everything is in network order.
++ */
++
++/* First this header is send */
++#define PROXY_WELCOME_LINE "ProxyRemap 1.02. This is a binary protocol.\r\n"
++
++/*
++ * Then this block is send every time a connection is opened or closed.
++ * Note how it is alligned to use small space usage - arrays of this
++ * structure are saved in many places.
++ */
++typedef struct {
++ /* Server endpoint of connection */
++ unsigned saddr;
++ unsigned short sport;
++
++ /* IP protocol for this connection (typically udp or tcp) */
++ unsigned char proto;
++
++ /* Is the connection opened or closed? */
++ unsigned char open;
++
++ /* Client the packets should be accounted to */
++ unsigned caddr;
++ unsigned char macaddr[6]; /* Might be 0. */
++
++ /* An informal two-charecter code from the proxyserver. Used for debugging. */
++ char proxyinfo[2];
++} ProxyRemapBlock;
++
++
++/*
++ * This is common code for for handling the tables containing information about
++ * which proxyserver connections are associated with which machines..
++ */
++
++/* Returns the number of bytes that should be available in the area
++ * maintained by this module given the maximal number of concurrent
++ * connections. */
++int proxyGetMemSize(int max_connections);
++
++/* Initializes a memory area to use. There must be as many bytes
++ available as returned by getMemSize. */
++void proxyInitMem(void *data, int max_connections);
++
++/* Queries */
++int proxyGetCurConn(void *data); /* Returns current number of connections */
++int proxyMaxCurConn(void *data); /* Returns maximal number of connections */
++
++/* This is called to open and close conenctions. Returns -1 if
++ a protocol error occores (i.e.: If it is discovered) */
++int proxyConsumeBlock(void *data, ProxyRemapBlock *);
++
++/* Returns the RemapBlock associated with this connection or 0: */
++ProxyRemapBlock *proxyLookup(void *data, unsigned ipaddr, unsigned short port,
++ char proto);
++
++/* Return the maximum number of connections */
++int proxyGetMaxConn(void *data);
++
++#endif
+diff -urN linux-2.6.20.orig/MAINTAINERS linux-2.6.20/MAINTAINERS
+--- linux-2.6.20.orig/MAINTAINERS 2007-02-04 19:44:54.000000000 +0100
++++ linux-2.6.20/MAINTAINERS 2007-02-09 09:38:50.000000000 +0100
+@@ -3680,6 +3680,12 @@
+ W: http://advogato.org/person/acme
+ S: Maintained
+
++WRR NETWORK SCHEDULER
++P: Rasmus Břg Hansen
++M moffe at zz9.dk
++W: http://www.zz9.dk/wrr
++S: Maintained
++
+ X.25 NETWORK LAYER
+ P: Henner Eisen
+ M: eis at baty.hanse.de
+diff -urN linux-2.6.20.orig/net/sched/Kconfig linux-2.6.20/net/sched/Kconfig
+--- linux-2.6.20.orig/net/sched/Kconfig 2007-02-04 19:44:54.000000000 +0100
++++ linux-2.6.20/net/sched/Kconfig 2007-02-09 09:38:50.000000000 +0100
+@@ -167,6 +167,25 @@
+ To compile this code as a module, choose M here: the
+ module will be called sch_prio.
+
++config NET_SCH_WRR
++ tristate "WRR packet scheduler"
++ depends on NET_SCHED && ( IP_NF_CONNTRACK || !IP_NF_CONNTRACK )
++ help
++ The weighted robin-robin scheduling algorithm directs network
++ connections to different real servers based on server weights
++ in a round-robin manner. Servers with higher weights receive
++ new connections first than those with less weights, and servers
++ with higher weights get more connections than those with less
++ weights and servers with equal weights get equal connections.
++
++ If you want masquerading (the "masq" option to the tc userspace
++ program) you need to enable connection tracking (IP_NF_CONNTRACK)
++ in the netfilter options.
++
++ If you want to compile it in kernel, say Y. If you want to compile
++ it as a module, say M here and read Documentation/modules.txt. The
++ module will be called sch_wrr. If unsure, say N.
++
+ config NET_SCH_RED
+ tristate "Random Early Detection (RED)"
+ ---help---
+diff -urN linux-2.6.20.orig/net/sched/Makefile linux-2.6.20/net/sched/Makefile
+--- linux-2.6.20.orig/net/sched/Makefile 2007-02-04 19:44:54.000000000 +0100
++++ linux-2.6.20/net/sched/Makefile 2007-02-09 09:44:17.000000000 +0100
+@@ -3,6 +3,7 @@
+ #
+
+ obj-y := sch_generic.o
++sch_wrr-objs = wrr.o wrr_proxydict.o
+
+ obj-$(CONFIG_NET_SCHED) += sch_api.o sch_blackhole.o
+ obj-$(CONFIG_NET_CLS) += cls_api.o
+@@ -24,6 +25,7 @@
+ obj-$(CONFIG_NET_SCH_INGRESS) += sch_ingress.o
+ obj-$(CONFIG_NET_SCH_DSMARK) += sch_dsmark.o
+ obj-$(CONFIG_NET_SCH_SFQ) += sch_sfq.o
++obj-$(CONFIG_NET_SCH_WRR) += sch_wrr.o
+ obj-$(CONFIG_NET_SCH_TBF) += sch_tbf.o
+ obj-$(CONFIG_NET_SCH_TEQL) += sch_teql.o
+ obj-$(CONFIG_NET_SCH_PRIO) += sch_prio.o
+diff -urN linux-2.6.20.orig/net/sched/wrr.c linux-2.6.20/net/sched/wrr.c
+--- linux-2.6.20.orig/net/sched/wrr.c 1970-01-01 01:00:00.000000000 +0100
++++ linux-2.6.20/net/sched/wrr.c 2007-02-09 10:00:45.000000000 +0100
+@@ -0,0 +1,1385 @@
++/*-----------------------------------------------------------------------------
++Weighted Round Robin scheduler.
++
++Written by Christian Worm Mortensen, cworm at it-c.dk.
++
++Introduction
++============
++This module implements a weighted round robin queue with build-in classifier.
++The classifier currently map each MAC or IP address (configurable either MAC
++or IP and either source or destination) to different classes. Each such class
++is called a band. Whan using MAC addresses only bridged packets can be
++classified other packets go to a default MAC address.
++
++Each band has a weight value, where 0<weight<=1. The bandwidth each band
++get is proportional to the weight as can be deduced from the next section.
++
++The queue
++=========
++Each band has a penalty value. Bands having something to sent are kept in
++a heap according to this value. The band with the lowest penalty value
++is in the root of the heap. The penalty value is a 128 bit number. Initially
++no bands are in the heap.
++
++Two global 64 bit values counter_low_penal and couter_high_penal are initialized
++to 0 and to 2^63 respectively.
++
++Enqueing:
++ The packet is inserted in the queue for the band it belongs to. If the band
++ is not in the heap it is inserted into it. In this case, the upper 64 bits
++ of its penalty value is set to the same as for the root-band of the heap.
++ If the heap is empty 0 is used. The lower 64 bit is set to couter_low_penal
++ and couter_low_penal is incremented by 1.
++
++Dequing:
++ If the heap is empty we have nothing to send.
++
++ If the root band has a non-empty queue a packet is dequeued from that.
++ The upper 64 bit of the penalty value of the band is incremented by the
++ packet size divided with the weight of the band. The lower 64 bit is set to
++ couter_high_penal and couter_high_penal is incremented by 1.
++
++ If the root element for some reason has an empty queue it is removed from
++ the heap and we try to dequeue again.
++
++The effect of the heap and the upper 64 bit of the penalty values is to
++implement a weighted round robin queue. The effect of counter_low_penal,
++counter_high_penal and the lower 64 bit of the penalty value is primarily to
++stabilize the queue and to give better quality of service to machines only
++sending a packet now and then. For example machines which have a single
++interactive connection such as telnet or simple text chatting.
++
++Setting weight
++==============
++The weight value can be changed dynamically by the queue itself. The weight
++value and how it is changed is described by the two members weight1 and
++weight2 which has type tc_wrr_class_weight and which are in each class. And
++by the two integer value members of the qdisc called penalfact1 and penalfact2.
++The structure is defined as:
++
++ struct tc_wrr_class_weight {
++ // All are represented as parts of (2^64-1).
++ __u64 val; // Current value (0 is not valid)
++ __u64 decr; // Value pr bytes (2^64-1 is not valid)
++ __u64 incr; // Value pr seconds (2^64-1 is not valid)
++ __u64 min; // Minimal value (0 is not valid)
++ __u64 max; // Minimal value (0 is not valid)
++
++ // The time where the above information was correct:
++ time_t tim;
++ };
++
++The weight value used by the dequeue operations is calculated as
++weight1.val*weight2.val. weight1 and weight2 and handled independently and in the
++same way as will be described now.
++
++Every second, the val parameter is incremented by incr.
++
++Every time a packet is transmitted the value is increment by decr times
++the packet size. Depending on the value of the weight_mode parameter it
++is also mulitplied with other numbers. This makes it possible to give
++penalty to machines transferring much data.
++
++-----------------------------------------------------------------------------*/
++
++#include <linux/autoconf.h>
++#include <linux/module.h>
++#include <asm/uaccess.h>
++#include <asm/system.h>
++#include <linux/bitops.h>
++#include <linux/types.h>
++#include <linux/kernel.h>
++#include <linux/vmalloc.h>
++#include <linux/sched.h>
++#include <linux/string.h>
++#include <linux/mm.h>
++#include <linux/socket.h>
++#include <linux/sockios.h>
++#include <linux/in.h>
++#include <linux/errno.h>
++#include <linux/interrupt.h>
++#include <linux/if_ether.h>
++#include <linux/inet.h>
++#include <linux/netdevice.h>
++#include <linux/etherdevice.h>
++#include <linux/notifier.h>
++#include <net/ip.h>
++#include <net/route.h>
++#include <linux/skbuff.h>
++#include <net/sock.h>
++#include <net/pkt_sched.h>
++#include <linux/if_arp.h>
++#include <linux/version.h>
++#include <linux/wrr.h>
++
++#define WRR_VER "051111"
++
++#define my_malloc(size) kmalloc(size,GFP_KERNEL)
++#define my_free(ptr) kfree(ptr)
++
++#define LOCK_START sch_tree_lock(sch);
++#define LOCK_END sch_tree_unlock(sch);
++#define ENQUEUE_SUCCESS 0
++#define ENQUEUE_FAIL NET_XMIT_DROP
++
++#if defined CONFIG_IP_NF_CONNTRACK || defined CONFIG_IP_NF_CONNTRACK_MODULE
++#include <linux/netfilter_ipv4/ip_conntrack.h>
++#define MASQ_SUPPORT
++#endif
++
++/* The penalty (priority) type */
++typedef u64 penalty_base_t;
++#define penalty_base_t_max ((penalty_base_t)-1)
++typedef struct penalty_t {
++ penalty_base_t ms;
++ penalty_base_t ls;
++} penalty_t;
++#define penalty_leq(a,b) (a.ms<b.ms || (a.ms==b.ms && a.ls<=b.ls))
++#define penalty_le(a,b) (a.ms<b.ms || (a.ms==b.ms && a.ls<b.ls))
++static penalty_t penalty_max = { penalty_base_t_max, penalty_base_t_max };
++
++/*
++ * A general heap.
++ */
++
++struct heap;
++struct heap_element;
++
++/* Initializes an empty heap:
++ * he: A pointer to an unintialized heap structure identifying the heap
++ * size: Maximal number of elements the heap can contain
++ * poll: An array of size "size" used by the heap.
++ */
++static void heap_init(struct heap *he, int size, struct heap_element *poll);
++
++/* Each element in the heap is identified by a user-assigned id which
++ * should be a non negative integer less than the size argument
++ * given to heap_init.
++ */
++static void heap_insert(struct heap *, int id, penalty_t);
++static void heap_remove(struct heap *, int id);
++static void heap_set_penalty(struct heap *, int id, penalty_t);
++
++/* Retreviewing information */
++static char heap_empty(struct heap *); /* Heap empty? */
++static char heap_contains(struct heap *, int id); /* Does heap contain
++ the given id? */
++static int heap_root(struct heap *); /* Returns the id of the root */
++static penalty_t heap_get_penalty(struct heap *, int id); /* Returns penalty
++ of root node */
++
++/*
++ * Heap implementation
++ */
++
++struct heap_element {
++ penalty_t penalty;
++ int id; /* The user-assigned id of this element */
++ int id2idx; /* Maps from user-assigned ids to indices in root_1 */
++};
++
++struct heap {
++ struct heap_element *root_1;
++ int elements;
++};
++
++/* Heap implementation */
++static void heap_init(struct heap *h, int size, struct heap_element *poll)
++{
++ int i;
++
++ h->elements = 0;
++ h->root_1 = poll - 1;
++
++ for (i = 0; i < size; i++)
++ poll[i].id2idx = 0;
++};
++
++static char heap_empty(struct heap *h)
++{
++ return h->elements == 0;
++}
++
++static char heap_contains(struct heap *h, int id)
++{
++ return h->root_1[id + 1].id2idx != 0;
++}
++
++static int heap_root(struct heap *h)
++{
++ return h->root_1[1].id;
++}
++
++static penalty_t heap_get_penalty(struct heap *h, int id)
++{
++ return h->root_1[h->root_1[id + 1].id2idx].penalty;
++}
++
++static void heap_penalty_changed_internal(struct heap *h, int idx);
++
++static void heap_set_penalty(struct heap *h, int id, penalty_t p)
++{
++ int idx = h->root_1[id + 1].id2idx;
++ h->root_1[idx].penalty = p;
++ heap_penalty_changed_internal(h, idx);
++}
++
++static void heap_insert(struct heap *h, int id, penalty_t p)
++{
++ /* Insert at the end of the heap */
++ h->elements++;
++ h->root_1[h->elements].id = id;
++ h->root_1[h->elements].penalty = p;
++ h->root_1[id + 1].id2idx = h->elements;
++
++ /* And put it in the right position */
++ heap_penalty_changed_internal(h, h->elements);
++}
++
++static void heap_remove(struct heap *h, int id)
++{
++ int idx = h->root_1[id + 1].id2idx;
++ int mvid;
++ h->root_1[id + 1].id2idx = 0;
++
++ if (h->elements == idx) {
++ h->elements--;
++ return;
++ }
++
++ mvid = h->root_1[h->elements].id;
++ h->root_1[idx].id = mvid;
++ h->root_1[idx].penalty = h->root_1[h->elements].penalty;
++ h->root_1[mvid + 1].id2idx = idx;
++
++ h->elements--;
++ heap_penalty_changed_internal(h, idx);
++}
++
++static void heap_swap(struct heap *h, int idx0, int idx1)
++{
++ penalty_t tmp_p;
++ int tmp_id;
++ int id0, id1;
++
++ /* Simple content */
++ tmp_p = h->root_1[idx0].penalty;
++ tmp_id = h->root_1[idx0].id;
++ h->root_1[idx0].penalty = h->root_1[idx1].penalty;
++ h->root_1[idx0].id = h->root_1[idx1].id;
++ h->root_1[idx1].penalty = tmp_p;
++ h->root_1[idx1].id = tmp_id;
++
++ /* Update reverse pointers */
++ id0 = h->root_1[idx0].id;
++ id1 = h->root_1[idx1].id;
++ h->root_1[id0 + 1].id2idx = idx0;
++ h->root_1[id1 + 1].id2idx = idx1;
++}
++
++static void heap_penalty_changed_internal(struct heap *h, int cur)
++{
++ if (cur == 1
++ || penalty_leq(h->root_1[cur >> 1].penalty,
++ h->root_1[cur].penalty)) {
++ /* We are in heap order upwards - so we should move the element down */
++ for (;;) {
++ int nxt0 = cur << 1;
++ int nxt1 = nxt0 + 1;
++ penalty_t pen_c = h->root_1[cur].penalty;
++ penalty_t pen_0 =
++ nxt0 <=
++ h->elements ? h->root_1[nxt0].penalty : penalty_max;
++ penalty_t pen_1 =
++ nxt1 <=
++ h->elements ? h->root_1[nxt1].penalty : penalty_max;
++
++ if (penalty_le(pen_0, pen_c)
++ && penalty_leq(pen_0, pen_1)) {
++ /* Swap with child 0 */
++ heap_swap(h, cur, nxt0);
++ cur = nxt0;
++ } else if (penalty_le(pen_1, pen_c)) {
++ /* Swap with child 1 */
++ heap_swap(h, cur, nxt1);
++ cur = nxt1;
++ } else {
++ /* Heap in heap order */
++ return;
++ }
++ }
++ } else {
++ /* We are not in heap order upwards (and thus we must be it downwards).
++ We move up */
++ while (cur != 1) { /* While not root */
++ int nxt = cur >> 1;
++ if (penalty_leq
++ (h->root_1[nxt].penalty, h->root_1[cur].penalty))
++ return;
++ heap_swap(h, cur, nxt);
++ cur = nxt;
++ }
++ }
++};
++
++/*
++ * Classification based on MAC or IP adresses. Note that of historical reason
++ * these are prefixed with mac_ since originally only MAC bases classification
++ * was supported.
++
++ * This code should be in a separate filter module - but it isn't.
++ */
++
++
++/* Interface */
++
++struct mac_head;
++
++/* Initialices/destroys the structure we maintain.
++ Returns -1 on error */
++static int mac_init(struct mac_head *, int max_macs, char srcaddr,
++ char usemac, char usemasq, void *proxyremap);
++static void mac_done(struct mac_head *);
<<Diff was trimmed, longer than 597 lines>>
More information about the pld-cvs-commit
mailing list