SOURCES (LINUX_2_6): wrr-linux-2.6.12.2.patch (NEW) - fresh patch.
pluto
pluto at pld-linux.org
Thu Sep 15 12:49:41 CEST 2005
Author: pluto Date: Thu Sep 15 10:49:41 2005 GMT
Module: SOURCES Tag: LINUX_2_6
---- Log message:
- fresh patch.
---- Files affected:
SOURCES:
wrr-linux-2.6.12.2.patch (NONE -> 1.1.2.1) (NEW)
---- Diffs:
================================================================
Index: SOURCES/wrr-linux-2.6.12.2.patch
diff -u /dev/null SOURCES/wrr-linux-2.6.12.2.patch:1.1.2.1
--- /dev/null Thu Sep 15 12:49:41 2005
+++ SOURCES/wrr-linux-2.6.12.2.patch Thu Sep 15 12:49:36 2005
@@ -0,0 +1,1684 @@
+diff -urN linux-2.6.12.2.orig/include/linux/pkt_sched.h linux-2.6.12.2/include/linux/pkt_sched.h
+--- linux-2.6.12.2.orig/include/linux/pkt_sched.h 2005-06-17 21:48:29.000000000 +0200
++++ linux-2.6.12.2/include/linux/pkt_sched.h 2005-07-08 11:15:27.000000000 +0200
+@@ -458,4 +458,117 @@
+
+ #define NETEM_DIST_SCALE 8192
+
++
++/* WRR section */
++
++/* Other includes */
++#include <linux/if_ether.h>
++
++// A sub weight and of a class
++// All numbers are represented as parts of (2^64-1).
++struct tc_wrr_class_weight {
++ __u64 val; // Current value (0 is not valid)
++ __u64 decr; // Value pr bytes (2^64-1 is not valid)
++ __u64 incr; // Value pr seconds (2^64-1 is not valid)
++ __u64 min; // Minimal value (0 is not valid)
++ __u64 max; // Minimal value (0 is not valid)
++
++ // The time where the above information was correct:
++ time_t tim;
++};
++
++// Pakcet send when modifying a class:
++struct tc_wrr_class_modf {
++ // Not-valid values are ignored.
++ struct tc_wrr_class_weight weight1;
++ struct tc_wrr_class_weight weight2;
++};
++
++// Packet returned when quering a class:
++struct tc_wrr_class_stats {
++ char used; // If this is false the information below is invalid
++
++ struct tc_wrr_class_modf class_modf;
++
++ unsigned char addr[ETH_ALEN];
++ char usemac; // True if addr is a MAC address, else it is an IP address
++ // (this value is only for convience, it is always the same
++ // value as in the qdisc)
++ int heappos; // Current heap position or 0 if not in heap
++ __u64 penal_ls; // Penalty value in heap (ls)
++ __u64 penal_ms; // Penalty value in heap (ms)
++};
++
++// Qdisc-wide penalty information (boolean values - 2 not valid)
++struct tc_wrr_qdisc_weight {
++ signed char weight_mode; // 0=No automatic change to weight
++ // 1=Decrease normally
++ // 2=Also multiply with number of machines
++ // 3=Instead multiply with priority divided
++ // with priority of the other.
++ // -1=no change
++};
++
++// Packet send when modifing a qdisc:
++struct tc_wrr_qdisc_modf {
++ // Not-valid values are ignored:
++ struct tc_wrr_qdisc_weight weight1;
++ struct tc_wrr_qdisc_weight weight2;
++};
++
++// Packet send when creating a qdisc:
++struct tc_wrr_qdisc_crt {
++ struct tc_wrr_qdisc_modf qdisc_modf;
++
++ char srcaddr; // 1=lookup source, 0=lookup destination
++ char usemac; // 1=Classify on MAC addresses, 0=classify on IP
++ char usemasq; // 1=Classify based on masqgrading - only valid
++ // if usemac is zero
++ int bands_max; // Maximal number of bands (i.e.: classes)
++ int proxy_maxconn; // If differnt from 0 then we support proxy remapping
++ // of packets. And this is the number of maximal
++ // concurrent proxy connections.
++};
++
++// Packet returned when quering a qdisc:
++struct tc_wrr_qdisc_stats {
++ struct tc_wrr_qdisc_crt qdisc_crt;
++ int proxy_curconn;
++ int nodes_in_heap; // Current number of bands wanting to send something
++ int bands_cur; // Current number of bands used (i.e.: MAC/IP addresses seen)
++ int bands_reused; // Number of times this band has been reused.
++ int packets_requed; // Number of times packets have been requeued.
++ __u64 priosum; // Sum of priorities in heap where 1 is 2^32
++};
++
++struct tc_wrr_qdisc_modf_std {
++ // This indicates which of the tc_wrr_qdisc_modf structers this is:
++ char proxy; // 0=This struct
++
++ // Should we also change a class?
++ char change_class;
++
++ // Only valid if change_class is false
++ struct tc_wrr_qdisc_modf qdisc_modf;
++
++ // Only valid if change_class is true:
++ unsigned char addr[ETH_ALEN]; // Class to change (non-used bytes should be 0)
++ struct tc_wrr_class_modf class_modf; // The change
++};
++
++// Used for proxyrempping:
++struct tc_wrr_qdisc_modf_proxy {
++ // This indicates which of the tc_wrr_qdisc_modf structers this is:
++ char proxy; // 1=This struct
++
++ // This is 1 if the proxyremap information should be reset
++ char reset;
++
++ // changec is the number of elements in changes.
++ int changec;
++
++ // This is an array of type ProxyRemapBlock:
++ long changes[0];
++};
++
+ #endif
+diff -urN linux-2.6.12.2.orig/net/sched/Kconfig linux-2.6.12.2/net/sched/Kconfig
+--- linux-2.6.12.2.orig/net/sched/Kconfig 2005-06-17 21:48:29.000000000 +0200
++++ linux-2.6.12.2/net/sched/Kconfig 2005-07-06 02:44:31.000000000 +0200
+@@ -129,6 +129,21 @@
+ To compile this code as a module, choose M here: the
+ module will be called sch_prio.
+
++config NET_SCH_WRR
++ tristate "WRR packet scheduler"
++ depends on NET_SCHED && IP_NF_CONNTRACK
++ help
++ The weighted robin-robin scheduling algorithm directs network
++ connections to different real servers based on server weights
++ in a round-robin manner. Servers with higher weights receive
++ new connections first than those with less weights, and servers
++ with higher weights get more connections than those with less
++ weights and servers with equal weights get equal connections.
++
++ If you want to compile it in kernel, say Y. If you want to compile
++ it as a module, say M here and read Documentation/modules.txt. If
++ unsure, say N.
++
+ config NET_SCH_RED
+ tristate "RED queue"
+ depends on NET_SCHED
+diff -urN linux-2.6.12.2.orig/net/sched/Makefile linux-2.6.12.2/net/sched/Makefile
+--- linux-2.6.12.2.orig/net/sched/Makefile 2005-06-17 21:48:29.000000000 +0200
++++ linux-2.6.12.2/net/sched/Makefile 2005-07-06 02:44:31.000000000 +0200
+@@ -0,0 +0,1 @@
++obj-$(CONFIG_NET_SCH_WRR) += sch_wrr.o
+diff -urN linux-2.6.12.2.orig/net/sched/proxydict.c linux-2.6.12.2/net/sched/proxydict.c
+--- linux-2.6.12.2.orig/net/sched/proxydict.c 1970-01-01 01:00:00.000000000 +0100
++++ linux-2.6.12.2/net/sched/proxydict.c 2005-07-08 11:23:38.000000000 +0200
+@@ -0,0 +1,153 @@
++#ifndef __KERNEL__
++#include <string.h>
++#include <netinet/in.h>
++#endif
++
++#include "proxyremap.h"
++#include "proxydict.h"
++
++
++/*--------------------------------------------------------------------------
++Implementation.
++*/
++
++// Hash function
++#define hash_fnc(m,server,port,proto) \
++ (((proto)*7+(server)*13+(port)*5)%m->hash_size)
++
++// Size of hash table given maximal number of connections:
++#define hash_size_max_con(max_con) (2*(max_con))
++
++// The memory area we maintain:
++typedef struct {
++ int hash_size;
++ int max_con;
++ int cur_con;
++
++ int free_first;
++
++ // Then we have:
++ // int hash_table[hash_size];
++ // int next[max_con];
++ // ProxyRemapBlock info[max_con];
++ //
++ // The idea is the following:
++ // Given a connection we map it by hash_fnc into hash_table. This gives an
++ // index in next which contains a -1 terminated linked list of connections
++ // mapping to that hash value.
++ //
++ // The entries in next not allocated is also in linked list where
++ // the first free index is free_first.
++} proxy_memory;
++
++#define Memory(m) ((proxy_memory*)m)
++#define Hash_table(m) ((int*)(((char*)m)+sizeof(proxy_memory)))
++#define Next(m) ((int*)(((char*)m)+sizeof(proxy_memory)+ \
++ sizeof(int)*((proxy_memory*)m)->hash_size))
++#define Info(m) ((ProxyRemapBlock*)(((char*)m)+ \
++ sizeof(proxy_memory)+ \
++ sizeof(int)*((proxy_memory*)m)->hash_size+\
++ sizeof(int)*((proxy_memory*)m)->max_con \
++ ))
++
++int proxyGetMemSize(int max_con) {
++ return sizeof(proxy_memory)+
++ sizeof(int)*hash_size_max_con(max_con)+
++ sizeof(int)*max_con+
++ sizeof(ProxyRemapBlock)*max_con;
++}
++
++void proxyInitMem(void* data, int max_con) {
++ // Init m:
++ proxy_memory* m=Memory(data);
++ m->max_con=max_con;
++ m->cur_con=0;
++ m->hash_size=hash_size_max_con(max_con);
++
++ {
++ // Get pointers:
++ int* hash_table=Hash_table(data);
++ int* next=Next(data);
++ int i;
++
++ // Init the hash table:
++ for(i=0; i<m->hash_size; i++) hash_table[i]=-1;
++
++ // Init the free-list
++ for(i=0; i<m->max_con; i++) next[i]=i+1;
++ m->free_first=0;
++ }
++}
++
++int proxyGetCurConn(void* data) {
++ return Memory(data)->cur_con;
++}
++
++int proxyGetMaxConn(void* data) {
++ return Memory(data)->max_con;
++}
++
++ProxyRemapBlock* proxyLookup(void* data, unsigned ipaddr, unsigned short port, char proto) {
++ proxy_memory* m=Memory(data);
++ int* hash_table=Hash_table(m);
++ int* next=Next(m);
++ ProxyRemapBlock* info=Info(m);
++ int i;
++
++ for(i=hash_table[hash_fnc(m,ipaddr,port,proto)]; i!=-1; i=next[i]) {
++ if(info[i].proto==proto &&
++ info[i].sport==port &&
++ info[i].saddr==ipaddr) return &info[i];
++ }
++
++ return 0;
++}
++
++int proxyConsumeBlock(void* data, ProxyRemapBlock* blk) {
++ proxy_memory* m=Memory(data);
++ int* hash_table=Hash_table(m);
++ int* next=Next(m);
++ ProxyRemapBlock* info=Info(m);
++ int hash=hash_fnc(m,blk->saddr,blk->sport,blk->proto);
++ int foo;
++
++ if(blk->open) {
++ if(m->cur_con == m->max_con) return -1;
++
++ // Insert the block at a free entry:
++ info[m->free_first]=*blk;
++ m->cur_con++;
++
++ foo=next[m->free_first];
++
++ // And insert it in the hash tabel:
++ next[m->free_first]=hash_table[hash];
++ hash_table[hash]=m->free_first;
++ m->free_first=foo;
++ } else {
++ int* toupdate;
++
++ // Find the block
++ for(toupdate=&hash_table[hash];
++ *toupdate!=-1;
++ toupdate=&next[*toupdate]) {
++ if(info[*toupdate].proto==blk->proto &&
++ info[*toupdate].sport==blk->sport &&
++ info[*toupdate].saddr==blk->saddr) break;
++ }
++ if(*toupdate==-1) return -1;
++
++ foo=*toupdate;
++
++ // Delete it from the hashing list:
++ *toupdate=next[*toupdate];
++
++ // And put it on the free list:
++ next[foo]=m->free_first;
++ m->free_first=foo;
++
++ m->cur_con--;
++ }
++
++ return 0;
++}
+diff -urN linux-2.6.12.2.orig/net/sched/proxydict.h linux-2.6.12.2/net/sched/proxydict.h
+--- linux-2.6.12.2.orig/net/sched/proxydict.h 1970-01-01 01:00:00.000000000 +0100
++++ linux-2.6.12.2/net/sched/proxydict.h 2005-07-06 02:44:31.000000000 +0200
+@@ -0,0 +1,32 @@
++#ifdef __cplusplus
++extern "C" {
++#endif
++
++/*--------------------------------------------------------------------------
++This is common code for for handling the tabels containing information about
++which proxyserver connections are associated with which machines..
++*/
++
++// Returns the number of bytes that should be available in the area
++// maintained by this module given the maximal number of concurrent
++// connections.
++int proxyGetMemSize(int max_connections);
++
++// Initializes a memory area to use. There must be as many bytes
++// available as returned by getMemSize.
++void proxyInitMem(void* data, int max_connections);
++
++// Queries:
++int proxyGetCurConn(void* data); // Returns current number of connections
++int proxyMaxCurConn(void* data); // Returns maximal number of connections
++
++// This is called to open and close conenctions. Returns -1 if
++// a protocol error occores (i.e.: If it is discovered)
++int proxyConsumeBlock(void* data, ProxyRemapBlock*);
++
++// Returns the RemapBlock associated with this connection or 0:
++ProxyRemapBlock* proxyLookup(void* data, unsigned ipaddr, unsigned short port, char proto);
++
++#ifdef __cplusplus
++}
++#endif
+diff -urN linux-2.6.12.2.orig/net/sched/proxyremap.h linux-2.6.12.2/net/sched/proxyremap.h
+--- linux-2.6.12.2.orig/net/sched/proxyremap.h 1970-01-01 01:00:00.000000000 +0100
++++ linux-2.6.12.2/net/sched/proxyremap.h 2005-07-06 02:44:31.000000000 +0200
+@@ -0,0 +1,33 @@
++#ifndef PROXYREMAP_H
++#define PROXYREMAP_H
++
++// This describes the information that is written in proxyremap.log and which
++// are used in the communication between proxyremapserver and proxyremapclient.
++// Everything is in network order.
++
++// First this header is send:
++#define PROXY_WELCOME_LINE "ProxyRemap 1.02. This is a binary protocol.\r\n"
++
++// Then this block is send every time a connection is opened or closed.
++// Note how it is alligned to use small space usage - arrays of this
++// structure are saved in many places.
++typedef struct {
++ // Server endpoint of connection:
++ unsigned saddr;
++ unsigned short sport;
++
++ // IP protocol for this connection (typically udp or tcp):
++ unsigned char proto;
++
++ // Is the connection opened or closed?
++ unsigned char open;
++
++ // Client the packets should be accounted to:
++ unsigned caddr;
++ unsigned char macaddr[6]; // Might be 0.
++
++ // An informal two-charecter code from the proxyserver. Used for debugging.
++ char proxyinfo[2];
++} ProxyRemapBlock;
++
++#endif
+diff -urN linux-2.6.12.2.orig/net/sched/sch_wrr.c linux-2.6.12.2/net/sched/sch_wrr.c
+--- linux-2.6.12.2.orig/net/sched/sch_wrr.c 1970-01-01 01:00:00.000000000 +0100
++++ linux-2.6.12.2/net/sched/sch_wrr.c 2005-07-06 02:44:31.000000000 +0200
+@@ -0,0 +1,1299 @@
++/*-----------------------------------------------------------------------------
++Weighted Round Robin scheduler.
++
++Written by Christian Worm Mortensen, cworm at it-c.dk.
++
++Introduction
++============
++This module implements a weighted round robin queue with build-in classifier.
++The classifier currently map each MAC or IP address (configurable either MAC
++or IP and either source or destination) to different classes. Each such class
++is called a band. Whan using MAC addresses only bridged packets can be
++classified other packets go to a default MAC address.
++
++Each band has a weight value, where 0<weight<=1. The bandwidth each band
++get is proportional to the weight as can be deduced from the next section.
++
++
++The queue
++=========
++Each band has a penalty value. Bands having something to sent are kept in
++a heap according to this value. The band with the lowest penalty value
++is in the root of the heap. The penalty value is a 128 bit number. Initially
++no bands are in the heap.
++
++Two global 64 bit values counter_low_penal and couter_high_penal are initialized
++to 0 and to 2^63 respectively.
++
++Enqueing:
++ The packet is inserted in the queue for the band it belongs to. If the band
++ is not in the heap it is inserted into it. In this case, the upper 64 bits
++ of its penalty value is set to the same as for the root-band of the heap.
++ If the heap is empty 0 is used. The lower 64 bit is set to couter_low_penal
++ and couter_low_penal is incremented by 1.
++
++Dequing:
++ If the heap is empty we have nothing to send.
++
++ If the root band has a non-empty queue a packet is dequeued from that.
++ The upper 64 bit of the penalty value of the band is incremented by the
++ packet size divided with the weight of the band. The lower 64 bit is set to
++ couter_high_penal and couter_high_penal is incremented by 1.
++
++ If the root element for some reason has an empty queue it is removed from
++ the heap and we try to dequeue again.
++
++The effect of the heap and the upper 64 bit of the penalty values is to
++implement a weighted round robin queue. The effect of counter_low_penal,
++counter_high_penal and the lower 64 bit of the penalty value is primarily to
++stabilize the queue and to give better quality of service to machines only
++sending a packet now and then. For example machines which have a single
++interactive connection such as telnet or simple text chatting.
++
++
++Setting weight
++==============
++The weight value can be changed dynamically by the queue itself. The weight
++value and how it is changed is described by the two members weight1 and
++weight2 which has type tc_wrr_class_weight and which are in each class. And
++by the two integer value members of the qdisc called penalfact1 and penalfact2.
++The structure is defined as:
++
++ struct tc_wrr_class_weight {
++ // All are represented as parts of (2^64-1).
++ __u64 val; // Current value (0 is not valid)
++ __u64 decr; // Value pr bytes (2^64-1 is not valid)
++ __u64 incr; // Value pr seconds (2^64-1 is not valid)
++ __u64 min; // Minimal value (0 is not valid)
++ __u64 max; // Minimal value (0 is not valid)
++
++ // The time where the above information was correct:
++ time_t tim;
++ };
++
++The weight value used by the dequeue operations is calculated as
++weight1.val*weight2.val. weight1 and weight2 and handled independently and in the
++same way as will be described now.
++
++Every second, the val parameter is incremented by incr.
++
++Every time a packet is transmitted the value is increment by decr times
++the packet size. Depending on the value of the weight_mode parameter it
++is also mulitplied with other numbers. This makes it possible to give
++penalty to machines transferring much data.
++
++-----------------------------------------------------------------------------*/
++
++#include <linux/config.h>
++#include <linux/module.h>
++#include <asm/uaccess.h>
++#include <asm/system.h>
++#include <linux/bitops.h>
++#include <linux/types.h>
++#include <linux/kernel.h>
++#include <linux/vmalloc.h>
++#include <linux/sched.h>
++#include <linux/string.h>
++#include <linux/mm.h>
++#include <linux/socket.h>
++#include <linux/sockios.h>
++#include <linux/in.h>
++#include <linux/errno.h>
++#include <linux/interrupt.h>
++#include <linux/if_ether.h>
++#include <linux/inet.h>
++#include <linux/netdevice.h>
++#include <linux/etherdevice.h>
++#include <linux/notifier.h>
++#include <net/ip.h>
++#include <net/route.h>
++#include <linux/skbuff.h>
++#include <net/sock.h>
++#include <net/pkt_sched.h>
++
++#include <linux/if_arp.h>
++#include <linux/version.h>
++
++// There seems to be problems when calling functions from userspace when
++// using vmalloc and vfree.
++//#define my_malloc(size) vmalloc(size)
++//#define my_free(ptr) vfree(ptr)
++#define my_malloc(size) kmalloc(size,GFP_KERNEL)
++#define my_free(ptr) kfree(ptr)
++
++#define LOCK_START sch_tree_lock(sch);
++#define LOCK_END sch_tree_unlock(sch);
++#define ENQUEUE_SUCCESS 0
++#define ENQUEUE_FAIL NET_XMIT_DROP
++#ifdef CONFIG_IP_NF_CONNTRACK
++ #include <linux/netfilter_ipv4/ip_conntrack.h>
++ #define MASQ_SUPPORT
++#endif
++
++#include "proxydict.c"
++
++// The penalty (priority) type:
++typedef u64 penalty_base_t;
++#define penalty_base_t_max ((penalty_base_t)-1)
++typedef struct penalty_t {
++ penalty_base_t ms;
++ penalty_base_t ls;
++} penalty_t;
++#define penalty_leq(a,b) (a.ms<b.ms || (a.ms==b.ms && a.ls<=b.ls))
++#define penalty_le(a,b) (a.ms<b.ms || (a.ms==b.ms && a.ls<b.ls))
++static penalty_t penalty_max={penalty_base_t_max,penalty_base_t_max};
++
++//-----------------------------------------------------------------------------
++// A generel heap.
++
++struct heap;
++struct heap_element;
++
++// Initializes an empty heap:
++// he: A pointer to an unintialized heap structure identifying the heap
++// size: Maximal number of elements the heap can contain
++// poll: An array of size "size" used by the heap.
++static void heap_init(struct heap* he,int size, struct heap_element* poll);
++
++// Each element in the heap is identified by a user-assigned id which
++// should be a non negative integer less than the size argument
++// given to heap_init.
++static void heap_insert(struct heap*, int id, penalty_t);
++static void heap_remove(struct heap*, int id);
++static void heap_set_penalty(struct heap*, int id, penalty_t);
++
++// Retreviewing information:
++static char heap_empty(struct heap*); // Heap empty?
++static char heap_contains(struct heap*, int id); // Does heap contain
++ // the given id?
++static int heap_root(struct heap*); // Returns the id of the root
++static penalty_t heap_get_penalty(struct heap*, int id); // Returns penaly
++ // of root node
++
++//--------------------
++// Heap implementation
++
++struct heap_element {
++ penalty_t penalty;
++ int id; // The user-assigned id of this element
++ int id2idx; // Maps from user-assigned ids to indices in root_1
++};
++
++struct heap {
++ struct heap_element* root_1;
++ int elements;
++};
++
++// Heap implementation:
++static void heap_init(struct heap* h, int size, struct heap_element* poll) {
++ int i;
++
++ h->elements=0;
++ h->root_1=poll-1;
++
++ for(i=0; i<size; i++) poll[i].id2idx=0;
++};
++
++static char heap_empty(struct heap* h) {
++ return h->elements==0;
++}
++
++static char heap_contains(struct heap* h, int id) {
++ return h->root_1[id+1].id2idx!=0;
++}
++
++static int heap_root(struct heap* h) {
++ return h->root_1[1].id;
<<Diff was trimmed, longer than 597 lines>>
More information about the pld-cvs-commit
mailing list