SOURCES (LINUX_2_6): wrr-linux-2.6.12.2.patch (NEW) - fresh patch.

pluto pluto at pld-linux.org
Thu Sep 15 12:49:41 CEST 2005


Author: pluto                        Date: Thu Sep 15 10:49:41 2005 GMT
Module: SOURCES                       Tag: LINUX_2_6
---- Log message:
- fresh patch.

---- Files affected:
SOURCES:
   wrr-linux-2.6.12.2.patch (NONE -> 1.1.2.1)  (NEW)

---- Diffs:

================================================================
Index: SOURCES/wrr-linux-2.6.12.2.patch
diff -u /dev/null SOURCES/wrr-linux-2.6.12.2.patch:1.1.2.1
--- /dev/null	Thu Sep 15 12:49:41 2005
+++ SOURCES/wrr-linux-2.6.12.2.patch	Thu Sep 15 12:49:36 2005
@@ -0,0 +1,1684 @@
+diff -urN linux-2.6.12.2.orig/include/linux/pkt_sched.h linux-2.6.12.2/include/linux/pkt_sched.h
+--- linux-2.6.12.2.orig/include/linux/pkt_sched.h	2005-06-17 21:48:29.000000000 +0200
++++ linux-2.6.12.2/include/linux/pkt_sched.h	2005-07-08 11:15:27.000000000 +0200
+@@ -458,4 +458,117 @@
+ 
+ #define NETEM_DIST_SCALE	8192
+ 
++
++/* WRR section */
++
++/* Other includes */
++#include <linux/if_ether.h>
++
++// A sub weight and of a class
++// All numbers are represented as parts of (2^64-1).
++struct tc_wrr_class_weight {
++  __u64 val;  // Current value                        (0 is not valid)
++  __u64 decr; // Value pr bytes                       (2^64-1 is not valid)
++  __u64 incr; // Value pr seconds                     (2^64-1 is not valid)
++  __u64 min;  // Minimal value                        (0 is not valid)
++  __u64 max;  // Minimal value                        (0 is not valid)
++
++  // The time where the above information was correct:
++  time_t tim;
++};
++
++// Pakcet send when modifying a class:
++struct tc_wrr_class_modf {
++  // Not-valid values are ignored.
++  struct tc_wrr_class_weight weight1;
++  struct tc_wrr_class_weight weight2;
++};
++
++// Packet returned when quering a class:
++struct tc_wrr_class_stats {
++  char used; // If this is false the information below is invalid
++
++  struct tc_wrr_class_modf class_modf;
++
++  unsigned char addr[ETH_ALEN];
++  char usemac;    // True if addr is a MAC address, else it is an IP address
++                  // (this value is only for convience, it is always the same
++		  //  value as in the qdisc)
++  int heappos;    // Current heap position or 0 if not in heap  
++  __u64 penal_ls; // Penalty value in heap (ls)
++  __u64 penal_ms; // Penalty value in heap (ms)
++};
++
++// Qdisc-wide penalty information (boolean values - 2 not valid)
++struct tc_wrr_qdisc_weight {
++  signed char weight_mode; // 0=No automatic change to weight
++                    // 1=Decrease normally
++		    // 2=Also multiply with number of machines
++		    // 3=Instead multiply with priority divided
++		    //   with priority of the other.
++		    // -1=no change
++};
++
++// Packet send when modifing a qdisc:
++struct tc_wrr_qdisc_modf {
++  // Not-valid values are ignored:
++  struct tc_wrr_qdisc_weight weight1;
++  struct tc_wrr_qdisc_weight weight2;
++};
++
++// Packet send when creating a qdisc:
++struct tc_wrr_qdisc_crt {
++  struct tc_wrr_qdisc_modf qdisc_modf;
++  
++  char srcaddr;      // 1=lookup source, 0=lookup destination
++  char usemac;       // 1=Classify on MAC addresses, 0=classify on IP
++  char usemasq;      // 1=Classify based on masqgrading - only valid
++                     //   if usemac is zero
++  int bands_max;     // Maximal number of bands (i.e.: classes)  
++  int proxy_maxconn; // If differnt from 0 then we support proxy remapping
++                     // of packets. And this is the number of maximal
++		     // concurrent proxy connections.
++};
++
++// Packet returned when quering a qdisc:
++struct tc_wrr_qdisc_stats {
++  struct tc_wrr_qdisc_crt qdisc_crt;
++  int proxy_curconn;		     
++  int nodes_in_heap;  // Current number of bands wanting to send something
++  int bands_cur;      // Current number of bands used (i.e.: MAC/IP addresses seen)
++  int bands_reused;   // Number of times this band has been reused.
++  int packets_requed; // Number of times packets have been requeued.
++  __u64 priosum;      // Sum of priorities in heap where 1 is 2^32
++};
++
++struct tc_wrr_qdisc_modf_std {
++  // This indicates which of the tc_wrr_qdisc_modf structers this is:
++  char proxy; // 0=This struct
++
++  // Should we also change a class?
++  char change_class;
++
++  // Only valid if change_class is false
++  struct tc_wrr_qdisc_modf qdisc_modf;
++    
++  // Only valid if change_class is true:
++  unsigned char addr[ETH_ALEN]; // Class to change (non-used bytes should be 0)
++  struct tc_wrr_class_modf class_modf; // The change    
++};
++
++// Used for proxyrempping:
++struct tc_wrr_qdisc_modf_proxy {
++  // This indicates which of the tc_wrr_qdisc_modf structers this is:
++  char proxy; // 1=This struct
++  
++  // This is 1 if the proxyremap information should be reset
++  char reset;
++  
++  // changec is the number of elements in changes.
++  int changec; 
++  
++  // This is an array of type ProxyRemapBlock:
++  long changes[0];  
++};
++
+ #endif
+diff -urN linux-2.6.12.2.orig/net/sched/Kconfig linux-2.6.12.2/net/sched/Kconfig
+--- linux-2.6.12.2.orig/net/sched/Kconfig	2005-06-17 21:48:29.000000000 +0200
++++ linux-2.6.12.2/net/sched/Kconfig	2005-07-06 02:44:31.000000000 +0200
+@@ -129,6 +129,21 @@
+ 	  To compile this code as a module, choose M here: the
+ 	  module will be called sch_prio.
+ 
++config NET_SCH_WRR
++	tristate "WRR packet scheduler"
++	depends on NET_SCHED && IP_NF_CONNTRACK
++	help
++	  The weighted robin-robin scheduling algorithm directs network
++	  connections to different real servers based on server weights
++	  in a round-robin manner. Servers with higher weights receive
++	  new connections first than those with less weights, and servers
++	  with higher weights get more connections than those with less
++	  weights and servers with equal weights get equal connections.
++
++	  If you want to compile it in kernel, say Y. If you want to compile
++	  it as a module, say M here and read Documentation/modules.txt. If
++	  unsure, say N.
++
+ config NET_SCH_RED
+ 	tristate "RED queue"
+ 	depends on NET_SCHED
+diff -urN linux-2.6.12.2.orig/net/sched/Makefile linux-2.6.12.2/net/sched/Makefile
+--- linux-2.6.12.2.orig/net/sched/Makefile	2005-06-17 21:48:29.000000000 +0200
++++ linux-2.6.12.2/net/sched/Makefile	2005-07-06 02:44:31.000000000 +0200
+@@ -0,0 +0,1 @@
++obj-$(CONFIG_NET_SCH_WRR)	+= sch_wrr.o
+diff -urN linux-2.6.12.2.orig/net/sched/proxydict.c linux-2.6.12.2/net/sched/proxydict.c
+--- linux-2.6.12.2.orig/net/sched/proxydict.c	1970-01-01 01:00:00.000000000 +0100
++++ linux-2.6.12.2/net/sched/proxydict.c	2005-07-08 11:23:38.000000000 +0200
+@@ -0,0 +1,153 @@
++#ifndef __KERNEL__
++#include <string.h>
++#include <netinet/in.h>
++#endif
++
++#include "proxyremap.h"
++#include "proxydict.h"
++
++
++/*--------------------------------------------------------------------------
++Implementation.
++*/
++
++// Hash function
++#define hash_fnc(m,server,port,proto) \
++ (((proto)*7+(server)*13+(port)*5)%m->hash_size)
++
++// Size of hash table given maximal number of connections:
++#define hash_size_max_con(max_con) (2*(max_con))
++
++// The memory area we maintain:
++typedef struct {
++  int hash_size;
++  int max_con;
++  int cur_con;
++  
++  int free_first;
++  
++  // Then we have:
++  //   int hash_table[hash_size];
++  //   int next[max_con];
++  //   ProxyRemapBlock info[max_con];
++  //
++  // The idea is the following:
++  //   Given a connection we map it by hash_fnc into hash_table. This gives an 
++  //   index in next which contains a -1 terminated linked list of connections 
++  //   mapping to that hash value.
++  //
++  //   The entries in next not allocated is also in linked list where 
++  //   the first free index is free_first.
++} proxy_memory;  
++
++#define Memory(m)     ((proxy_memory*)m)
++#define Hash_table(m) ((int*)(((char*)m)+sizeof(proxy_memory)))
++#define Next(m)       ((int*)(((char*)m)+sizeof(proxy_memory)+     \
++                       sizeof(int)*((proxy_memory*)m)->hash_size))
++#define Info(m)       ((ProxyRemapBlock*)(((char*)m)+                          \
++                                           sizeof(proxy_memory)+                     \
++                                           sizeof(int)*((proxy_memory*)m)->hash_size+\
++					   sizeof(int)*((proxy_memory*)m)->max_con   \
++					  ))
++
++int proxyGetMemSize(int max_con) {
++  return sizeof(proxy_memory)+
++         sizeof(int)*hash_size_max_con(max_con)+
++	 sizeof(int)*max_con+
++	 sizeof(ProxyRemapBlock)*max_con;
++}
++
++void proxyInitMem(void* data, int max_con) {
++  // Init m:
++  proxy_memory* m=Memory(data);
++  m->max_con=max_con;
++  m->cur_con=0;
++  m->hash_size=hash_size_max_con(max_con);
++
++  {
++    // Get pointers:
++    int* hash_table=Hash_table(data);
++    int* next=Next(data);
++    int i;
++  
++    // Init the hash table:
++    for(i=0; i<m->hash_size; i++) hash_table[i]=-1;
++  
++    // Init the free-list
++    for(i=0; i<m->max_con; i++) next[i]=i+1;
++    m->free_first=0;
++  }
++}  
++  
++int proxyGetCurConn(void* data) {
++  return Memory(data)->cur_con;
++}
++
++int proxyGetMaxConn(void* data) {
++  return Memory(data)->max_con;
++}
++
++ProxyRemapBlock* proxyLookup(void* data, unsigned ipaddr, unsigned short port, char proto) {    
++  proxy_memory* m=Memory(data);
++  int* hash_table=Hash_table(m);
++  int* next=Next(m);
++  ProxyRemapBlock* info=Info(m);
++  int i;
++  
++  for(i=hash_table[hash_fnc(m,ipaddr,port,proto)]; i!=-1; i=next[i]) {
++    if(info[i].proto==proto &&
++       info[i].sport==port &&
++       info[i].saddr==ipaddr) return &info[i];
++  }
++       
++  return 0;
++}    
++
++int proxyConsumeBlock(void* data, ProxyRemapBlock* blk) {
++  proxy_memory* m=Memory(data);
++  int* hash_table=Hash_table(m);
++  int* next=Next(m);
++  ProxyRemapBlock* info=Info(m);
++  int hash=hash_fnc(m,blk->saddr,blk->sport,blk->proto);
++  int foo;
++  
++  if(blk->open) {
++    if(m->cur_con == m->max_con) return -1;
++    
++    // Insert the block at a free entry:
++    info[m->free_first]=*blk;
++    m->cur_con++;
++
++    foo=next[m->free_first];
++    
++    // And insert it in the hash tabel:
++    next[m->free_first]=hash_table[hash];
++    hash_table[hash]=m->free_first;
++    m->free_first=foo;
++  } else {
++    int* toupdate;
++    
++    // Find the block
++    for(toupdate=&hash_table[hash]; 
++        *toupdate!=-1; 
++	toupdate=&next[*toupdate]) {
++      if(info[*toupdate].proto==blk->proto &&
++         info[*toupdate].sport==blk->sport &&
++         info[*toupdate].saddr==blk->saddr) break;
++    }
++    if(*toupdate==-1) return -1;
++
++    foo=*toupdate;
++    
++    // Delete it from the hashing list:    
++    *toupdate=next[*toupdate];
++    
++    // And put it on the free list:
++    next[foo]=m->free_first;
++    m->free_first=foo;
++
++    m->cur_con--;
++  }
++  
++  return 0;
++}
+diff -urN linux-2.6.12.2.orig/net/sched/proxydict.h linux-2.6.12.2/net/sched/proxydict.h
+--- linux-2.6.12.2.orig/net/sched/proxydict.h	1970-01-01 01:00:00.000000000 +0100
++++ linux-2.6.12.2/net/sched/proxydict.h	2005-07-06 02:44:31.000000000 +0200
+@@ -0,0 +1,32 @@
++#ifdef __cplusplus
++extern "C" {
++#endif
++
++/*--------------------------------------------------------------------------
++This is common code for for handling the tabels containing information about 
++which proxyserver connections are associated with which machines..
++*/
++
++// Returns the number of bytes that should be available in the area
++// maintained by this module given the maximal number of concurrent 
++// connections.
++int proxyGetMemSize(int max_connections);
++
++// Initializes a memory area to use. There must be as many bytes
++// available as returned by getMemSize.
++void proxyInitMem(void* data, int max_connections);
++
++// Queries:
++int proxyGetCurConn(void* data); // Returns current number of connections
++int proxyMaxCurConn(void* data); // Returns maximal number of connections
++
++// This is called to open and close conenctions. Returns -1 if
++// a protocol error occores (i.e.: If it is discovered)
++int proxyConsumeBlock(void* data, ProxyRemapBlock*);
++
++// Returns the RemapBlock associated with this connection or 0:
++ProxyRemapBlock* proxyLookup(void* data, unsigned ipaddr, unsigned short port, char proto);
++
++#ifdef __cplusplus
++}
++#endif
+diff -urN linux-2.6.12.2.orig/net/sched/proxyremap.h linux-2.6.12.2/net/sched/proxyremap.h
+--- linux-2.6.12.2.orig/net/sched/proxyremap.h	1970-01-01 01:00:00.000000000 +0100
++++ linux-2.6.12.2/net/sched/proxyremap.h	2005-07-06 02:44:31.000000000 +0200
+@@ -0,0 +1,33 @@
++#ifndef PROXYREMAP_H
++#define PROXYREMAP_H
++
++// This describes the information that is written in proxyremap.log and which
++// are used in the communication between proxyremapserver and proxyremapclient.
++// Everything is in network order.
++
++// First this header is send:
++#define PROXY_WELCOME_LINE "ProxyRemap 1.02. This is a binary protocol.\r\n"
++
++// Then this block is send every time a connection is opened or closed.
++// Note how it is alligned to use small space usage - arrays of this
++// structure are saved in many places.
++typedef struct {   
++  // Server endpoint of connection:
++  unsigned saddr;
++  unsigned short sport;
++
++  // IP protocol for this connection (typically udp or tcp):
++  unsigned char proto;
++  
++  // Is the connection opened or closed?
++  unsigned char open;
++  
++  // Client the packets should be accounted to:
++  unsigned caddr;
++  unsigned char macaddr[6]; // Might be 0.
++  
++  // An informal two-charecter code from the proxyserver. Used for debugging.
++  char proxyinfo[2];
++} ProxyRemapBlock;
++
++#endif
+diff -urN linux-2.6.12.2.orig/net/sched/sch_wrr.c linux-2.6.12.2/net/sched/sch_wrr.c
+--- linux-2.6.12.2.orig/net/sched/sch_wrr.c	1970-01-01 01:00:00.000000000 +0100
++++ linux-2.6.12.2/net/sched/sch_wrr.c	2005-07-06 02:44:31.000000000 +0200
+@@ -0,0 +1,1299 @@
++/*-----------------------------------------------------------------------------
++Weighted Round Robin scheduler.
++  
++Written by Christian Worm Mortensen, cworm at it-c.dk.
++
++Introduction
++============
++This module implements a weighted round robin queue with build-in classifier.
++The classifier currently map each MAC or IP address (configurable either MAC
++or IP and either source or destination) to different classes. Each such class 
++is called a band. Whan using MAC addresses only bridged packets can be 
++classified other packets go to a default MAC address.
++
++Each band has a weight value, where 0<weight<=1. The bandwidth each band
++get is proportional to the weight as can be deduced from the next section.
++
++
++The queue
++=========
++Each band has a penalty value. Bands having something to sent are kept in
++a heap according to this value. The band with the lowest penalty value
++is in the root of the heap. The penalty value is a 128 bit number. Initially 
++no bands are in the heap.
++
++Two global 64 bit values counter_low_penal and couter_high_penal are initialized
++to 0 and to 2^63 respectively.
++
++Enqueing:
++  The packet is inserted in the queue for the band it belongs to. If the band 
++  is not in the heap it is inserted into it. In this case, the upper 64 bits 
++  of its penalty value is set to the same as for the root-band of the heap. 
++  If the heap is empty 0 is used. The lower 64 bit is set to couter_low_penal
++  and couter_low_penal is incremented by 1.
++  
++Dequing:
++  If the heap is empty we have nothing to send. 
++  
++  If the root band has a non-empty queue a packet is dequeued from that.
++  The upper 64 bit of the penalty value of the band is incremented by the 
++  packet size divided with the weight of the band. The lower 64 bit is set to 
++  couter_high_penal and couter_high_penal is incremented by 1.
++
++  If the root element for some reason has an  empty queue it is removed from 
++  the heap and we try to dequeue again.
++
++The effect of the heap and the upper 64 bit of the penalty values is to 
++implement a weighted round robin queue. The effect of counter_low_penal,
++counter_high_penal and the lower 64 bit of the penalty value is primarily to
++stabilize the queue and to give better quality of service to machines only 
++sending a packet now and then. For example machines which have a single 
++interactive connection such as telnet or simple text chatting.
++
++
++Setting weight
++==============
++The weight value can be changed dynamically by the queue itself. The weight 
++value and how it is changed is described by the two members weight1 and 
++weight2 which has type tc_wrr_class_weight and which are in each class. And 
++by the two integer value members of the qdisc called penalfact1 and penalfact2.
++The structure is defined as:
++
++  struct tc_wrr_class_weight {
++    // All are represented as parts of (2^64-1).
++    __u64 val;  // Current value                        (0 is not valid)
++    __u64 decr; // Value pr bytes                       (2^64-1 is not valid)
++    __u64 incr; // Value pr seconds                     (2^64-1 is not valid)
++    __u64 min;  // Minimal value                        (0 is not valid)
++    __u64 max;  // Minimal value                        (0 is not valid)
++
++    // The time where the above information was correct:
++    time_t tim;
++  };
++    
++The weight value used by the dequeue operations is calculated as 
++weight1.val*weight2.val. weight1 and weight2 and handled independently and in the 
++same way as will be described now.
++
++Every second, the val parameter is incremented by incr.
++
++Every time a packet is transmitted the value is increment by decr times
++the packet size. Depending on the value of the weight_mode parameter it
++is also mulitplied with other numbers. This makes it possible to give 
++penalty to machines transferring much data.
++
++-----------------------------------------------------------------------------*/
++
++#include <linux/config.h>
++#include <linux/module.h>
++#include <asm/uaccess.h>
++#include <asm/system.h>
++#include <linux/bitops.h>
++#include <linux/types.h>
++#include <linux/kernel.h>
++#include <linux/vmalloc.h>
++#include <linux/sched.h>
++#include <linux/string.h>
++#include <linux/mm.h>
++#include <linux/socket.h>
++#include <linux/sockios.h>
++#include <linux/in.h>
++#include <linux/errno.h>
++#include <linux/interrupt.h>
++#include <linux/if_ether.h>
++#include <linux/inet.h>
++#include <linux/netdevice.h>
++#include <linux/etherdevice.h>
++#include <linux/notifier.h>
++#include <net/ip.h>
++#include <net/route.h>
++#include <linux/skbuff.h>
++#include <net/sock.h>
++#include <net/pkt_sched.h>
++
++#include <linux/if_arp.h>
++#include <linux/version.h>
++
++// There seems to be problems when calling functions from userspace when
++// using vmalloc and vfree.
++//#define my_malloc(size) vmalloc(size)
++//#define my_free(ptr)   vfree(ptr)
++#define my_malloc(size) kmalloc(size,GFP_KERNEL)
++#define my_free(ptr)    kfree(ptr)
++
++#define LOCK_START sch_tree_lock(sch);
++#define LOCK_END   sch_tree_unlock(sch);
++#define ENQUEUE_SUCCESS 0
++#define ENQUEUE_FAIL    NET_XMIT_DROP
++#ifdef CONFIG_IP_NF_CONNTRACK
++  #include <linux/netfilter_ipv4/ip_conntrack.h>
++  #define MASQ_SUPPORT
++#endif
++
++#include "proxydict.c"
++
++// The penalty (priority) type:
++typedef u64 penalty_base_t;
++#define penalty_base_t_max ((penalty_base_t)-1)
++typedef struct penalty_t {
++  penalty_base_t ms;
++  penalty_base_t ls;
++} penalty_t;
++#define penalty_leq(a,b) (a.ms<b.ms || (a.ms==b.ms && a.ls<=b.ls))
++#define penalty_le(a,b)  (a.ms<b.ms || (a.ms==b.ms && a.ls<b.ls))
++static penalty_t penalty_max={penalty_base_t_max,penalty_base_t_max};
++
++//-----------------------------------------------------------------------------
++// A generel heap.
++
++struct heap;
++struct heap_element;
++
++// Initializes an empty heap:
++//   he:   A pointer to an unintialized heap structure identifying the heap
++//   size: Maximal number of elements the heap can contain
++//   poll: An array of size "size" used by the heap.     
++static void heap_init(struct heap* he,int size, struct heap_element* poll);
++
++// Each element in the heap is identified by a user-assigned id which
++// should be a non negative integer less than the size argument
++// given to heap_init.
++static void heap_insert(struct heap*, int id, penalty_t);
++static void heap_remove(struct heap*, int id);
++static void heap_set_penalty(struct heap*, int id, penalty_t);
++
++// Retreviewing information:
++static char      heap_empty(struct heap*); // Heap empty?
++static char      heap_contains(struct heap*, int id); // Does heap contain 
++                                                      // the given id?
++static int       heap_root(struct heap*);  // Returns the id of the root
++static penalty_t heap_get_penalty(struct heap*, int id); // Returns penaly
++                                                         // of root node
++
++//--------------------
++// Heap implementation
++
++struct heap_element {
++  penalty_t penalty;
++  int id;             // The user-assigned id of this element
++  int id2idx;         // Maps from user-assigned ids to indices in root_1
++};
++
++struct heap {
++  struct heap_element* root_1;
++  int elements;
++};
++
++// Heap implementation:
++static void heap_init(struct heap* h, int size, struct heap_element* poll) {
++  int i;
++  
++  h->elements=0;
++  h->root_1=poll-1;
++  
++  for(i=0; i<size; i++) poll[i].id2idx=0;
++};
++
++static char heap_empty(struct heap* h) {
++  return h->elements==0;
++}
++
++static char heap_contains(struct heap* h, int id) {
++  return h->root_1[id+1].id2idx!=0;
++}
++
++static int heap_root(struct heap* h) {
++  return h->root_1[1].id;
<<Diff was trimmed, longer than 597 lines>>



More information about the pld-cvs-commit mailing list