Browse Source

Merge branch 'master'

Jeff Garzik 20 years ago
parent
commit
96b88fb850
100 changed files with 3188 additions and 3153 deletions
  1. 52 61
      Documentation/block/biodoc.txt
  2. 260 236
      Documentation/kernel-parameters.txt
  3. 3 2
      Documentation/networking/bonding.txt
  4. 2 2
      Makefile
  5. 1 1
      arch/alpha/kernel/pci-noop.c
  6. 1 1
      arch/alpha/kernel/pci_iommu.c
  7. 8 7
      arch/arm/mach-integrator/impd1.c
  8. 2 0
      arch/arm/mach-pxa/corgi_lcd.c
  9. 20 0
      arch/arm/mach-pxa/generic.c
  10. 3 3
      arch/arm/mach-s3c2410/mach-bast.c
  11. 4 4
      arch/arm/mm/consistent.c
  12. 1 1
      arch/frv/mb93090-mb00/pci-dma-nommu.c
  13. 1 1
      arch/frv/mb93090-mb00/pci-dma.c
  14. 1 1
      arch/frv/mm/dma-alloc.c
  15. 19 11
      arch/i386/kernel/cpu/cpufreq/powernow-k8.c
  16. 1 1
      arch/ia64/hp/common/hwsw_iommu.c
  17. 1 1
      arch/ia64/hp/common/sba_iommu.c
  18. 1 1
      arch/ia64/lib/swiotlb.c
  19. 1 1
      arch/ia64/sn/kernel/xpc.h
  20. 1 1
      arch/ia64/sn/pci/pci_dma.c
  21. 2 2
      arch/mips/mm/dma-coherent.c
  22. 2 2
      arch/mips/mm/dma-ip27.c
  23. 2 2
      arch/mips/mm/dma-ip32.c
  24. 2 2
      arch/mips/mm/dma-noncoherent.c
  25. 3 3
      arch/parisc/kernel/pci-dma.c
  26. 1 1
      arch/ppc/8xx_io/cs4218.h
  27. 2 2
      arch/ppc/8xx_io/cs4218_tdm.c
  28. 2 2
      arch/ppc/kernel/dma-mapping.c
  29. 2 2
      arch/ppc/mm/pgtable.c
  30. 1 1
      arch/ppc64/kernel/iSeries_htab.c
  31. 2 2
      arch/ppc64/kernel/mpic.c
  32. 1 1
      arch/ppc64/kernel/time.c
  33. 1 2
      arch/ppc64/mm/init.c
  34. 1 1
      arch/sh/boards/renesas/rts7751r2d/mach.c
  35. 1 1
      arch/sh/cchips/voyagergx/consistent.c
  36. 1 1
      arch/sh/drivers/pci/dma-dreamcast.c
  37. 1 1
      arch/sh/mm/consistent.c
  38. 1 1
      arch/sparc64/solaris/socksys.c
  39. 1 1
      arch/sparc64/solaris/timod.c
  40. 1 1
      arch/um/include/sysdep-i386/thread.h
  41. 1 1
      arch/um/include/sysdep-x86_64/thread.h
  42. 1 1
      arch/um/kernel/mem.c
  43. 1 1
      arch/um/kernel/process_kern.c
  44. 2 2
      arch/x86_64/kernel/pci-gart.c
  45. 1 1
      arch/x86_64/kernel/pci-nommu.c
  46. 1 1
      arch/xtensa/kernel/pci-dma.c
  47. 90 237
      drivers/block/as-iosched.c
  48. 85 287
      drivers/block/cfq-iosched.c
  49. 19 106
      drivers/block/deadline-iosched.c
  50. 207 138
      drivers/block/elevator.c
  51. 55 138
      drivers/block/ll_rw_blk.c
  52. 1 1
      drivers/block/loop.c
  53. 5 43
      drivers/block/noop-iosched.c
  54. 1 1
      drivers/block/rd.c
  55. 2 1
      drivers/char/drm/drm_vm.c
  56. 1 1
      drivers/char/drm/mga_drv.h
  57. 1 1
      drivers/char/drm/mga_state.c
  58. 6 5
      drivers/char/drm/radeon_cp.c
  59. 1 1
      drivers/char/n_tty.c
  60. 3 3
      drivers/cpufreq/cpufreq_conservative.c
  61. 1 1
      drivers/ieee1394/eth1394.c
  62. 1 1
      drivers/infiniband/hw/mthca/mthca_cmd.c
  63. 1 1
      drivers/infiniband/hw/mthca/mthca_cmd.h
  64. 11 10
      drivers/infiniband/hw/mthca/mthca_eq.c
  65. 1 1
      drivers/infiniband/hw/mthca/mthca_memfree.c
  66. 1 1
      drivers/infiniband/hw/mthca/mthca_memfree.h
  67. 1 1
      drivers/md/bitmap.c
  68. 1 1
      drivers/md/dm-crypt.c
  69. 6 4
      drivers/md/md.c
  70. 0 1
      drivers/media/video/Kconfig
  71. 8 4
      drivers/message/fusion/mptsas.c
  72. 3 2
      drivers/net/8139cp.c
  73. 4 1
      drivers/net/8139too.c
  74. 23 0
      drivers/net/Kconfig
  75. 3 1
      drivers/net/Makefile
  76. 5 8
      drivers/net/au1000_eth.c
  77. 128 8
      drivers/net/b44.c
  78. 2 0
      drivers/net/b44.h
  79. 55 2
      drivers/net/bonding/bond_main.c
  80. 2 2
      drivers/net/cassini.c
  81. 16 21
      drivers/net/declance.c
  82. 3 1
      drivers/net/e100.c
  83. 60 14
      drivers/net/e1000/e1000.h
  84. 67 28
      drivers/net/e1000/e1000_ethtool.c
  85. 187 33
      drivers/net/e1000/e1000_hw.c
  86. 84 12
      drivers/net/e1000/e1000_hw.h
  87. 822 256
      drivers/net/e1000/e1000_main.c
  88. 8 2
      drivers/net/e1000/e1000_param.c
  89. 2 2
      drivers/net/epic100.c
  90. 202 108
      drivers/net/forcedeth.c
  91. 86 326
      drivers/net/gianfar.c
  92. 16 14
      drivers/net/gianfar.h
  93. 64 36
      drivers/net/gianfar_ethtool.c
  94. 219 0
      drivers/net/gianfar_mii.c
  95. 45 0
      drivers/net/gianfar_mii.h
  96. 0 661
      drivers/net/gianfar_phy.c
  97. 0 213
      drivers/net/gianfar_phy.h
  98. 1 0
      drivers/net/hamradio/Kconfig
  99. 3 6
      drivers/net/hamradio/bpqether.c
  100. 149 33
      drivers/net/hamradio/mkiss.c

+ 52 - 61
Documentation/block/biodoc.txt

@@ -906,9 +906,20 @@ Aside:
 
 
 4. The I/O scheduler
-I/O schedulers are now per queue. They should be runtime switchable and modular
-but aren't yet. Jens has most bits to do this, but the sysfs implementation is
-missing.
+I/O scheduler, a.k.a. elevator, is implemented in two layers.  Generic dispatch
+queue and specific I/O schedulers.  Unless stated otherwise, elevator is used
+to refer to both parts and I/O scheduler to specific I/O schedulers.
+
+Block layer implements generic dispatch queue in ll_rw_blk.c and elevator.c.
+The generic dispatch queue is responsible for properly ordering barrier
+requests, requeueing, handling non-fs requests and all other subtleties.
+
+Specific I/O schedulers are responsible for ordering normal filesystem
+requests.  They can also choose to delay certain requests to improve
+throughput or whatever purpose.  As the plural form indicates, there are
+multiple I/O schedulers.  They can be built as modules but at least one should
+be built inside the kernel.  Each queue can choose different one and can also
+change to another one dynamically.
 
 A block layer call to the i/o scheduler follows the convention elv_xxx(). This
 calls elevator_xxx_fn in the elevator switch (drivers/block/elevator.c). Oh,
@@ -921,44 +932,36 @@ keeping work.
 The functions an elevator may implement are: (* are mandatory)
 elevator_merge_fn		called to query requests for merge with a bio
 
-elevator_merge_req_fn		" " "  with another request
+elevator_merge_req_fn		called when two requests get merged. the one
+				which gets merged into the other one will be
+				never seen by I/O scheduler again. IOW, after
+				being merged, the request is gone.
 
 elevator_merged_fn		called when a request in the scheduler has been
 				involved in a merge. It is used in the deadline
 				scheduler for example, to reposition the request
 				if its sorting order has changed.
 
-*elevator_next_req_fn		returns the next scheduled request, or NULL
-				if there are none (or none are ready).
+elevator_dispatch_fn		fills the dispatch queue with ready requests.
+				I/O schedulers are free to postpone requests by
+				not filling the dispatch queue unless @force
+				is non-zero.  Once dispatched, I/O schedulers
+				are not allowed to manipulate the requests -
+				they belong to generic dispatch queue.
 
-*elevator_add_req_fn		called to add a new request into the scheduler
+elevator_add_req_fn		called to add a new request into the scheduler
 
 elevator_queue_empty_fn		returns true if the merge queue is empty.
 				Drivers shouldn't use this, but rather check
 				if elv_next_request is NULL (without losing the
 				request if one exists!)
 
-elevator_remove_req_fn		This is called when a driver claims ownership of
-				the target request - it now belongs to the
-				driver. It must not be modified or merged.
-				Drivers must not lose the request! A subsequent
-				call of elevator_next_req_fn must  return the
-				_next_ request.
-
-elevator_requeue_req_fn		called to add a request to the scheduler. This
-				is used when the request has alrnadebeen
-				returned by elv_next_request, but hasn't
-				completed. If this is not implemented then
-				elevator_add_req_fn is called instead.
-
 elevator_former_req_fn
 elevator_latter_req_fn		These return the request before or after the
 				one specified in disk sort order. Used by the
 				block layer to find merge possibilities.
 
-elevator_completed_req_fn	called when a request is completed. This might
-				come about due to being merged with another or
-				when the device completes the request.
+elevator_completed_req_fn	called when a request is completed.
 
 elevator_may_queue_fn		returns true if the scheduler wants to allow the
 				current context to queue a new request even if
@@ -967,13 +970,33 @@ elevator_may_queue_fn		returns true if the scheduler wants to allow the
 
 elevator_set_req_fn
 elevator_put_req_fn		Must be used to allocate and free any elevator
-				specific storate for a request.
+				specific storage for a request.
+
+elevator_activate_req_fn	Called when device driver first sees a request.
+				I/O schedulers can use this callback to
+				determine when actual execution of a request
+				starts.
+elevator_deactivate_req_fn	Called when device driver decides to delay
+				a request by requeueing it.
 
 elevator_init_fn
 elevator_exit_fn		Allocate and free any elevator specific storage
 				for a queue.
 
-4.2 I/O scheduler implementation
+4.2 Request flows seen by I/O schedulers
+All requests seens by I/O schedulers strictly follow one of the following three
+flows.
+
+ set_req_fn ->
+
+ i.   add_req_fn -> (merged_fn ->)* -> dispatch_fn -> activate_req_fn ->
+      (deactivate_req_fn -> activate_req_fn ->)* -> completed_req_fn
+ ii.  add_req_fn -> (merged_fn ->)* -> merge_req_fn
+ iii. [none]
+
+ -> put_req_fn
+
+4.3 I/O scheduler implementation
 The generic i/o scheduler algorithm attempts to sort/merge/batch requests for
 optimal disk scan and request servicing performance (based on generic
 principles and device capabilities), optimized for:
@@ -993,18 +1016,7 @@ request in sort order to prevent binary tree lookups.
 This arrangement is not a generic block layer characteristic however, so
 elevators may implement queues as they please.
 
-ii. Last merge hint
-The last merge hint is part of the generic queue layer. I/O schedulers must do
-some management on it. For the most part, the most important thing is to make
-sure q->last_merge is cleared (set to NULL) when the request on it is no longer
-a candidate for merging (for example if it has been sent to the driver).
-
-The last merge performed is cached as a hint for the subsequent request. If
-sequential data is being submitted, the hint is used to perform merges without
-any scanning. This is not sufficient when there are multiple processes doing
-I/O though, so a "merge hash" is used by some schedulers.
-
-iii. Merge hash
+ii. Merge hash
 AS and deadline use a hash table indexed by the last sector of a request. This
 enables merging code to quickly look up "back merge" candidates, even when
 multiple I/O streams are being performed at once on one disk.
@@ -1013,29 +1025,8 @@ multiple I/O streams are being performed at once on one disk.
 are far less common than "back merges" due to the nature of most I/O patterns.
 Front merges are handled by the binary trees in AS and deadline schedulers.
 
-iv. Handling barrier cases
-A request with flags REQ_HARDBARRIER or REQ_SOFTBARRIER must not be ordered
-around. That is, they must be processed after all older requests, and before
-any newer ones. This includes merges!
-
-In AS and deadline schedulers, barriers have the effect of flushing the reorder
-queue. The performance cost of this will vary from nothing to a lot depending
-on i/o patterns and device characteristics. Obviously they won't improve
-performance, so their use should be kept to a minimum.
-
-v. Handling insertion position directives
-A request may be inserted with a position directive. The directives are one of
-ELEVATOR_INSERT_BACK, ELEVATOR_INSERT_FRONT, ELEVATOR_INSERT_SORT.
-
-ELEVATOR_INSERT_SORT is a general directive for non-barrier requests.
-ELEVATOR_INSERT_BACK is used to insert a barrier to the back of the queue.
-ELEVATOR_INSERT_FRONT is used to insert a barrier to the front of the queue, and
-overrides the ordering requested by any previous barriers. In practice this is
-harmless and required, because it is used for SCSI requeueing. This does not
-require flushing the reorder queue, so does not impose a performance penalty.
-
-vi. Plugging the queue to batch requests in anticipation of opportunities for
-  merge/sort optimizations
+iii. Plugging the queue to batch requests in anticipation of opportunities for
+     merge/sort optimizations
 
 This is just the same as in 2.4 so far, though per-device unplugging
 support is anticipated for 2.5. Also with a priority-based i/o scheduler,
@@ -1069,7 +1060,7 @@ Aside:
   blk_kick_queue() to unplug a specific queue (right away ?)
   or optionally, all queues, is in the plan.
 
-4.3 I/O contexts
+4.4 I/O contexts
 I/O contexts provide a dynamically allocated per process data area. They may
 be used in I/O schedulers, and in the block layer (could be used for IO statis,
 priorities for example). See *io_context in drivers/block/ll_rw_blk.c, and

+ 260 - 236
Documentation/kernel-parameters.txt

@@ -17,7 +17,7 @@ are specified on the kernel command line with the module name plus
 
 	usbcore.blinkenlights=1
 
-The text in square brackets at the beginning of the description state the
+The text in square brackets at the beginning of the description states the
 restrictions on the kernel for the said kernel parameter to be valid. The
 restrictions referred to are that the relevant option is valid if:
 
@@ -27,8 +27,8 @@ restrictions referred to are that the relevant option is valid if:
 	APM	Advanced Power Management support is enabled.
 	AX25	Appropriate AX.25 support is enabled.
 	CD	Appropriate CD support is enabled.
-	DEVFS	devfs support is enabled. 
-	DRM	Direct Rendering Management support is enabled. 
+	DEVFS	devfs support is enabled.
+	DRM	Direct Rendering Management support is enabled.
 	EDD	BIOS Enhanced Disk Drive Services (EDD) is enabled
 	EFI	EFI Partitioning (GPT) is enabled
 	EIDE	EIDE/ATAPI support is enabled.
@@ -71,7 +71,7 @@ restrictions referred to are that the relevant option is valid if:
 	SERIAL	Serial support is enabled.
 	SMP	The kernel is an SMP kernel.
 	SPARC	Sparc architecture is enabled.
-	SWSUSP	Software suspension is enabled.
+	SWSUSP	Software suspend is enabled.
 	TS	Appropriate touchscreen support is enabled.
 	USB	USB support is enabled.
 	USBHID	USB Human Interface Device support is enabled.
@@ -105,13 +105,13 @@ running once the system is up.
 			See header of drivers/scsi/53c7xx.c.
 			See also Documentation/scsi/ncr53c7xx.txt.
 
-	acpi=		[HW,ACPI] Advanced Configuration and Power Interface 
-			Format: { force | off | ht | strict }
+	acpi=		[HW,ACPI] Advanced Configuration and Power Interface
+			Format: { force | off | ht | strict | noirq }
 			force -- enable ACPI if default was off
 			off -- disable ACPI if default was on
 			noirq -- do not use ACPI for IRQ routing
 			ht -- run only enough ACPI to enable Hyper Threading
-			strict --  Be less tolerant of platforms that are not
+			strict -- Be less tolerant of platforms that are not
 				strictly ACPI specification compliant.
 
 			See also Documentation/pm.txt, pci=noacpi
@@ -119,20 +119,23 @@ running once the system is up.
 	acpi_sleep=	[HW,ACPI] Sleep options
 			Format: { s3_bios, s3_mode }
 			See Documentation/power/video.txt
- 
+
 	acpi_sci=	[HW,ACPI] ACPI System Control Interrupt trigger mode
-			Format: { level | edge |  high | low }
+			Format: { level | edge | high | low }
 
-	acpi_irq_balance	[HW,ACPI] ACPI will balance active IRQs
-				default in APIC mode
+	acpi_irq_balance [HW,ACPI]
+			ACPI will balance active IRQs
+			default in APIC mode
 
-	acpi_irq_nobalance	[HW,ACPI] ACPI will not move active IRQs (default)
-				default in PIC mode
+	acpi_irq_nobalance [HW,ACPI]
+			ACPI will not move active IRQs (default)
+			default in PIC mode
 
-	acpi_irq_pci=	[HW,ACPI] If irq_balance, Clear listed IRQs for use by PCI
+	acpi_irq_pci=	[HW,ACPI] If irq_balance, clear listed IRQs for
+			use by PCI
 			Format: <irq>,<irq>...
 
-	acpi_irq_isa=	[HW,ACPI] If irq_balance, Mark listed IRQs used by ISA
+	acpi_irq_isa=	[HW,ACPI] If irq_balance, mark listed IRQs used by ISA
 			Format: <irq>,<irq>...
 
 	acpi_osi=	[HW,ACPI] empty param disables _OSI
@@ -145,14 +148,14 @@ running once the system is up.
 
 	acpi_dbg_layer=	[HW,ACPI]
 			Format: <int>
-			Each bit of the <int> indicates an acpi debug layer,
+			Each bit of the <int> indicates an ACPI debug layer,
 			1: enable, 0: disable. It is useful for boot time
 			debugging. After system has booted up, it can be set
 			via /proc/acpi/debug_layer.
 
 	acpi_dbg_level=	[HW,ACPI]
 			Format: <int>
-			Each bit of the <int> indicates an acpi debug level,
+			Each bit of the <int> indicates an ACPI debug level,
 			1: enable, 0: disable. It is useful for boot time
 			debugging. After system has booted up, it can be set
 			via /proc/acpi/debug_level.
@@ -161,12 +164,13 @@ running once the system is up.
 
 	acpi_generic_hotkey [HW,ACPI]
 			Allow consolidated generic hotkey driver to
-			over-ride platform specific driver.
+			override platform specific driver.
 			See also Documentation/acpi-hotkey.txt.
 
 	enable_timer_pin_1 [i386,x86-64]
 			Enable PIN 1 of APIC timer
-			Can be useful to work around chipset bugs (in particular on some ATI chipsets)
+			Can be useful to work around chipset bugs
+			(in particular on some ATI chipsets).
 			The kernel tries to set a reasonable default.
 
 	disable_timer_pin_1 [i386,x86-64]
@@ -182,7 +186,7 @@ running once the system is up.
 
 	adlib=		[HW,OSS]
 			Format: <io>
- 
+
 	advansys=	[HW,SCSI]
 			See header of drivers/scsi/advansys.c.
 
@@ -192,7 +196,7 @@ running once the system is up.
 	aedsp16=	[HW,OSS] Audio Excel DSP 16
 			Format: <io>,<irq>,<dma>,<mss_io>,<mpu_io>,<mpu_irq>
 			See also header of sound/oss/aedsp16.c.
- 
+
 	aha152x=	[HW,SCSI]
 			See Documentation/scsi/aha152x.txt.
 
@@ -205,10 +209,6 @@ running once the system is up.
 	aic79xx=	[HW,SCSI]
 			See Documentation/scsi/aic79xx.txt.
 
-	AM53C974=	[HW,SCSI]
-			Format: <host-scsi-id>,<target-scsi-id>,<max-rate>,<max-offset>
-			See also header of drivers/scsi/AM53C974.c.
-
 	amijoy.map=	[HW,JOY] Amiga joystick support
 			Map of devices attached to JOY0DAT and JOY1DAT
 			Format: <a>,<b>
@@ -219,23 +219,24 @@ running once the system is up.
 			connected to one of 16 gameports
 			Format: <type1>,<type2>,..<type16>
 
-	apc=		[HW,SPARC] Power management functions (SPARCstation-4/5 + deriv.)
+	apc=		[HW,SPARC]
+			Power management functions (SPARCstation-4/5 + deriv.)
 			Format: noidle
 			Disable APC CPU standby support. SPARCstation-Fox does
 			not play well with APC CPU idle - disable it if you have
 			APC and your system crashes randomly.
 
-	apic=		[APIC,i386] Change the output verbosity  whilst booting
+	apic=		[APIC,i386] Change the output verbosity whilst booting
 			Format: { quiet (default) | verbose | debug }
 			Change the amount of debugging information output
 			when initialising the APIC and IO-APIC components.
- 
+
 	apm=		[APM] Advanced Power Management
 			See header of arch/i386/kernel/apm.c.
 
 	applicom=	[HW]
 			Format: <mem>,<irq>
- 
+
 	arcrimi=	[HW,NET] ARCnet - "RIM I" (entirely mem-mapped) cards
 			Format: <io>,<irq>,<nodeID>
 
@@ -250,38 +251,40 @@ running once the system is up.
 
 	atkbd.reset=	[HW] Reset keyboard during initialization
 
-	atkbd.set=	[HW] Select keyboard code set 
-			Format: <int> (2 = AT (default) 3 = PS/2)
+	atkbd.set=	[HW] Select keyboard code set
+			Format: <int> (2 = AT (default), 3 = PS/2)
 
 	atkbd.scroll=	[HW] Enable scroll wheel on MS Office and similar
 			keyboards
 
 	atkbd.softraw=	[HW] Choose between synthetic and real raw mode
 			Format: <bool> (0 = real, 1 = synthetic (default))
-	
-	atkbd.softrepeat=
-			[HW] Use software keyboard repeat
+
+	atkbd.softrepeat= [HW]
+			Use software keyboard repeat
 
 	autotest	[IA64]
 
 	awe=		[HW,OSS] AWE32/SB32/AWE64 wave table synth
 			Format: <io>,<memsize>,<isapnp>
- 
+
 	aztcd=		[HW,CD] Aztech CD268 CDROM driver
 			Format: <io>,0x79 (?)
 
 	baycom_epp=	[HW,AX25]
 			Format: <io>,<mode>
- 
+
 	baycom_par=	[HW,AX25] BayCom Parallel Port AX.25 Modem
 			Format: <io>,<mode>
 			See header of drivers/net/hamradio/baycom_par.c.
 
-	baycom_ser_fdx=	[HW,AX25] BayCom Serial Port AX.25 Modem (Full Duplex Mode)
+	baycom_ser_fdx=	[HW,AX25]
+			BayCom Serial Port AX.25 Modem (Full Duplex Mode)
 			Format: <io>,<irq>,<mode>[,<baud>]
 			See header of drivers/net/hamradio/baycom_ser_fdx.c.
 
-	baycom_ser_hdx=	[HW,AX25] BayCom Serial Port AX.25 Modem (Half Duplex Mode)
+	baycom_ser_hdx=	[HW,AX25]
+			BayCom Serial Port AX.25 Modem (Half Duplex Mode)
 			Format: <io>,<irq>,<mode>
 			See header of drivers/net/hamradio/baycom_ser_hdx.c.
 
@@ -292,7 +295,8 @@ running once the system is up.
 	blkmtd_count=
 
 	bttv.card=	[HW,V4L] bttv (bt848 + bt878 based grabber cards)
-	bttv.radio=	Most important insmod options are available as kernel args too.
+	bttv.radio=	Most important insmod options are available as
+			kernel args too.
 	bttv.pll=	See Documentation/video4linux/bttv/Insmod-options
 	bttv.tuner=	and Documentation/video4linux/bttv/CARDLIST
 
@@ -318,15 +322,17 @@ running once the system is up.
 	checkreqprot	[SELINUX] Set initial checkreqprot flag value.
 			Format: { "0" | "1" }
 			See security/selinux/Kconfig help text.
-			0 -- check protection applied by kernel (includes any implied execute protection).
+			0 -- check protection applied by kernel (includes
+				any implied execute protection).
 			1 -- check protection requested by application.
 			Default value is set via a kernel config option.
-			Value can be changed at runtime via /selinux/checkreqprot.
- 
- 	clock=		[BUGS=IA-32, HW] gettimeofday timesource override. 
+			Value can be changed at runtime via
+				/selinux/checkreqprot.
+
+ 	clock=		[BUGS=IA-32,HW] gettimeofday timesource override.
 			Forces specified timesource (if avaliable) to be used
-			when calculating gettimeofday(). If specicified timesource
-			is not avalible, it defaults to PIT. 
+			when calculating gettimeofday(). If specicified
+			timesource is not avalible, it defaults to PIT.
 			Format: { pit | tsc | cyclone | pmtmr }
 
 	hpet=		[IA-32,HPET] option to disable HPET and use PIT.
@@ -336,17 +342,19 @@ running once the system is up.
 			Format: { auto | [<io>,][<irq>] }
 
 	com20020=	[HW,NET] ARCnet - COM20020 chipset
-			Format: <io>[,<irq>[,<nodeID>[,<backplane>[,<ckp>[,<timeout>]]]]]
+			Format:
+			<io>[,<irq>[,<nodeID>[,<backplane>[,<ckp>[,<timeout>]]]]]
 
 	com90io=	[HW,NET] ARCnet - COM90xx chipset (IO-mapped buffers)
 			Format: <io>[,<irq>]
 
-	com90xx=	[HW,NET] ARCnet - COM90xx chipset (memory-mapped buffers)
+	com90xx=	[HW,NET]
+			ARCnet - COM90xx chipset (memory-mapped buffers)
 			Format: <io>[,<irq>[,<memstart>]]
 
 	condev=		[HW,S390] console device
 	conmode=
- 
+
 	console=	[KNL] Output console device and options.
 
 		tty<n>	Use the virtual console device <n>.
@@ -367,7 +375,8 @@ running once the system is up.
 			options are the same as for ttyS, above.
 
 	cpcihp_generic=	[HW,PCI] Generic port I/O CompactPCI driver
-			Format: <first_slot>,<last_slot>,<port>,<enum_bit>[,<debug>]
+			Format:
+			<first_slot>,<last_slot>,<port>,<enum_bit>[,<debug>]
 
 	cpia_pp=	[HW,PPT]
 			Format: { parport<nr> | auto | none }
@@ -384,10 +393,10 @@ running once the system is up.
 
 	cs89x0_media=	[HW,NET]
 			Format: { rj45 | aui | bnc }
- 
+
 	cyclades=	[HW,SERIAL] Cyclades multi-serial port adapter.
- 
-	dasd=		[HW,NET]    
+
+	dasd=		[HW,NET]
 			See header of drivers/s390/block/dasd_devmap.c.
 
 	db9.dev[2|3]=	[HW,JOY] Multisystem joystick support via parallel port
@@ -406,7 +415,7 @@ running once the system is up.
 
 	dhash_entries=	[KNL]
 			Set number of hash buckets for dentry cache.
- 
+
 	digi=		[HW,SERIAL]
 			IO parameters + enable/disable command.
 
@@ -424,11 +433,11 @@ running once the system is up.
 
 	dtc3181e=	[HW,SCSI]
 
-	earlyprintk=	[IA-32, X86-64]
+	earlyprintk=	[IA-32,X86-64]
 			earlyprintk=vga
 			earlyprintk=serial[,ttySn[,baudrate]]
 
-			Append ,keep to not disable it when the real console
+			Append ",keep" to not disable it when the real console
 			takes over.
 
 			Only vga or serial at a time, not both.
@@ -451,7 +460,7 @@ running once the system is up.
 			Format: {"of[f]" | "sk[ipmbr]"}
 			See comment in arch/i386/boot/edd.S
 
-	eicon=		[HW,ISDN] 
+	eicon=		[HW,ISDN]
 			Format: <id>,<membase>,<irq>
 
 	eisa_irq_edge=	[PARISC,HW]
@@ -462,12 +471,13 @@ running once the system is up.
 			arch/i386/kernel/cpu/cpufreq/elanfreq.c.
 
 	elevator=	[IOSCHED]
-			Format: {"as"|"cfq"|"deadline"|"noop"}
-			See Documentation/block/as-iosched.txt
-			and Documentation/block/deadline-iosched.txt for details.
+			Format: {"as" | "cfq" | "deadline" | "noop"}
+			See Documentation/block/as-iosched.txt and
+			Documentation/block/deadline-iosched.txt for details.
+
 	elfcorehdr=	[IA-32]
-			Specifies physical address of start of kernel core image
-			elf header.
+			Specifies physical address of start of kernel core
+			image elf header.
 			See Documentation/kdump.txt for details.
 
 	enforcing	[SELINUX] Set initial enforcing status.
@@ -485,7 +495,7 @@ running once the system is up.
 	es1371=		[HW,OSS]
 			Format: <spdif>,[<nomix>,[<amplifier>]]
 			See also header of sound/oss/es1371.c.
- 
+
 	ether=		[HW,NET] Ethernet cards parameters
 			This option is obsoleted by the "netdev=" option, which
 			has equivalent usage. See its documentation for details.
@@ -526,12 +536,13 @@ running once the system is up.
 
 	gus=		[HW,OSS]
 			Format: <io>,<irq>,<dma>,<dma16>
- 
+
 	gvp11=		[HW,SCSI]
 
 	hashdist=	[KNL,NUMA] Large hashes allocated during boot
 			are distributed across NUMA nodes.  Defaults on
 			for IA-64, off otherwise.
+			Format: 0 | 1 (for off | on)
 
 	hcl=		[IA-64] SGI's Hardware Graph compatibility layer
 
@@ -595,13 +606,13 @@ running once the system is up.
 	ide?=		[HW] (E)IDE subsystem
 			Format: ide?=noprobe or chipset specific parameters.
 			See Documentation/ide.txt.
-	
+
 	idebus=		[HW] (E)IDE subsystem - VLB/PCI bus speed
 			See Documentation/ide.txt.
 
 	idle=		[HW]
 			Format: idle=poll or idle=halt
- 
+
 	ihash_entries=	[KNL]
 			Set number of hash buckets for inode cache.
 
@@ -649,7 +660,7 @@ running once the system is up.
 			firmware running.
 
 	isapnp=		[ISAPNP]
-			Format: <RDP>, <reset>, <pci_scan>, <verbosity>
+			Format: <RDP>,<reset>,<pci_scan>,<verbosity>
 
 	isolcpus=	[KNL,SMP] Isolate CPUs from the general scheduler.
 			Format: <cpu number>,...,<cpu number>
@@ -661,32 +672,33 @@ running once the system is up.
 			"number of CPUs in system - 1".
 
 			This option is the preferred way to isolate CPUs. The
-			alternative - manually setting the CPU mask of all tasks
-			in the system can cause problems and suboptimal load
-			balancer performance.
+			alternative -- manually setting the CPU mask of all
+			tasks in the system -- can cause problems and
+			suboptimal load balancer performance.
 
 	isp16=		[HW,CD]
 			Format: <io>,<irq>,<dma>,<setup>
 
-	iucv=		[HW,NET] 
+	iucv=		[HW,NET]
 
 	js=		[HW,JOY] Analog joystick
 			See Documentation/input/joystick.txt.
 
 	keepinitrd	[HW,ARM]
 
-	kstack=N	[IA-32, X86-64] Print N words from the kernel stack
+	kstack=N	[IA-32,X86-64] Print N words from the kernel stack
 			in oops dumps.
 
 	l2cr=		[PPC]
 
-	lapic		[IA-32,APIC] Enable the local APIC even if BIOS disabled it.
+	lapic		[IA-32,APIC] Enable the local APIC even if BIOS
+			disabled it.
 
 	lasi=		[HW,SCSI] PARISC LASI driver for the 53c700 chip
 			Format: addr:<io>,irq:<irq>
 
-	llsc*=		[IA64]
-			See function print_params() in arch/ia64/sn/kernel/llsc4.c.
+	llsc*=		[IA64] See function print_params() in
+			arch/ia64/sn/kernel/llsc4.c.
 
 	load_ramdisk=	[RAM] List of ramdisks to load from floppy
 			See Documentation/ramdisk.txt.
@@ -713,8 +725,9 @@ running once the system is up.
 			7 (KERN_DEBUG)		debug-level messages
 
 	log_buf_len=n	Sets the size of the printk ring buffer, in bytes.
-			Format is n, nk, nM.  n must be a power of two.  The
-			default is set in kernel config.
+			Format: { n | nk | nM }
+			n must be a power of two.  The default size
+			is set in the kernel config file.
 
 	lp=0		[LP]	Specify parallel ports to use, e.g,
 	lp=port[,port...]	lp=none,parport0 (lp0 not configured, lp1 uses
@@ -750,23 +763,23 @@ running once the system is up.
 	ltpc=		[NET]
 			Format: <io>,<irq>,<dma>
 
-	mac5380=	[HW,SCSI]
-			Format: <can_queue>,<cmd_per_lun>,<sg_tablesize>,<hostid>,<use_tags>
+	mac5380=	[HW,SCSI] Format:
+			<can_queue>,<cmd_per_lun>,<sg_tablesize>,<hostid>,<use_tags>
 
-	mac53c9x=	[HW,SCSI]
-			Format: <num_esps>,<disconnect>,<nosync>,<can_queue>,<cmd_per_lun>,<sg_tablesize>,<hostid>,<use_tags>
+	mac53c9x=	[HW,SCSI] Format:
+			<num_esps>,<disconnect>,<nosync>,<can_queue>,<cmd_per_lun>,<sg_tablesize>,<hostid>,<use_tags>
 
-	machvec=	[IA64]
-			Force the use of a particular machine-vector (machvec) in a generic
-			kernel.  Example: machvec=hpzx1_swiotlb
+	machvec=	[IA64] Force the use of a particular machine-vector
+			(machvec) in a generic kernel.
+			Example: machvec=hpzx1_swiotlb
 
-	mad16=		[HW,OSS]
-			Format: <io>,<irq>,<dma>,<dma16>,<mpu_io>,<mpu_irq>,<joystick>
+	mad16=		[HW,OSS] Format:
+			<io>,<irq>,<dma>,<dma16>,<mpu_io>,<mpu_irq>,<joystick>
 
 	maui=		[HW,OSS]
 			Format: <io>,<irq>
- 
-	max_loop=       [LOOP] Maximum number of loopback devices that can
+
+	max_loop=	[LOOP] Maximum number of loopback devices that can
 			be mounted
 			Format: <1-256>
 
@@ -776,11 +789,11 @@ running once the system is up.
 	max_addr=[KMG]	[KNL,BOOT,ia64] All physical memory greater than or
 			equal to this physical address is ignored.
 
-	max_luns=	[SCSI] Maximum number of LUNs to probe
+	max_luns=	[SCSI] Maximum number of LUNs to probe.
 			Should be between 1 and 2^32-1.
 
 	max_report_luns=
-			[SCSI] Maximum number of LUNs received
+			[SCSI] Maximum number of LUNs received.
 			Should be between 1 and 16384.
 
 	mca-pentium	[BUGS=IA-32]
@@ -796,11 +809,11 @@ running once the system is up.
 
 	md=		[HW] RAID subsystems devices and level
 			See Documentation/md.txt.
- 
+
 	mdacon=		[MDA]
 			Format: <first>,<last>
 			Specifies range of consoles to be captured by the MDA.
- 
+
 	mem=nn[KMG]	[KNL,BOOT] Force usage of a specific amount of memory
 			Amount of memory to be used when the kernel is not able
 			to see the whole system memory or for test.
@@ -851,15 +864,15 @@ running once the system is up.
 	MTD_Partition=	[MTD]
 			Format: <name>,<region-number>,<size>,<offset>
 
-	MTD_Region=	[MTD]
-			Format: <name>,<region-number>[,<base>,<size>,<buswidth>,<altbuswidth>]
+	MTD_Region=	[MTD] Format:
+			<name>,<region-number>[,<base>,<size>,<buswidth>,<altbuswidth>]
 
 	mtdparts=	[MTD]
 			See drivers/mtd/cmdline.c.
 
 	mtouchusb.raw_coordinates=
-			[HW] Make the MicroTouch USB driver use raw coordinates ('y', default)
-			or cooked coordinates ('n')
+			[HW] Make the MicroTouch USB driver use raw coordinates
+			('y', default) or cooked coordinates ('n')
 
 	n2=		[NET] SDL Inc. RISCom/N2 synchronous serial card
 
@@ -880,7 +893,9 @@ running once the system is up.
 			Format: <irq>,<io>,<mem_start>,<mem_end>,<name>
 			Note that mem_start is often overloaded to mean
 			something different and driver-specific.
- 
+			This usage is only documented in each driver source
+			file if at all.
+
 	nfsaddrs=	[NFS]
 			See Documentation/nfsroot.txt.
 
@@ -893,8 +908,8 @@ running once the system is up.
 			emulation library even if a 387 maths coprocessor
 			is present.
 
-	noalign		[KNL,ARM] 
- 
+	noalign		[KNL,ARM]
+
 	noapic		[SMP,APIC] Tells the kernel to not make use of any
 			IOAPICs that may be present in the system.
 
@@ -905,19 +920,19 @@ running once the system is up.
 			on "Classic" PPC cores.
 
 	nocache		[ARM]
- 
+
 	nodisconnect	[HW,SCSI,M68K] Disables SCSI disconnects.
 
 	noexec		[IA-64]
 
-	noexec		[IA-32, X86-64]
+	noexec		[IA-32,X86-64]
 			noexec=on: enable non-executable mappings (default)
 			noexec=off: disable nn-executable mappings
 
 	nofxsr		[BUGS=IA-32]
 
 	nohlt		[BUGS=ARM]
- 
+
 	no-hlt		[BUGS=IA-32] Tells the kernel that the hlt
 			instruction doesn't work correctly and not to
 			use it.
@@ -948,8 +963,9 @@ running once the system is up.
 
 	noresidual	[PPC] Don't use residual data on PReP machines.
 
-	noresume	[SWSUSP] Disables resume and restore original swap space.
- 
+	noresume	[SWSUSP] Disables resume and restores original swap
+			space.
+
 	no-scroll	[VGA] Disables scrollback.
 			This is required for the Braillex ib80-piezo Braille
 			reader made by F.H. Papenmeier (Germany).
@@ -965,16 +981,16 @@ running once the system is up.
 	nousb		[USB] Disable the USB subsystem
 
 	nowb		[ARM]
- 
+
 	opl3=		[HW,OSS]
 			Format: <io>
 
 	opl3sa=		[HW,OSS]
 			Format: <io>,<irq>,<dma>,<dma2>,<mpu_io>,<mpu_irq>
 
-	opl3sa2=	[HW,OSS]
-			Format: <io>,<irq>,<dma>,<dma2>,<mss_io>,<mpu_io>,<ymode>,<loopback>[,<isapnp>,<multiple]
- 
+	opl3sa2=	[HW,OSS] Format:
+			<io>,<irq>,<dma>,<dma2>,<mss_io>,<mpu_io>,<ymode>,<loopback>[,<isapnp>,<multiple]
+
 	oprofile.timer=	[HW]
 			Use timer interrupt instead of performance counters
 
@@ -993,36 +1009,33 @@ running once the system is up.
 			Format: <parport#>
 	parkbd.mode=	[HW] Parallel port keyboard adapter mode of operation,
 			0 for XT, 1 for AT (default is AT).
-			Format: <mode> 
-
-	parport=0	[HW,PPT]	Specify parallel ports. 0 disables.
-	parport=auto			Use 'auto' to force the driver to use
-	parport=0xBBB[,IRQ[,DMA]]	any IRQ/DMA settings detected (the
-					default is to ignore detected IRQ/DMA
-					settings because of possible
-					conflicts). You can specify the base
-					address, IRQ, and DMA settings; IRQ and
-					DMA should be numbers, or 'auto' (for
-					using detected settings on that
-					particular port), or 'nofifo' (to avoid
-					using a FIFO even if it is detected).
-					Parallel ports are assigned in the
-					order they are specified on the command
-					line, starting with parport0.
-
-	parport_init_mode=
-			[HW,PPT]	Configure VIA parallel port to
-					operate in specific mode. This is
-					necessary on Pegasos computer where
-					firmware has no options for setting up
-					parallel port mode and sets it to
-					spp. Currently this function knows
-					686a and 8231 chips.
+			Format: <mode>
+
+	parport=	[HW,PPT] Specify parallel ports. 0 disables.
+			Format: { 0 | auto | 0xBBB[,IRQ[,DMA]] }
+			Use 'auto' to force the driver to use any
+			IRQ/DMA settings detected (the default is to
+			ignore detected IRQ/DMA settings because of
+			possible conflicts). You can specify the base
+			address, IRQ, and DMA settings; IRQ and DMA
+			should be numbers, or 'auto' (for using detected
+			settings on that particular port), or 'nofifo'
+			(to avoid using a FIFO even if it is detected).
+			Parallel ports are assigned in the order they
+			are specified on the command line, starting
+			with parport0.
+
+	parport_init_mode=	[HW,PPT]
+			Configure VIA parallel port to operate in
+			a specific mode. This is necessary on Pegasos
+			computer where firmware has no options for setting
+			up parallel port mode and sets it to spp.
+			Currently this function knows 686a and 8231 chips.
 			Format: [spp|ps2|epp|ecp|ecpepp]
 
-	pas2=		[HW,OSS]
-			Format: <io>,<irq>,<dma>,<dma16>,<sb_io>,<sb_irq>,<sb_dma>,<sb_dma16>
- 
+	pas2=		[HW,OSS] Format:
+			<io>,<irq>,<dma>,<dma16>,<sb_io>,<sb_irq>,<sb_dma>,<sb_dma16>
+
 	pas16=		[HW,SCSI]
 			See header of drivers/scsi/pas16.c.
 
@@ -1032,64 +1045,67 @@ running once the system is up.
 			See header of drivers/block/paride/pcd.c.
 			See also Documentation/paride.txt.
 
-	pci=option[,option...]		[PCI] various PCI subsystem options:
-		off			[IA-32] don't probe for the PCI bus
-		bios			[IA-32] force use of PCI BIOS, don't access
-					the hardware directly. Use this if your machine
-					has a non-standard PCI host bridge.
-		nobios			[IA-32] disallow use of PCI BIOS, only direct
-					hardware access methods are allowed. Use this
-					if you experience crashes upon bootup and you
-					suspect they are caused by the BIOS.
-		conf1			[IA-32] Force use of PCI Configuration Mechanism 1.
-		conf2			[IA-32] Force use of PCI Configuration Mechanism 2.
-		nosort			[IA-32] Don't sort PCI devices according to
-					order given by the PCI BIOS. This sorting is done
-					to get a device order compatible with older kernels.
-		biosirq			[IA-32] Use PCI BIOS calls to get the interrupt
-					routing table. These calls are known to be buggy
-					on several machines and they hang the machine when used,
-					but on other computers it's the only way to get the
-					interrupt routing table. Try this option if the kernel
-					is unable to allocate IRQs or discover secondary PCI
-					buses on your motherboard.
-		rom			[IA-32] Assign address space to expansion ROMs.
-					Use with caution as certain devices share address
-					decoders between ROMs and other resources.
-		irqmask=0xMMMM		[IA-32] Set a bit mask of IRQs allowed to be assigned
-					automatically to PCI devices. You can make the kernel
-					exclude IRQs of your ISA cards this way.
+	pci=option[,option...]	[PCI] various PCI subsystem options:
+		off		[IA-32] don't probe for the PCI bus
+		bios		[IA-32] force use of PCI BIOS, don't access
+				the hardware directly. Use this if your machine
+				has a non-standard PCI host bridge.
+		nobios		[IA-32] disallow use of PCI BIOS, only direct
+				hardware access methods are allowed. Use this
+				if you experience crashes upon bootup and you
+				suspect they are caused by the BIOS.
+		conf1		[IA-32] Force use of PCI Configuration
+				Mechanism 1.
+		conf2		[IA-32] Force use of PCI Configuration
+				Mechanism 2.
+		nosort		[IA-32] Don't sort PCI devices according to
+				order given by the PCI BIOS. This sorting is
+				done to get a device order compatible with
+				older kernels.
+		biosirq		[IA-32] Use PCI BIOS calls to get the interrupt
+				routing table. These calls are known to be buggy
+				on several machines and they hang the machine
+				when used, but on other computers it's the only
+				way to get the interrupt routing table. Try
+				this option if the kernel is unable to allocate
+				IRQs or discover secondary PCI buses on your
+				motherboard.
+		rom		[IA-32] Assign address space to expansion ROMs.
+				Use with caution as certain devices share
+				address decoders between ROMs and other
+				resources.
+		irqmask=0xMMMM	[IA-32] Set a bit mask of IRQs allowed to be
+				assigned automatically to PCI devices. You can
+				make the kernel exclude IRQs of your ISA cards
+				this way.
 		pirqaddr=0xAAAAA	[IA-32] Specify the physical address
-					of the PIRQ table (normally generated
-					by the BIOS) if it is outside the
-					F0000h-100000h range.
-		lastbus=N		[IA-32] Scan all buses till bus #N. Can be useful
-					if the kernel is unable to find your secondary buses
-					and you want to tell it explicitly which ones they are.
-		assign-busses		[IA-32] Always assign all PCI bus
-					numbers ourselves, overriding
-					whatever the firmware may have
-					done.
-		usepirqmask		[IA-32] Honor the possible IRQ mask
-					stored in the BIOS $PIR table. This is
-					needed on some systems with broken
-					BIOSes, notably some HP Pavilion N5400
-					and Omnibook XE3 notebooks. This will
-					have no effect if ACPI IRQ routing is
-					enabled.
-		noacpi			[IA-32] Do not use ACPI for IRQ routing
-					or for PCI scanning.
-		routeirq		Do IRQ routing for all PCI devices.
-					This is normally done in pci_enable_device(),
-					so this option is a temporary workaround
-					for broken drivers that don't call it.
-
-		firmware		[ARM] Do not re-enumerate the bus but
-					instead just use the configuration
-					from the bootloader. This is currently
-					used on IXP2000 systems where the
-					bus has to be configured a certain way
-					for adjunct CPUs.
+				of the PIRQ table (normally generated
+				by the BIOS) if it is outside the
+				F0000h-100000h range.
+		lastbus=N	[IA-32] Scan all buses thru bus #N. Can be
+				useful if the kernel is unable to find your
+				secondary buses and you want to tell it
+				explicitly which ones they are.
+		assign-busses	[IA-32] Always assign all PCI bus
+				numbers ourselves, overriding
+				whatever the firmware may have done.
+		usepirqmask	[IA-32] Honor the possible IRQ mask stored
+				in the BIOS $PIR table. This is needed on
+				some systems with broken BIOSes, notably
+				some HP Pavilion N5400 and Omnibook XE3
+				notebooks. This will have no effect if ACPI
+				IRQ routing is enabled.
+		noacpi		[IA-32] Do not use ACPI for IRQ routing
+				or for PCI scanning.
+		routeirq	Do IRQ routing for all PCI devices.
+				This is normally done in pci_enable_device(),
+				so this option is a temporary workaround
+				for broken drivers that don't call it.
+		firmware	[ARM] Do not re-enumerate the bus but instead
+				just use the configuration from the
+				bootloader. This is currently used on
+				IXP2000 systems where the bus has to be
+				configured a certain way for adjunct CPUs.
 
 	pcmv=		[HW,PCMCIA] BadgePAD 4
 
@@ -1127,19 +1143,20 @@ running once the system is up.
 			[ISAPNP] Exclude DMAs for the autoconfiguration
 
 	pnp_reserve_io=	[ISAPNP] Exclude I/O ports for the autoconfiguration
-		     	Ranges are in pairs (I/O port base and size).
+			Ranges are in pairs (I/O port base and size).
 
 	pnp_reserve_mem=
-			[ISAPNP] Exclude memory regions for the autoconfiguration
+			[ISAPNP] Exclude memory regions for the
+			autoconfiguration.
 			Ranges are in pairs (memory base and size).
 
 	profile=	[KNL] Enable kernel profiling via /proc/profile
-			{ schedule | <number> }
-			(param: schedule - profile schedule points}
-			(param: profile step/bucket size as a power of 2 for
-				statistical time based profiling)
+			Format: [schedule,]<number>
+			Param: "schedule" - profile schedule points.
+			Param: <number> - step/bucket size as a power of 2 for
+				statistical time based profiling.
 
-	processor.max_cstate=   [HW, ACPI]
+	processor.max_cstate=	[HW,ACPI]
 			Limit processor to maximum C-state
 			max_cstate=9 overrides any DMI blacklist limit.
 
@@ -1147,27 +1164,28 @@ running once the system is up.
 			before loading.
 			See Documentation/ramdisk.txt.
 
-	psmouse.proto=  [HW,MOUSE] Highest PS2 mouse protocol extension to
-			probe for (bare|imps|exps|lifebook|any).
+	psmouse.proto=	[HW,MOUSE] Highest PS2 mouse protocol extension to
+			probe for; one of (bare|imps|exps|lifebook|any).
 	psmouse.rate=	[HW,MOUSE] Set desired mouse report rate, in reports
 			per second.
-	psmouse.resetafter=
-			[HW,MOUSE] Try to reset the device after so many bad packets
+	psmouse.resetafter=	[HW,MOUSE]
+			Try to reset the device after so many bad packets
 			(0 = never).
 	psmouse.resolution=
 			[HW,MOUSE] Set desired mouse resolution, in dpi.
 	psmouse.smartscroll=
-			[HW,MOUSE] Controls Logitech smartscroll autorepeat,
+			[HW,MOUSE] Controls Logitech smartscroll autorepeat.
 			0 = disabled, 1 = enabled (default).
 
 	pss=		[HW,OSS] Personal Sound System (ECHO ESC614)
-			Format: <io>,<mss_io>,<mss_irq>,<mss_dma>,<mpu_io>,<mpu_irq>
+			Format:
+			<io>,<mss_io>,<mss_irq>,<mss_dma>,<mpu_io>,<mpu_irq>
 
 	pt.		[PARIDE]
 			See Documentation/paride.txt.
 
 	quiet=		[KNL] Disable log messages
- 
+
 	r128=		[HW,DRM]
 
 	raid=		[HW,RAID]
@@ -1176,10 +1194,9 @@ running once the system is up.
 	ramdisk=	[RAM] Sizes of RAM disks in kilobytes [deprecated]
 			See Documentation/ramdisk.txt.
 
-	ramdisk_blocksize=
-			[RAM]
+	ramdisk_blocksize=	[RAM]
 			See Documentation/ramdisk.txt.
- 
+
 	ramdisk_size=	[RAM] Sizes of RAM disks in kilobytes
 			New name for the ramdisk parameter.
 			See Documentation/ramdisk.txt.
@@ -1195,7 +1212,8 @@ running once the system is up.
 
 	reserve=	[KNL,BUGS] Force the kernel to ignore some iomem area
 
-	resume=		[SWSUSP] Specify the partition device for software suspension
+	resume=		[SWSUSP]
+			Specify the partition device for software suspend
 
 	rhash_entries=	[KNL,NET]
 			Set number of hash buckets for route cache
@@ -1225,7 +1243,7 @@ running once the system is up.
 			Format: <io>,<irq>,<dma>,<dma2>
 
 	sbni=		[NET] Granch SBNI12 leased line adapter
- 
+
 	sbpcd=		[HW,CD] Soundblaster CD adapter
 			Format: <io>,<type>
 			See a comment before function sbpcd_setup() in
@@ -1258,21 +1276,20 @@ running once the system is up.
 
 	serialnumber	[BUGS=IA-32]
 
-	sg_def_reserved_size=
-			[SCSI]
- 
+	sg_def_reserved_size=	[SCSI]
+
 	sgalaxy=	[HW,OSS]
 			Format: <io>,<irq>,<dma>,<dma2>,<sgbase>
 
 	shapers=	[NET]
 			Maximal number of shapers.
- 
+
 	sim710=		[SCSI,HW]
 			See header of drivers/scsi/sim710.c.
 
 	simeth=		[IA-64]
 	simscsi=
- 
+
 	sjcd=		[HW,CD]
 			Format: <io>,<irq>,<dma>
 			See header of drivers/cdrom/sjcd.c.
@@ -1403,10 +1420,10 @@ running once the system is up.
 	snd-wavefront=	[HW,ALSA]
 
 	snd-ymfpci=	[HW,ALSA]
- 
+
 	sonicvibes=	[HW,OSS]
 			Format: <reverb>
- 
+
 	sonycd535=	[HW,CD]
 			Format: <io>[,<irq>]
 
@@ -1423,7 +1440,7 @@ running once the system is up.
 
 	sscape=		[HW,OSS]
 			Format: <io>,<irq>,<dma>,<mpu_io>,<mpu_irq>
- 
+
 	st=		[HW,SCSI] SCSI tape parameters (buffers, etc.)
 			See Documentation/scsi/st.txt.
 
@@ -1446,7 +1463,7 @@ running once the system is up.
 	stram_swap=	[HW,M68k]
 
 	swiotlb=	[IA-64] Number of I/O TLB slabs
- 
+
 	switches=	[HW,M68k]
 
 	sym53c416=	[HW,SCSI]
@@ -1479,14 +1496,16 @@ running once the system is up.
 	tp720=		[HW,PS2]
 
 	trix=		[HW,OSS] MediaTrix AudioTrix Pro
-			Format: <io>,<irq>,<dma>,<dma2>,<sb_io>,<sb_irq>,<sb_dma>,<mpu_io>,<mpu_irq>
- 
+			Format:
+			<io>,<irq>,<dma>,<dma2>,<sb_io>,<sb_irq>,<sb_dma>,<mpu_io>,<mpu_irq>
+
 	tsdev.xres=	[TS] Horizontal screen resolution.
 	tsdev.yres=	[TS] Vertical screen resolution.
 
-	turbografx.map[2|3]=
-			[HW,JOY] TurboGraFX parallel port interface
-			Format: <port#>,<js1>,<js2>,<js3>,<js4>,<js5>,<js6>,<js7>
+	turbografx.map[2|3]=	[HW,JOY]
+			TurboGraFX parallel port interface
+			Format:
+			<port#>,<js1>,<js2>,<js3>,<js4>,<js5>,<js6>,<js7>
 			See also Documentation/input/joystick-parport.txt
 
 	u14-34f=	[HW,SCSI] UltraStor 14F/34F SCSI host adapter
@@ -1502,17 +1521,18 @@ running once the system is up.
 
 	usbhid.mousepoll=
 			[USBHID] The interval which mice are to be polled at.
- 
+
 	video=		[FB] Frame buffer configuration
 			See Documentation/fb/modedb.txt.
 
 	vga=		[BOOT,IA-32] Select a particular video mode
-			See Documentation/i386/boot.txt and Documentation/svga.txt.
+			See Documentation/i386/boot.txt and
+			Documentation/svga.txt.
 			Use vga=ask for menu.
 			This is actually a boot loader parameter; the value is
 			passed to the kernel using a special protocol.
 
-	vmalloc=nn[KMG]	[KNL,BOOT] forces the vmalloc area to have an exact
+	vmalloc=nn[KMG]	[KNL,BOOT] Forces the vmalloc area to have an exact
 			size of <nn>. This can be used to increase the
 			minimum size (128MB on x86). It can also be used to
 			decrease the size and leave more room for directly
@@ -1520,11 +1540,11 @@ running once the system is up.
 
 	vmhalt=		[KNL,S390]
 
-	vmpoff=		[KNL,S390] 
- 
+	vmpoff=		[KNL,S390]
+
 	waveartist=	[HW,OSS]
 			Format: <io>,<irq>,<dma>,<dma2>
- 
+
 	wd33c93=	[HW,SCSI]
 			See header of drivers/scsi/wd33c93.c.
 
@@ -1538,21 +1558,25 @@ running once the system is up.
 	xd_geo=		See header of drivers/block/xd.c.
 
 	xirc2ps_cs=	[NET,PCMCIA]
-			Format: <irq>,<irq_mask>,<io>,<full_duplex>,<do_sound>,<lockup_hack>[,<irq2>[,<irq3>[,<irq4>]]]
-
+			Format:
+			<irq>,<irq_mask>,<io>,<full_duplex>,<do_sound>,<lockup_hack>[,<irq2>[,<irq3>[,<irq4>]]]
 
 
+______________________________________________________________________
 Changelog:
 
+2000-06-??	Mr. Unknown
 	The last known update (for 2.4.0) - the changelog was not kept before.
-	2000-06-??	Mr. Unknown
 
+2002-11-24	Petr Baudis <pasky@ucw.cz>
+		Randy Dunlap <randy.dunlap@verizon.net>
 	Update for 2.5.49, description for most of the options introduced,
 	references to other documentation (C files, READMEs, ..), added S390,
 	PPC, SPARC, MTD, ALSA and OSS category. Minor corrections and
 	reformatting.
-	2002-11-24	Petr Baudis <pasky@ucw.cz>
-			Randy Dunlap <randy.dunlap@verizon.net>
+
+2005-10-19	Randy Dunlap <rdunlap@xenotime.net>
+	Lots of typos, whitespace, some reformatting.
 
 TODO:
 

+ 3 - 2
Documentation/networking/bonding.txt

@@ -777,7 +777,7 @@ doing so is the same as described in the "Configuring Multiple Bonds
 Manually" section, below.
 
 	NOTE: It has been observed that some Red Hat supplied kernels
-are apparently unable to rename modules at load time (the "-obonding1"
+are apparently unable to rename modules at load time (the "-o bond1"
 part).  Attempts to pass that option to modprobe will produce an
 "Operation not permitted" error.  This has been reported on some
 Fedora Core kernels, and has been seen on RHEL 4 as well.  On kernels
@@ -883,7 +883,8 @@ the above does not work, and the second bonding instance never sees
 its options.  In that case, the second options line can be substituted
 as follows:
 
-install bonding1 /sbin/modprobe bonding -obond1 mode=balance-alb miimon=50
+install bond1 /sbin/modprobe --ignore-install bonding -o bond1 \
+	mode=balance-alb miimon=50
 
 	This may be repeated any number of times, specifying a new and
 unique name in place of bond1 for each subsequent instance.

+ 2 - 2
Makefile

@@ -1,7 +1,7 @@
 VERSION = 2
 PATCHLEVEL = 6
 SUBLEVEL = 14
-EXTRAVERSION =-rc5
+EXTRAVERSION =
 NAME=Affluent Albatross
 
 # *DOCUMENTATION*
@@ -334,7 +334,7 @@ KALLSYMS	= scripts/kallsyms
 PERL		= perl
 CHECK		= sparse
 
-CHECKFLAGS     := -D__linux__ -Dlinux -D__STDC__ -Dunix -D__unix__ $(CF)
+CHECKFLAGS     := -D__linux__ -Dlinux -D__STDC__ -Dunix -D__unix__ -Wbitwise $(CF)
 MODFLAGS	= -DMODULE
 CFLAGS_MODULE   = $(MODFLAGS)
 AFLAGS_MODULE   = $(MODFLAGS)

+ 1 - 1
arch/alpha/kernel/pci-noop.c

@@ -154,7 +154,7 @@ pci_dma_supported(struct pci_dev *hwdev, dma_addr_t mask)
 
 void *
 dma_alloc_coherent(struct device *dev, size_t size,
-		   dma_addr_t *dma_handle, int gfp)
+		   dma_addr_t *dma_handle, gfp_t gfp)
 {
 	void *ret;
 

+ 1 - 1
arch/alpha/kernel/pci_iommu.c

@@ -397,7 +397,7 @@ pci_alloc_consistent(struct pci_dev *pdev, size_t size, dma_addr_t *dma_addrp)
 {
 	void *cpu_addr;
 	long order = get_order(size);
-	int gfp = GFP_ATOMIC;
+	gfp_t gfp = GFP_ATOMIC;
 
 try_again:
 	cpu_addr = (void *)__get_free_pages(gfp, order);

+ 8 - 7
arch/arm/mach-integrator/impd1.c

@@ -67,7 +67,7 @@ static void impd1_setvco(struct clk *clk, struct icst525_vco vco)
 	}
 	writel(0, impd1->base + IMPD1_LOCK);
 
-#if DEBUG
+#ifdef DEBUG
 	vco.v = val & 0x1ff;
 	vco.r = (val >> 9) & 0x7f;
 	vco.s = (val >> 16) & 7;
@@ -427,17 +427,18 @@ static int impd1_probe(struct lm_device *dev)
 	return ret;
 }
 
+static int impd1_remove_one(struct device *dev, void *data)
+{
+	device_unregister(dev);
+	return 0;
+}
+
 static void impd1_remove(struct lm_device *dev)
 {
 	struct impd1_module *impd1 = lm_get_drvdata(dev);
-	struct list_head *l, *n;
 	int i;
 
-	list_for_each_safe(l, n, &dev->dev.children) {
-		struct device *d = list_to_dev(l);
-
-		device_unregister(d);
-	}
+	device_for_each_child(&dev->dev, NULL, impd1_remove_one);
 
 	for (i = 0; i < ARRAY_SIZE(impd1->vcos); i++)
 		clk_unregister(&impd1->vcos[i]);

+ 2 - 0
arch/arm/mach-pxa/corgi_lcd.c

@@ -488,6 +488,7 @@ static int is_pxafb_device(struct device * dev, void * data)
 
 unsigned long spitz_get_hsync_len(void)
 {
+#ifdef CONFIG_FB_PXA
 	if (!spitz_pxafb_dev) {
 		spitz_pxafb_dev = bus_find_device(&platform_bus_type, NULL, NULL, is_pxafb_device);
 		if (!spitz_pxafb_dev)
@@ -496,6 +497,7 @@ unsigned long spitz_get_hsync_len(void)
 	if (!get_hsync_time)
 		get_hsync_time = symbol_get(pxafb_get_hsync_time);
 	if (!get_hsync_time)
+#endif
 		return 0;
 
 	return pxafb_get_hsync_time(spitz_pxafb_dev);

+ 20 - 0
arch/arm/mach-pxa/generic.c

@@ -250,6 +250,25 @@ void __init pxa_set_i2c_info(struct i2c_pxa_platform_data *info)
 	i2c_device.dev.platform_data = info;
 }
 
+static struct resource i2s_resources[] = {
+	{
+		.start	= 0x40400000,
+		.end	= 0x40400083,
+		.flags	= IORESOURCE_MEM,
+	}, {
+		.start	= IRQ_I2S,
+		.end	= IRQ_I2S,
+		.flags	= IORESOURCE_IRQ,
+	},
+};
+
+static struct platform_device i2s_device = {
+	.name		= "pxa2xx-i2s",
+	.id		= -1,
+	.resource	= i2c_resources,
+	.num_resources	= ARRAY_SIZE(i2s_resources),
+};
+
 static struct platform_device *devices[] __initdata = {
 	&pxamci_device,
 	&udc_device,
@@ -258,6 +277,7 @@ static struct platform_device *devices[] __initdata = {
 	&btuart_device,
 	&stuart_device,
 	&i2c_device,
+	&i2s_device,
 };
 
 static int __init pxa_init(void)

+ 3 - 3
arch/arm/mach-s3c2410/mach-bast.c

@@ -307,9 +307,9 @@ static void bast_nand_select(struct s3c2410_nand_set *set, int slot)
 }
 
 static struct s3c2410_platform_nand bast_nand_info = {
-	.tacls		= 40,
-	.twrph0		= 80,
-	.twrph1		= 80,
+	.tacls		= 30,
+	.twrph0		= 60,
+	.twrph1		= 60,
 	.nr_sets	= ARRAY_SIZE(bast_nand_sets),
 	.sets		= bast_nand_sets,
 	.select_chip	= bast_nand_select,

+ 4 - 4
arch/arm/mm/consistent.c

@@ -75,7 +75,7 @@ static struct vm_region consistent_head = {
 };
 
 static struct vm_region *
-vm_region_alloc(struct vm_region *head, size_t size, int gfp)
+vm_region_alloc(struct vm_region *head, size_t size, gfp_t gfp)
 {
 	unsigned long addr = head->vm_start, end = head->vm_end - size;
 	unsigned long flags;
@@ -133,7 +133,7 @@ static struct vm_region *vm_region_find(struct vm_region *head, unsigned long ad
 #endif
 
 static void *
-__dma_alloc(struct device *dev, size_t size, dma_addr_t *handle, int gfp,
+__dma_alloc(struct device *dev, size_t size, dma_addr_t *handle, gfp_t gfp,
 	    pgprot_t prot)
 {
 	struct page *page;
@@ -251,7 +251,7 @@ __dma_alloc(struct device *dev, size_t size, dma_addr_t *handle, int gfp,
  * virtual and bus address for that space.
  */
 void *
-dma_alloc_coherent(struct device *dev, size_t size, dma_addr_t *handle, int gfp)
+dma_alloc_coherent(struct device *dev, size_t size, dma_addr_t *handle, gfp_t gfp)
 {
 	return __dma_alloc(dev, size, handle, gfp,
 			   pgprot_noncached(pgprot_kernel));
@@ -263,7 +263,7 @@ EXPORT_SYMBOL(dma_alloc_coherent);
  * dma_alloc_coherent above.
  */
 void *
-dma_alloc_writecombine(struct device *dev, size_t size, dma_addr_t *handle, int gfp)
+dma_alloc_writecombine(struct device *dev, size_t size, dma_addr_t *handle, gfp_t gfp)
 {
 	return __dma_alloc(dev, size, handle, gfp,
 			   pgprot_writecombine(pgprot_kernel));

+ 1 - 1
arch/frv/mb93090-mb00/pci-dma-nommu.c

@@ -33,7 +33,7 @@ struct dma_alloc_record {
 static DEFINE_SPINLOCK(dma_alloc_lock);
 static LIST_HEAD(dma_alloc_list);
 
-void *dma_alloc_coherent(struct device *hwdev, size_t size, dma_addr_t *dma_handle, int gfp)
+void *dma_alloc_coherent(struct device *hwdev, size_t size, dma_addr_t *dma_handle, gfp_t gfp)
 {
 	struct dma_alloc_record *new;
 	struct list_head *this = &dma_alloc_list;

+ 1 - 1
arch/frv/mb93090-mb00/pci-dma.c

@@ -17,7 +17,7 @@
 #include <linux/highmem.h>
 #include <asm/io.h>
 
-void *dma_alloc_coherent(struct device *hwdev, size_t size, dma_addr_t *dma_handle, int gfp)
+void *dma_alloc_coherent(struct device *hwdev, size_t size, dma_addr_t *dma_handle, gfp_t gfp)
 {
 	void *ret;
 

+ 1 - 1
arch/frv/mm/dma-alloc.c

@@ -81,7 +81,7 @@ static int map_page(unsigned long va, unsigned long pa, pgprot_t prot)
  * portions of the kernel with single large page TLB entries, and
  * still get unique uncached pages for consistent DMA.
  */
-void *consistent_alloc(int gfp, size_t size, dma_addr_t *dma_handle)
+void *consistent_alloc(gfp_t gfp, size_t size, dma_addr_t *dma_handle)
 {
 	struct vm_struct *area;
 	unsigned long page, va, pa;

+ 19 - 11
arch/i386/kernel/cpu/cpufreq/powernow-k8.c

@@ -44,7 +44,7 @@
 
 #define PFX "powernow-k8: "
 #define BFX PFX "BIOS error: "
-#define VERSION "version 1.50.3"
+#define VERSION "version 1.50.4"
 #include "powernow-k8.h"
 
 /* serialize freq changes  */
@@ -111,8 +111,8 @@ static int query_current_values_with_pending_wait(struct powernow_k8_data *data)
 	u32 i = 0;
 
 	do {
-		if (i++ > 0x1000000) {
-			printk(KERN_ERR PFX "detected change pending stuck\n");
+		if (i++ > 10000) {
+			dprintk("detected change pending stuck\n");
 			return 1;
 		}
 		rdmsr(MSR_FIDVID_STATUS, lo, hi);
@@ -159,6 +159,7 @@ static int write_new_fid(struct powernow_k8_data *data, u32 fid)
 {
 	u32 lo;
 	u32 savevid = data->currvid;
+	u32 i = 0;
 
 	if ((fid & INVALID_FID_MASK) || (data->currvid & INVALID_VID_MASK)) {
 		printk(KERN_ERR PFX "internal error - overflow on fid write\n");
@@ -170,10 +171,13 @@ static int write_new_fid(struct powernow_k8_data *data, u32 fid)
 	dprintk("writing fid 0x%x, lo 0x%x, hi 0x%x\n",
 		fid, lo, data->plllock * PLL_LOCK_CONVERSION);
 
-	wrmsr(MSR_FIDVID_CTL, lo, data->plllock * PLL_LOCK_CONVERSION);
-
-	if (query_current_values_with_pending_wait(data))
-		return 1;
+	do {
+		wrmsr(MSR_FIDVID_CTL, lo, data->plllock * PLL_LOCK_CONVERSION);
+		if (i++ > 100) {
+			printk(KERN_ERR PFX "internal error - pending bit very stuck - no further pstate changes possible\n");
+			return 1;
+		}			
+	} while (query_current_values_with_pending_wait(data));
 
 	count_off_irt(data);
 
@@ -197,6 +201,7 @@ static int write_new_vid(struct powernow_k8_data *data, u32 vid)
 {
 	u32 lo;
 	u32 savefid = data->currfid;
+	int i = 0;
 
 	if ((data->currfid & INVALID_FID_MASK) || (vid & INVALID_VID_MASK)) {
 		printk(KERN_ERR PFX "internal error - overflow on vid write\n");
@@ -208,10 +213,13 @@ static int write_new_vid(struct powernow_k8_data *data, u32 vid)
 	dprintk("writing vid 0x%x, lo 0x%x, hi 0x%x\n",
 		vid, lo, STOP_GRANT_5NS);
 
-	wrmsr(MSR_FIDVID_CTL, lo, STOP_GRANT_5NS);
-
-	if (query_current_values_with_pending_wait(data))
-		return 1;
+	do {
+		wrmsr(MSR_FIDVID_CTL, lo, STOP_GRANT_5NS);
+                if (i++ > 100) {
+                        printk(KERN_ERR PFX "internal error - pending bit very stuck - no further pstate changes possible\n");
+                        return 1;
+                }
+	} while (query_current_values_with_pending_wait(data));
 
 	if (savefid != data->currfid) {
 		printk(KERN_ERR PFX "fid changed on vid trans, old 0x%x new 0x%x\n",

+ 1 - 1
arch/ia64/hp/common/hwsw_iommu.c

@@ -71,7 +71,7 @@ hwsw_init (void)
 }
 
 void *
-hwsw_alloc_coherent (struct device *dev, size_t size, dma_addr_t *dma_handle, int flags)
+hwsw_alloc_coherent (struct device *dev, size_t size, dma_addr_t *dma_handle, gfp_t flags)
 {
 	if (use_swiotlb(dev))
 		return swiotlb_alloc_coherent(dev, size, dma_handle, flags);

+ 1 - 1
arch/ia64/hp/common/sba_iommu.c

@@ -1076,7 +1076,7 @@ void sba_unmap_single(struct device *dev, dma_addr_t iova, size_t size, int dir)
  * See Documentation/DMA-mapping.txt
  */
 void *
-sba_alloc_coherent (struct device *dev, size_t size, dma_addr_t *dma_handle, int flags)
+sba_alloc_coherent (struct device *dev, size_t size, dma_addr_t *dma_handle, gfp_t flags)
 {
 	struct ioc *ioc;
 	void *addr;

+ 1 - 1
arch/ia64/lib/swiotlb.c

@@ -314,7 +314,7 @@ sync_single(struct device *hwdev, char *dma_addr, size_t size, int dir)
 
 void *
 swiotlb_alloc_coherent(struct device *hwdev, size_t size,
-		       dma_addr_t *dma_handle, int flags)
+		       dma_addr_t *dma_handle, gfp_t flags)
 {
 	unsigned long dev_addr;
 	void *ret;

+ 1 - 1
arch/ia64/sn/kernel/xpc.h

@@ -939,7 +939,7 @@ xpc_map_bte_errors(bte_result_t error)
 
 
 static inline void *
-xpc_kmalloc_cacheline_aligned(size_t size, int flags, void **base)
+xpc_kmalloc_cacheline_aligned(size_t size, gfp_t flags, void **base)
 {
 	/* see if kmalloc will give us cachline aligned memory by default */
 	*base = kmalloc(size, flags);

+ 1 - 1
arch/ia64/sn/pci/pci_dma.c

@@ -75,7 +75,7 @@ EXPORT_SYMBOL(sn_dma_set_mask);
  * more information.
  */
 void *sn_dma_alloc_coherent(struct device *dev, size_t size,
-			    dma_addr_t * dma_handle, int flags)
+			    dma_addr_t * dma_handle, gfp_t flags)
 {
 	void *cpuaddr;
 	unsigned long phys_addr;

+ 2 - 2
arch/mips/mm/dma-coherent.c

@@ -18,7 +18,7 @@
 #include <asm/io.h>
 
 void *dma_alloc_noncoherent(struct device *dev, size_t size,
-	dma_addr_t * dma_handle, int gfp)
+	dma_addr_t * dma_handle, gfp_t gfp)
 {
 	void *ret;
 	/* ignore region specifiers */
@@ -39,7 +39,7 @@ void *dma_alloc_noncoherent(struct device *dev, size_t size,
 EXPORT_SYMBOL(dma_alloc_noncoherent);
 
 void *dma_alloc_coherent(struct device *dev, size_t size,
-	dma_addr_t * dma_handle, int gfp)
+	dma_addr_t * dma_handle, gfp_t gfp)
 	__attribute__((alias("dma_alloc_noncoherent")));
 
 EXPORT_SYMBOL(dma_alloc_coherent);

+ 2 - 2
arch/mips/mm/dma-ip27.c

@@ -22,7 +22,7 @@
 	pdev_to_baddr(to_pci_dev(dev), (addr))
 
 void *dma_alloc_noncoherent(struct device *dev, size_t size,
-	dma_addr_t * dma_handle, int gfp)
+	dma_addr_t * dma_handle, gfp_t gfp)
 {
 	void *ret;
 
@@ -44,7 +44,7 @@ void *dma_alloc_noncoherent(struct device *dev, size_t size,
 EXPORT_SYMBOL(dma_alloc_noncoherent);
 
 void *dma_alloc_coherent(struct device *dev, size_t size,
-	dma_addr_t * dma_handle, int gfp)
+	dma_addr_t * dma_handle, gfp_t gfp)
 	__attribute__((alias("dma_alloc_noncoherent")));
 
 EXPORT_SYMBOL(dma_alloc_coherent);

+ 2 - 2
arch/mips/mm/dma-ip32.c

@@ -37,7 +37,7 @@
 #define RAM_OFFSET_MASK	0x3fffffff
 
 void *dma_alloc_noncoherent(struct device *dev, size_t size,
-	dma_addr_t * dma_handle, int gfp)
+	dma_addr_t * dma_handle, gfp_t gfp)
 {
 	void *ret;
 	/* ignore region specifiers */
@@ -61,7 +61,7 @@ void *dma_alloc_noncoherent(struct device *dev, size_t size,
 EXPORT_SYMBOL(dma_alloc_noncoherent);
 
 void *dma_alloc_coherent(struct device *dev, size_t size,
-	dma_addr_t * dma_handle, int gfp)
+	dma_addr_t * dma_handle, gfp_t gfp)
 {
 	void *ret;
 

+ 2 - 2
arch/mips/mm/dma-noncoherent.c

@@ -24,7 +24,7 @@
  */
 
 void *dma_alloc_noncoherent(struct device *dev, size_t size,
-	dma_addr_t * dma_handle, int gfp)
+	dma_addr_t * dma_handle, gfp_t gfp)
 {
 	void *ret;
 	/* ignore region specifiers */
@@ -45,7 +45,7 @@ void *dma_alloc_noncoherent(struct device *dev, size_t size,
 EXPORT_SYMBOL(dma_alloc_noncoherent);
 
 void *dma_alloc_coherent(struct device *dev, size_t size,
-	dma_addr_t * dma_handle, int gfp)
+	dma_addr_t * dma_handle, gfp_t gfp)
 {
 	void *ret;
 

+ 3 - 3
arch/parisc/kernel/pci-dma.c

@@ -349,7 +349,7 @@ pcxl_dma_init(void)
 
 __initcall(pcxl_dma_init);
 
-static void * pa11_dma_alloc_consistent (struct device *dev, size_t size, dma_addr_t *dma_handle, int flag)
+static void * pa11_dma_alloc_consistent (struct device *dev, size_t size, dma_addr_t *dma_handle, gfp_t flag)
 {
 	unsigned long vaddr;
 	unsigned long paddr;
@@ -502,13 +502,13 @@ struct hppa_dma_ops pcxl_dma_ops = {
 };
 
 static void *fail_alloc_consistent(struct device *dev, size_t size,
-				   dma_addr_t *dma_handle, int flag)
+				   dma_addr_t *dma_handle, gfp_t flag)
 {
 	return NULL;
 }
 
 static void *pa11_dma_alloc_noncoherent(struct device *dev, size_t size,
-					  dma_addr_t *dma_handle, int flag)
+					  dma_addr_t *dma_handle, gfp_t flag)
 {
 	void *addr = NULL;
 

+ 1 - 1
arch/ppc/8xx_io/cs4218.h

@@ -78,7 +78,7 @@ typedef struct {
     const char *name2;
     void (*open)(void);
     void (*release)(void);
-    void *(*dma_alloc)(unsigned int, int);
+    void *(*dma_alloc)(unsigned int, gfp_t);
     void (*dma_free)(void *, unsigned int);
     int (*irqinit)(void);
 #ifdef MODULE

+ 2 - 2
arch/ppc/8xx_io/cs4218_tdm.c

@@ -318,7 +318,7 @@ struct cs_sound_settings {
 
 static struct cs_sound_settings sound;
 
-static void *CS_Alloc(unsigned int size, int flags);
+static void *CS_Alloc(unsigned int size, gfp_t flags);
 static void CS_Free(void *ptr, unsigned int size);
 static int CS_IrqInit(void);
 #ifdef MODULE
@@ -959,7 +959,7 @@ static TRANS transCSNormalRead = {
 
 /*** Low level stuff *********************************************************/
 
-static void *CS_Alloc(unsigned int size, int flags)
+static void *CS_Alloc(unsigned int size, gfp_t flags)
 {
 	int	order;
 

+ 2 - 2
arch/ppc/kernel/dma-mapping.c

@@ -115,7 +115,7 @@ static struct vm_region consistent_head = {
 };
 
 static struct vm_region *
-vm_region_alloc(struct vm_region *head, size_t size, int gfp)
+vm_region_alloc(struct vm_region *head, size_t size, gfp_t gfp)
 {
 	unsigned long addr = head->vm_start, end = head->vm_end - size;
 	unsigned long flags;
@@ -173,7 +173,7 @@ static struct vm_region *vm_region_find(struct vm_region *head, unsigned long ad
  * virtual and bus address for that space.
  */
 void *
-__dma_alloc_coherent(size_t size, dma_addr_t *handle, int gfp)
+__dma_alloc_coherent(size_t size, dma_addr_t *handle, gfp_t gfp)
 {
 	struct page *page;
 	struct vm_region *c;

+ 2 - 2
arch/ppc/mm/pgtable.c

@@ -114,9 +114,9 @@ struct page *pte_alloc_one(struct mm_struct *mm, unsigned long address)
 	struct page *ptepage;
 
 #ifdef CONFIG_HIGHPTE
-	int flags = GFP_KERNEL | __GFP_HIGHMEM | __GFP_REPEAT;
+	gfp_t flags = GFP_KERNEL | __GFP_HIGHMEM | __GFP_REPEAT;
 #else
-	int flags = GFP_KERNEL | __GFP_REPEAT;
+	gfp_t flags = GFP_KERNEL | __GFP_REPEAT;
 #endif
 
 	ptepage = alloc_pages(flags, 0);

+ 1 - 1
arch/ppc64/kernel/iSeries_htab.c

@@ -66,7 +66,7 @@ static long iSeries_hpte_insert(unsigned long hpte_group, unsigned long va,
 	}
 
 	if (slot < 0) {		/* MSB set means secondary group */
-		vflags |= HPTE_V_VALID;
+		vflags |= HPTE_V_SECONDARY;
 		secondary = 1;
 		slot &= 0x7fffffffffffffff;
 	}

+ 2 - 2
arch/ppc64/kernel/mpic.c

@@ -506,8 +506,8 @@ struct mpic * __init mpic_alloc(unsigned long phys_addr,
 	mpic->senses_count = senses_count;
 
 	/* Map the global registers */
-	mpic->gregs = ioremap(phys_addr + MPIC_GREG_BASE, 0x1000);
-	mpic->tmregs = mpic->gregs + (MPIC_TIMER_BASE >> 2);
+	mpic->gregs = ioremap(phys_addr + MPIC_GREG_BASE, 0x2000);
+	mpic->tmregs = mpic->gregs + ((MPIC_TIMER_BASE - MPIC_GREG_BASE) >> 2);
 	BUG_ON(mpic->gregs == NULL);
 
 	/* Reset */

+ 1 - 1
arch/ppc64/kernel/time.c

@@ -870,7 +870,7 @@ void div128_by_32( unsigned long dividend_high, unsigned long dividend_low,
 	rb = ((ra + b) - (x * divisor)) << 32;
 
 	y = (rb + c)/divisor;
-	rc = ((rb + b) - (y * divisor)) << 32;
+	rc = ((rb + c) - (y * divisor)) << 32;
 
 	z = (rc + d)/divisor;
 

+ 1 - 2
arch/ppc64/mm/init.c

@@ -799,8 +799,7 @@ void update_mmu_cache(struct vm_area_struct *vma, unsigned long ea,
 	if (cpus_equal(vma->vm_mm->cpu_vm_mask, tmp))
 		local = 1;
 
-	__hash_page(ea, pte_val(pte) & (_PAGE_USER|_PAGE_RW), vsid, ptep,
-		    0x300, local);
+	__hash_page(ea, 0, vsid, ptep, 0x300, local);
 	local_irq_restore(flags);
 }
 

+ 1 - 1
arch/sh/boards/renesas/rts7751r2d/mach.c

@@ -23,7 +23,7 @@ extern void init_rts7751r2d_IRQ(void);
 extern void *rts7751r2d_ioremap(unsigned long, unsigned long);
 extern int rts7751r2d_irq_demux(int irq);
 
-extern void *voyagergx_consistent_alloc(struct device *, size_t, dma_addr_t *, int);
+extern void *voyagergx_consistent_alloc(struct device *, size_t, dma_addr_t *, gfp_t);
 extern int voyagergx_consistent_free(struct device *, size_t, void *, dma_addr_t);
 
 /*

+ 1 - 1
arch/sh/cchips/voyagergx/consistent.c

@@ -31,7 +31,7 @@ static LIST_HEAD(voya_alloc_list);
 #define OHCI_SRAM_SIZE	0x10000
 
 void *voyagergx_consistent_alloc(struct device *dev, size_t size,
-				 dma_addr_t *handle, int flag)
+				 dma_addr_t *handle, gfp_t flag)
 {
 	struct list_head *list = &voya_alloc_list;
 	struct voya_alloc_entry *entry;

+ 1 - 1
arch/sh/drivers/pci/dma-dreamcast.c

@@ -33,7 +33,7 @@
 static int gapspci_dma_used = 0;
 
 void *dreamcast_consistent_alloc(struct device *dev, size_t size,
-				 dma_addr_t *dma_handle, int flag)
+				 dma_addr_t *dma_handle, gfp_t flag)
 {
 	unsigned long buf;
 

+ 1 - 1
arch/sh/mm/consistent.c

@@ -11,7 +11,7 @@
 #include <linux/dma-mapping.h>
 #include <asm/io.h>
 
-void *consistent_alloc(int gfp, size_t size, dma_addr_t *handle)
+void *consistent_alloc(gfp_t gfp, size_t size, dma_addr_t *handle)
 {
 	struct page *page, *end, *free;
 	void *ret;

+ 1 - 1
arch/sparc64/solaris/socksys.c

@@ -49,7 +49,7 @@ IPPROTO_EGP, IPPROTO_PUP, IPPROTO_UDP, IPPROTO_IDP, IPPROTO_RAW,
 
 #else
 
-extern void * mykmalloc(size_t s, int gfp);
+extern void * mykmalloc(size_t s, gfp_t gfp);
 extern void mykfree(void *);
 
 #endif

+ 1 - 1
arch/sparc64/solaris/timod.c

@@ -39,7 +39,7 @@ static char * page = NULL ;
 
 #else
 
-void * mykmalloc(size_t s, int gfp)
+void * mykmalloc(size_t s, gfp_t gfp)
 {
 	static char * page;
 	static size_t free;

+ 1 - 1
arch/um/include/sysdep-i386/thread.h

@@ -4,7 +4,7 @@
 #include <kern_constants.h>
 
 #define TASK_DEBUGREGS(task) ((unsigned long *) &(((char *) (task))[HOST_TASK_DEBUGREGS]))
-#ifdef CONFIG_MODE_TT
+#ifdef UML_CONFIG_MODE_TT
 #define TASK_EXTERN_PID(task) *((int *) &(((char *) (task))[HOST_TASK_EXTERN_PID]))
 #endif
 

+ 1 - 1
arch/um/include/sysdep-x86_64/thread.h

@@ -3,7 +3,7 @@
 
 #include <kern_constants.h>
 
-#ifdef CONFIG_MODE_TT
+#ifdef UML_CONFIG_MODE_TT
 #define TASK_EXTERN_PID(task) *((int *) &(((char *) (task))[HOST_TASK_EXTERN_PID]))
 #endif
 

+ 1 - 1
arch/um/kernel/mem.c

@@ -252,7 +252,7 @@ void paging_init(void)
 #endif
 }
 
-struct page *arch_validate(struct page *page, int mask, int order)
+struct page *arch_validate(struct page *page, gfp_t mask, int order)
 {
 	unsigned long addr, zero = 0;
 	int i;

+ 1 - 1
arch/um/kernel/process_kern.c

@@ -80,7 +80,7 @@ void free_stack(unsigned long stack, int order)
 unsigned long alloc_stack(int order, int atomic)
 {
 	unsigned long page;
-	int flags = GFP_KERNEL;
+	gfp_t flags = GFP_KERNEL;
 
 	if (atomic)
 		flags = GFP_ATOMIC;

+ 2 - 2
arch/x86_64/kernel/pci-gart.c

@@ -187,7 +187,7 @@ static void flush_gart(struct device *dev)
 
 /* Allocate DMA memory on node near device */
 noinline
-static void *dma_alloc_pages(struct device *dev, unsigned gfp, unsigned order)
+static void *dma_alloc_pages(struct device *dev, gfp_t gfp, unsigned order)
 {
 	struct page *page;
 	int node;
@@ -204,7 +204,7 @@ static void *dma_alloc_pages(struct device *dev, unsigned gfp, unsigned order)
  */
 void *
 dma_alloc_coherent(struct device *dev, size_t size, dma_addr_t *dma_handle,
-		   unsigned gfp)
+		   gfp_t gfp)
 {
 	void *memory;
 	unsigned long dma_mask = 0;

+ 1 - 1
arch/x86_64/kernel/pci-nommu.c

@@ -24,7 +24,7 @@ EXPORT_SYMBOL(iommu_sac_force);
  */
 
 void *dma_alloc_coherent(struct device *hwdev, size_t size,
-			 dma_addr_t *dma_handle, unsigned gfp)
+			 dma_addr_t *dma_handle, gfp_t gfp)
 {
 	void *ret;
 	u64 mask;

+ 1 - 1
arch/xtensa/kernel/pci-dma.c

@@ -29,7 +29,7 @@
  */
 
 void *
-dma_alloc_coherent(struct device *dev, size_t size, dma_addr_t *handle, int gfp)
+dma_alloc_coherent(struct device *dev, size_t size, dma_addr_t *handle, gfp_t gfp)
 {
 	void *ret;
 

+ 90 - 237
drivers/block/as-iosched.c

@@ -98,7 +98,6 @@ struct as_data {
 
 	struct as_rq *next_arq[2];	/* next in sort order */
 	sector_t last_sector[2];	/* last REQ_SYNC & REQ_ASYNC sectors */
-	struct list_head *dispatch;	/* driver dispatch queue */
 	struct list_head *hash;		/* request hash */
 
 	unsigned long exit_prob;	/* probability a task will exit while
@@ -239,6 +238,25 @@ static struct io_context *as_get_io_context(void)
 	return ioc;
 }
 
+static void as_put_io_context(struct as_rq *arq)
+{
+	struct as_io_context *aic;
+
+	if (unlikely(!arq->io_context))
+		return;
+
+	aic = arq->io_context->aic;
+
+	if (arq->is_sync == REQ_SYNC && aic) {
+		spin_lock(&aic->lock);
+		set_bit(AS_TASK_IORUNNING, &aic->state);
+		aic->last_end_request = jiffies;
+		spin_unlock(&aic->lock);
+	}
+
+	put_io_context(arq->io_context);
+}
+
 /*
  * the back merge hash support functions
  */
@@ -261,14 +279,6 @@ static inline void as_del_arq_hash(struct as_rq *arq)
 		__as_del_arq_hash(arq);
 }
 
-static void as_remove_merge_hints(request_queue_t *q, struct as_rq *arq)
-{
-	as_del_arq_hash(arq);
-
-	if (q->last_merge == arq->request)
-		q->last_merge = NULL;
-}
-
 static void as_add_arq_hash(struct as_data *ad, struct as_rq *arq)
 {
 	struct request *rq = arq->request;
@@ -312,7 +322,7 @@ static struct request *as_find_arq_hash(struct as_data *ad, sector_t offset)
 		BUG_ON(!arq->on_hash);
 
 		if (!rq_mergeable(__rq)) {
-			as_remove_merge_hints(ad->q, arq);
+			as_del_arq_hash(arq);
 			continue;
 		}
 
@@ -950,23 +960,12 @@ static void as_completed_request(request_queue_t *q, struct request *rq)
 
 	WARN_ON(!list_empty(&rq->queuelist));
 
-	if (arq->state == AS_RQ_PRESCHED) {
-		WARN_ON(arq->io_context);
-		goto out;
-	}
-
-	if (arq->state == AS_RQ_MERGED)
-		goto out_ioc;
-
 	if (arq->state != AS_RQ_REMOVED) {
 		printk("arq->state %d\n", arq->state);
 		WARN_ON(1);
 		goto out;
 	}
 
-	if (!blk_fs_request(rq))
-		goto out;
-
 	if (ad->changed_batch && ad->nr_dispatched == 1) {
 		kblockd_schedule_work(&ad->antic_work);
 		ad->changed_batch = 0;
@@ -1001,21 +1000,7 @@ static void as_completed_request(request_queue_t *q, struct request *rq)
 		}
 	}
 
-out_ioc:
-	if (!arq->io_context)
-		goto out;
-
-	if (arq->is_sync == REQ_SYNC) {
-		struct as_io_context *aic = arq->io_context->aic;
-		if (aic) {
-			spin_lock(&aic->lock);
-			set_bit(AS_TASK_IORUNNING, &aic->state);
-			aic->last_end_request = jiffies;
-			spin_unlock(&aic->lock);
-		}
-	}
-
-	put_io_context(arq->io_context);
+	as_put_io_context(arq);
 out:
 	arq->state = AS_RQ_POSTSCHED;
 }
@@ -1047,72 +1032,10 @@ static void as_remove_queued_request(request_queue_t *q, struct request *rq)
 		ad->next_arq[data_dir] = as_find_next_arq(ad, arq);
 
 	list_del_init(&arq->fifo);
-	as_remove_merge_hints(q, arq);
+	as_del_arq_hash(arq);
 	as_del_arq_rb(ad, arq);
 }
 
-/*
- * as_remove_dispatched_request is called to remove a request which has gone
- * to the dispatch list.
- */
-static void as_remove_dispatched_request(request_queue_t *q, struct request *rq)
-{
-	struct as_rq *arq = RQ_DATA(rq);
-	struct as_io_context *aic;
-
-	if (!arq) {
-		WARN_ON(1);
-		return;
-	}
-
-	WARN_ON(arq->state != AS_RQ_DISPATCHED);
-	WARN_ON(ON_RB(&arq->rb_node));
-	if (arq->io_context && arq->io_context->aic) {
-		aic = arq->io_context->aic;
-		if (aic) {
-			WARN_ON(!atomic_read(&aic->nr_dispatched));
-			atomic_dec(&aic->nr_dispatched);
-		}
-	}
-}
-
-/*
- * as_remove_request is called when a driver has finished with a request.
- * This should be only called for dispatched requests, but for some reason
- * a POWER4 box running hwscan it does not.
- */
-static void as_remove_request(request_queue_t *q, struct request *rq)
-{
-	struct as_rq *arq = RQ_DATA(rq);
-
-	if (unlikely(arq->state == AS_RQ_NEW))
-		goto out;
-
-	if (ON_RB(&arq->rb_node)) {
-		if (arq->state != AS_RQ_QUEUED) {
-			printk("arq->state %d\n", arq->state);
-			WARN_ON(1);
-			goto out;
-		}
-		/*
-		 * We'll lose the aliased request(s) here. I don't think this
-		 * will ever happen, but if it does, hopefully someone will
-		 * report it.
-		 */
-		WARN_ON(!list_empty(&rq->queuelist));
-		as_remove_queued_request(q, rq);
-	} else {
-		if (arq->state != AS_RQ_DISPATCHED) {
-			printk("arq->state %d\n", arq->state);
-			WARN_ON(1);
-			goto out;
-		}
-		as_remove_dispatched_request(q, rq);
-	}
-out:
-	arq->state = AS_RQ_REMOVED;
-}
-
 /*
  * as_fifo_expired returns 0 if there are no expired reads on the fifo,
  * 1 otherwise.  It is ratelimited so that we only perform the check once per
@@ -1165,7 +1088,6 @@ static inline int as_batch_expired(struct as_data *ad)
 static void as_move_to_dispatch(struct as_data *ad, struct as_rq *arq)
 {
 	struct request *rq = arq->request;
-	struct list_head *insert;
 	const int data_dir = arq->is_sync;
 
 	BUG_ON(!ON_RB(&arq->rb_node));
@@ -1198,13 +1120,13 @@ static void as_move_to_dispatch(struct as_data *ad, struct as_rq *arq)
 	/*
 	 * take it off the sort and fifo list, add to dispatch queue
 	 */
-	insert = ad->dispatch->prev;
-
 	while (!list_empty(&rq->queuelist)) {
 		struct request *__rq = list_entry_rq(rq->queuelist.next);
 		struct as_rq *__arq = RQ_DATA(__rq);
 
-		list_move_tail(&__rq->queuelist, ad->dispatch);
+		list_del(&__rq->queuelist);
+
+		elv_dispatch_add_tail(ad->q, __rq);
 
 		if (__arq->io_context && __arq->io_context->aic)
 			atomic_inc(&__arq->io_context->aic->nr_dispatched);
@@ -1218,7 +1140,8 @@ static void as_move_to_dispatch(struct as_data *ad, struct as_rq *arq)
 	as_remove_queued_request(ad->q, rq);
 	WARN_ON(arq->state != AS_RQ_QUEUED);
 
-	list_add(&rq->queuelist, insert);
+	elv_dispatch_sort(ad->q, rq);
+
 	arq->state = AS_RQ_DISPATCHED;
 	if (arq->io_context && arq->io_context->aic)
 		atomic_inc(&arq->io_context->aic->nr_dispatched);
@@ -1230,12 +1153,42 @@ static void as_move_to_dispatch(struct as_data *ad, struct as_rq *arq)
  * read/write expire, batch expire, etc, and moves it to the dispatch
  * queue. Returns 1 if a request was found, 0 otherwise.
  */
-static int as_dispatch_request(struct as_data *ad)
+static int as_dispatch_request(request_queue_t *q, int force)
 {
+	struct as_data *ad = q->elevator->elevator_data;
 	struct as_rq *arq;
 	const int reads = !list_empty(&ad->fifo_list[REQ_SYNC]);
 	const int writes = !list_empty(&ad->fifo_list[REQ_ASYNC]);
 
+	if (unlikely(force)) {
+		/*
+		 * Forced dispatch, accounting is useless.  Reset
+		 * accounting states and dump fifo_lists.  Note that
+		 * batch_data_dir is reset to REQ_SYNC to avoid
+		 * screwing write batch accounting as write batch
+		 * accounting occurs on W->R transition.
+		 */
+		int dispatched = 0;
+
+		ad->batch_data_dir = REQ_SYNC;
+		ad->changed_batch = 0;
+		ad->new_batch = 0;
+
+		while (ad->next_arq[REQ_SYNC]) {
+			as_move_to_dispatch(ad, ad->next_arq[REQ_SYNC]);
+			dispatched++;
+		}
+		ad->last_check_fifo[REQ_SYNC] = jiffies;
+
+		while (ad->next_arq[REQ_ASYNC]) {
+			as_move_to_dispatch(ad, ad->next_arq[REQ_ASYNC]);
+			dispatched++;
+		}
+		ad->last_check_fifo[REQ_ASYNC] = jiffies;
+
+		return dispatched;
+	}
+
 	/* Signal that the write batch was uncontended, so we can't time it */
 	if (ad->batch_data_dir == REQ_ASYNC && !reads) {
 		if (ad->current_write_count == 0 || !writes)
@@ -1359,20 +1312,6 @@ fifo_expired:
 	return 1;
 }
 
-static struct request *as_next_request(request_queue_t *q)
-{
-	struct as_data *ad = q->elevator->elevator_data;
-	struct request *rq = NULL;
-
-	/*
-	 * if there are still requests on the dispatch queue, grab the first
-	 */
-	if (!list_empty(ad->dispatch) || as_dispatch_request(ad))
-		rq = list_entry_rq(ad->dispatch->next);
-
-	return rq;
-}
-
 /*
  * Add arq to a list behind alias
  */
@@ -1404,17 +1343,25 @@ as_add_aliased_request(struct as_data *ad, struct as_rq *arq, struct as_rq *alia
 	/*
 	 * Don't want to have to handle merges.
 	 */
-	as_remove_merge_hints(ad->q, arq);
+	as_del_arq_hash(arq);
 }
 
 /*
  * add arq to rbtree and fifo
  */
-static void as_add_request(struct as_data *ad, struct as_rq *arq)
+static void as_add_request(request_queue_t *q, struct request *rq)
 {
+	struct as_data *ad = q->elevator->elevator_data;
+	struct as_rq *arq = RQ_DATA(rq);
 	struct as_rq *alias;
 	int data_dir;
 
+	if (arq->state != AS_RQ_PRESCHED) {
+		printk("arq->state: %d\n", arq->state);
+		WARN_ON(1);
+	}
+	arq->state = AS_RQ_NEW;
+
 	if (rq_data_dir(arq->request) == READ
 			|| current->flags&PF_SYNCWRITE)
 		arq->is_sync = 1;
@@ -1437,12 +1384,8 @@ static void as_add_request(struct as_data *ad, struct as_rq *arq)
 		arq->expires = jiffies + ad->fifo_expire[data_dir];
 		list_add_tail(&arq->fifo, &ad->fifo_list[data_dir]);
 
-		if (rq_mergeable(arq->request)) {
+		if (rq_mergeable(arq->request))
 			as_add_arq_hash(ad, arq);
-
-			if (!ad->q->last_merge)
-				ad->q->last_merge = arq->request;
-		}
 		as_update_arq(ad, arq); /* keep state machine up to date */
 
 	} else {
@@ -1463,96 +1406,24 @@ static void as_add_request(struct as_data *ad, struct as_rq *arq)
 	arq->state = AS_RQ_QUEUED;
 }
 
-static void as_deactivate_request(request_queue_t *q, struct request *rq)
+static void as_activate_request(request_queue_t *q, struct request *rq)
 {
-	struct as_data *ad = q->elevator->elevator_data;
 	struct as_rq *arq = RQ_DATA(rq);
 
-	if (arq) {
-		if (arq->state == AS_RQ_REMOVED) {
-			arq->state = AS_RQ_DISPATCHED;
-			if (arq->io_context && arq->io_context->aic)
-				atomic_inc(&arq->io_context->aic->nr_dispatched);
-		}
-	} else
-		WARN_ON(blk_fs_request(rq)
-			&& (!(rq->flags & (REQ_HARDBARRIER|REQ_SOFTBARRIER))) );
-
-	/* Stop anticipating - let this request get through */
-	as_antic_stop(ad);
-}
-
-/*
- * requeue the request. The request has not been completed, nor is it a
- * new request, so don't touch accounting.
- */
-static void as_requeue_request(request_queue_t *q, struct request *rq)
-{
-	as_deactivate_request(q, rq);
-	list_add(&rq->queuelist, &q->queue_head);
-}
-
-/*
- * Account a request that is inserted directly onto the dispatch queue.
- * arq->io_context->aic->nr_dispatched should not need to be incremented
- * because only new requests should come through here: requeues go through
- * our explicit requeue handler.
- */
-static void as_account_queued_request(struct as_data *ad, struct request *rq)
-{
-	if (blk_fs_request(rq)) {
-		struct as_rq *arq = RQ_DATA(rq);
-		arq->state = AS_RQ_DISPATCHED;
-		ad->nr_dispatched++;
-	}
+	WARN_ON(arq->state != AS_RQ_DISPATCHED);
+	arq->state = AS_RQ_REMOVED;
+	if (arq->io_context && arq->io_context->aic)
+		atomic_dec(&arq->io_context->aic->nr_dispatched);
 }
 
-static void
-as_insert_request(request_queue_t *q, struct request *rq, int where)
+static void as_deactivate_request(request_queue_t *q, struct request *rq)
 {
-	struct as_data *ad = q->elevator->elevator_data;
 	struct as_rq *arq = RQ_DATA(rq);
 
-	if (arq) {
-		if (arq->state != AS_RQ_PRESCHED) {
-			printk("arq->state: %d\n", arq->state);
-			WARN_ON(1);
-		}
-		arq->state = AS_RQ_NEW;
-	}
-
-	/* barriers must flush the reorder queue */
-	if (unlikely(rq->flags & (REQ_SOFTBARRIER | REQ_HARDBARRIER)
-			&& where == ELEVATOR_INSERT_SORT)) {
-		WARN_ON(1);
-		where = ELEVATOR_INSERT_BACK;
-	}
-
-	switch (where) {
-		case ELEVATOR_INSERT_BACK:
-			while (ad->next_arq[REQ_SYNC])
-				as_move_to_dispatch(ad, ad->next_arq[REQ_SYNC]);
-
-			while (ad->next_arq[REQ_ASYNC])
-				as_move_to_dispatch(ad, ad->next_arq[REQ_ASYNC]);
-
-			list_add_tail(&rq->queuelist, ad->dispatch);
-			as_account_queued_request(ad, rq);
-			as_antic_stop(ad);
-			break;
-		case ELEVATOR_INSERT_FRONT:
-			list_add(&rq->queuelist, ad->dispatch);
-			as_account_queued_request(ad, rq);
-			as_antic_stop(ad);
-			break;
-		case ELEVATOR_INSERT_SORT:
-			BUG_ON(!blk_fs_request(rq));
-			as_add_request(ad, arq);
-			break;
-		default:
-			BUG();
-			return;
-	}
+	WARN_ON(arq->state != AS_RQ_REMOVED);
+	arq->state = AS_RQ_DISPATCHED;
+	if (arq->io_context && arq->io_context->aic)
+		atomic_inc(&arq->io_context->aic->nr_dispatched);
 }
 
 /*
@@ -1565,12 +1436,8 @@ static int as_queue_empty(request_queue_t *q)
 {
 	struct as_data *ad = q->elevator->elevator_data;
 
-	if (!list_empty(&ad->fifo_list[REQ_ASYNC])
-		|| !list_empty(&ad->fifo_list[REQ_SYNC])
-		|| !list_empty(ad->dispatch))
-			return 0;
-
-	return 1;
+	return list_empty(&ad->fifo_list[REQ_ASYNC])
+		&& list_empty(&ad->fifo_list[REQ_SYNC]);
 }
 
 static struct request *
@@ -1607,15 +1474,6 @@ as_merge(request_queue_t *q, struct request **req, struct bio *bio)
 	struct request *__rq;
 	int ret;
 
-	/*
-	 * try last_merge to avoid going to hash
-	 */
-	ret = elv_try_last_merge(q, bio);
-	if (ret != ELEVATOR_NO_MERGE) {
-		__rq = q->last_merge;
-		goto out_insert;
-	}
-
 	/*
 	 * see if the merge hash can satisfy a back merge
 	 */
@@ -1644,9 +1502,6 @@ as_merge(request_queue_t *q, struct request **req, struct bio *bio)
 
 	return ELEVATOR_NO_MERGE;
 out:
-	if (rq_mergeable(__rq))
-		q->last_merge = __rq;
-out_insert:
 	if (ret) {
 		if (rq_mergeable(__rq))
 			as_hot_arq_hash(ad, RQ_DATA(__rq));
@@ -1693,9 +1548,6 @@ static void as_merged_request(request_queue_t *q, struct request *req)
 		 * behind the disk head. We currently don't bother adjusting.
 		 */
 	}
-
-	if (arq->on_hash)
-		q->last_merge = req;
 }
 
 static void
@@ -1763,6 +1615,7 @@ as_merged_requests(request_queue_t *q, struct request *req,
 	 * kill knowledge of next, this one is a goner
 	 */
 	as_remove_queued_request(q, next);
+	as_put_io_context(anext);
 
 	anext->state = AS_RQ_MERGED;
 }
@@ -1782,7 +1635,7 @@ static void as_work_handler(void *data)
 	unsigned long flags;
 
 	spin_lock_irqsave(q->queue_lock, flags);
-	if (as_next_request(q))
+	if (!as_queue_empty(q))
 		q->request_fn(q);
 	spin_unlock_irqrestore(q->queue_lock, flags);
 }
@@ -1797,7 +1650,9 @@ static void as_put_request(request_queue_t *q, struct request *rq)
 		return;
 	}
 
-	if (arq->state != AS_RQ_POSTSCHED && arq->state != AS_RQ_PRESCHED) {
+	if (unlikely(arq->state != AS_RQ_POSTSCHED &&
+		     arq->state != AS_RQ_PRESCHED &&
+		     arq->state != AS_RQ_MERGED)) {
 		printk("arq->state %d\n", arq->state);
 		WARN_ON(1);
 	}
@@ -1807,7 +1662,7 @@ static void as_put_request(request_queue_t *q, struct request *rq)
 }
 
 static int as_set_request(request_queue_t *q, struct request *rq,
-			  struct bio *bio, int gfp_mask)
+			  struct bio *bio, gfp_t gfp_mask)
 {
 	struct as_data *ad = q->elevator->elevator_data;
 	struct as_rq *arq = mempool_alloc(ad->arq_pool, gfp_mask);
@@ -1907,7 +1762,6 @@ static int as_init_queue(request_queue_t *q, elevator_t *e)
 	INIT_LIST_HEAD(&ad->fifo_list[REQ_ASYNC]);
 	ad->sort_list[REQ_SYNC] = RB_ROOT;
 	ad->sort_list[REQ_ASYNC] = RB_ROOT;
-	ad->dispatch = &q->queue_head;
 	ad->fifo_expire[REQ_SYNC] = default_read_expire;
 	ad->fifo_expire[REQ_ASYNC] = default_write_expire;
 	ad->antic_expire = default_antic_expire;
@@ -2072,10 +1926,9 @@ static struct elevator_type iosched_as = {
 		.elevator_merge_fn = 		as_merge,
 		.elevator_merged_fn =		as_merged_request,
 		.elevator_merge_req_fn =	as_merged_requests,
-		.elevator_next_req_fn =		as_next_request,
-		.elevator_add_req_fn =		as_insert_request,
-		.elevator_remove_req_fn =	as_remove_request,
-		.elevator_requeue_req_fn = 	as_requeue_request,
+		.elevator_dispatch_fn =		as_dispatch_request,
+		.elevator_add_req_fn =		as_add_request,
+		.elevator_activate_req_fn =	as_activate_request,
 		.elevator_deactivate_req_fn = 	as_deactivate_request,
 		.elevator_queue_empty_fn =	as_queue_empty,
 		.elevator_completed_req_fn =	as_completed_request,

+ 85 - 287
drivers/block/cfq-iosched.c

@@ -84,7 +84,6 @@ static int cfq_max_depth = 2;
 	(node)->rb_left = NULL;		\
 } while (0)
 #define RB_CLEAR_ROOT(root)	((root)->rb_node = NULL)
-#define ON_RB(node)		((node)->rb_color != RB_NONE)
 #define rb_entry_crq(node)	rb_entry((node), struct cfq_rq, rb_node)
 #define rq_rb_key(rq)		(rq)->sector
 
@@ -271,10 +270,7 @@ CFQ_CFQQ_FNS(expired);
 #undef CFQ_CFQQ_FNS
 
 enum cfq_rq_state_flags {
-	CFQ_CRQ_FLAG_in_flight = 0,
-	CFQ_CRQ_FLAG_in_driver,
-	CFQ_CRQ_FLAG_is_sync,
-	CFQ_CRQ_FLAG_requeued,
+	CFQ_CRQ_FLAG_is_sync = 0,
 };
 
 #define CFQ_CRQ_FNS(name)						\
@@ -291,14 +287,11 @@ static inline int cfq_crq_##name(const struct cfq_rq *crq)		\
 	return (crq->crq_flags & (1 << CFQ_CRQ_FLAG_##name)) != 0;	\
 }
 
-CFQ_CRQ_FNS(in_flight);
-CFQ_CRQ_FNS(in_driver);
 CFQ_CRQ_FNS(is_sync);
-CFQ_CRQ_FNS(requeued);
 #undef CFQ_CRQ_FNS
 
 static struct cfq_queue *cfq_find_cfq_hash(struct cfq_data *, unsigned int, unsigned short);
-static void cfq_dispatch_sort(request_queue_t *, struct cfq_rq *);
+static void cfq_dispatch_insert(request_queue_t *, struct cfq_rq *);
 static void cfq_put_cfqd(struct cfq_data *cfqd);
 
 #define process_sync(tsk)	((tsk)->flags & PF_SYNCWRITE)
@@ -311,14 +304,6 @@ static inline void cfq_del_crq_hash(struct cfq_rq *crq)
 	hlist_del_init(&crq->hash);
 }
 
-static void cfq_remove_merge_hints(request_queue_t *q, struct cfq_rq *crq)
-{
-	cfq_del_crq_hash(crq);
-
-	if (q->last_merge == crq->request)
-		q->last_merge = NULL;
-}
-
 static inline void cfq_add_crq_hash(struct cfq_data *cfqd, struct cfq_rq *crq)
 {
 	const int hash_idx = CFQ_MHASH_FN(rq_hash_key(crq->request));
@@ -347,18 +332,13 @@ static struct request *cfq_find_rq_hash(struct cfq_data *cfqd, sector_t offset)
 	return NULL;
 }
 
-static inline int cfq_pending_requests(struct cfq_data *cfqd)
-{
-	return !list_empty(&cfqd->queue->queue_head) || cfqd->busy_queues;
-}
-
 /*
  * scheduler run of queue, if there are requests pending and no one in the
  * driver that will restart queueing
  */
 static inline void cfq_schedule_dispatch(struct cfq_data *cfqd)
 {
-	if (!cfqd->rq_in_driver && cfq_pending_requests(cfqd))
+	if (!cfqd->rq_in_driver && cfqd->busy_queues)
 		kblockd_schedule_work(&cfqd->unplug_work);
 }
 
@@ -366,7 +346,7 @@ static int cfq_queue_empty(request_queue_t *q)
 {
 	struct cfq_data *cfqd = q->elevator->elevator_data;
 
-	return !cfq_pending_requests(cfqd);
+	return !cfqd->busy_queues;
 }
 
 /*
@@ -386,11 +366,6 @@ cfq_choose_req(struct cfq_data *cfqd, struct cfq_rq *crq1, struct cfq_rq *crq2)
 	if (crq2 == NULL)
 		return crq1;
 
-	if (cfq_crq_requeued(crq1) && !cfq_crq_requeued(crq2))
-		return crq1;
-	else if (cfq_crq_requeued(crq2) && !cfq_crq_requeued(crq1))
-		return crq2;
-
 	if (cfq_crq_is_sync(crq1) && !cfq_crq_is_sync(crq2))
 		return crq1;
 	else if (cfq_crq_is_sync(crq2) && !cfq_crq_is_sync(crq1))
@@ -461,10 +436,7 @@ cfq_find_next_crq(struct cfq_data *cfqd, struct cfq_queue *cfqq,
 	struct cfq_rq *crq_next = NULL, *crq_prev = NULL;
 	struct rb_node *rbnext, *rbprev;
 
-	rbnext = NULL;
-	if (ON_RB(&last->rb_node))
-		rbnext = rb_next(&last->rb_node);
-	if (!rbnext) {
+	if (!(rbnext = rb_next(&last->rb_node))) {
 		rbnext = rb_first(&cfqq->sort_list);
 		if (rbnext == &last->rb_node)
 			rbnext = NULL;
@@ -545,13 +517,13 @@ static void cfq_resort_rr_list(struct cfq_queue *cfqq, int preempted)
  * the pending list according to last request service
  */
 static inline void
-cfq_add_cfqq_rr(struct cfq_data *cfqd, struct cfq_queue *cfqq, int requeue)
+cfq_add_cfqq_rr(struct cfq_data *cfqd, struct cfq_queue *cfqq)
 {
 	BUG_ON(cfq_cfqq_on_rr(cfqq));
 	cfq_mark_cfqq_on_rr(cfqq);
 	cfqd->busy_queues++;
 
-	cfq_resort_rr_list(cfqq, requeue);
+	cfq_resort_rr_list(cfqq, 0);
 }
 
 static inline void
@@ -571,22 +543,19 @@ cfq_del_cfqq_rr(struct cfq_data *cfqd, struct cfq_queue *cfqq)
 static inline void cfq_del_crq_rb(struct cfq_rq *crq)
 {
 	struct cfq_queue *cfqq = crq->cfq_queue;
+	struct cfq_data *cfqd = cfqq->cfqd;
+	const int sync = cfq_crq_is_sync(crq);
 
-	if (ON_RB(&crq->rb_node)) {
-		struct cfq_data *cfqd = cfqq->cfqd;
-		const int sync = cfq_crq_is_sync(crq);
+	BUG_ON(!cfqq->queued[sync]);
+	cfqq->queued[sync]--;
 
-		BUG_ON(!cfqq->queued[sync]);
-		cfqq->queued[sync]--;
+	cfq_update_next_crq(crq);
 
-		cfq_update_next_crq(crq);
+	rb_erase(&crq->rb_node, &cfqq->sort_list);
+	RB_CLEAR_COLOR(&crq->rb_node);
 
-		rb_erase(&crq->rb_node, &cfqq->sort_list);
-		RB_CLEAR_COLOR(&crq->rb_node);
-
-		if (cfq_cfqq_on_rr(cfqq) && RB_EMPTY(&cfqq->sort_list))
-			cfq_del_cfqq_rr(cfqd, cfqq);
-	}
+	if (cfq_cfqq_on_rr(cfqq) && RB_EMPTY(&cfqq->sort_list))
+		cfq_del_cfqq_rr(cfqd, cfqq);
 }
 
 static struct cfq_rq *
@@ -627,12 +596,12 @@ static void cfq_add_crq_rb(struct cfq_rq *crq)
 	 * if that happens, put the alias on the dispatch list
 	 */
 	while ((__alias = __cfq_add_crq_rb(crq)) != NULL)
-		cfq_dispatch_sort(cfqd->queue, __alias);
+		cfq_dispatch_insert(cfqd->queue, __alias);
 
 	rb_insert_color(&crq->rb_node, &cfqq->sort_list);
 
 	if (!cfq_cfqq_on_rr(cfqq))
-		cfq_add_cfqq_rr(cfqd, cfqq, cfq_crq_requeued(crq));
+		cfq_add_cfqq_rr(cfqd, cfqq);
 
 	/*
 	 * check if this request is a better next-serve candidate
@@ -643,10 +612,8 @@ static void cfq_add_crq_rb(struct cfq_rq *crq)
 static inline void
 cfq_reposition_crq_rb(struct cfq_queue *cfqq, struct cfq_rq *crq)
 {
-	if (ON_RB(&crq->rb_node)) {
-		rb_erase(&crq->rb_node, &cfqq->sort_list);
-		cfqq->queued[cfq_crq_is_sync(crq)]--;
-	}
+	rb_erase(&crq->rb_node, &cfqq->sort_list);
+	cfqq->queued[cfq_crq_is_sync(crq)]--;
 
 	cfq_add_crq_rb(crq);
 }
@@ -676,49 +643,28 @@ out:
 	return NULL;
 }
 
-static void cfq_deactivate_request(request_queue_t *q, struct request *rq)
+static void cfq_activate_request(request_queue_t *q, struct request *rq)
 {
 	struct cfq_data *cfqd = q->elevator->elevator_data;
-	struct cfq_rq *crq = RQ_DATA(rq);
-
-	if (crq) {
-		struct cfq_queue *cfqq = crq->cfq_queue;
-
-		if (cfq_crq_in_driver(crq)) {
-			cfq_clear_crq_in_driver(crq);
-			WARN_ON(!cfqd->rq_in_driver);
-			cfqd->rq_in_driver--;
-		}
-		if (cfq_crq_in_flight(crq)) {
-			const int sync = cfq_crq_is_sync(crq);
 
-			cfq_clear_crq_in_flight(crq);
-			WARN_ON(!cfqq->on_dispatch[sync]);
-			cfqq->on_dispatch[sync]--;
-		}
-		cfq_mark_crq_requeued(crq);
-	}
+	cfqd->rq_in_driver++;
 }
 
-/*
- * make sure the service time gets corrected on reissue of this request
- */
-static void cfq_requeue_request(request_queue_t *q, struct request *rq)
+static void cfq_deactivate_request(request_queue_t *q, struct request *rq)
 {
-	cfq_deactivate_request(q, rq);
-	list_add(&rq->queuelist, &q->queue_head);
+	struct cfq_data *cfqd = q->elevator->elevator_data;
+
+	WARN_ON(!cfqd->rq_in_driver);
+	cfqd->rq_in_driver--;
 }
 
-static void cfq_remove_request(request_queue_t *q, struct request *rq)
+static void cfq_remove_request(struct request *rq)
 {
 	struct cfq_rq *crq = RQ_DATA(rq);
 
-	if (crq) {
-		list_del_init(&rq->queuelist);
-		cfq_del_crq_rb(crq);
-		cfq_remove_merge_hints(q, crq);
-
-	}
+	list_del_init(&rq->queuelist);
+	cfq_del_crq_rb(crq);
+	cfq_del_crq_hash(crq);
 }
 
 static int
@@ -728,12 +674,6 @@ cfq_merge(request_queue_t *q, struct request **req, struct bio *bio)
 	struct request *__rq;
 	int ret;
 
-	ret = elv_try_last_merge(q, bio);
-	if (ret != ELEVATOR_NO_MERGE) {
-		__rq = q->last_merge;
-		goto out_insert;
-	}
-
 	__rq = cfq_find_rq_hash(cfqd, bio->bi_sector);
 	if (__rq && elv_rq_merge_ok(__rq, bio)) {
 		ret = ELEVATOR_BACK_MERGE;
@@ -748,8 +688,6 @@ cfq_merge(request_queue_t *q, struct request **req, struct bio *bio)
 
 	return ELEVATOR_NO_MERGE;
 out:
-	q->last_merge = __rq;
-out_insert:
 	*req = __rq;
 	return ret;
 }
@@ -762,14 +700,12 @@ static void cfq_merged_request(request_queue_t *q, struct request *req)
 	cfq_del_crq_hash(crq);
 	cfq_add_crq_hash(cfqd, crq);
 
-	if (ON_RB(&crq->rb_node) && (rq_rb_key(req) != crq->rb_key)) {
+	if (rq_rb_key(req) != crq->rb_key) {
 		struct cfq_queue *cfqq = crq->cfq_queue;
 
 		cfq_update_next_crq(crq);
 		cfq_reposition_crq_rb(cfqq, crq);
 	}
-
-	q->last_merge = req;
 }
 
 static void
@@ -785,7 +721,7 @@ cfq_merged_requests(request_queue_t *q, struct request *rq,
 	    time_before(next->start_time, rq->start_time))
 		list_move(&rq->queuelist, &next->queuelist);
 
-	cfq_remove_request(q, next);
+	cfq_remove_request(next);
 }
 
 static inline void
@@ -992,53 +928,15 @@ static int cfq_arm_slice_timer(struct cfq_data *cfqd, struct cfq_queue *cfqq)
 	return 1;
 }
 
-/*
- * we dispatch cfqd->cfq_quantum requests in total from the rr_list queues,
- * this function sector sorts the selected request to minimize seeks. we start
- * at cfqd->last_sector, not 0.
- */
-static void cfq_dispatch_sort(request_queue_t *q, struct cfq_rq *crq)
+static void cfq_dispatch_insert(request_queue_t *q, struct cfq_rq *crq)
 {
 	struct cfq_data *cfqd = q->elevator->elevator_data;
 	struct cfq_queue *cfqq = crq->cfq_queue;
-	struct list_head *head = &q->queue_head, *entry = head;
-	struct request *__rq;
-	sector_t last;
-
-	list_del(&crq->request->queuelist);
-
-	last = cfqd->last_sector;
-	list_for_each_entry_reverse(__rq, head, queuelist) {
-		struct cfq_rq *__crq = RQ_DATA(__rq);
-
-		if (blk_barrier_rq(__rq))
-			break;
-		if (!blk_fs_request(__rq))
-			break;
-		if (cfq_crq_requeued(__crq))
-			break;
-
-		if (__rq->sector <= crq->request->sector)
-			break;
-		if (__rq->sector > last && crq->request->sector < last) {
-			last = crq->request->sector + crq->request->nr_sectors;
-			break;
-		}
-		entry = &__rq->queuelist;
-	}
-
-	cfqd->last_sector = last;
 
 	cfqq->next_crq = cfq_find_next_crq(cfqd, cfqq, crq);
-
-	cfq_del_crq_rb(crq);
-	cfq_remove_merge_hints(q, crq);
-
-	cfq_mark_crq_in_flight(crq);
-	cfq_clear_crq_requeued(crq);
-
+	cfq_remove_request(crq->request);
 	cfqq->on_dispatch[cfq_crq_is_sync(crq)]++;
-	list_add_tail(&crq->request->queuelist, entry);
+	elv_dispatch_sort(q, crq->request);
 }
 
 /*
@@ -1159,7 +1057,7 @@ __cfq_dispatch_requests(struct cfq_data *cfqd, struct cfq_queue *cfqq,
 		/*
 		 * finally, insert request into driver dispatch list
 		 */
-		cfq_dispatch_sort(cfqd->queue, crq);
+		cfq_dispatch_insert(cfqd->queue, crq);
 
 		cfqd->dispatch_slice++;
 		dispatched++;
@@ -1194,7 +1092,7 @@ __cfq_dispatch_requests(struct cfq_data *cfqd, struct cfq_queue *cfqq,
 }
 
 static int
-cfq_dispatch_requests(request_queue_t *q, int max_dispatch, int force)
+cfq_dispatch_requests(request_queue_t *q, int force)
 {
 	struct cfq_data *cfqd = q->elevator->elevator_data;
 	struct cfq_queue *cfqq;
@@ -1204,12 +1102,25 @@ cfq_dispatch_requests(request_queue_t *q, int max_dispatch, int force)
 
 	cfqq = cfq_select_queue(cfqd, force);
 	if (cfqq) {
+		int max_dispatch;
+
+		/*
+		 * if idle window is disabled, allow queue buildup
+		 */
+		if (!cfq_cfqq_idle_window(cfqq) &&
+		    cfqd->rq_in_driver >= cfqd->cfq_max_depth)
+			return 0;
+
 		cfq_clear_cfqq_must_dispatch(cfqq);
 		cfq_clear_cfqq_wait_request(cfqq);
 		del_timer(&cfqd->idle_slice_timer);
 
-		if (cfq_class_idle(cfqq))
-			max_dispatch = 1;
+		if (!force) {
+			max_dispatch = cfqd->cfq_quantum;
+			if (cfq_class_idle(cfqq))
+				max_dispatch = 1;
+		} else
+			max_dispatch = INT_MAX;
 
 		return __cfq_dispatch_requests(cfqd, cfqq, max_dispatch);
 	}
@@ -1217,93 +1128,6 @@ cfq_dispatch_requests(request_queue_t *q, int max_dispatch, int force)
 	return 0;
 }
 
-static inline void cfq_account_dispatch(struct cfq_rq *crq)
-{
-	struct cfq_queue *cfqq = crq->cfq_queue;
-	struct cfq_data *cfqd = cfqq->cfqd;
-
-	if (unlikely(!blk_fs_request(crq->request)))
-		return;
-
-	/*
-	 * accounted bit is necessary since some drivers will call
-	 * elv_next_request() many times for the same request (eg ide)
-	 */
-	if (cfq_crq_in_driver(crq))
-		return;
-
-	cfq_mark_crq_in_driver(crq);
-	cfqd->rq_in_driver++;
-}
-
-static inline void
-cfq_account_completion(struct cfq_queue *cfqq, struct cfq_rq *crq)
-{
-	struct cfq_data *cfqd = cfqq->cfqd;
-	unsigned long now;
-
-	if (!cfq_crq_in_driver(crq))
-		return;
-
-	now = jiffies;
-
-	WARN_ON(!cfqd->rq_in_driver);
-	cfqd->rq_in_driver--;
-
-	if (!cfq_class_idle(cfqq))
-		cfqd->last_end_request = now;
-
-	if (!cfq_cfqq_dispatched(cfqq)) {
-		if (cfq_cfqq_on_rr(cfqq)) {
-			cfqq->service_last = now;
-			cfq_resort_rr_list(cfqq, 0);
-		}
-		if (cfq_cfqq_expired(cfqq)) {
-			__cfq_slice_expired(cfqd, cfqq, 0);
-			cfq_schedule_dispatch(cfqd);
-		}
-	}
-
-	if (cfq_crq_is_sync(crq))
-		crq->io_context->last_end_request = now;
-}
-
-static struct request *cfq_next_request(request_queue_t *q)
-{
-	struct cfq_data *cfqd = q->elevator->elevator_data;
-	struct request *rq;
-
-	if (!list_empty(&q->queue_head)) {
-		struct cfq_rq *crq;
-dispatch:
-		rq = list_entry_rq(q->queue_head.next);
-
-		crq = RQ_DATA(rq);
-		if (crq) {
-			struct cfq_queue *cfqq = crq->cfq_queue;
-
-			/*
-			 * if idle window is disabled, allow queue buildup
-			 */
-			if (!cfq_crq_in_driver(crq) &&
-			    !cfq_cfqq_idle_window(cfqq) &&
-			    !blk_barrier_rq(rq) &&
-			    cfqd->rq_in_driver >= cfqd->cfq_max_depth)
-				return NULL;
-
-			cfq_remove_merge_hints(q, crq);
-			cfq_account_dispatch(crq);
-		}
-
-		return rq;
-	}
-
-	if (cfq_dispatch_requests(q, cfqd->cfq_quantum, 0))
-		goto dispatch;
-
-	return NULL;
-}
-
 /*
  * task holds one reference to the queue, dropped when task exits. each crq
  * in-flight on this queue also holds a reference, dropped when crq is freed.
@@ -1422,7 +1246,7 @@ static void cfq_exit_io_context(struct cfq_io_context *cic)
 }
 
 static struct cfq_io_context *
-cfq_alloc_io_context(struct cfq_data *cfqd, int gfp_mask)
+cfq_alloc_io_context(struct cfq_data *cfqd, gfp_t gfp_mask)
 {
 	struct cfq_io_context *cic = kmem_cache_alloc(cfq_ioc_pool, gfp_mask);
 
@@ -1517,7 +1341,7 @@ static int cfq_ioc_set_ioprio(struct io_context *ioc, unsigned int ioprio)
 
 static struct cfq_queue *
 cfq_get_queue(struct cfq_data *cfqd, unsigned int key, unsigned short ioprio,
-	      int gfp_mask)
+	      gfp_t gfp_mask)
 {
 	const int hashval = hash_long(key, CFQ_QHASH_SHIFT);
 	struct cfq_queue *cfqq, *new_cfqq = NULL;
@@ -1578,7 +1402,7 @@ out:
  * cfqq, so we don't need to worry about it disappearing
  */
 static struct cfq_io_context *
-cfq_get_io_context(struct cfq_data *cfqd, pid_t pid, int gfp_mask)
+cfq_get_io_context(struct cfq_data *cfqd, pid_t pid, gfp_t gfp_mask)
 {
 	struct io_context *ioc = NULL;
 	struct cfq_io_context *cic;
@@ -1816,8 +1640,9 @@ cfq_crq_enqueued(struct cfq_data *cfqd, struct cfq_queue *cfqq,
 	}
 }
 
-static void cfq_enqueue(struct cfq_data *cfqd, struct request *rq)
+static void cfq_insert_request(request_queue_t *q, struct request *rq)
 {
+	struct cfq_data *cfqd = q->elevator->elevator_data;
 	struct cfq_rq *crq = RQ_DATA(rq);
 	struct cfq_queue *cfqq = crq->cfq_queue;
 
@@ -1827,66 +1652,43 @@ static void cfq_enqueue(struct cfq_data *cfqd, struct request *rq)
 
 	list_add_tail(&rq->queuelist, &cfqq->fifo);
 
-	if (rq_mergeable(rq)) {
+	if (rq_mergeable(rq))
 		cfq_add_crq_hash(cfqd, crq);
 
-		if (!cfqd->queue->last_merge)
-			cfqd->queue->last_merge = rq;
-	}
-
 	cfq_crq_enqueued(cfqd, cfqq, crq);
 }
 
-static void
-cfq_insert_request(request_queue_t *q, struct request *rq, int where)
-{
-	struct cfq_data *cfqd = q->elevator->elevator_data;
-
-	switch (where) {
-		case ELEVATOR_INSERT_BACK:
-			while (cfq_dispatch_requests(q, INT_MAX, 1))
-				;
-			list_add_tail(&rq->queuelist, &q->queue_head);
-			/*
-			 * If we were idling with pending requests on
-			 * inactive cfqqs, force dispatching will
-			 * remove the idle timer and the queue won't
-			 * be kicked by __make_request() afterward.
-			 * Kick it here.
-			 */
-			cfq_schedule_dispatch(cfqd);
-			break;
-		case ELEVATOR_INSERT_FRONT:
-			list_add(&rq->queuelist, &q->queue_head);
-			break;
-		case ELEVATOR_INSERT_SORT:
-			BUG_ON(!blk_fs_request(rq));
-			cfq_enqueue(cfqd, rq);
-			break;
-		default:
-			printk("%s: bad insert point %d\n", __FUNCTION__,where);
-			return;
-	}
-}
-
 static void cfq_completed_request(request_queue_t *q, struct request *rq)
 {
 	struct cfq_rq *crq = RQ_DATA(rq);
-	struct cfq_queue *cfqq;
+	struct cfq_queue *cfqq = crq->cfq_queue;
+	struct cfq_data *cfqd = cfqq->cfqd;
+	const int sync = cfq_crq_is_sync(crq);
+	unsigned long now;
 
-	if (unlikely(!blk_fs_request(rq)))
-		return;
+	now = jiffies;
 
-	cfqq = crq->cfq_queue;
+	WARN_ON(!cfqd->rq_in_driver);
+	WARN_ON(!cfqq->on_dispatch[sync]);
+	cfqd->rq_in_driver--;
+	cfqq->on_dispatch[sync]--;
 
-	if (cfq_crq_in_flight(crq)) {
-		const int sync = cfq_crq_is_sync(crq);
+	if (!cfq_class_idle(cfqq))
+		cfqd->last_end_request = now;
 
-		WARN_ON(!cfqq->on_dispatch[sync]);
-		cfqq->on_dispatch[sync]--;
+	if (!cfq_cfqq_dispatched(cfqq)) {
+		if (cfq_cfqq_on_rr(cfqq)) {
+			cfqq->service_last = now;
+			cfq_resort_rr_list(cfqq, 0);
+		}
+		if (cfq_cfqq_expired(cfqq)) {
+			__cfq_slice_expired(cfqd, cfqq, 0);
+			cfq_schedule_dispatch(cfqd);
+		}
 	}
 
-	cfq_account_completion(cfqq, crq);
+	if (cfq_crq_is_sync(crq))
+		crq->io_context->last_end_request = now;
 }
 
 static struct request *
@@ -2075,7 +1877,7 @@ static void cfq_put_request(request_queue_t *q, struct request *rq)
  */
 static int
 cfq_set_request(request_queue_t *q, struct request *rq, struct bio *bio,
-		int gfp_mask)
+		gfp_t gfp_mask)
 {
 	struct cfq_data *cfqd = q->elevator->elevator_data;
 	struct task_struct *tsk = current;
@@ -2118,9 +1920,6 @@ cfq_set_request(request_queue_t *q, struct request *rq, struct bio *bio,
 		INIT_HLIST_NODE(&crq->hash);
 		crq->cfq_queue = cfqq;
 		crq->io_context = cic;
-		cfq_clear_crq_in_flight(crq);
-		cfq_clear_crq_in_driver(crq);
-		cfq_clear_crq_requeued(crq);
 
 		if (rw == READ || process_sync(tsk))
 			cfq_mark_crq_is_sync(crq);
@@ -2201,7 +2000,7 @@ static void cfq_idle_slice_timer(unsigned long data)
 		 * only expire and reinvoke request handler, if there are
 		 * other queues with pending requests
 		 */
-		if (!cfq_pending_requests(cfqd)) {
+		if (!cfqd->busy_queues) {
 			cfqd->idle_slice_timer.expires = min(now + cfqd->cfq_slice_idle, cfqq->slice_end);
 			add_timer(&cfqd->idle_slice_timer);
 			goto out_cont;
@@ -2576,10 +2375,9 @@ static struct elevator_type iosched_cfq = {
 		.elevator_merge_fn = 		cfq_merge,
 		.elevator_merged_fn =		cfq_merged_request,
 		.elevator_merge_req_fn =	cfq_merged_requests,
-		.elevator_next_req_fn =		cfq_next_request,
+		.elevator_dispatch_fn =		cfq_dispatch_requests,
 		.elevator_add_req_fn =		cfq_insert_request,
-		.elevator_remove_req_fn =	cfq_remove_request,
-		.elevator_requeue_req_fn =	cfq_requeue_request,
+		.elevator_activate_req_fn =	cfq_activate_request,
 		.elevator_deactivate_req_fn =	cfq_deactivate_request,
 		.elevator_queue_empty_fn =	cfq_queue_empty,
 		.elevator_completed_req_fn =	cfq_completed_request,

+ 19 - 106
drivers/block/deadline-iosched.c

@@ -50,7 +50,6 @@ struct deadline_data {
 	 * next in sort order. read, write or both are NULL
 	 */
 	struct deadline_rq *next_drq[2];
-	struct list_head *dispatch;	/* driver dispatch queue */
 	struct list_head *hash;		/* request hash */
 	unsigned int batching;		/* number of sequential requests made */
 	sector_t last_sector;		/* head position */
@@ -113,15 +112,6 @@ static inline void deadline_del_drq_hash(struct deadline_rq *drq)
 		__deadline_del_drq_hash(drq);
 }
 
-static void
-deadline_remove_merge_hints(request_queue_t *q, struct deadline_rq *drq)
-{
-	deadline_del_drq_hash(drq);
-
-	if (q->last_merge == drq->request)
-		q->last_merge = NULL;
-}
-
 static inline void
 deadline_add_drq_hash(struct deadline_data *dd, struct deadline_rq *drq)
 {
@@ -239,10 +229,9 @@ deadline_del_drq_rb(struct deadline_data *dd, struct deadline_rq *drq)
 			dd->next_drq[data_dir] = rb_entry_drq(rbnext);
 	}
 
-	if (ON_RB(&drq->rb_node)) {
-		rb_erase(&drq->rb_node, DRQ_RB_ROOT(dd, drq));
-		RB_CLEAR(&drq->rb_node);
-	}
+	BUG_ON(!ON_RB(&drq->rb_node));
+	rb_erase(&drq->rb_node, DRQ_RB_ROOT(dd, drq));
+	RB_CLEAR(&drq->rb_node);
 }
 
 static struct request *
@@ -286,7 +275,7 @@ deadline_find_first_drq(struct deadline_data *dd, int data_dir)
 /*
  * add drq to rbtree and fifo
  */
-static inline void
+static void
 deadline_add_request(struct request_queue *q, struct request *rq)
 {
 	struct deadline_data *dd = q->elevator->elevator_data;
@@ -301,12 +290,8 @@ deadline_add_request(struct request_queue *q, struct request *rq)
 	drq->expires = jiffies + dd->fifo_expire[data_dir];
 	list_add_tail(&drq->fifo, &dd->fifo_list[data_dir]);
 
-	if (rq_mergeable(rq)) {
+	if (rq_mergeable(rq))
 		deadline_add_drq_hash(dd, drq);
-
-		if (!q->last_merge)
-			q->last_merge = rq;
-	}
 }
 
 /*
@@ -315,14 +300,11 @@ deadline_add_request(struct request_queue *q, struct request *rq)
 static void deadline_remove_request(request_queue_t *q, struct request *rq)
 {
 	struct deadline_rq *drq = RQ_DATA(rq);
+	struct deadline_data *dd = q->elevator->elevator_data;
 
-	if (drq) {
-		struct deadline_data *dd = q->elevator->elevator_data;
-
-		list_del_init(&drq->fifo);
-		deadline_remove_merge_hints(q, drq);
-		deadline_del_drq_rb(dd, drq);
-	}
+	list_del_init(&drq->fifo);
+	deadline_del_drq_rb(dd, drq);
+	deadline_del_drq_hash(drq);
 }
 
 static int
@@ -332,15 +314,6 @@ deadline_merge(request_queue_t *q, struct request **req, struct bio *bio)
 	struct request *__rq;
 	int ret;
 
-	/*
-	 * try last_merge to avoid going to hash
-	 */
-	ret = elv_try_last_merge(q, bio);
-	if (ret != ELEVATOR_NO_MERGE) {
-		__rq = q->last_merge;
-		goto out_insert;
-	}
-
 	/*
 	 * see if the merge hash can satisfy a back merge
 	 */
@@ -373,8 +346,6 @@ deadline_merge(request_queue_t *q, struct request **req, struct bio *bio)
 
 	return ELEVATOR_NO_MERGE;
 out:
-	q->last_merge = __rq;
-out_insert:
 	if (ret)
 		deadline_hot_drq_hash(dd, RQ_DATA(__rq));
 	*req = __rq;
@@ -399,8 +370,6 @@ static void deadline_merged_request(request_queue_t *q, struct request *req)
 		deadline_del_drq_rb(dd, drq);
 		deadline_add_drq_rb(dd, drq);
 	}
-
-	q->last_merge = req;
 }
 
 static void
@@ -452,7 +421,7 @@ deadline_move_to_dispatch(struct deadline_data *dd, struct deadline_rq *drq)
 	request_queue_t *q = drq->request->q;
 
 	deadline_remove_request(q, drq->request);
-	list_add_tail(&drq->request->queuelist, dd->dispatch);
+	elv_dispatch_add_tail(q, drq->request);
 }
 
 /*
@@ -502,8 +471,9 @@ static inline int deadline_check_fifo(struct deadline_data *dd, int ddir)
  * deadline_dispatch_requests selects the best request according to
  * read/write expire, fifo_batch, etc
  */
-static int deadline_dispatch_requests(struct deadline_data *dd)
+static int deadline_dispatch_requests(request_queue_t *q, int force)
 {
+	struct deadline_data *dd = q->elevator->elevator_data;
 	const int reads = !list_empty(&dd->fifo_list[READ]);
 	const int writes = !list_empty(&dd->fifo_list[WRITE]);
 	struct deadline_rq *drq;
@@ -597,65 +567,12 @@ dispatch_request:
 	return 1;
 }
 
-static struct request *deadline_next_request(request_queue_t *q)
-{
-	struct deadline_data *dd = q->elevator->elevator_data;
-	struct request *rq;
-
-	/*
-	 * if there are still requests on the dispatch queue, grab the first one
-	 */
-	if (!list_empty(dd->dispatch)) {
-dispatch:
-		rq = list_entry_rq(dd->dispatch->next);
-		return rq;
-	}
-
-	if (deadline_dispatch_requests(dd))
-		goto dispatch;
-
-	return NULL;
-}
-
-static void
-deadline_insert_request(request_queue_t *q, struct request *rq, int where)
-{
-	struct deadline_data *dd = q->elevator->elevator_data;
-
-	/* barriers must flush the reorder queue */
-	if (unlikely(rq->flags & (REQ_SOFTBARRIER | REQ_HARDBARRIER)
-			&& where == ELEVATOR_INSERT_SORT))
-		where = ELEVATOR_INSERT_BACK;
-
-	switch (where) {
-		case ELEVATOR_INSERT_BACK:
-			while (deadline_dispatch_requests(dd))
-				;
-			list_add_tail(&rq->queuelist, dd->dispatch);
-			break;
-		case ELEVATOR_INSERT_FRONT:
-			list_add(&rq->queuelist, dd->dispatch);
-			break;
-		case ELEVATOR_INSERT_SORT:
-			BUG_ON(!blk_fs_request(rq));
-			deadline_add_request(q, rq);
-			break;
-		default:
-			printk("%s: bad insert point %d\n", __FUNCTION__,where);
-			return;
-	}
-}
-
 static int deadline_queue_empty(request_queue_t *q)
 {
 	struct deadline_data *dd = q->elevator->elevator_data;
 
-	if (!list_empty(&dd->fifo_list[WRITE])
-	    || !list_empty(&dd->fifo_list[READ])
-	    || !list_empty(dd->dispatch))
-		return 0;
-
-	return 1;
+	return list_empty(&dd->fifo_list[WRITE])
+		&& list_empty(&dd->fifo_list[READ]);
 }
 
 static struct request *
@@ -733,7 +650,6 @@ static int deadline_init_queue(request_queue_t *q, elevator_t *e)
 	INIT_LIST_HEAD(&dd->fifo_list[WRITE]);
 	dd->sort_list[READ] = RB_ROOT;
 	dd->sort_list[WRITE] = RB_ROOT;
-	dd->dispatch = &q->queue_head;
 	dd->fifo_expire[READ] = read_expire;
 	dd->fifo_expire[WRITE] = write_expire;
 	dd->writes_starved = writes_starved;
@@ -748,15 +664,13 @@ static void deadline_put_request(request_queue_t *q, struct request *rq)
 	struct deadline_data *dd = q->elevator->elevator_data;
 	struct deadline_rq *drq = RQ_DATA(rq);
 
-	if (drq) {
-		mempool_free(drq, dd->drq_pool);
-		rq->elevator_private = NULL;
-	}
+	mempool_free(drq, dd->drq_pool);
+	rq->elevator_private = NULL;
 }
 
 static int
 deadline_set_request(request_queue_t *q, struct request *rq, struct bio *bio,
-		     int gfp_mask)
+		     gfp_t gfp_mask)
 {
 	struct deadline_data *dd = q->elevator->elevator_data;
 	struct deadline_rq *drq;
@@ -917,9 +831,8 @@ static struct elevator_type iosched_deadline = {
 		.elevator_merge_fn = 		deadline_merge,
 		.elevator_merged_fn =		deadline_merged_request,
 		.elevator_merge_req_fn =	deadline_merged_requests,
-		.elevator_next_req_fn =		deadline_next_request,
-		.elevator_add_req_fn =		deadline_insert_request,
-		.elevator_remove_req_fn =	deadline_remove_request,
+		.elevator_dispatch_fn =		deadline_dispatch_requests,
+		.elevator_add_req_fn =		deadline_add_request,
 		.elevator_queue_empty_fn =	deadline_queue_empty,
 		.elevator_former_req_fn =	deadline_former_request,
 		.elevator_latter_req_fn =	deadline_latter_request,

+ 207 - 138
drivers/block/elevator.c

@@ -34,6 +34,7 @@
 #include <linux/slab.h>
 #include <linux/init.h>
 #include <linux/compiler.h>
+#include <linux/delay.h>
 
 #include <asm/uaccess.h>
 
@@ -83,21 +84,11 @@ inline int elv_try_merge(struct request *__rq, struct bio *bio)
 }
 EXPORT_SYMBOL(elv_try_merge);
 
-inline int elv_try_last_merge(request_queue_t *q, struct bio *bio)
-{
-	if (q->last_merge)
-		return elv_try_merge(q->last_merge, bio);
-
-	return ELEVATOR_NO_MERGE;
-}
-EXPORT_SYMBOL(elv_try_last_merge);
-
 static struct elevator_type *elevator_find(const char *name)
 {
 	struct elevator_type *e = NULL;
 	struct list_head *entry;
 
-	spin_lock_irq(&elv_list_lock);
 	list_for_each(entry, &elv_list) {
 		struct elevator_type *__e;
 
@@ -108,7 +99,6 @@ static struct elevator_type *elevator_find(const char *name)
 			break;
 		}
 	}
-	spin_unlock_irq(&elv_list_lock);
 
 	return e;
 }
@@ -120,12 +110,15 @@ static void elevator_put(struct elevator_type *e)
 
 static struct elevator_type *elevator_get(const char *name)
 {
-	struct elevator_type *e = elevator_find(name);
+	struct elevator_type *e;
 
-	if (!e)
-		return NULL;
-	if (!try_module_get(e->elevator_owner))
-		return NULL;
+	spin_lock_irq(&elv_list_lock);
+
+	e = elevator_find(name);
+	if (e && !try_module_get(e->elevator_owner))
+		e = NULL;
+
+	spin_unlock_irq(&elv_list_lock);
 
 	return e;
 }
@@ -139,8 +132,6 @@ static int elevator_attach(request_queue_t *q, struct elevator_type *e,
 	eq->ops = &e->ops;
 	eq->elevator_type = e;
 
-	INIT_LIST_HEAD(&q->queue_head);
-	q->last_merge = NULL;
 	q->elevator = eq;
 
 	if (eq->ops->elevator_init_fn)
@@ -153,11 +144,15 @@ static char chosen_elevator[16];
 
 static void elevator_setup_default(void)
 {
+	struct elevator_type *e;
+
 	/*
 	 * check if default is set and exists
 	 */
-	if (chosen_elevator[0] && elevator_find(chosen_elevator))
+	if (chosen_elevator[0] && (e = elevator_get(chosen_elevator))) {
+		elevator_put(e);
 		return;
+	}
 
 #if defined(CONFIG_IOSCHED_AS)
 	strcpy(chosen_elevator, "anticipatory");
@@ -186,6 +181,11 @@ int elevator_init(request_queue_t *q, char *name)
 	struct elevator_queue *eq;
 	int ret = 0;
 
+	INIT_LIST_HEAD(&q->queue_head);
+	q->last_merge = NULL;
+	q->end_sector = 0;
+	q->boundary_rq = NULL;
+
 	elevator_setup_default();
 
 	if (!name)
@@ -220,9 +220,52 @@ void elevator_exit(elevator_t *e)
 	kfree(e);
 }
 
+/*
+ * Insert rq into dispatch queue of q.  Queue lock must be held on
+ * entry.  If sort != 0, rq is sort-inserted; otherwise, rq will be
+ * appended to the dispatch queue.  To be used by specific elevators.
+ */
+void elv_dispatch_sort(request_queue_t *q, struct request *rq)
+{
+	sector_t boundary;
+	struct list_head *entry;
+
+	if (q->last_merge == rq)
+		q->last_merge = NULL;
+
+	boundary = q->end_sector;
+
+	list_for_each_prev(entry, &q->queue_head) {
+		struct request *pos = list_entry_rq(entry);
+
+		if (pos->flags & (REQ_SOFTBARRIER|REQ_HARDBARRIER|REQ_STARTED))
+			break;
+		if (rq->sector >= boundary) {
+			if (pos->sector < boundary)
+				continue;
+		} else {
+			if (pos->sector >= boundary)
+				break;
+		}
+		if (rq->sector >= pos->sector)
+			break;
+	}
+
+	list_add(&rq->queuelist, entry);
+}
+
 int elv_merge(request_queue_t *q, struct request **req, struct bio *bio)
 {
 	elevator_t *e = q->elevator;
+	int ret;
+
+	if (q->last_merge) {
+		ret = elv_try_merge(q->last_merge, bio);
+		if (ret != ELEVATOR_NO_MERGE) {
+			*req = q->last_merge;
+			return ret;
+		}
+	}
 
 	if (e->ops->elevator_merge_fn)
 		return e->ops->elevator_merge_fn(q, req, bio);
@@ -236,6 +279,8 @@ void elv_merged_request(request_queue_t *q, struct request *rq)
 
 	if (e->ops->elevator_merged_fn)
 		e->ops->elevator_merged_fn(q, rq);
+
+	q->last_merge = rq;
 }
 
 void elv_merge_requests(request_queue_t *q, struct request *rq,
@@ -243,20 +288,13 @@ void elv_merge_requests(request_queue_t *q, struct request *rq,
 {
 	elevator_t *e = q->elevator;
 
-	if (q->last_merge == next)
-		q->last_merge = NULL;
-
 	if (e->ops->elevator_merge_req_fn)
 		e->ops->elevator_merge_req_fn(q, rq, next);
+
+	q->last_merge = rq;
 }
 
-/*
- * For careful internal use by the block layer. Essentially the same as
- * a requeue in that it tells the io scheduler that this request is not
- * active in the driver or hardware anymore, but we don't want the request
- * added back to the scheduler. Function is not exported.
- */
-void elv_deactivate_request(request_queue_t *q, struct request *rq)
+void elv_requeue_request(request_queue_t *q, struct request *rq)
 {
 	elevator_t *e = q->elevator;
 
@@ -264,19 +302,14 @@ void elv_deactivate_request(request_queue_t *q, struct request *rq)
 	 * it already went through dequeue, we need to decrement the
 	 * in_flight count again
 	 */
-	if (blk_account_rq(rq))
+	if (blk_account_rq(rq)) {
 		q->in_flight--;
+		if (blk_sorted_rq(rq) && e->ops->elevator_deactivate_req_fn)
+			e->ops->elevator_deactivate_req_fn(q, rq);
+	}
 
 	rq->flags &= ~REQ_STARTED;
 
-	if (e->ops->elevator_deactivate_req_fn)
-		e->ops->elevator_deactivate_req_fn(q, rq);
-}
-
-void elv_requeue_request(request_queue_t *q, struct request *rq)
-{
-	elv_deactivate_request(q, rq);
-
 	/*
 	 * if this is the flush, requeue the original instead and drop the flush
 	 */
@@ -285,31 +318,27 @@ void elv_requeue_request(request_queue_t *q, struct request *rq)
 		rq = rq->end_io_data;
 	}
 
-	/*
-	 * the request is prepped and may have some resources allocated.
-	 * allowing unprepped requests to pass this one may cause resource
-	 * deadlock.  turn on softbarrier.
-	 */
-	rq->flags |= REQ_SOFTBARRIER;
-
-	/*
-	 * if iosched has an explicit requeue hook, then use that. otherwise
-	 * just put the request at the front of the queue
-	 */
-	if (q->elevator->ops->elevator_requeue_req_fn)
-		q->elevator->ops->elevator_requeue_req_fn(q, rq);
-	else
-		__elv_add_request(q, rq, ELEVATOR_INSERT_FRONT, 0);
+	__elv_add_request(q, rq, ELEVATOR_INSERT_FRONT, 0);
 }
 
 void __elv_add_request(request_queue_t *q, struct request *rq, int where,
 		       int plug)
 {
-	/*
-	 * barriers implicitly indicate back insertion
-	 */
-	if (rq->flags & (REQ_SOFTBARRIER | REQ_HARDBARRIER) &&
-	    where == ELEVATOR_INSERT_SORT)
+	if (rq->flags & (REQ_SOFTBARRIER | REQ_HARDBARRIER)) {
+		/*
+		 * barriers implicitly indicate back insertion
+		 */
+		if (where == ELEVATOR_INSERT_SORT)
+			where = ELEVATOR_INSERT_BACK;
+
+		/*
+		 * this request is scheduling boundary, update end_sector
+		 */
+		if (blk_fs_request(rq)) {
+			q->end_sector = rq_end_sector(rq);
+			q->boundary_rq = rq;
+		}
+	} else if (!(rq->flags & REQ_ELVPRIV) && where == ELEVATOR_INSERT_SORT)
 		where = ELEVATOR_INSERT_BACK;
 
 	if (plug)
@@ -317,23 +346,54 @@ void __elv_add_request(request_queue_t *q, struct request *rq, int where,
 
 	rq->q = q;
 
-	if (!test_bit(QUEUE_FLAG_DRAIN, &q->queue_flags)) {
-		q->elevator->ops->elevator_add_req_fn(q, rq, where);
+	switch (where) {
+	case ELEVATOR_INSERT_FRONT:
+		rq->flags |= REQ_SOFTBARRIER;
 
-		if (blk_queue_plugged(q)) {
-			int nrq = q->rq.count[READ] + q->rq.count[WRITE]
-				  - q->in_flight;
+		list_add(&rq->queuelist, &q->queue_head);
+		break;
 
-			if (nrq >= q->unplug_thresh)
-				__generic_unplug_device(q);
-		}
-	} else
+	case ELEVATOR_INSERT_BACK:
+		rq->flags |= REQ_SOFTBARRIER;
+
+		while (q->elevator->ops->elevator_dispatch_fn(q, 1))
+			;
+		list_add_tail(&rq->queuelist, &q->queue_head);
 		/*
-		 * if drain is set, store the request "locally". when the drain
-		 * is finished, the requests will be handed ordered to the io
-		 * scheduler
+		 * We kick the queue here for the following reasons.
+		 * - The elevator might have returned NULL previously
+		 *   to delay requests and returned them now.  As the
+		 *   queue wasn't empty before this request, ll_rw_blk
+		 *   won't run the queue on return, resulting in hang.
+		 * - Usually, back inserted requests won't be merged
+		 *   with anything.  There's no point in delaying queue
+		 *   processing.
 		 */
-		list_add_tail(&rq->queuelist, &q->drain_list);
+		blk_remove_plug(q);
+		q->request_fn(q);
+		break;
+
+	case ELEVATOR_INSERT_SORT:
+		BUG_ON(!blk_fs_request(rq));
+		rq->flags |= REQ_SORTED;
+		q->elevator->ops->elevator_add_req_fn(q, rq);
+		if (q->last_merge == NULL && rq_mergeable(rq))
+			q->last_merge = rq;
+		break;
+
+	default:
+		printk(KERN_ERR "%s: bad insertion point %d\n",
+		       __FUNCTION__, where);
+		BUG();
+	}
+
+	if (blk_queue_plugged(q)) {
+		int nrq = q->rq.count[READ] + q->rq.count[WRITE]
+			- q->in_flight;
+
+		if (nrq >= q->unplug_thresh)
+			__generic_unplug_device(q);
+	}
 }
 
 void elv_add_request(request_queue_t *q, struct request *rq, int where,
@@ -348,13 +408,19 @@ void elv_add_request(request_queue_t *q, struct request *rq, int where,
 
 static inline struct request *__elv_next_request(request_queue_t *q)
 {
-	struct request *rq = q->elevator->ops->elevator_next_req_fn(q);
+	struct request *rq;
+
+	if (unlikely(list_empty(&q->queue_head) &&
+		     !q->elevator->ops->elevator_dispatch_fn(q, 0)))
+		return NULL;
+
+	rq = list_entry_rq(q->queue_head.next);
 
 	/*
 	 * if this is a barrier write and the device has to issue a
 	 * flush sequence to support it, check how far we are
 	 */
-	if (rq && blk_fs_request(rq) && blk_barrier_rq(rq)) {
+	if (blk_fs_request(rq) && blk_barrier_rq(rq)) {
 		BUG_ON(q->ordered == QUEUE_ORDERED_NONE);
 
 		if (q->ordered == QUEUE_ORDERED_FLUSH &&
@@ -371,15 +437,30 @@ struct request *elv_next_request(request_queue_t *q)
 	int ret;
 
 	while ((rq = __elv_next_request(q)) != NULL) {
-		/*
-		 * just mark as started even if we don't start it, a request
-		 * that has been delayed should not be passed by new incoming
-		 * requests
-		 */
-		rq->flags |= REQ_STARTED;
+		if (!(rq->flags & REQ_STARTED)) {
+			elevator_t *e = q->elevator;
 
-		if (rq == q->last_merge)
-			q->last_merge = NULL;
+			/*
+			 * This is the first time the device driver
+			 * sees this request (possibly after
+			 * requeueing).  Notify IO scheduler.
+			 */
+			if (blk_sorted_rq(rq) &&
+			    e->ops->elevator_activate_req_fn)
+				e->ops->elevator_activate_req_fn(q, rq);
+
+			/*
+			 * just mark as started even if we don't start
+			 * it, a request that has been delayed should
+			 * not be passed by new incoming requests
+			 */
+			rq->flags |= REQ_STARTED;
+		}
+
+		if (!q->boundary_rq || q->boundary_rq == rq) {
+			q->end_sector = rq_end_sector(rq);
+			q->boundary_rq = NULL;
+		}
 
 		if ((rq->flags & REQ_DONTPREP) || !q->prep_rq_fn)
 			break;
@@ -391,9 +472,9 @@ struct request *elv_next_request(request_queue_t *q)
 			/*
 			 * the request may have been (partially) prepped.
 			 * we need to keep this request in the front to
-			 * avoid resource deadlock.  turn on softbarrier.
+			 * avoid resource deadlock.  REQ_STARTED will
+			 * prevent other fs requests from passing this one.
 			 */
-			rq->flags |= REQ_SOFTBARRIER;
 			rq = NULL;
 			break;
 		} else if (ret == BLKPREP_KILL) {
@@ -416,42 +497,32 @@ struct request *elv_next_request(request_queue_t *q)
 	return rq;
 }
 
-void elv_remove_request(request_queue_t *q, struct request *rq)
+void elv_dequeue_request(request_queue_t *q, struct request *rq)
 {
-	elevator_t *e = q->elevator;
+	BUG_ON(list_empty(&rq->queuelist));
+
+	list_del_init(&rq->queuelist);
 
 	/*
 	 * the time frame between a request being removed from the lists
 	 * and to it is freed is accounted as io that is in progress at
-	 * the driver side. note that we only account requests that the
-	 * driver has seen (REQ_STARTED set), to avoid false accounting
-	 * for request-request merges
+	 * the driver side.
 	 */
 	if (blk_account_rq(rq))
 		q->in_flight++;
-
-	/*
-	 * the main clearing point for q->last_merge is on retrieval of
-	 * request by driver (it calls elv_next_request()), but it _can_
-	 * also happen here if a request is added to the queue but later
-	 * deleted without ever being given to driver (merged with another
-	 * request).
-	 */
-	if (rq == q->last_merge)
-		q->last_merge = NULL;
-
-	if (e->ops->elevator_remove_req_fn)
-		e->ops->elevator_remove_req_fn(q, rq);
 }
 
 int elv_queue_empty(request_queue_t *q)
 {
 	elevator_t *e = q->elevator;
 
+	if (!list_empty(&q->queue_head))
+		return 0;
+
 	if (e->ops->elevator_queue_empty_fn)
 		return e->ops->elevator_queue_empty_fn(q);
 
-	return list_empty(&q->queue_head);
+	return 1;
 }
 
 struct request *elv_latter_request(request_queue_t *q, struct request *rq)
@@ -487,7 +558,7 @@ struct request *elv_former_request(request_queue_t *q, struct request *rq)
 }
 
 int elv_set_request(request_queue_t *q, struct request *rq, struct bio *bio,
-		    int gfp_mask)
+		    gfp_t gfp_mask)
 {
 	elevator_t *e = q->elevator;
 
@@ -523,11 +594,11 @@ void elv_completed_request(request_queue_t *q, struct request *rq)
 	/*
 	 * request is released from the driver, io must be done
 	 */
-	if (blk_account_rq(rq))
+	if (blk_account_rq(rq)) {
 		q->in_flight--;
-
-	if (e->ops->elevator_completed_req_fn)
-		e->ops->elevator_completed_req_fn(q, rq);
+		if (blk_sorted_rq(rq) && e->ops->elevator_completed_req_fn)
+			e->ops->elevator_completed_req_fn(q, rq);
+	}
 }
 
 int elv_register_queue(struct request_queue *q)
@@ -555,10 +626,9 @@ void elv_unregister_queue(struct request_queue *q)
 
 int elv_register(struct elevator_type *e)
 {
+	spin_lock_irq(&elv_list_lock);
 	if (elevator_find(e->elevator_name))
 		BUG();
-
-	spin_lock_irq(&elv_list_lock);
 	list_add_tail(&e->list, &elv_list);
 	spin_unlock_irq(&elv_list_lock);
 
@@ -582,25 +652,36 @@ EXPORT_SYMBOL_GPL(elv_unregister);
  * switch to new_e io scheduler. be careful not to introduce deadlocks -
  * we don't free the old io scheduler, before we have allocated what we
  * need for the new one. this way we have a chance of going back to the old
- * one, if the new one fails init for some reason. we also do an intermediate
- * switch to noop to ensure safety with stack-allocated requests, since they
- * don't originate from the block layer allocator. noop is safe here, because
- * it never needs to touch the elevator itself for completion events. DRAIN
- * flags will make sure we don't touch it for additions either.
+ * one, if the new one fails init for some reason.
  */
 static void elevator_switch(request_queue_t *q, struct elevator_type *new_e)
 {
-	elevator_t *e = kmalloc(sizeof(elevator_t), GFP_KERNEL);
-	struct elevator_type *noop_elevator = NULL;
-	elevator_t *old_elevator;
+	elevator_t *old_elevator, *e;
 
+	/*
+	 * Allocate new elevator
+	 */
+	e = kmalloc(sizeof(elevator_t), GFP_KERNEL);
 	if (!e)
 		goto error;
 
 	/*
-	 * first step, drain requests from the block freelist
+	 * Turn on BYPASS and drain all requests w/ elevator private data
 	 */
-	blk_wait_queue_drained(q, 0);
+	spin_lock_irq(q->queue_lock);
+
+	set_bit(QUEUE_FLAG_ELVSWITCH, &q->queue_flags);
+
+	while (q->elevator->ops->elevator_dispatch_fn(q, 1))
+		;
+
+	while (q->rq.elvpriv) {
+		spin_unlock_irq(q->queue_lock);
+		msleep(10);
+		spin_lock_irq(q->queue_lock);
+	}
+
+	spin_unlock_irq(q->queue_lock);
 
 	/*
 	 * unregister old elevator data
@@ -608,18 +689,6 @@ static void elevator_switch(request_queue_t *q, struct elevator_type *new_e)
 	elv_unregister_queue(q);
 	old_elevator = q->elevator;
 
-	/*
- 	 * next step, switch to noop since it uses no private rq structures
-	 * and doesn't allocate any memory for anything. then wait for any
-	 * non-fs requests in-flight
- 	 */
-	noop_elevator = elevator_get("noop");
-	spin_lock_irq(q->queue_lock);
-	elevator_attach(q, noop_elevator, e);
-	spin_unlock_irq(q->queue_lock);
-
-	blk_wait_queue_drained(q, 1);
-
 	/*
 	 * attach and start new elevator
 	 */
@@ -630,11 +699,10 @@ static void elevator_switch(request_queue_t *q, struct elevator_type *new_e)
 		goto fail_register;
 
 	/*
-	 * finally exit old elevator and start queue again
+	 * finally exit old elevator and turn off BYPASS.
 	 */
 	elevator_exit(old_elevator);
-	blk_finish_queue_drain(q);
-	elevator_put(noop_elevator);
+	clear_bit(QUEUE_FLAG_ELVSWITCH, &q->queue_flags);
 	return;
 
 fail_register:
@@ -643,13 +711,13 @@ fail_register:
 	 * one again (along with re-adding the sysfs dir)
 	 */
 	elevator_exit(e);
+	e = NULL;
 fail:
 	q->elevator = old_elevator;
 	elv_register_queue(q);
-	blk_finish_queue_drain(q);
+	clear_bit(QUEUE_FLAG_ELVSWITCH, &q->queue_flags);
+	kfree(e);
 error:
-	if (noop_elevator)
-		elevator_put(noop_elevator);
 	elevator_put(new_e);
 	printk(KERN_ERR "elevator: switch to %s failed\n",new_e->elevator_name);
 }
@@ -701,11 +769,12 @@ ssize_t elv_iosched_show(request_queue_t *q, char *name)
 	return len;
 }
 
+EXPORT_SYMBOL(elv_dispatch_sort);
 EXPORT_SYMBOL(elv_add_request);
 EXPORT_SYMBOL(__elv_add_request);
 EXPORT_SYMBOL(elv_requeue_request);
 EXPORT_SYMBOL(elv_next_request);
-EXPORT_SYMBOL(elv_remove_request);
+EXPORT_SYMBOL(elv_dequeue_request);
 EXPORT_SYMBOL(elv_queue_empty);
 EXPORT_SYMBOL(elv_completed_request);
 EXPORT_SYMBOL(elevator_exit);

+ 55 - 138
drivers/block/ll_rw_blk.c

@@ -263,8 +263,6 @@ void blk_queue_make_request(request_queue_t * q, make_request_fn * mfn)
 	blk_queue_bounce_limit(q, BLK_BOUNCE_HIGH);
 
 	blk_queue_activity_fn(q, NULL, NULL);
-
-	INIT_LIST_HEAD(&q->drain_list);
 }
 
 EXPORT_SYMBOL(blk_queue_make_request);
@@ -353,6 +351,8 @@ static void blk_pre_flush_end_io(struct request *flush_rq)
 	struct request *rq = flush_rq->end_io_data;
 	request_queue_t *q = rq->q;
 
+	elv_completed_request(q, flush_rq);
+
 	rq->flags |= REQ_BAR_PREFLUSH;
 
 	if (!flush_rq->errors)
@@ -369,6 +369,8 @@ static void blk_post_flush_end_io(struct request *flush_rq)
 	struct request *rq = flush_rq->end_io_data;
 	request_queue_t *q = rq->q;
 
+	elv_completed_request(q, flush_rq);
+
 	rq->flags |= REQ_BAR_POSTFLUSH;
 
 	q->end_flush_fn(q, flush_rq);
@@ -408,8 +410,6 @@ struct request *blk_start_pre_flush(request_queue_t *q, struct request *rq)
 	if (!list_empty(&rq->queuelist))
 		blkdev_dequeue_request(rq);
 
-	elv_deactivate_request(q, rq);
-
 	flush_rq->end_io_data = rq;
 	flush_rq->end_io = blk_pre_flush_end_io;
 
@@ -1040,6 +1040,7 @@ EXPORT_SYMBOL(blk_queue_invalidate_tags);
 static char *rq_flags[] = {
 	"REQ_RW",
 	"REQ_FAILFAST",
+	"REQ_SORTED",
 	"REQ_SOFTBARRIER",
 	"REQ_HARDBARRIER",
 	"REQ_CMD",
@@ -1047,6 +1048,7 @@ static char *rq_flags[] = {
 	"REQ_STARTED",
 	"REQ_DONTPREP",
 	"REQ_QUEUED",
+	"REQ_ELVPRIV",
 	"REQ_PC",
 	"REQ_BLOCK_PC",
 	"REQ_SENSE",
@@ -1637,9 +1639,9 @@ static int blk_init_free_list(request_queue_t *q)
 
 	rl->count[READ] = rl->count[WRITE] = 0;
 	rl->starved[READ] = rl->starved[WRITE] = 0;
+	rl->elvpriv = 0;
 	init_waitqueue_head(&rl->wait[READ]);
 	init_waitqueue_head(&rl->wait[WRITE]);
-	init_waitqueue_head(&rl->drain);
 
 	rl->rq_pool = mempool_create_node(BLKDEV_MIN_RQ, mempool_alloc_slab,
 				mempool_free_slab, request_cachep, q->node);
@@ -1652,13 +1654,13 @@ static int blk_init_free_list(request_queue_t *q)
 
 static int __make_request(request_queue_t *, struct bio *);
 
-request_queue_t *blk_alloc_queue(int gfp_mask)
+request_queue_t *blk_alloc_queue(gfp_t gfp_mask)
 {
 	return blk_alloc_queue_node(gfp_mask, -1);
 }
 EXPORT_SYMBOL(blk_alloc_queue);
 
-request_queue_t *blk_alloc_queue_node(int gfp_mask, int node_id)
+request_queue_t *blk_alloc_queue_node(gfp_t gfp_mask, int node_id)
 {
 	request_queue_t *q;
 
@@ -1782,12 +1784,14 @@ EXPORT_SYMBOL(blk_get_queue);
 
 static inline void blk_free_request(request_queue_t *q, struct request *rq)
 {
-	elv_put_request(q, rq);
+	if (rq->flags & REQ_ELVPRIV)
+		elv_put_request(q, rq);
 	mempool_free(rq, q->rq.rq_pool);
 }
 
 static inline struct request *
-blk_alloc_request(request_queue_t *q, int rw, struct bio *bio, int gfp_mask)
+blk_alloc_request(request_queue_t *q, int rw, struct bio *bio,
+		  int priv, gfp_t gfp_mask)
 {
 	struct request *rq = mempool_alloc(q->rq.rq_pool, gfp_mask);
 
@@ -1800,11 +1804,15 @@ blk_alloc_request(request_queue_t *q, int rw, struct bio *bio, int gfp_mask)
 	 */
 	rq->flags = rw;
 
-	if (!elv_set_request(q, rq, bio, gfp_mask))
-		return rq;
+	if (priv) {
+		if (unlikely(elv_set_request(q, rq, bio, gfp_mask))) {
+			mempool_free(rq, q->rq.rq_pool);
+			return NULL;
+		}
+		rq->flags |= REQ_ELVPRIV;
+	}
 
-	mempool_free(rq, q->rq.rq_pool);
-	return NULL;
+	return rq;
 }
 
 /*
@@ -1860,22 +1868,18 @@ static void __freed_request(request_queue_t *q, int rw)
  * A request has just been released.  Account for it, update the full and
  * congestion status, wake up any waiters.   Called under q->queue_lock.
  */
-static void freed_request(request_queue_t *q, int rw)
+static void freed_request(request_queue_t *q, int rw, int priv)
 {
 	struct request_list *rl = &q->rq;
 
 	rl->count[rw]--;
+	if (priv)
+		rl->elvpriv--;
 
 	__freed_request(q, rw);
 
 	if (unlikely(rl->starved[rw ^ 1]))
 		__freed_request(q, rw ^ 1);
-
-	if (!rl->count[READ] && !rl->count[WRITE]) {
-		smp_mb();
-		if (unlikely(waitqueue_active(&rl->drain)))
-			wake_up(&rl->drain);
-	}
 }
 
 #define blkdev_free_rq(list) list_entry((list)->next, struct request, queuelist)
@@ -1885,14 +1889,12 @@ static void freed_request(request_queue_t *q, int rw)
  * Returns !NULL on success, with queue_lock *not held*.
  */
 static struct request *get_request(request_queue_t *q, int rw, struct bio *bio,
-				   int gfp_mask)
+				   gfp_t gfp_mask)
 {
 	struct request *rq = NULL;
 	struct request_list *rl = &q->rq;
 	struct io_context *ioc = current_io_context(GFP_ATOMIC);
-
-	if (unlikely(test_bit(QUEUE_FLAG_DRAIN, &q->queue_flags)))
-		goto out;
+	int priv;
 
 	if (rl->count[rw]+1 >= q->nr_requests) {
 		/*
@@ -1937,9 +1939,14 @@ get_rq:
 	rl->starved[rw] = 0;
 	if (rl->count[rw] >= queue_congestion_on_threshold(q))
 		set_queue_congested(q, rw);
+
+	priv = !test_bit(QUEUE_FLAG_ELVSWITCH, &q->queue_flags);
+	if (priv)
+		rl->elvpriv++;
+
 	spin_unlock_irq(q->queue_lock);
 
-	rq = blk_alloc_request(q, rw, bio, gfp_mask);
+	rq = blk_alloc_request(q, rw, bio, priv, gfp_mask);
 	if (!rq) {
 		/*
 		 * Allocation failed presumably due to memory. Undo anything
@@ -1949,7 +1956,7 @@ get_rq:
 		 * wait queue, but this is pretty rare.
 		 */
 		spin_lock_irq(q->queue_lock);
-		freed_request(q, rw);
+		freed_request(q, rw, priv);
 
 		/*
 		 * in the very unlikely event that allocation failed and no
@@ -2019,7 +2026,7 @@ static struct request *get_request_wait(request_queue_t *q, int rw,
 	return rq;
 }
 
-struct request *blk_get_request(request_queue_t *q, int rw, int gfp_mask)
+struct request *blk_get_request(request_queue_t *q, int rw, gfp_t gfp_mask)
 {
 	struct request *rq;
 
@@ -2251,7 +2258,7 @@ EXPORT_SYMBOL(blk_rq_unmap_user);
  * @gfp_mask:	memory allocation flags
  */
 int blk_rq_map_kern(request_queue_t *q, struct request *rq, void *kbuf,
-		    unsigned int len, unsigned int gfp_mask)
+		    unsigned int len, gfp_t gfp_mask)
 {
 	struct bio *bio;
 
@@ -2433,13 +2440,15 @@ void disk_round_stats(struct gendisk *disk)
 {
 	unsigned long now = jiffies;
 
-	__disk_stat_add(disk, time_in_queue,
-			disk->in_flight * (now - disk->stamp));
-	disk->stamp = now;
+	if (now == disk->stamp)
+		return;
 
-	if (disk->in_flight)
-		__disk_stat_add(disk, io_ticks, (now - disk->stamp_idle));
-	disk->stamp_idle = now;
+	if (disk->in_flight) {
+		__disk_stat_add(disk, time_in_queue,
+				disk->in_flight * (now - disk->stamp));
+		__disk_stat_add(disk, io_ticks, (now - disk->stamp));
+	}
+	disk->stamp = now;
 }
 
 /*
@@ -2454,6 +2463,8 @@ static void __blk_put_request(request_queue_t *q, struct request *req)
 	if (unlikely(--req->ref_count))
 		return;
 
+	elv_completed_request(q, req);
+
 	req->rq_status = RQ_INACTIVE;
 	req->rl = NULL;
 
@@ -2463,26 +2474,25 @@ static void __blk_put_request(request_queue_t *q, struct request *req)
 	 */
 	if (rl) {
 		int rw = rq_data_dir(req);
-
-		elv_completed_request(q, req);
+		int priv = req->flags & REQ_ELVPRIV;
 
 		BUG_ON(!list_empty(&req->queuelist));
 
 		blk_free_request(q, req);
-		freed_request(q, rw);
+		freed_request(q, rw, priv);
 	}
 }
 
 void blk_put_request(struct request *req)
 {
+	unsigned long flags;
+	request_queue_t *q = req->q;
+
 	/*
-	 * if req->rl isn't set, this request didnt originate from the
-	 * block layer, so it's safe to just disregard it
+	 * Gee, IDE calls in w/ NULL q.  Fix IDE and remove the
+	 * following if (q) test.
 	 */
-	if (req->rl) {
-		unsigned long flags;
-		request_queue_t *q = req->q;
-
+	if (q) {
 		spin_lock_irqsave(q->queue_lock, flags);
 		__blk_put_request(q, req);
 		spin_unlock_irqrestore(q->queue_lock, flags);
@@ -2797,97 +2807,6 @@ static inline void blk_partition_remap(struct bio *bio)
 	}
 }
 
-void blk_finish_queue_drain(request_queue_t *q)
-{
-	struct request_list *rl = &q->rq;
-	struct request *rq;
-	int requeued = 0;
-
-	spin_lock_irq(q->queue_lock);
-	clear_bit(QUEUE_FLAG_DRAIN, &q->queue_flags);
-
-	while (!list_empty(&q->drain_list)) {
-		rq = list_entry_rq(q->drain_list.next);
-
-		list_del_init(&rq->queuelist);
-		elv_requeue_request(q, rq);
-		requeued++;
-	}
-
-	if (requeued)
-		q->request_fn(q);
-
-	spin_unlock_irq(q->queue_lock);
-
-	wake_up(&rl->wait[0]);
-	wake_up(&rl->wait[1]);
-	wake_up(&rl->drain);
-}
-
-static int wait_drain(request_queue_t *q, struct request_list *rl, int dispatch)
-{
-	int wait = rl->count[READ] + rl->count[WRITE];
-
-	if (dispatch)
-		wait += !list_empty(&q->queue_head);
-
-	return wait;
-}
-
-/*
- * We rely on the fact that only requests allocated through blk_alloc_request()
- * have io scheduler private data structures associated with them. Any other
- * type of request (allocated on stack or through kmalloc()) should not go
- * to the io scheduler core, but be attached to the queue head instead.
- */
-void blk_wait_queue_drained(request_queue_t *q, int wait_dispatch)
-{
-	struct request_list *rl = &q->rq;
-	DEFINE_WAIT(wait);
-
-	spin_lock_irq(q->queue_lock);
-	set_bit(QUEUE_FLAG_DRAIN, &q->queue_flags);
-
-	while (wait_drain(q, rl, wait_dispatch)) {
-		prepare_to_wait(&rl->drain, &wait, TASK_UNINTERRUPTIBLE);
-
-		if (wait_drain(q, rl, wait_dispatch)) {
-			__generic_unplug_device(q);
-			spin_unlock_irq(q->queue_lock);
-			io_schedule();
-			spin_lock_irq(q->queue_lock);
-		}
-
-		finish_wait(&rl->drain, &wait);
-	}
-
-	spin_unlock_irq(q->queue_lock);
-}
-
-/*
- * block waiting for the io scheduler being started again.
- */
-static inline void block_wait_queue_running(request_queue_t *q)
-{
-	DEFINE_WAIT(wait);
-
-	while (unlikely(test_bit(QUEUE_FLAG_DRAIN, &q->queue_flags))) {
-		struct request_list *rl = &q->rq;
-
-		prepare_to_wait_exclusive(&rl->drain, &wait,
-				TASK_UNINTERRUPTIBLE);
-
-		/*
-		 * re-check the condition. avoids using prepare_to_wait()
-		 * in the fast path (queue is running)
-		 */
-		if (test_bit(QUEUE_FLAG_DRAIN, &q->queue_flags))
-			io_schedule();
-
-		finish_wait(&rl->drain, &wait);
-	}
-}
-
 static void handle_bad_sector(struct bio *bio)
 {
 	char b[BDEVNAME_SIZE];
@@ -2983,8 +2902,6 @@ end_io:
 		if (unlikely(test_bit(QUEUE_FLAG_DEAD, &q->queue_flags)))
 			goto end_io;
 
-		block_wait_queue_running(q);
-
 		/*
 		 * If this device has partitions, remap block n
 		 * of partition p to block n+start(p) of the disk.
@@ -3393,7 +3310,7 @@ void exit_io_context(void)
  * but since the current task itself holds a reference, the context can be
  * used in general code, so long as it stays within `current` context.
  */
-struct io_context *current_io_context(int gfp_flags)
+struct io_context *current_io_context(gfp_t gfp_flags)
 {
 	struct task_struct *tsk = current;
 	struct io_context *ret;
@@ -3424,7 +3341,7 @@ EXPORT_SYMBOL(current_io_context);
  *
  * This is always called in the context of the task which submitted the I/O.
  */
-struct io_context *get_io_context(int gfp_flags)
+struct io_context *get_io_context(gfp_t gfp_flags)
 {
 	struct io_context *ret;
 	ret = current_io_context(gfp_flags);

+ 1 - 1
drivers/block/loop.c

@@ -881,7 +881,7 @@ loop_init_xfer(struct loop_device *lo, struct loop_func_table *xfer,
 static int loop_clr_fd(struct loop_device *lo, struct block_device *bdev)
 {
 	struct file *filp = lo->lo_backing_file;
-	int gfp = lo->old_gfp_mask;
+	gfp_t gfp = lo->old_gfp_mask;
 
 	if (lo->lo_state != Lo_bound)
 		return -ENXIO;

+ 5 - 43
drivers/block/noop-iosched.c

@@ -7,57 +7,19 @@
 #include <linux/module.h>
 #include <linux/init.h>
 
-/*
- * See if we can find a request that this buffer can be coalesced with.
- */
-static int elevator_noop_merge(request_queue_t *q, struct request **req,
-			       struct bio *bio)
-{
-	int ret;
-
-	ret = elv_try_last_merge(q, bio);
-	if (ret != ELEVATOR_NO_MERGE)
-		*req = q->last_merge;
-
-	return ret;
-}
-
-static void elevator_noop_merge_requests(request_queue_t *q, struct request *req,
-					 struct request *next)
-{
-	list_del_init(&next->queuelist);
-}
-
-static void elevator_noop_add_request(request_queue_t *q, struct request *rq,
-				      int where)
+static void elevator_noop_add_request(request_queue_t *q, struct request *rq)
 {
-	if (where == ELEVATOR_INSERT_FRONT)
-		list_add(&rq->queuelist, &q->queue_head);
-	else
-		list_add_tail(&rq->queuelist, &q->queue_head);
-
-	/*
-	 * new merges must not precede this barrier
-	 */
-	if (rq->flags & REQ_HARDBARRIER)
-		q->last_merge = NULL;
-	else if (!q->last_merge)
-		q->last_merge = rq;
+	elv_dispatch_add_tail(q, rq);
 }
 
-static struct request *elevator_noop_next_request(request_queue_t *q)
+static int elevator_noop_dispatch(request_queue_t *q, int force)
 {
-	if (!list_empty(&q->queue_head))
-		return list_entry_rq(q->queue_head.next);
-
-	return NULL;
+	return 0;
 }
 
 static struct elevator_type elevator_noop = {
 	.ops = {
-		.elevator_merge_fn		= elevator_noop_merge,
-		.elevator_merge_req_fn		= elevator_noop_merge_requests,
-		.elevator_next_req_fn		= elevator_noop_next_request,
+		.elevator_dispatch_fn		= elevator_noop_dispatch,
 		.elevator_add_req_fn		= elevator_noop_add_request,
 	},
 	.elevator_name = "noop",

+ 1 - 1
drivers/block/rd.c

@@ -348,7 +348,7 @@ static int rd_open(struct inode *inode, struct file *filp)
 		struct block_device *bdev = inode->i_bdev;
 		struct address_space *mapping;
 		unsigned bsize;
-		int gfp_mask;
+		gfp_t gfp_mask;
 
 		inode = igrab(bdev->bd_inode);
 		rd_bdev[unit] = bdev;

+ 2 - 1
drivers/char/drm/drm_vm.c

@@ -148,7 +148,8 @@ static __inline__ struct page *drm_do_vm_shm_nopage(struct vm_area_struct *vma,
 
 	offset	 = address - vma->vm_start;
 	i = (unsigned long)map->handle + offset;
-	page = vmalloc_to_page((void *)i);
+	page = (map->type == _DRM_CONSISTENT) ?
+		virt_to_page((void *)i) : vmalloc_to_page((void *)i);
 	if (!page)
 		return NOPAGE_OOM;
 	get_page(page);

+ 1 - 1
drivers/char/drm/mga_drv.h

@@ -227,7 +227,7 @@ static inline u32 _MGA_READ(u32 *addr)
 #define MGA_EMIT_STATE( dev_priv, dirty )				\
 do {									\
 	if ( (dirty) & ~MGA_UPLOAD_CLIPRECTS ) {			\
-		if ( dev_priv->chipset == MGA_CARD_TYPE_G400 ) {	\
+		if ( dev_priv->chipset >= MGA_CARD_TYPE_G400 ) {	\
 			mga_g400_emit_state( dev_priv );		\
 		} else {						\
 			mga_g200_emit_state( dev_priv );		\

+ 1 - 1
drivers/char/drm/mga_state.c

@@ -53,7 +53,7 @@ static void mga_emit_clip_rect( drm_mga_private_t *dev_priv,
 
 	/* Force reset of DWGCTL on G400 (eliminates clip disable bit).
 	 */
-	if (dev_priv->chipset == MGA_CARD_TYPE_G400) {
+	if (dev_priv->chipset >= MGA_CARD_TYPE_G400) {
 		DMA_BLOCK(MGA_DWGCTL, ctx->dwgctl,
 			  MGA_LEN + MGA_EXEC, 0x80000000,
 			  MGA_DWGCTL, ctx->dwgctl,

+ 6 - 5
drivers/char/drm/radeon_cp.c

@@ -1133,10 +1133,10 @@ static void radeon_cp_init_ring_buffer( drm_device_t *dev,
 		ring_start = (dev_priv->cp_ring->offset
 			      - dev->agp->base
 			      + dev_priv->gart_vm_start);
-       } else
+	} else
 #endif
 		ring_start = (dev_priv->cp_ring->offset
-			      - dev->sg->handle
+			      - (unsigned long)dev->sg->virtual
 			      + dev_priv->gart_vm_start);
 
 	RADEON_WRITE( RADEON_CP_RB_BASE, ring_start );
@@ -1164,7 +1164,8 @@ static void radeon_cp_init_ring_buffer( drm_device_t *dev,
 		drm_sg_mem_t *entry = dev->sg;
 		unsigned long tmp_ofs, page_ofs;
 
-		tmp_ofs = dev_priv->ring_rptr->offset - dev->sg->handle;
+		tmp_ofs = dev_priv->ring_rptr->offset -
+				(unsigned long)dev->sg->virtual;
 		page_ofs = tmp_ofs >> PAGE_SHIFT;
 
 		RADEON_WRITE( RADEON_CP_RB_RPTR_ADDR,
@@ -1491,8 +1492,8 @@ static int radeon_do_init_cp( drm_device_t *dev, drm_radeon_init_t *init )
 	else
 #endif
 		dev_priv->gart_buffers_offset = (dev->agp_buffer_map->offset
-						- dev->sg->handle
-						+ dev_priv->gart_vm_start);
+					- (unsigned long)dev->sg->virtual
+					+ dev_priv->gart_vm_start);
 
 	DRM_DEBUG( "dev_priv->gart_size %d\n",
 		   dev_priv->gart_size );

+ 1 - 1
drivers/char/n_tty.c

@@ -62,7 +62,7 @@
 
 static inline unsigned char *alloc_buf(void)
 {
-	unsigned int prio = in_interrupt() ? GFP_ATOMIC : GFP_KERNEL;
+	gfp_t prio = in_interrupt() ? GFP_ATOMIC : GFP_KERNEL;
 
 	if (PAGE_SIZE != N_TTY_BUF_SIZE)
 		return kmalloc(N_TTY_BUF_SIZE, prio);

+ 3 - 3
drivers/cpufreq/cpufreq_conservative.c

@@ -315,9 +315,9 @@ static void dbs_check_cpu(int cpu)
 	policy = this_dbs_info->cur_policy;
 
 	if ( init_flag == 0 ) {
-		for ( /* NULL */; init_flag < NR_CPUS; init_flag++ ) {
-			dbs_info = &per_cpu(cpu_dbs_info, init_flag);
-			requested_freq[cpu] = dbs_info->cur_policy->cur;
+		for_each_online_cpu(j) {
+			dbs_info = &per_cpu(cpu_dbs_info, j);
+			requested_freq[j] = dbs_info->cur_policy->cur;
 		}
 		init_flag = 1;
 	}

+ 1 - 1
drivers/ieee1394/eth1394.c

@@ -1630,7 +1630,7 @@ static void ether1394_complete_cb(void *__ptask)
 /* Transmit a packet (called by kernel) */
 static int ether1394_tx (struct sk_buff *skb, struct net_device *dev)
 {
-	int kmflags = in_interrupt() ? GFP_ATOMIC : GFP_KERNEL;
+	gfp_t kmflags = in_interrupt() ? GFP_ATOMIC : GFP_KERNEL;
 	struct eth1394hdr *eth;
 	struct eth1394_priv *priv = netdev_priv(dev);
 	int proto;

+ 1 - 1
drivers/infiniband/hw/mthca/mthca_cmd.c

@@ -524,7 +524,7 @@ void mthca_cmd_use_polling(struct mthca_dev *dev)
 }
 
 struct mthca_mailbox *mthca_alloc_mailbox(struct mthca_dev *dev,
-					  unsigned int gfp_mask)
+					  gfp_t gfp_mask)
 {
 	struct mthca_mailbox *mailbox;
 

+ 1 - 1
drivers/infiniband/hw/mthca/mthca_cmd.h

@@ -248,7 +248,7 @@ void mthca_cmd_event(struct mthca_dev *dev, u16 token,
 		     u8  status, u64 out_param);
 
 struct mthca_mailbox *mthca_alloc_mailbox(struct mthca_dev *dev,
-					  unsigned int gfp_mask);
+					  gfp_t gfp_mask);
 void mthca_free_mailbox(struct mthca_dev *dev, struct mthca_mailbox *mailbox);
 
 int mthca_SYS_EN(struct mthca_dev *dev, u8 *status);

+ 11 - 10
drivers/infiniband/hw/mthca/mthca_eq.c

@@ -396,20 +396,21 @@ static irqreturn_t mthca_tavor_interrupt(int irq, void *dev_ptr, struct pt_regs
 		writel(dev->eq_table.clr_mask, dev->eq_table.clr_int);
 
 	ecr = readl(dev->eq_regs.tavor.ecr_base + 4);
-	if (ecr) {
-		writel(ecr, dev->eq_regs.tavor.ecr_base +
-		       MTHCA_ECR_CLR_BASE - MTHCA_ECR_BASE + 4);
+	if (!ecr)
+		return IRQ_NONE;
 
-		for (i = 0; i < MTHCA_NUM_EQ; ++i)
-			if (ecr & dev->eq_table.eq[i].eqn_mask &&
-			    mthca_eq_int(dev, &dev->eq_table.eq[i])) {
+	writel(ecr, dev->eq_regs.tavor.ecr_base +
+	       MTHCA_ECR_CLR_BASE - MTHCA_ECR_BASE + 4);
+
+	for (i = 0; i < MTHCA_NUM_EQ; ++i)
+		if (ecr & dev->eq_table.eq[i].eqn_mask) {
+			if (mthca_eq_int(dev, &dev->eq_table.eq[i]))
 				tavor_set_eq_ci(dev, &dev->eq_table.eq[i],
 						dev->eq_table.eq[i].cons_index);
-				tavor_eq_req_not(dev, dev->eq_table.eq[i].eqn);
-			}
-	}
+			tavor_eq_req_not(dev, dev->eq_table.eq[i].eqn);
+		}
 
-	return IRQ_RETVAL(ecr);
+	return IRQ_HANDLED;
 }
 
 static irqreturn_t mthca_tavor_msi_x_interrupt(int irq, void *eq_ptr,

+ 1 - 1
drivers/infiniband/hw/mthca/mthca_memfree.c

@@ -82,7 +82,7 @@ void mthca_free_icm(struct mthca_dev *dev, struct mthca_icm *icm)
 }
 
 struct mthca_icm *mthca_alloc_icm(struct mthca_dev *dev, int npages,
-				  unsigned int gfp_mask)
+				  gfp_t gfp_mask)
 {
 	struct mthca_icm *icm;
 	struct mthca_icm_chunk *chunk = NULL;

+ 1 - 1
drivers/infiniband/hw/mthca/mthca_memfree.h

@@ -77,7 +77,7 @@ struct mthca_icm_iter {
 struct mthca_dev;
 
 struct mthca_icm *mthca_alloc_icm(struct mthca_dev *dev, int npages,
-				  unsigned int gfp_mask);
+				  gfp_t gfp_mask);
 void mthca_free_icm(struct mthca_dev *dev, struct mthca_icm *icm);
 
 struct mthca_icm_table *mthca_alloc_icm_table(struct mthca_dev *dev,

+ 1 - 1
drivers/md/bitmap.c

@@ -91,7 +91,7 @@ int bitmap_active(struct bitmap *bitmap)
 
 #define WRITE_POOL_SIZE 256
 /* mempool for queueing pending writes on the bitmap file */
-static void *write_pool_alloc(unsigned int gfp_flags, void *data)
+static void *write_pool_alloc(gfp_t gfp_flags, void *data)
 {
 	return kmalloc(sizeof(struct page_list), gfp_flags);
 }

+ 1 - 1
drivers/md/dm-crypt.c

@@ -331,7 +331,7 @@ crypt_alloc_buffer(struct crypt_config *cc, unsigned int size,
 {
 	struct bio *bio;
 	unsigned int nr_iovecs = (size + PAGE_SIZE - 1) >> PAGE_SHIFT;
-	int gfp_mask = GFP_NOIO | __GFP_HIGHMEM;
+	gfp_t gfp_mask = GFP_NOIO | __GFP_HIGHMEM;
 	unsigned int i;
 
 	/*

+ 6 - 4
drivers/md/md.c

@@ -3568,7 +3568,8 @@ static void md_do_sync(mddev_t *mddev)
 		mddev->curr_resync = 2;
 
 	try_again:
-		if (signal_pending(current)) {
+		if (signal_pending(current) ||
+		    kthread_should_stop()) {
 			flush_signals(current);
 			set_bit(MD_RECOVERY_INTR, &mddev->recovery);
 			goto skip;
@@ -3590,8 +3591,9 @@ static void md_do_sync(mddev_t *mddev)
 					 */
 					continue;
 				prepare_to_wait(&resync_wait, &wq, TASK_INTERRUPTIBLE);
-				if (!signal_pending(current)
-				    && mddev2->curr_resync >= mddev->curr_resync) {
+				if (!signal_pending(current) &&
+				    !kthread_should_stop() &&
+				    mddev2->curr_resync >= mddev->curr_resync) {
 					printk(KERN_INFO "md: delaying resync of %s"
 					       " until %s has finished resync (they"
 					       " share one or more physical units)\n",
@@ -3697,7 +3699,7 @@ static void md_do_sync(mddev_t *mddev)
 		}
 
 
-		if (signal_pending(current)) {
+		if (signal_pending(current) || kthread_should_stop()) {
 			/*
 			 * got a signal, exit.
 			 */

+ 0 - 1
drivers/media/video/Kconfig

@@ -262,7 +262,6 @@ config VIDEO_SAA7134_DVB
 	depends on VIDEO_SAA7134 && DVB_CORE
 	select VIDEO_BUF_DVB
 	select DVB_MT352
-	select DVB_CX22702
 	select DVB_TDA1004X
 	---help---
 	  This adds support for DVB cards based on the

+ 8 - 4
drivers/message/fusion/mptsas.c

@@ -257,8 +257,8 @@ static void mptsas_print_device_pg0(SasDevicePage0_t *pg0)
 	printk("SAS Address=0x%llX\n", le64_to_cpu(sas_address));
 	printk("Target ID=0x%X\n", pg0->TargetID);
 	printk("Bus=0x%X\n", pg0->Bus);
-	printk("PhyNum=0x%X\n", pg0->PhyNum);
-	printk("AccessStatus=0x%X\n", le16_to_cpu(pg0->AccessStatus));
+	printk("Parent Phy Num=0x%X\n", pg0->PhyNum);
+	printk("Access Status=0x%X\n", le16_to_cpu(pg0->AccessStatus));
 	printk("Device Info=0x%X\n", le32_to_cpu(pg0->DeviceInfo));
 	printk("Flags=0x%X\n", le16_to_cpu(pg0->Flags));
 	printk("Physical Port=0x%X\n", pg0->PhysicalPort);
@@ -270,7 +270,7 @@ static void mptsas_print_expander_pg1(SasExpanderPage1_t *pg1)
 	printk("---- SAS EXPANDER PAGE 1 ------------\n");
 
 	printk("Physical Port=0x%X\n", pg1->PhysicalPort);
-	printk("PHY Identifier=0x%X\n", pg1->Phy);
+	printk("PHY Identifier=0x%X\n", pg1->PhyIdentifier);
 	printk("Negotiated Link Rate=0x%X\n", pg1->NegotiatedLinkRate);
 	printk("Programmed Link Rate=0x%X\n", pg1->ProgrammedLinkRate);
 	printk("Hardware Link Rate=0x%X\n", pg1->HwLinkRate);
@@ -604,7 +604,7 @@ mptsas_sas_expander_pg1(MPT_ADAPTER *ioc, struct mptsas_phyinfo *phy_info,
 	mptsas_print_expander_pg1(buffer);
 
 	/* save config data */
-	phy_info->phy_id = buffer->Phy;
+	phy_info->phy_id = buffer->PhyIdentifier;
 	phy_info->port_id = buffer->PhysicalPort;
 	phy_info->negotiated_link_rate = buffer->NegotiatedLinkRate;
 	phy_info->programmed_link_rate = buffer->ProgrammedLinkRate;
@@ -825,6 +825,8 @@ mptsas_probe_hba_phys(MPT_ADAPTER *ioc, int *index)
 		mptsas_sas_device_pg0(ioc, &port_info->phy_info[i].identify,
 			(MPI_SAS_DEVICE_PGAD_FORM_GET_NEXT_HANDLE <<
 			 MPI_SAS_DEVICE_PGAD_FORM_SHIFT), handle);
+		port_info->phy_info[i].identify.phy_id =
+		    port_info->phy_info[i].phy_id;
 		handle = port_info->phy_info[i].identify.handle;
 
 		if (port_info->phy_info[i].attached.handle) {
@@ -881,6 +883,8 @@ mptsas_probe_expander_phys(MPT_ADAPTER *ioc, u32 *handle, int *index)
 				(MPI_SAS_DEVICE_PGAD_FORM_HANDLE <<
 				 MPI_SAS_DEVICE_PGAD_FORM_SHIFT),
 				port_info->phy_info[i].identify.handle);
+			port_info->phy_info[i].identify.phy_id =
+			    port_info->phy_info[i].phy_id;
 		}
 
 		if (port_info->phy_info[i].attached.handle) {

+ 3 - 2
drivers/net/8139cp.c

@@ -1027,8 +1027,7 @@ static void cp_reset_hw (struct cp_private *cp)
 		if (!(cpr8(Cmd) & CmdReset))
 			return;
 
-		set_current_state(TASK_UNINTERRUPTIBLE);
-		schedule_timeout(10);
+		schedule_timeout_uninterruptible(10);
 	}
 
 	printk(KERN_ERR "%s: hardware reset timeout\n", cp->dev->name);
@@ -1575,6 +1574,7 @@ static struct ethtool_ops cp_ethtool_ops = {
 	.set_wol		= cp_set_wol,
 	.get_strings		= cp_get_strings,
 	.get_ethtool_stats	= cp_get_ethtool_stats,
+	.get_perm_addr		= ethtool_op_get_perm_addr,
 };
 
 static int cp_ioctl (struct net_device *dev, struct ifreq *rq, int cmd)
@@ -1773,6 +1773,7 @@ static int cp_init_one (struct pci_dev *pdev, const struct pci_device_id *ent)
 	for (i = 0; i < 3; i++)
 		((u16 *) (dev->dev_addr))[i] =
 		    le16_to_cpu (read_eeprom (regs, i + 7, addr_len));
+	memcpy(dev->perm_addr, dev->dev_addr, dev->addr_len);
 
 	dev->open = cp_open;
 	dev->stop = cp_close;

+ 4 - 1
drivers/net/8139too.c

@@ -552,7 +552,8 @@ const static struct {
 
 	{ "RTL-8100B/8139D",
 	  HW_REVID(1, 1, 1, 0, 1, 0, 1),
-	  HasLWake,
+	  HasHltClk /* XXX undocumented? */
+	| HasLWake,
 	},
 
 	{ "RTL-8101",
@@ -970,6 +971,7 @@ static int __devinit rtl8139_init_one (struct pci_dev *pdev,
 	for (i = 0; i < 3; i++)
 		((u16 *) (dev->dev_addr))[i] =
 		    le16_to_cpu (read_eeprom (ioaddr, i + 7, addr_len));
+	memcpy(dev->perm_addr, dev->dev_addr, dev->addr_len);
 
 	/* The Rtl8139-specific entries in the device structure. */
 	dev->open = rtl8139_open;
@@ -2465,6 +2467,7 @@ static struct ethtool_ops rtl8139_ethtool_ops = {
 	.get_strings		= rtl8139_get_strings,
 	.get_stats_count	= rtl8139_get_stats_count,
 	.get_ethtool_stats	= rtl8139_get_ethtool_stats,
+	.get_perm_addr		= ethtool_op_get_perm_addr,
 };
 
 static int netdev_ioctl(struct net_device *dev, struct ifreq *rq, int cmd)

+ 23 - 0
drivers/net/Kconfig

@@ -475,6 +475,14 @@ config SGI_IOC3_ETH_HW_TX_CSUM
 	  the moment only acceleration of IPv4 is supported.  This option
 	  enables offloading for checksums on transmit.  If unsure, say Y.
 
+config MIPS_SIM_NET
+	tristate "MIPS simulator Network device (EXPERIMENTAL)"
+	depends on NETDEVICES && MIPS_SIM && EXPERIMENTAL
+	help
+	  The MIPSNET device is a simple Ethernet network device which is
+	  emulated by the MIPS Simulator.
+	  If you are not using a MIPSsim or are unsure, say N.
+
 config SGI_O2MACE_ETH
 	tristate "SGI O2 MACE Fast Ethernet support"
 	depends on NET_ETHERNET && SGI_IP32=y
@@ -2083,6 +2091,7 @@ config SPIDER_NET
 config GIANFAR
 	tristate "Gianfar Ethernet"
 	depends on 85xx || 83xx
+	select PHYLIB
 	help
 	  This driver supports the Gigabit TSEC on the MPC85xx 
 	  family of chips, and the FEC on the 8540
@@ -2243,6 +2252,20 @@ config ISERIES_VETH
 	tristate "iSeries Virtual Ethernet driver support"
 	depends on PPC_ISERIES
 
+config RIONET
+	tristate "RapidIO Ethernet over messaging driver support"
+	depends on NETDEVICES && RAPIDIO
+
+config RIONET_TX_SIZE
+	int "Number of outbound queue entries"
+	depends on RIONET
+	default "128"
+
+config RIONET_RX_SIZE
+	int "Number of inbound queue entries"
+	depends on RIONET
+	default "128"
+
 config FDDI
 	bool "FDDI driver support"
 	depends on (PCI || EISA)

+ 3 - 1
drivers/net/Makefile

@@ -13,7 +13,7 @@ obj-$(CONFIG_CHELSIO_T1) += chelsio/
 obj-$(CONFIG_BONDING) += bonding/
 obj-$(CONFIG_GIANFAR) += gianfar_driver.o
 
-gianfar_driver-objs := gianfar.o gianfar_ethtool.o gianfar_phy.o
+gianfar_driver-objs := gianfar.o gianfar_ethtool.o gianfar_mii.o
 
 #
 # link order important here
@@ -64,6 +64,7 @@ obj-$(CONFIG_SKFP) += skfp/
 obj-$(CONFIG_VIA_RHINE) += via-rhine.o
 obj-$(CONFIG_VIA_VELOCITY) += via-velocity.o
 obj-$(CONFIG_ADAPTEC_STARFIRE) += starfire.o
+obj-$(CONFIG_RIONET) += rionet.o
 
 #
 # end link order section
@@ -166,6 +167,7 @@ obj-$(CONFIG_EQUALIZER) += eql.o
 obj-$(CONFIG_MIPS_JAZZ_SONIC) += jazzsonic.o
 obj-$(CONFIG_MIPS_GT96100ETH) += gt96100eth.o
 obj-$(CONFIG_MIPS_AU1X00_ENET) += au1000_eth.o
+obj-$(CONFIG_MIPS_SIM_NET) += mipsnet.o
 obj-$(CONFIG_SGI_IOC3_ETH) += ioc3-eth.o
 obj-$(CONFIG_DECLANCE) += declance.o
 obj-$(CONFIG_ATARILANCE) += atarilance.o

+ 5 - 8
drivers/net/au1000_eth.c

@@ -151,13 +151,6 @@ struct au1000_private *au_macs[NUM_ETH_INTERFACES];
 	SUPPORTED_100baseT_Half | SUPPORTED_100baseT_Full | \
 	SUPPORTED_Autoneg
 
-static char *phy_link[] = 
-{	"unknown", 
-	"10Base2", "10BaseT", 
-	"AUI",
-	"100BaseT", "100BaseTX", "100BaseFX"
-};
-
 int bcm_5201_init(struct net_device *dev, int phy_addr)
 {
 	s16 data;
@@ -785,6 +778,7 @@ static struct mii_chip_info {
 	{"Broadcom BCM5201 10/100 BaseT PHY",0x0040,0x6212, &bcm_5201_ops,0},
 	{"Broadcom BCM5221 10/100 BaseT PHY",0x0040,0x61e4, &bcm_5201_ops,0},
 	{"Broadcom BCM5222 10/100 BaseT PHY",0x0040,0x6322, &bcm_5201_ops,1},
+	{"NS DP83847 PHY", 0x2000, 0x5c30, &bcm_5201_ops ,0},
 	{"AMD 79C901 HomePNA PHY",0x0000,0x35c8, &am79c901_ops,0},
 	{"AMD 79C874 10/100 BaseT PHY",0x0022,0x561b, &am79c874_ops,0},
 	{"LSI 80227 10/100 BaseT PHY",0x0016,0xf840, &lsi_80227_ops,0},
@@ -1045,7 +1039,7 @@ found:
 #endif
 
 	if (aup->mii->chip_info == NULL) {
-		printk(KERN_ERR "%s: Au1x No MII transceivers found!\n",
+		printk(KERN_ERR "%s: Au1x No known MII transceivers found!\n",
 				dev->name);
 		return -1;
 	}
@@ -1546,6 +1540,9 @@ au1000_probe(u32 ioaddr, int irq, int port_num)
 		printk(KERN_ERR "%s: out of memory\n", dev->name);
 		goto err_out;
 	}
+	aup->mii->next = NULL;
+	aup->mii->chip_info = NULL;
+	aup->mii->status = 0;
 	aup->mii->mii_control_reg = 0;
 	aup->mii->mii_data_reg = 0;
 

+ 128 - 8
drivers/net/b44.c

@@ -106,6 +106,29 @@ static int b44_poll(struct net_device *dev, int *budget);
 static void b44_poll_controller(struct net_device *dev);
 #endif
 
+static int dma_desc_align_mask;
+static int dma_desc_sync_size;
+
+static inline void b44_sync_dma_desc_for_device(struct pci_dev *pdev,
+                                                dma_addr_t dma_base,
+                                                unsigned long offset,
+                                                enum dma_data_direction dir)
+{
+	dma_sync_single_range_for_device(&pdev->dev, dma_base,
+	                                 offset & dma_desc_align_mask,
+	                                 dma_desc_sync_size, dir);
+}
+
+static inline void b44_sync_dma_desc_for_cpu(struct pci_dev *pdev,
+                                             dma_addr_t dma_base,
+                                             unsigned long offset,
+                                             enum dma_data_direction dir)
+{
+	dma_sync_single_range_for_cpu(&pdev->dev, dma_base,
+	                              offset & dma_desc_align_mask,
+	                              dma_desc_sync_size, dir);
+}
+
 static inline unsigned long br32(const struct b44 *bp, unsigned long reg)
 {
 	return readl(bp->regs + reg);
@@ -668,6 +691,11 @@ static int b44_alloc_rx_skb(struct b44 *bp, int src_idx, u32 dest_idx_unmasked)
 	dp->ctrl = cpu_to_le32(ctrl);
 	dp->addr = cpu_to_le32((u32) mapping + bp->rx_offset + bp->dma_offset);
 
+	if (bp->flags & B44_FLAG_RX_RING_HACK)
+		b44_sync_dma_desc_for_device(bp->pdev, bp->rx_ring_dma,
+		                             dest_idx * sizeof(dp),
+		                             DMA_BIDIRECTIONAL);
+
 	return RX_PKT_BUF_SZ;
 }
 
@@ -692,6 +720,11 @@ static void b44_recycle_rx(struct b44 *bp, int src_idx, u32 dest_idx_unmasked)
 	pci_unmap_addr_set(dest_map, mapping,
 			   pci_unmap_addr(src_map, mapping));
 
+	if (bp->flags & B44_FLAG_RX_RING_HACK)
+		b44_sync_dma_desc_for_cpu(bp->pdev, bp->rx_ring_dma,
+		                          src_idx * sizeof(src_desc),
+		                          DMA_BIDIRECTIONAL);
+
 	ctrl = src_desc->ctrl;
 	if (dest_idx == (B44_RX_RING_SIZE - 1))
 		ctrl |= cpu_to_le32(DESC_CTRL_EOT);
@@ -700,8 +733,14 @@ static void b44_recycle_rx(struct b44 *bp, int src_idx, u32 dest_idx_unmasked)
 
 	dest_desc->ctrl = ctrl;
 	dest_desc->addr = src_desc->addr;
+
 	src_map->skb = NULL;
 
+	if (bp->flags & B44_FLAG_RX_RING_HACK)
+		b44_sync_dma_desc_for_device(bp->pdev, bp->rx_ring_dma,
+		                             dest_idx * sizeof(dest_desc),
+		                             DMA_BIDIRECTIONAL);
+
 	pci_dma_sync_single_for_device(bp->pdev, src_desc->addr,
 				       RX_PKT_BUF_SZ,
 				       PCI_DMA_FROMDEVICE);
@@ -959,6 +998,11 @@ static int b44_start_xmit(struct sk_buff *skb, struct net_device *dev)
 	bp->tx_ring[entry].ctrl = cpu_to_le32(ctrl);
 	bp->tx_ring[entry].addr = cpu_to_le32((u32) mapping+bp->dma_offset);
 
+	if (bp->flags & B44_FLAG_TX_RING_HACK)
+		b44_sync_dma_desc_for_device(bp->pdev, bp->tx_ring_dma,
+		                             entry * sizeof(bp->tx_ring[0]),
+		                             DMA_TO_DEVICE);
+
 	entry = NEXT_TX(entry);
 
 	bp->tx_prod = entry;
@@ -1064,6 +1108,16 @@ static void b44_init_rings(struct b44 *bp)
 	memset(bp->rx_ring, 0, B44_RX_RING_BYTES);
 	memset(bp->tx_ring, 0, B44_TX_RING_BYTES);
 
+	if (bp->flags & B44_FLAG_RX_RING_HACK)
+		dma_sync_single_for_device(&bp->pdev->dev, bp->rx_ring_dma,
+		                           DMA_TABLE_BYTES,
+		                           PCI_DMA_BIDIRECTIONAL);
+
+	if (bp->flags & B44_FLAG_TX_RING_HACK)
+		dma_sync_single_for_device(&bp->pdev->dev, bp->tx_ring_dma,
+		                           DMA_TABLE_BYTES,
+		                           PCI_DMA_TODEVICE);
+
 	for (i = 0; i < bp->rx_pending; i++) {
 		if (b44_alloc_rx_skb(bp, -1, i) < 0)
 			break;
@@ -1085,14 +1139,28 @@ static void b44_free_consistent(struct b44 *bp)
 		bp->tx_buffers = NULL;
 	}
 	if (bp->rx_ring) {
-		pci_free_consistent(bp->pdev, DMA_TABLE_BYTES,
-				    bp->rx_ring, bp->rx_ring_dma);
+		if (bp->flags & B44_FLAG_RX_RING_HACK) {
+			dma_unmap_single(&bp->pdev->dev, bp->rx_ring_dma,
+				         DMA_TABLE_BYTES,
+				         DMA_BIDIRECTIONAL);
+			kfree(bp->rx_ring);
+		} else
+			pci_free_consistent(bp->pdev, DMA_TABLE_BYTES,
+					    bp->rx_ring, bp->rx_ring_dma);
 		bp->rx_ring = NULL;
+		bp->flags &= ~B44_FLAG_RX_RING_HACK;
 	}
 	if (bp->tx_ring) {
-		pci_free_consistent(bp->pdev, DMA_TABLE_BYTES,
-				    bp->tx_ring, bp->tx_ring_dma);
+		if (bp->flags & B44_FLAG_TX_RING_HACK) {
+			dma_unmap_single(&bp->pdev->dev, bp->tx_ring_dma,
+				         DMA_TABLE_BYTES,
+				         DMA_TO_DEVICE);
+			kfree(bp->tx_ring);
+		} else
+			pci_free_consistent(bp->pdev, DMA_TABLE_BYTES,
+					    bp->tx_ring, bp->tx_ring_dma);
 		bp->tx_ring = NULL;
+		bp->flags &= ~B44_FLAG_TX_RING_HACK;
 	}
 }
 
@@ -1118,12 +1186,56 @@ static int b44_alloc_consistent(struct b44 *bp)
 
 	size = DMA_TABLE_BYTES;
 	bp->rx_ring = pci_alloc_consistent(bp->pdev, size, &bp->rx_ring_dma);
-	if (!bp->rx_ring)
-		goto out_err;
+	if (!bp->rx_ring) {
+		/* Allocation may have failed due to pci_alloc_consistent
+		   insisting on use of GFP_DMA, which is more restrictive
+		   than necessary...  */
+		struct dma_desc *rx_ring;
+		dma_addr_t rx_ring_dma;
+
+		if (!(rx_ring = (struct dma_desc *)kmalloc(size, GFP_KERNEL)))
+			goto out_err;
+
+		memset(rx_ring, 0, size);
+		rx_ring_dma = dma_map_single(&bp->pdev->dev, rx_ring,
+		                             DMA_TABLE_BYTES,
+		                             DMA_BIDIRECTIONAL);
+
+		if (rx_ring_dma + size > B44_DMA_MASK) {
+			kfree(rx_ring);
+			goto out_err;
+		}
+
+		bp->rx_ring = rx_ring;
+		bp->rx_ring_dma = rx_ring_dma;
+		bp->flags |= B44_FLAG_RX_RING_HACK;
+	}
 
 	bp->tx_ring = pci_alloc_consistent(bp->pdev, size, &bp->tx_ring_dma);
-	if (!bp->tx_ring)
-		goto out_err;
+	if (!bp->tx_ring) {
+		/* Allocation may have failed due to pci_alloc_consistent
+		   insisting on use of GFP_DMA, which is more restrictive
+		   than necessary...  */
+		struct dma_desc *tx_ring;
+		dma_addr_t tx_ring_dma;
+
+		if (!(tx_ring = (struct dma_desc *)kmalloc(size, GFP_KERNEL)))
+			goto out_err;
+
+		memset(tx_ring, 0, size);
+		tx_ring_dma = dma_map_single(&bp->pdev->dev, tx_ring,
+		                             DMA_TABLE_BYTES,
+		                             DMA_TO_DEVICE);
+
+		if (tx_ring_dma + size > B44_DMA_MASK) {
+			kfree(tx_ring);
+			goto out_err;
+		}
+
+		bp->tx_ring = tx_ring;
+		bp->tx_ring_dma = tx_ring_dma;
+		bp->flags |= B44_FLAG_TX_RING_HACK;
+	}
 
 	return 0;
 
@@ -1676,6 +1788,7 @@ static struct ethtool_ops b44_ethtool_ops = {
 	.set_pauseparam		= b44_set_pauseparam,
 	.get_msglevel		= b44_get_msglevel,
 	.set_msglevel		= b44_set_msglevel,
+	.get_perm_addr		= ethtool_op_get_perm_addr,
 };
 
 static int b44_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
@@ -1718,6 +1831,7 @@ static int __devinit b44_get_invariants(struct b44 *bp)
 	bp->dev->dev_addr[3] = eeprom[80];
 	bp->dev->dev_addr[4] = eeprom[83];
 	bp->dev->dev_addr[5] = eeprom[82];
+	memcpy(bp->dev->perm_addr, bp->dev->dev_addr, bp->dev->addr_len);
 
 	bp->phy_addr = eeprom[90] & 0x1f;
 
@@ -1971,6 +2085,12 @@ static struct pci_driver b44_driver = {
 
 static int __init b44_init(void)
 {
+	unsigned int dma_desc_align_size = dma_get_cache_alignment();
+
+	/* Setup paramaters for syncing RX/TX DMA descriptors */
+	dma_desc_align_mask = ~(dma_desc_align_size - 1);
+	dma_desc_sync_size = max(dma_desc_align_size, sizeof(struct dma_desc));
+
 	return pci_module_init(&b44_driver);
 }
 

+ 2 - 0
drivers/net/b44.h

@@ -400,6 +400,8 @@ struct b44 {
 #define B44_FLAG_ADV_100HALF	0x04000000
 #define B44_FLAG_ADV_100FULL	0x08000000
 #define B44_FLAG_INTERNAL_PHY	0x10000000
+#define B44_FLAG_RX_RING_HACK	0x20000000
+#define B44_FLAG_TX_RING_HACK	0x40000000
 
 	u32			rx_offset;
 

+ 55 - 2
drivers/net/bonding/bond_main.c

@@ -4241,6 +4241,43 @@ out:
 	return 0;
 }
 
+static void bond_activebackup_xmit_copy(struct sk_buff *skb,
+                                        struct bonding *bond,
+                                        struct slave *slave)
+{
+	struct sk_buff *skb2 = skb_copy(skb, GFP_ATOMIC);
+	struct ethhdr *eth_data;
+	u8 *hwaddr;
+	int res;
+
+	if (!skb2) {
+		printk(KERN_ERR DRV_NAME ": Error: "
+		       "bond_activebackup_xmit_copy(): skb_copy() failed\n");
+		return;
+	}
+
+	skb2->mac.raw = (unsigned char *)skb2->data;
+	eth_data = eth_hdr(skb2);
+
+	/* Pick an appropriate source MAC address
+	 *	-- use slave's perm MAC addr, unless used by bond
+	 *	-- otherwise, borrow active slave's perm MAC addr
+	 *	   since that will not be used
+	 */
+	hwaddr = slave->perm_hwaddr;
+	if (!memcmp(eth_data->h_source, hwaddr, ETH_ALEN))
+		hwaddr = bond->curr_active_slave->perm_hwaddr;
+
+	/* Set source MAC address appropriately */
+	memcpy(eth_data->h_source, hwaddr, ETH_ALEN);
+
+	res = bond_dev_queue_xmit(bond, skb2, slave->dev);
+	if (res)
+		dev_kfree_skb(skb2);
+
+	return;
+}
+
 /*
  * in active-backup mode, we know that bond->curr_active_slave is always valid if
  * the bond has a usable interface.
@@ -4257,10 +4294,26 @@ static int bond_xmit_activebackup(struct sk_buff *skb, struct net_device *bond_d
 		goto out;
 	}
 
-	if (bond->curr_active_slave) { /* one usable interface */
-		res = bond_dev_queue_xmit(bond, skb, bond->curr_active_slave->dev);
+	if (!bond->curr_active_slave)
+		goto out;
+
+	/* Xmit IGMP frames on all slaves to ensure rapid fail-over
+	   for multicast traffic on snooping switches */
+	if (skb->protocol == __constant_htons(ETH_P_IP) &&
+	    skb->nh.iph->protocol == IPPROTO_IGMP) {
+		struct slave *slave, *active_slave;
+		int i;
+
+		active_slave = bond->curr_active_slave;
+		bond_for_each_slave_from_to(bond, slave, i, active_slave->next,
+		                            active_slave->prev)
+			if (IS_UP(slave->dev) &&
+			    (slave->link == BOND_LINK_UP))
+				bond_activebackup_xmit_copy(skb, bond, slave);
 	}
 
+	res = bond_dev_queue_xmit(bond, skb, bond->curr_active_slave->dev);
+
 out:
 	if (res) {
 		/* no suitable interface, frame not sent */

+ 2 - 2
drivers/net/cassini.c

@@ -489,7 +489,7 @@ static int cas_page_free(struct cas *cp, cas_page_t *page)
 /* local page allocation routines for the receive buffers. jumbo pages
  * require at least 8K contiguous and 8K aligned buffers.
  */
-static cas_page_t *cas_page_alloc(struct cas *cp, const int flags)
+static cas_page_t *cas_page_alloc(struct cas *cp, const gfp_t flags)
 {
 	cas_page_t *page;
 
@@ -561,7 +561,7 @@ static void cas_spare_free(struct cas *cp)
 }
 
 /* replenish spares if needed */
-static void cas_spare_recover(struct cas *cp, const int flags)
+static void cas_spare_recover(struct cas *cp, const gfp_t flags)
 {
 	struct list_head list, *elem, *tmp;
 	int needed, i;

+ 16 - 21
drivers/net/declance.c

@@ -5,7 +5,7 @@
  *
  *      adopted from sunlance.c by Richard van den Berg
  *
- *      Copyright (C) 2002, 2003  Maciej W. Rozycki
+ *      Copyright (C) 2002, 2003, 2005  Maciej W. Rozycki
  *
  *      additional sources:
  *      - PMAD-AA TURBOchannel Ethernet Module Functional Specification,
@@ -57,13 +57,15 @@
 #include <linux/string.h>
 
 #include <asm/addrspace.h>
+#include <asm/system.h>
+
 #include <asm/dec/interrupts.h>
 #include <asm/dec/ioasic.h>
 #include <asm/dec/ioasic_addrs.h>
 #include <asm/dec/kn01.h>
 #include <asm/dec/machtype.h>
+#include <asm/dec/system.h>
 #include <asm/dec/tc.h>
-#include <asm/system.h>
 
 static char version[] __devinitdata =
 "declance.c: v0.009 by Linux MIPS DECstation task force\n";
@@ -79,10 +81,6 @@ MODULE_LICENSE("GPL");
 #define PMAD_LANCE 2
 #define PMAX_LANCE 3
 
-#ifndef CONFIG_TC
-unsigned long system_base;
-unsigned long dmaptr;
-#endif
 
 #define LE_CSR0 0
 #define LE_CSR1 1
@@ -237,7 +235,7 @@ struct lance_init_block {
 /*
  * This works *only* for the ring descriptors
  */
-#define LANCE_ADDR(x) (PHYSADDR(x) >> 1)
+#define LANCE_ADDR(x) (CPHYSADDR(x) >> 1)
 
 struct lance_private {
 	struct net_device *next;
@@ -697,12 +695,13 @@ out:
 	spin_unlock(&lp->lock);
 }
 
-static void lance_dma_merr_int(const int irq, void *dev_id,
-				struct pt_regs *regs)
+static irqreturn_t lance_dma_merr_int(const int irq, void *dev_id,
+				      struct pt_regs *regs)
 {
 	struct net_device *dev = (struct net_device *) dev_id;
 
 	printk("%s: DMA error\n", dev->name);
+	return IRQ_HANDLED;
 }
 
 static irqreturn_t
@@ -1026,10 +1025,6 @@ static int __init dec_lance_init(const int type, const int slot)
 	unsigned long esar_base;
 	unsigned char *esar;
 
-#ifndef CONFIG_TC
-	system_base = KN01_LANCE_BASE;
-#endif
-
 	if (dec_lance_debug && version_printed++ == 0)
 		printk(version);
 
@@ -1062,16 +1057,16 @@ static int __init dec_lance_init(const int type, const int slot)
 	switch (type) {
 #ifdef CONFIG_TC
 	case ASIC_LANCE:
-		dev->base_addr = system_base + IOASIC_LANCE;
+		dev->base_addr = CKSEG1ADDR(dec_kn_slot_base + IOASIC_LANCE);
 
 		/* buffer space for the on-board LANCE shared memory */
 		/*
 		 * FIXME: ugly hack!
 		 */
-		dev->mem_start = KSEG1ADDR(0x00020000);
+		dev->mem_start = CKSEG1ADDR(0x00020000);
 		dev->mem_end = dev->mem_start + 0x00020000;
 		dev->irq = dec_interrupt[DEC_IRQ_LANCE];
-		esar_base = system_base + IOASIC_ESAR;
+		esar_base = CKSEG1ADDR(dec_kn_slot_base + IOASIC_ESAR);
 
 		/* Workaround crash with booting KN04 2.1k from Disk */
 		memset((void *)dev->mem_start, 0,
@@ -1101,14 +1096,14 @@ static int __init dec_lance_init(const int type, const int slot)
 		/* Setup I/O ASIC LANCE DMA.  */
 		lp->dma_irq = dec_interrupt[DEC_IRQ_LANCE_MERR];
 		ioasic_write(IO_REG_LANCE_DMA_P,
-			     PHYSADDR(dev->mem_start) << 3);
+			     CPHYSADDR(dev->mem_start) << 3);
 
 		break;
 
 	case PMAD_LANCE:
 		claim_tc_card(slot);
 
-		dev->mem_start = get_tc_base_addr(slot);
+		dev->mem_start = CKSEG1ADDR(get_tc_base_addr(slot));
 		dev->base_addr = dev->mem_start + 0x100000;
 		dev->irq = get_tc_irq_nr(slot);
 		esar_base = dev->mem_start + 0x1c0002;
@@ -1137,9 +1132,9 @@ static int __init dec_lance_init(const int type, const int slot)
 
 	case PMAX_LANCE:
 		dev->irq = dec_interrupt[DEC_IRQ_LANCE];
-		dev->base_addr = KN01_LANCE_BASE;
-		dev->mem_start = KN01_LANCE_BASE + 0x01000000;
-		esar_base = KN01_RTC_BASE + 1;
+		dev->base_addr = CKSEG1ADDR(KN01_SLOT_BASE + KN01_LANCE);
+		dev->mem_start = CKSEG1ADDR(KN01_SLOT_BASE + KN01_LANCE_MEM);
+		esar_base = CKSEG1ADDR(KN01_SLOT_BASE + KN01_ESAR + 1);
 		lp->dma_irq = -1;
 
 		/*

+ 3 - 1
drivers/net/e100.c

@@ -2201,6 +2201,7 @@ static struct ethtool_ops e100_ethtool_ops = {
 	.phys_id		= e100_phys_id,
 	.get_stats_count	= e100_get_stats_count,
 	.get_ethtool_stats	= e100_get_ethtool_stats,
+	.get_perm_addr		= ethtool_op_get_perm_addr,
 };
 
 static int e100_do_ioctl(struct net_device *netdev, struct ifreq *ifr, int cmd)
@@ -2351,7 +2352,8 @@ static int __devinit e100_probe(struct pci_dev *pdev,
 	e100_phy_init(nic);
 
 	memcpy(netdev->dev_addr, nic->eeprom, ETH_ALEN);
-	if(!is_valid_ether_addr(netdev->dev_addr)) {
+	memcpy(netdev->perm_addr, nic->eeprom, ETH_ALEN);
+	if(!is_valid_ether_addr(netdev->perm_addr)) {
 		DPRINTK(PROBE, ERR, "Invalid MAC address from "
 			"EEPROM, aborting.\n");
 		err = -EAGAIN;

+ 60 - 14
drivers/net/e1000/e1000.h

@@ -72,6 +72,10 @@
 #include <linux/mii.h>
 #include <linux/ethtool.h>
 #include <linux/if_vlan.h>
+#ifdef CONFIG_E1000_MQ
+#include <linux/cpu.h>
+#include <linux/smp.h>
+#endif
 
 #define BAR_0		0
 #define BAR_1		1
@@ -165,10 +169,33 @@ struct e1000_buffer {
 	uint16_t next_to_watch;
 };
 
-struct e1000_ps_page { struct page *ps_page[MAX_PS_BUFFERS]; };
-struct e1000_ps_page_dma { uint64_t ps_page_dma[MAX_PS_BUFFERS]; };
+struct e1000_ps_page { struct page *ps_page[PS_PAGE_BUFFERS]; };
+struct e1000_ps_page_dma { uint64_t ps_page_dma[PS_PAGE_BUFFERS]; };
+
+struct e1000_tx_ring {
+	/* pointer to the descriptor ring memory */
+	void *desc;
+	/* physical address of the descriptor ring */
+	dma_addr_t dma;
+	/* length of descriptor ring in bytes */
+	unsigned int size;
+	/* number of descriptors in the ring */
+	unsigned int count;
+	/* next descriptor to associate a buffer with */
+	unsigned int next_to_use;
+	/* next descriptor to check for DD status bit */
+	unsigned int next_to_clean;
+	/* array of buffer information structs */
+	struct e1000_buffer *buffer_info;
+
+	struct e1000_buffer previous_buffer_info;
+	spinlock_t tx_lock;
+	uint16_t tdh;
+	uint16_t tdt;
+	uint64_t pkt;
+};
 
-struct e1000_desc_ring {
+struct e1000_rx_ring {
 	/* pointer to the descriptor ring memory */
 	void *desc;
 	/* physical address of the descriptor ring */
@@ -186,6 +213,10 @@ struct e1000_desc_ring {
 	/* arrays of page information for packet split */
 	struct e1000_ps_page *ps_page;
 	struct e1000_ps_page_dma *ps_page_dma;
+
+	uint16_t rdh;
+	uint16_t rdt;
+	uint64_t pkt;
 };
 
 #define E1000_DESC_UNUSED(R) \
@@ -227,9 +258,10 @@ struct e1000_adapter {
 	unsigned long led_status;
 
 	/* TX */
-	struct e1000_desc_ring tx_ring;
-	struct e1000_buffer previous_buffer_info;
-	spinlock_t tx_lock;
+	struct e1000_tx_ring *tx_ring;      /* One per active queue */
+#ifdef CONFIG_E1000_MQ
+	struct e1000_tx_ring **cpu_tx_ring; /* per-cpu */
+#endif
 	uint32_t txd_cmd;
 	uint32_t tx_int_delay;
 	uint32_t tx_abs_int_delay;
@@ -246,19 +278,33 @@ struct e1000_adapter {
 
 	/* RX */
 #ifdef CONFIG_E1000_NAPI
-	boolean_t (*clean_rx) (struct e1000_adapter *adapter, int *work_done,
-			  int work_to_do);
+	boolean_t (*clean_rx) (struct e1000_adapter *adapter,
+			       struct e1000_rx_ring *rx_ring,
+			       int *work_done, int work_to_do);
 #else
-	boolean_t (*clean_rx) (struct e1000_adapter *adapter);
+	boolean_t (*clean_rx) (struct e1000_adapter *adapter,
+			       struct e1000_rx_ring *rx_ring);
 #endif
-	void (*alloc_rx_buf) (struct e1000_adapter *adapter);
-	struct e1000_desc_ring rx_ring;
+	void (*alloc_rx_buf) (struct e1000_adapter *adapter,
+			      struct e1000_rx_ring *rx_ring);
+	struct e1000_rx_ring *rx_ring;      /* One per active queue */
+#ifdef CONFIG_E1000_NAPI
+	struct net_device *polling_netdev;  /* One per active queue */
+#endif
+#ifdef CONFIG_E1000_MQ
+	struct net_device **cpu_netdev;     /* per-cpu */
+	struct call_async_data_struct rx_sched_call_data;
+	int cpu_for_queue[4];
+#endif
+	int num_queues;
+
 	uint64_t hw_csum_err;
 	uint64_t hw_csum_good;
+	uint64_t rx_hdr_split;
 	uint32_t rx_int_delay;
 	uint32_t rx_abs_int_delay;
 	boolean_t rx_csum;
-	boolean_t rx_ps;
+	unsigned int rx_ps_pages;
 	uint32_t gorcl;
 	uint64_t gorcl_old;
 	uint16_t rx_ps_bsize0;
@@ -278,8 +324,8 @@ struct e1000_adapter {
 	struct e1000_phy_stats phy_stats;
 
 	uint32_t test_icr;
-	struct e1000_desc_ring test_tx_ring;
-	struct e1000_desc_ring test_rx_ring;
+	struct e1000_tx_ring test_tx_ring;
+	struct e1000_rx_ring test_rx_ring;
 
 
 	int msg_enable;

+ 67 - 28
drivers/net/e1000/e1000_ethtool.c

@@ -39,10 +39,10 @@ extern int e1000_up(struct e1000_adapter *adapter);
 extern void e1000_down(struct e1000_adapter *adapter);
 extern void e1000_reset(struct e1000_adapter *adapter);
 extern int e1000_set_spd_dplx(struct e1000_adapter *adapter, uint16_t spddplx);
-extern int e1000_setup_rx_resources(struct e1000_adapter *adapter);
-extern int e1000_setup_tx_resources(struct e1000_adapter *adapter);
-extern void e1000_free_rx_resources(struct e1000_adapter *adapter);
-extern void e1000_free_tx_resources(struct e1000_adapter *adapter);
+extern int e1000_setup_all_rx_resources(struct e1000_adapter *adapter);
+extern int e1000_setup_all_tx_resources(struct e1000_adapter *adapter);
+extern void e1000_free_all_rx_resources(struct e1000_adapter *adapter);
+extern void e1000_free_all_tx_resources(struct e1000_adapter *adapter);
 extern void e1000_update_stats(struct e1000_adapter *adapter);
 
 struct e1000_stats {
@@ -91,7 +91,8 @@ static const struct e1000_stats e1000_gstrings_stats[] = {
 	{ "tx_flow_control_xoff", E1000_STAT(stats.xofftxc) },
 	{ "rx_long_byte_count", E1000_STAT(stats.gorcl) },
 	{ "rx_csum_offload_good", E1000_STAT(hw_csum_good) },
-	{ "rx_csum_offload_errors", E1000_STAT(hw_csum_err) }
+	{ "rx_csum_offload_errors", E1000_STAT(hw_csum_err) },
+	{ "rx_header_split", E1000_STAT(rx_hdr_split) },
 };
 #define E1000_STATS_LEN	\
 	sizeof(e1000_gstrings_stats) / sizeof(struct e1000_stats)
@@ -546,8 +547,10 @@ e1000_set_eeprom(struct net_device *netdev,
 	ret_val = e1000_write_eeprom(hw, first_word,
 				     last_word - first_word + 1, eeprom_buff);
 
-	/* Update the checksum over the first part of the EEPROM if needed */
-	if((ret_val == 0) && first_word <= EEPROM_CHECKSUM_REG)
+	/* Update the checksum over the first part of the EEPROM if needed 
+	 * and flush shadow RAM for 82573 conrollers */
+	if((ret_val == 0) && ((first_word <= EEPROM_CHECKSUM_REG) || 
+				(hw->mac_type == e1000_82573)))
 		e1000_update_eeprom_checksum(hw);
 
 	kfree(eeprom_buff);
@@ -576,8 +579,8 @@ e1000_get_ringparam(struct net_device *netdev,
 {
 	struct e1000_adapter *adapter = netdev_priv(netdev);
 	e1000_mac_type mac_type = adapter->hw.mac_type;
-	struct e1000_desc_ring *txdr = &adapter->tx_ring;
-	struct e1000_desc_ring *rxdr = &adapter->rx_ring;
+	struct e1000_tx_ring *txdr = adapter->tx_ring;
+	struct e1000_rx_ring *rxdr = adapter->rx_ring;
 
 	ring->rx_max_pending = (mac_type < e1000_82544) ? E1000_MAX_RXD :
 		E1000_MAX_82544_RXD;
@@ -597,20 +600,40 @@ e1000_set_ringparam(struct net_device *netdev,
 {
 	struct e1000_adapter *adapter = netdev_priv(netdev);
 	e1000_mac_type mac_type = adapter->hw.mac_type;
-	struct e1000_desc_ring *txdr = &adapter->tx_ring;
-	struct e1000_desc_ring *rxdr = &adapter->rx_ring;
-	struct e1000_desc_ring tx_old, tx_new, rx_old, rx_new;
-	int err;
+	struct e1000_tx_ring *txdr, *tx_old, *tx_new;
+	struct e1000_rx_ring *rxdr, *rx_old, *rx_new;
+	int i, err, tx_ring_size, rx_ring_size;
+
+	tx_ring_size = sizeof(struct e1000_tx_ring) * adapter->num_queues;
+	rx_ring_size = sizeof(struct e1000_rx_ring) * adapter->num_queues;
+
+	if (netif_running(adapter->netdev))
+		e1000_down(adapter);
 
 	tx_old = adapter->tx_ring;
 	rx_old = adapter->rx_ring;
 
+	adapter->tx_ring = kmalloc(tx_ring_size, GFP_KERNEL);
+	if (!adapter->tx_ring) {
+		err = -ENOMEM;
+		goto err_setup_rx;
+	}
+	memset(adapter->tx_ring, 0, tx_ring_size);
+
+	adapter->rx_ring = kmalloc(rx_ring_size, GFP_KERNEL);
+	if (!adapter->rx_ring) {
+		kfree(adapter->tx_ring);
+		err = -ENOMEM;
+		goto err_setup_rx;
+	}
+	memset(adapter->rx_ring, 0, rx_ring_size);
+
+	txdr = adapter->tx_ring;
+	rxdr = adapter->rx_ring;
+
 	if((ring->rx_mini_pending) || (ring->rx_jumbo_pending))
 		return -EINVAL;
 
-	if(netif_running(adapter->netdev))
-		e1000_down(adapter);
-
 	rxdr->count = max(ring->rx_pending,(uint32_t)E1000_MIN_RXD);
 	rxdr->count = min(rxdr->count,(uint32_t)(mac_type < e1000_82544 ?
 		E1000_MAX_RXD : E1000_MAX_82544_RXD));
@@ -621,11 +644,16 @@ e1000_set_ringparam(struct net_device *netdev,
 		E1000_MAX_TXD : E1000_MAX_82544_TXD));
 	E1000_ROUNDUP(txdr->count, REQ_TX_DESCRIPTOR_MULTIPLE); 
 
+	for (i = 0; i < adapter->num_queues; i++) {
+		txdr[i].count = txdr->count;
+		rxdr[i].count = rxdr->count;
+	}
+
 	if(netif_running(adapter->netdev)) {
 		/* Try to get new resources before deleting old */
-		if((err = e1000_setup_rx_resources(adapter)))
+		if ((err = e1000_setup_all_rx_resources(adapter)))
 			goto err_setup_rx;
-		if((err = e1000_setup_tx_resources(adapter)))
+		if ((err = e1000_setup_all_tx_resources(adapter)))
 			goto err_setup_tx;
 
 		/* save the new, restore the old in order to free it,
@@ -635,8 +663,10 @@ e1000_set_ringparam(struct net_device *netdev,
 		tx_new = adapter->tx_ring;
 		adapter->rx_ring = rx_old;
 		adapter->tx_ring = tx_old;
-		e1000_free_rx_resources(adapter);
-		e1000_free_tx_resources(adapter);
+		e1000_free_all_rx_resources(adapter);
+		e1000_free_all_tx_resources(adapter);
+		kfree(tx_old);
+		kfree(rx_old);
 		adapter->rx_ring = rx_new;
 		adapter->tx_ring = tx_new;
 		if((err = e1000_up(adapter)))
@@ -645,7 +675,7 @@ e1000_set_ringparam(struct net_device *netdev,
 
 	return 0;
 err_setup_tx:
-	e1000_free_rx_resources(adapter);
+	e1000_free_all_rx_resources(adapter);
 err_setup_rx:
 	adapter->rx_ring = rx_old;
 	adapter->tx_ring = tx_old;
@@ -696,6 +726,11 @@ e1000_reg_test(struct e1000_adapter *adapter, uint64_t *data)
 	 * Some bits that get toggled are ignored.
 	 */
         switch (adapter->hw.mac_type) {
+	/* there are several bits on newer hardware that are r/w */
+	case e1000_82571:
+	case e1000_82572:
+		toggle = 0x7FFFF3FF;
+		break;
 	case e1000_82573:
 		toggle = 0x7FFFF033;
 		break;
@@ -898,8 +933,8 @@ e1000_intr_test(struct e1000_adapter *adapter, uint64_t *data)
 static void
 e1000_free_desc_rings(struct e1000_adapter *adapter)
 {
-	struct e1000_desc_ring *txdr = &adapter->test_tx_ring;
-	struct e1000_desc_ring *rxdr = &adapter->test_rx_ring;
+	struct e1000_tx_ring *txdr = &adapter->test_tx_ring;
+	struct e1000_rx_ring *rxdr = &adapter->test_rx_ring;
 	struct pci_dev *pdev = adapter->pdev;
 	int i;
 
@@ -941,8 +976,8 @@ e1000_free_desc_rings(struct e1000_adapter *adapter)
 static int
 e1000_setup_desc_rings(struct e1000_adapter *adapter)
 {
-	struct e1000_desc_ring *txdr = &adapter->test_tx_ring;
-	struct e1000_desc_ring *rxdr = &adapter->test_rx_ring;
+	struct e1000_tx_ring *txdr = &adapter->test_tx_ring;
+	struct e1000_rx_ring *rxdr = &adapter->test_rx_ring;
 	struct pci_dev *pdev = adapter->pdev;
 	uint32_t rctl;
 	int size, i, ret_val;
@@ -1245,6 +1280,8 @@ e1000_set_phy_loopback(struct e1000_adapter *adapter)
 	case e1000_82541_rev_2:
 	case e1000_82547:
 	case e1000_82547_rev_2:
+	case e1000_82571:
+	case e1000_82572:
 	case e1000_82573:
 		return e1000_integrated_phy_loopback(adapter);
 		break;
@@ -1340,8 +1377,8 @@ e1000_check_lbtest_frame(struct sk_buff *skb, unsigned int frame_size)
 static int
 e1000_run_loopback_test(struct e1000_adapter *adapter)
 {
-	struct e1000_desc_ring *txdr = &adapter->test_tx_ring;
-	struct e1000_desc_ring *rxdr = &adapter->test_rx_ring;
+	struct e1000_tx_ring *txdr = &adapter->test_tx_ring;
+	struct e1000_rx_ring *rxdr = &adapter->test_rx_ring;
 	struct pci_dev *pdev = adapter->pdev;
 	int i, j, k, l, lc, good_cnt, ret_val=0;
 	unsigned long time;
@@ -1509,6 +1546,7 @@ e1000_diag_test(struct net_device *netdev,
 		data[2] = 0;
 		data[3] = 0;
 	}
+	msleep_interruptible(4 * 1000);
 }
 
 static void
@@ -1625,7 +1663,7 @@ e1000_phys_id(struct net_device *netdev, uint32_t data)
 	if(!data || data > (uint32_t)(MAX_SCHEDULE_TIMEOUT / HZ))
 		data = (uint32_t)(MAX_SCHEDULE_TIMEOUT / HZ);
 
-	if(adapter->hw.mac_type < e1000_82573) {
+	if(adapter->hw.mac_type < e1000_82571) {
 		if(!adapter->blink_timer.function) {
 			init_timer(&adapter->blink_timer);
 			adapter->blink_timer.function = e1000_led_blink_callback;
@@ -1739,6 +1777,7 @@ struct ethtool_ops e1000_ethtool_ops = {
 	.phys_id                = e1000_phys_id,
 	.get_stats_count        = e1000_get_stats_count,
 	.get_ethtool_stats      = e1000_get_ethtool_stats,
+	.get_perm_addr		= ethtool_op_get_perm_addr,
 };
 
 void e1000_set_ethtool_ops(struct net_device *netdev)

+ 187 - 33
drivers/net/e1000/e1000_hw.c

@@ -83,14 +83,14 @@ uint16_t e1000_igp_cable_length_table[IGP01E1000_AGC_LENGTH_TABLE_SIZE] =
 
 static const
 uint16_t e1000_igp_2_cable_length_table[IGP02E1000_AGC_LENGTH_TABLE_SIZE] =
-    { 8, 13, 17, 19, 21, 23, 25, 27, 29, 31, 33, 35, 37, 39, 41, 43,
-      22, 24, 27, 30, 32, 35, 37, 40, 42, 44, 47, 49, 51, 54, 56, 58,
-      32, 35, 38, 41, 44, 47, 50, 53, 55, 58, 61, 63, 66, 69, 71, 74,
-      43, 47, 51, 54, 58, 61, 64, 67, 71, 74, 77, 80, 82, 85, 88, 90,
-      57, 62, 66, 70, 74, 77, 81, 85, 88, 91, 94, 97, 100, 103, 106, 108,
-      73, 78, 82, 87, 91, 95, 98, 102, 105, 109, 112, 114, 117, 119, 122, 124,
-      91, 96, 101, 105, 109, 113, 116, 119, 122, 125, 127, 128, 128, 128, 128, 128,
-      108, 113, 117, 121, 124, 127, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128};
+    { 0, 0, 0, 0, 0, 0, 0, 0, 3, 5, 8, 11, 13, 16, 18, 21,
+      0, 0, 0, 3, 6, 10, 13, 16, 19, 23, 26, 29, 32, 35, 38, 41,
+      6, 10, 14, 18, 22, 26, 30, 33, 37, 41, 44, 48, 51, 54, 58, 61,
+      21, 26, 31, 35, 40, 44, 49, 53, 57, 61, 65, 68, 72, 75, 79, 82,
+      40, 45, 51, 56, 61, 66, 70, 75, 79, 83, 87, 91, 94, 98, 101, 104,
+      60, 66, 72, 77, 82, 87, 92, 96, 100, 104, 108, 111, 114, 117, 119, 121,
+      83, 89, 95, 100, 105, 109, 113, 116, 119, 122, 124,
+      104, 109, 114, 118, 121, 124};
 
 
 /******************************************************************************
@@ -286,7 +286,6 @@ e1000_set_mac_type(struct e1000_hw *hw)
     case E1000_DEV_ID_82546GB_FIBER:
     case E1000_DEV_ID_82546GB_SERDES:
     case E1000_DEV_ID_82546GB_PCIE:
-    case E1000_DEV_ID_82546GB_QUAD_COPPER:
         hw->mac_type = e1000_82546_rev_3;
         break;
     case E1000_DEV_ID_82541EI:
@@ -305,8 +304,19 @@ e1000_set_mac_type(struct e1000_hw *hw)
     case E1000_DEV_ID_82547GI:
         hw->mac_type = e1000_82547_rev_2;
         break;
+    case E1000_DEV_ID_82571EB_COPPER:
+    case E1000_DEV_ID_82571EB_FIBER:
+    case E1000_DEV_ID_82571EB_SERDES:
+            hw->mac_type = e1000_82571;
+        break;
+    case E1000_DEV_ID_82572EI_COPPER:
+    case E1000_DEV_ID_82572EI_FIBER:
+    case E1000_DEV_ID_82572EI_SERDES:
+        hw->mac_type = e1000_82572;
+        break;
     case E1000_DEV_ID_82573E:
     case E1000_DEV_ID_82573E_IAMT:
+    case E1000_DEV_ID_82573L:
         hw->mac_type = e1000_82573;
         break;
     default:
@@ -315,6 +325,8 @@ e1000_set_mac_type(struct e1000_hw *hw)
     }
 
     switch(hw->mac_type) {
+    case e1000_82571:
+    case e1000_82572:
     case e1000_82573:
         hw->eeprom_semaphore_present = TRUE;
         /* fall through */
@@ -351,6 +363,8 @@ e1000_set_media_type(struct e1000_hw *hw)
     switch (hw->device_id) {
     case E1000_DEV_ID_82545GM_SERDES:
     case E1000_DEV_ID_82546GB_SERDES:
+    case E1000_DEV_ID_82571EB_SERDES:
+    case E1000_DEV_ID_82572EI_SERDES:
         hw->media_type = e1000_media_type_internal_serdes;
         break;
     default:
@@ -523,6 +537,8 @@ e1000_reset_hw(struct e1000_hw *hw)
             E1000_WRITE_REG(hw, CTRL_EXT, ctrl_ext);
             E1000_WRITE_FLUSH(hw);
             /* fall through */
+        case e1000_82571:
+        case e1000_82572:
             ret_val = e1000_get_auto_rd_done(hw);
             if(ret_val)
                 /* We don't want to continue accessing MAC registers. */
@@ -683,6 +699,9 @@ e1000_init_hw(struct e1000_hw *hw)
         switch (hw->mac_type) {
         default:
             break;
+        case e1000_82571:
+        case e1000_82572:
+            ctrl |= (1 << 22);
         case e1000_82573:
             ctrl |= E1000_TXDCTL_COUNT_DESC;
             break;
@@ -694,6 +713,26 @@ e1000_init_hw(struct e1000_hw *hw)
         e1000_enable_tx_pkt_filtering(hw); 
     }
 
+    switch (hw->mac_type) {
+    default:
+        break;
+    case e1000_82571:
+    case e1000_82572:
+        ctrl = E1000_READ_REG(hw, TXDCTL1);
+        ctrl &= ~E1000_TXDCTL_WTHRESH;
+        ctrl |= E1000_TXDCTL_COUNT_DESC | E1000_TXDCTL_FULL_TX_DESC_WB;
+        ctrl |= (1 << 22);
+        E1000_WRITE_REG(hw, TXDCTL1, ctrl);
+        break;
+    }
+
+
+
+    if (hw->mac_type == e1000_82573) {
+        uint32_t gcr = E1000_READ_REG(hw, GCR);
+        gcr |= E1000_GCR_L1_ACT_WITHOUT_L0S_RX;
+        E1000_WRITE_REG(hw, GCR, gcr);
+    }
 
     /* Clear all of the statistics registers (clear on read).  It is
      * important that we do this after we have tried to establish link
@@ -878,6 +917,14 @@ e1000_setup_fiber_serdes_link(struct e1000_hw *hw)
 
     DEBUGFUNC("e1000_setup_fiber_serdes_link");
 
+    /* On 82571 and 82572 Fiber connections, SerDes loopback mode persists
+     * until explicitly turned off or a power cycle is performed.  A read to
+     * the register does not indicate its status.  Therefore, we ensure
+     * loopback mode is disabled during initialization.
+     */
+    if (hw->mac_type == e1000_82571 || hw->mac_type == e1000_82572)
+        E1000_WRITE_REG(hw, SCTL, E1000_DISABLE_SERDES_LOOPBACK);
+
     /* On adapters with a MAC newer than 82544, SW Defineable pin 1 will be
      * set when the optics detect a signal. On older adapters, it will be
      * cleared when there is a signal.  This applies to fiber media only.
@@ -2943,6 +2990,8 @@ e1000_phy_reset(struct e1000_hw *hw)
 
     switch (hw->mac_type) {
     case e1000_82541_rev_2:
+    case e1000_82571:
+    case e1000_82572:
         ret_val = e1000_phy_hw_reset(hw);
         if(ret_val)
             return ret_val;
@@ -2981,6 +3030,16 @@ e1000_detect_gig_phy(struct e1000_hw *hw)
 
     DEBUGFUNC("e1000_detect_gig_phy");
 
+    /* The 82571 firmware may still be configuring the PHY.  In this
+     * case, we cannot access the PHY until the configuration is done.  So
+     * we explicitly set the PHY values. */
+    if(hw->mac_type == e1000_82571 ||
+       hw->mac_type == e1000_82572) {
+        hw->phy_id = IGP01E1000_I_PHY_ID;
+        hw->phy_type = e1000_phy_igp_2;
+        return E1000_SUCCESS;
+    }
+
     /* Read the PHY ID Registers to identify which PHY is onboard. */
     ret_val = e1000_read_phy_reg(hw, PHY_ID1, &phy_id_high);
     if(ret_val)
@@ -3334,6 +3393,21 @@ e1000_init_eeprom_params(struct e1000_hw *hw)
         eeprom->use_eerd = FALSE;
         eeprom->use_eewr = FALSE;
         break;
+    case e1000_82571:
+    case e1000_82572:
+        eeprom->type = e1000_eeprom_spi;
+        eeprom->opcode_bits = 8;
+        eeprom->delay_usec = 1;
+        if (eecd & E1000_EECD_ADDR_BITS) {
+            eeprom->page_size = 32;
+            eeprom->address_bits = 16;
+        } else {
+            eeprom->page_size = 8;
+            eeprom->address_bits = 8;
+        }
+        eeprom->use_eerd = FALSE;
+        eeprom->use_eewr = FALSE;
+        break;
     case e1000_82573:
         eeprom->type = e1000_eeprom_spi;
         eeprom->opcode_bits = 8;
@@ -3543,25 +3617,26 @@ e1000_acquire_eeprom(struct e1000_hw *hw)
     eecd = E1000_READ_REG(hw, EECD);
 
     if (hw->mac_type != e1000_82573) {
-    /* Request EEPROM Access */
-    if(hw->mac_type > e1000_82544) {
-        eecd |= E1000_EECD_REQ;
-        E1000_WRITE_REG(hw, EECD, eecd);
-        eecd = E1000_READ_REG(hw, EECD);
-        while((!(eecd & E1000_EECD_GNT)) &&
-              (i < E1000_EEPROM_GRANT_ATTEMPTS)) {
-            i++;
-            udelay(5);
-            eecd = E1000_READ_REG(hw, EECD);
-        }
-        if(!(eecd & E1000_EECD_GNT)) {
-            eecd &= ~E1000_EECD_REQ;
+        /* Request EEPROM Access */
+        if(hw->mac_type > e1000_82544) {
+            eecd |= E1000_EECD_REQ;
             E1000_WRITE_REG(hw, EECD, eecd);
-            DEBUGOUT("Could not acquire EEPROM grant\n");
-            return -E1000_ERR_EEPROM;
+            eecd = E1000_READ_REG(hw, EECD);
+            while((!(eecd & E1000_EECD_GNT)) &&
+                  (i < E1000_EEPROM_GRANT_ATTEMPTS)) {
+                i++;
+                udelay(5);
+                eecd = E1000_READ_REG(hw, EECD);
+            }
+            if(!(eecd & E1000_EECD_GNT)) {
+                eecd &= ~E1000_EECD_REQ;
+                E1000_WRITE_REG(hw, EECD, eecd);
+                DEBUGOUT("Could not acquire EEPROM grant\n");
+                e1000_put_hw_eeprom_semaphore(hw);
+                return -E1000_ERR_EEPROM;
+            }
         }
     }
-    }
 
     /* Setup EEPROM for Read/Write */
 
@@ -4064,7 +4139,7 @@ e1000_write_eeprom(struct e1000_hw *hw,
         return -E1000_ERR_EEPROM;
     }
 
-    /* 82573 reads only through eerd */
+    /* 82573 writes only through eewr */
     if(eeprom->use_eewr == TRUE)
         return e1000_write_eeprom_eewr(hw, offset, words, data);
 
@@ -4353,9 +4428,16 @@ e1000_read_mac_addr(struct e1000_hw * hw)
         hw->perm_mac_addr[i] = (uint8_t) (eeprom_data & 0x00FF);
         hw->perm_mac_addr[i+1] = (uint8_t) (eeprom_data >> 8);
     }
-    if(((hw->mac_type == e1000_82546) || (hw->mac_type == e1000_82546_rev_3)) &&
-       (E1000_READ_REG(hw, STATUS) & E1000_STATUS_FUNC_1))
+    switch (hw->mac_type) {
+    default:
+        break;
+    case e1000_82546:
+    case e1000_82546_rev_3:
+    case e1000_82571:
+        if(E1000_READ_REG(hw, STATUS) & E1000_STATUS_FUNC_1)
             hw->perm_mac_addr[5] ^= 0x01;
+        break;
+    }
 
     for(i = 0; i < NODE_ADDRESS_SIZE; i++)
         hw->mac_addr[i] = hw->perm_mac_addr[i];
@@ -4385,6 +4467,12 @@ e1000_init_rx_addrs(struct e1000_hw *hw)
     e1000_rar_set(hw, hw->mac_addr, 0);
 
     rar_num = E1000_RAR_ENTRIES;
+
+    /* Reserve a spot for the Locally Administered Address to work around
+     * an 82571 issue in which a reset on one port will reload the MAC on
+     * the other port. */
+    if ((hw->mac_type == e1000_82571) && (hw->laa_is_present == TRUE))
+        rar_num -= 1;
     /* Zero out the other 15 receive addresses. */
     DEBUGOUT("Clearing RAR[1-15]\n");
     for(i = 1; i < rar_num; i++) {
@@ -4427,6 +4515,12 @@ e1000_mc_addr_list_update(struct e1000_hw *hw,
     /* Clear RAR[1-15] */
     DEBUGOUT(" Clearing RAR[1-15]\n");
     num_rar_entry = E1000_RAR_ENTRIES;
+    /* Reserve a spot for the Locally Administered Address to work around
+     * an 82571 issue in which a reset on one port will reload the MAC on
+     * the other port. */
+    if ((hw->mac_type == e1000_82571) && (hw->laa_is_present == TRUE))
+        num_rar_entry -= 1;
+
     for(i = rar_used_count; i < num_rar_entry; i++) {
         E1000_WRITE_REG_ARRAY(hw, RA, (i << 1), 0);
         E1000_WRITE_REG_ARRAY(hw, RA, ((i << 1) + 1), 0);
@@ -4984,7 +5078,6 @@ e1000_clear_hw_cntrs(struct e1000_hw *hw)
     temp = E1000_READ_REG(hw, ICTXQEC);
     temp = E1000_READ_REG(hw, ICTXQMTC);
     temp = E1000_READ_REG(hw, ICRXDMTC);
-
 }
 
 /******************************************************************************
@@ -5151,6 +5244,8 @@ e1000_get_bus_info(struct e1000_hw *hw)
         hw->bus_speed = e1000_bus_speed_unknown;
         hw->bus_width = e1000_bus_width_unknown;
         break;
+    case e1000_82571:
+    case e1000_82572:
     case e1000_82573:
         hw->bus_type = e1000_bus_type_pci_express;
         hw->bus_speed = e1000_bus_speed_2500;
@@ -5250,6 +5345,7 @@ e1000_get_cable_length(struct e1000_hw *hw,
     int32_t ret_val;
     uint16_t agc_value = 0;
     uint16_t cur_agc, min_agc = IGP01E1000_AGC_LENGTH_TABLE_SIZE;
+    uint16_t max_agc = 0;
     uint16_t i, phy_data;
     uint16_t cable_length;
 
@@ -5338,6 +5434,40 @@ e1000_get_cable_length(struct e1000_hw *hw,
                        IGP01E1000_AGC_RANGE) : 0;
         *max_length = e1000_igp_cable_length_table[agc_value] +
                       IGP01E1000_AGC_RANGE;
+    } else if (hw->phy_type == e1000_phy_igp_2) {
+        uint16_t agc_reg_array[IGP02E1000_PHY_CHANNEL_NUM] =
+                                                         {IGP02E1000_PHY_AGC_A,
+                                                          IGP02E1000_PHY_AGC_B,
+                                                          IGP02E1000_PHY_AGC_C,
+                                                          IGP02E1000_PHY_AGC_D};
+        /* Read the AGC registers for all channels */
+        for (i = 0; i < IGP02E1000_PHY_CHANNEL_NUM; i++) {
+            ret_val = e1000_read_phy_reg(hw, agc_reg_array[i], &phy_data);
+            if (ret_val)
+                return ret_val;
+
+	    /* Getting bits 15:9, which represent the combination of course and
+             * fine gain values.  The result is a number that can be put into
+             * the lookup table to obtain the approximate cable length. */
+            cur_agc = (phy_data >> IGP02E1000_AGC_LENGTH_SHIFT) &
+                      IGP02E1000_AGC_LENGTH_MASK;
+
+            /* Remove min & max AGC values from calculation. */
+            if (e1000_igp_2_cable_length_table[min_agc] > e1000_igp_2_cable_length_table[cur_agc])
+                min_agc = cur_agc;
+	    if (e1000_igp_2_cable_length_table[max_agc] < e1000_igp_2_cable_length_table[cur_agc])
+                max_agc = cur_agc;
+
+            agc_value += e1000_igp_2_cable_length_table[cur_agc];
+        }
+
+        agc_value -= (e1000_igp_2_cable_length_table[min_agc] + e1000_igp_2_cable_length_table[max_agc]);
+        agc_value /= (IGP02E1000_PHY_CHANNEL_NUM - 2);
+
+        /* Calculate cable length with the error range of +/- 10 meters. */
+        *min_length = ((agc_value - IGP02E1000_AGC_RANGE) > 0) ?
+                       (agc_value - IGP02E1000_AGC_RANGE) : 0;
+        *max_length = agc_value + IGP02E1000_AGC_RANGE;
     }
 
     return E1000_SUCCESS;
@@ -6465,6 +6595,8 @@ e1000_get_auto_rd_done(struct e1000_hw *hw)
     default:
         msec_delay(5);
         break;
+    case e1000_82571:
+    case e1000_82572:
     case e1000_82573:
         while(timeout) {
             if (E1000_READ_REG(hw, EECD) & E1000_EECD_AUTO_RD) break;
@@ -6494,10 +6626,31 @@ e1000_get_auto_rd_done(struct e1000_hw *hw)
 int32_t
 e1000_get_phy_cfg_done(struct e1000_hw *hw)
 {
+    int32_t timeout = PHY_CFG_TIMEOUT;
+    uint32_t cfg_mask = E1000_EEPROM_CFG_DONE;
+
     DEBUGFUNC("e1000_get_phy_cfg_done");
 
-    /* Simply wait for 10ms */
-    msec_delay(10);
+    switch (hw->mac_type) {
+    default:
+        msec_delay(10);
+        break;
+    case e1000_82571:
+    case e1000_82572:
+        while (timeout) {
+            if (E1000_READ_REG(hw, EEMNGCTL) & cfg_mask)
+                break;
+            else
+                msec_delay(1);
+            timeout--;
+        }
+
+        if (!timeout) {
+            DEBUGOUT("MNG configuration cycle has not completed.\n");
+            return -E1000_ERR_RESET;
+        }
+        break;
+    }
 
     return E1000_SUCCESS;
 }
@@ -6569,8 +6722,7 @@ e1000_put_hw_eeprom_semaphore(struct e1000_hw *hw)
         return;
 
     swsm = E1000_READ_REG(hw, SWSM);
-    /* Release both semaphores. */
-    swsm &= ~(E1000_SWSM_SMBI | E1000_SWSM_SWESMBI);
+        swsm &= ~(E1000_SWSM_SWESMBI);
     E1000_WRITE_REG(hw, SWSM, swsm);
 }
 
@@ -6606,6 +6758,8 @@ e1000_arc_subsystem_valid(struct e1000_hw *hw)
      * if this is the case.  We read FWSM to determine the manageability mode.
      */
     switch (hw->mac_type) {
+    case e1000_82571:
+    case e1000_82572:
     case e1000_82573:
         fwsm = E1000_READ_REG(hw, FWSM);
         if((fwsm & E1000_FWSM_MODE_MASK) != 0)

+ 84 - 12
drivers/net/e1000/e1000_hw.h

@@ -57,6 +57,8 @@ typedef enum {
     e1000_82541_rev_2,
     e1000_82547,
     e1000_82547_rev_2,
+    e1000_82571,
+    e1000_82572,
     e1000_82573,
     e1000_num_macs
 } e1000_mac_type;
@@ -478,10 +480,16 @@ uint8_t e1000_arc_subsystem_valid(struct e1000_hw *hw);
 #define E1000_DEV_ID_82546GB_SERDES      0x107B
 #define E1000_DEV_ID_82546GB_PCIE        0x108A
 #define E1000_DEV_ID_82547EI             0x1019
+#define E1000_DEV_ID_82571EB_COPPER      0x105E
+#define E1000_DEV_ID_82571EB_FIBER       0x105F
+#define E1000_DEV_ID_82571EB_SERDES      0x1060
+#define E1000_DEV_ID_82572EI_COPPER      0x107D
+#define E1000_DEV_ID_82572EI_FIBER       0x107E
+#define E1000_DEV_ID_82572EI_SERDES      0x107F
 #define E1000_DEV_ID_82573E              0x108B
 #define E1000_DEV_ID_82573E_IAMT         0x108C
+#define E1000_DEV_ID_82573L              0x109A
 
-#define E1000_DEV_ID_82546GB_QUAD_COPPER 0x1099
 
 #define NODE_ADDRESS_SIZE 6
 #define ETH_LENGTH_OF_ADDRESS 6
@@ -833,6 +841,8 @@ struct e1000_ffvt_entry {
 #define E1000_FFMT_SIZE E1000_FLEXIBLE_FILTER_SIZE_MAX
 #define E1000_FFVT_SIZE E1000_FLEXIBLE_FILTER_SIZE_MAX
 
+#define E1000_DISABLE_SERDES_LOOPBACK   0x0400
+
 /* Register Set. (82543, 82544)
  *
  * Registers are defined to be 32 bits and  should be accessed as 32 bit values.
@@ -853,6 +863,7 @@ struct e1000_ffvt_entry {
 #define E1000_CTRL_EXT 0x00018  /* Extended Device Control - RW */
 #define E1000_FLA      0x0001C  /* Flash Access - RW */
 #define E1000_MDIC     0x00020  /* MDI Control - RW */
+#define E1000_SCTL     0x00024  /* SerDes Control - RW */
 #define E1000_FCAL     0x00028  /* Flow Control Address Low - RW */
 #define E1000_FCAH     0x0002C  /* Flow Control Address High -RW */
 #define E1000_FCT      0x00030  /* Flow Control Type - RW */
@@ -864,6 +875,12 @@ struct e1000_ffvt_entry {
 #define E1000_IMC      0x000D8  /* Interrupt Mask Clear - WO */
 #define E1000_IAM      0x000E0  /* Interrupt Acknowledge Auto Mask */
 #define E1000_RCTL     0x00100  /* RX Control - RW */
+#define E1000_RDTR1    0x02820  /* RX Delay Timer (1) - RW */
+#define E1000_RDBAL1   0x02900  /* RX Descriptor Base Address Low (1) - RW */
+#define E1000_RDBAH1   0x02904  /* RX Descriptor Base Address High (1) - RW */
+#define E1000_RDLEN1   0x02908  /* RX Descriptor Length (1) - RW */
+#define E1000_RDH1     0x02910  /* RX Descriptor Head (1) - RW */
+#define E1000_RDT1     0x02918  /* RX Descriptor Tail (1) - RW */
 #define E1000_FCTTV    0x00170  /* Flow Control Transmit Timer Value - RW */
 #define E1000_TXCW     0x00178  /* TX Configuration Word - RW */
 #define E1000_RXCW     0x00180  /* RX Configuration Word - RO */
@@ -895,6 +912,12 @@ struct e1000_ffvt_entry {
 #define E1000_RDH      0x02810  /* RX Descriptor Head - RW */
 #define E1000_RDT      0x02818  /* RX Descriptor Tail - RW */
 #define E1000_RDTR     0x02820  /* RX Delay Timer - RW */
+#define E1000_RDBAL0   E1000_RDBAL /* RX Desc Base Address Low (0) - RW */
+#define E1000_RDBAH0   E1000_RDBAH /* RX Desc Base Address High (0) - RW */
+#define E1000_RDLEN0   E1000_RDLEN /* RX Desc Length (0) - RW */
+#define E1000_RDH0     E1000_RDH   /* RX Desc Head (0) - RW */
+#define E1000_RDT0     E1000_RDT   /* RX Desc Tail (0) - RW */
+#define E1000_RDTR0    E1000_RDTR  /* RX Delay Timer (0) - RW */
 #define E1000_RXDCTL   0x02828  /* RX Descriptor Control - RW */
 #define E1000_RADV     0x0282C  /* RX Interrupt Absolute Delay Timer - RW */
 #define E1000_RSRPD    0x02C00  /* RX Small Packet Detect - RW */
@@ -980,15 +1003,15 @@ struct e1000_ffvt_entry {
 #define E1000_BPTC     0x040F4  /* Broadcast Packets TX Count - R/clr */
 #define E1000_TSCTC    0x040F8  /* TCP Segmentation Context TX - R/clr */
 #define E1000_TSCTFC   0x040FC  /* TCP Segmentation Context TX Fail - R/clr */
-#define E1000_IAC       0x4100  /* Interrupt Assertion Count */
-#define E1000_ICRXPTC   0x4104  /* Interrupt Cause Rx Packet Timer Expire Count */
-#define E1000_ICRXATC   0x4108  /* Interrupt Cause Rx Absolute Timer Expire Count */
-#define E1000_ICTXPTC   0x410C  /* Interrupt Cause Tx Packet Timer Expire Count */
-#define E1000_ICTXATC   0x4110  /* Interrupt Cause Tx Absolute Timer Expire Count */
-#define E1000_ICTXQEC   0x4118  /* Interrupt Cause Tx Queue Empty Count */
-#define E1000_ICTXQMTC  0x411C  /* Interrupt Cause Tx Queue Minimum Threshold Count */
-#define E1000_ICRXDMTC  0x4120  /* Interrupt Cause Rx Descriptor Minimum Threshold Count */
-#define E1000_ICRXOC    0x4124  /* Interrupt Cause Receiver Overrun Count */
+#define E1000_IAC      0x04100  /* Interrupt Assertion Count */
+#define E1000_ICRXPTC  0x04104  /* Interrupt Cause Rx Packet Timer Expire Count */
+#define E1000_ICRXATC  0x04108  /* Interrupt Cause Rx Absolute Timer Expire Count */
+#define E1000_ICTXPTC  0x0410C  /* Interrupt Cause Tx Packet Timer Expire Count */
+#define E1000_ICTXATC  0x04110  /* Interrupt Cause Tx Absolute Timer Expire Count */
+#define E1000_ICTXQEC  0x04118  /* Interrupt Cause Tx Queue Empty Count */
+#define E1000_ICTXQMTC 0x0411C  /* Interrupt Cause Tx Queue Minimum Threshold Count */
+#define E1000_ICRXDMTC 0x04120  /* Interrupt Cause Rx Descriptor Minimum Threshold Count */
+#define E1000_ICRXOC   0x04124  /* Interrupt Cause Receiver Overrun Count */
 #define E1000_RXCSUM   0x05000  /* RX Checksum Control - RW */
 #define E1000_RFCTL    0x05008  /* Receive Filter Control*/
 #define E1000_MTA      0x05200  /* Multicast Table Array - RW Array */
@@ -1018,6 +1041,14 @@ struct e1000_ffvt_entry {
 #define E1000_FWSM      0x05B54 /* FW Semaphore */
 #define E1000_FFLT_DBG  0x05F04 /* Debug Register */
 #define E1000_HICR      0x08F00 /* Host Inteface Control */
+
+/* RSS registers */
+#define E1000_CPUVEC    0x02C10 /* CPU Vector Register - RW */
+#define E1000_MRQC      0x05818 /* Multiple Receive Control - RW */
+#define E1000_RETA      0x05C00 /* Redirection Table - RW Array */
+#define E1000_RSSRK     0x05C80 /* RSS Random Key - RW Array */
+#define E1000_RSSIM     0x05864 /* RSS Interrupt Mask */
+#define E1000_RSSIR     0x05868 /* RSS Interrupt Request */
 /* Register Set (82542)
  *
  * Some of the 82542 registers are located at different offsets than they are
@@ -1032,6 +1063,7 @@ struct e1000_ffvt_entry {
 #define E1000_82542_CTRL_EXT E1000_CTRL_EXT
 #define E1000_82542_FLA      E1000_FLA
 #define E1000_82542_MDIC     E1000_MDIC
+#define E1000_82542_SCTL     E1000_SCTL
 #define E1000_82542_FCAL     E1000_FCAL
 #define E1000_82542_FCAH     E1000_FCAH
 #define E1000_82542_FCT      E1000_FCT
@@ -1049,6 +1081,18 @@ struct e1000_ffvt_entry {
 #define E1000_82542_RDLEN    0x00118
 #define E1000_82542_RDH      0x00120
 #define E1000_82542_RDT      0x00128
+#define E1000_82542_RDTR0    E1000_82542_RDTR
+#define E1000_82542_RDBAL0   E1000_82542_RDBAL
+#define E1000_82542_RDBAH0   E1000_82542_RDBAH
+#define E1000_82542_RDLEN0   E1000_82542_RDLEN
+#define E1000_82542_RDH0     E1000_82542_RDH
+#define E1000_82542_RDT0     E1000_82542_RDT
+#define E1000_82542_RDTR1    0x00130
+#define E1000_82542_RDBAL1   0x00138
+#define E1000_82542_RDBAH1   0x0013C
+#define E1000_82542_RDLEN1   0x00140
+#define E1000_82542_RDH1     0x00148
+#define E1000_82542_RDT1     0x00150
 #define E1000_82542_FCRTH    0x00160
 #define E1000_82542_FCRTL    0x00168
 #define E1000_82542_FCTTV    E1000_FCTTV
@@ -1197,6 +1241,13 @@ struct e1000_ffvt_entry {
 #define E1000_82542_ICRXOC      E1000_ICRXOC
 #define E1000_82542_HICR        E1000_HICR
 
+#define E1000_82542_CPUVEC      E1000_CPUVEC
+#define E1000_82542_MRQC        E1000_MRQC
+#define E1000_82542_RETA        E1000_RETA
+#define E1000_82542_RSSRK       E1000_RSSRK
+#define E1000_82542_RSSIM       E1000_RSSIM
+#define E1000_82542_RSSIR       E1000_RSSIR
+
 /* Statistics counters collected by the MAC */
 struct e1000_hw_stats {
     uint64_t crcerrs;
@@ -1336,6 +1387,7 @@ struct e1000_hw {
     boolean_t serdes_link_down;
     boolean_t tbi_compatibility_en;
     boolean_t tbi_compatibility_on;
+    boolean_t laa_is_present;
     boolean_t phy_reset_disable;
     boolean_t fc_send_xon;
     boolean_t fc_strict_ieee;
@@ -1374,6 +1426,7 @@ struct e1000_hw {
 #define E1000_CTRL_BEM32    0x00000400  /* Big Endian 32 mode */
 #define E1000_CTRL_FRCSPD   0x00000800  /* Force Speed */
 #define E1000_CTRL_FRCDPX   0x00001000  /* Force Duplex */
+#define E1000_CTRL_D_UD_EN  0x00002000  /* Dock/Undock enable */
 #define E1000_CTRL_D_UD_POLARITY 0x00004000 /* Defined polarity of Dock/Undock indication in SDP[0] */
 #define E1000_CTRL_SWDPIN0  0x00040000  /* SWDPIN 0 value */
 #define E1000_CTRL_SWDPIN1  0x00080000  /* SWDPIN 1 value */
@@ -1491,6 +1544,8 @@ struct e1000_hw {
 #define E1000_CTRL_EXT_WR_WMARK_320   0x01000000
 #define E1000_CTRL_EXT_WR_WMARK_384   0x02000000
 #define E1000_CTRL_EXT_WR_WMARK_448   0x03000000
+#define E1000_CTRL_EXT_CANC           0x04000000  /* Interrupt delay cancellation */
+#define E1000_CTRL_EXT_DRV_LOAD       0x10000000  /* Driver loaded bit for FW */
 #define E1000_CTRL_EXT_IAME           0x08000000  /* Interrupt acknowledge Auto-mask */
 #define E1000_CTRL_EXT_INT_TIMER_CLR  0x20000000  /* Clear Interrupt timers after IMS clear */
 
@@ -1524,6 +1579,7 @@ struct e1000_hw {
 #define E1000_LEDCTL_LED2_BLINK           0x00800000
 #define E1000_LEDCTL_LED3_MODE_MASK       0x0F000000
 #define E1000_LEDCTL_LED3_MODE_SHIFT      24
+#define E1000_LEDCTL_LED3_BLINK_RATE      0x20000000
 #define E1000_LEDCTL_LED3_IVRT            0x40000000
 #define E1000_LEDCTL_LED3_BLINK           0x80000000
 
@@ -1784,6 +1840,16 @@ struct e1000_hw {
 #define E1000_RXCSUM_IPPCSE    0x00001000   /* IP payload checksum enable */
 #define E1000_RXCSUM_PCSD      0x00002000   /* packet checksum disabled */
 
+/* Multiple Receive Queue Control */
+#define E1000_MRQC_ENABLE_MASK              0x00000003
+#define E1000_MRQC_ENABLE_RSS_2Q            0x00000001
+#define E1000_MRQC_ENABLE_RSS_INT           0x00000004
+#define E1000_MRQC_RSS_FIELD_MASK           0xFFFF0000
+#define E1000_MRQC_RSS_FIELD_IPV4_TCP       0x00010000
+#define E1000_MRQC_RSS_FIELD_IPV4           0x00020000
+#define E1000_MRQC_RSS_FIELD_IPV6_TCP       0x00040000
+#define E1000_MRQC_RSS_FIELD_IPV6_EX        0x00080000
+#define E1000_MRQC_RSS_FIELD_IPV6           0x00100000
 
 /* Definitions for power management and wakeup registers */
 /* Wake Up Control */
@@ -1928,6 +1994,7 @@ struct e1000_host_command_info {
 #define E1000_MDALIGN          4096
 
 #define E1000_GCR_BEM32                 0x00400000
+#define E1000_GCR_L1_ACT_WITHOUT_L0S_RX 0x08000000
 /* Function Active and Power State to MNG */
 #define E1000_FACTPS_FUNC0_POWER_STATE_MASK         0x00000003
 #define E1000_FACTPS_LAN0_VALID                     0x00000004
@@ -1980,6 +2047,7 @@ struct e1000_host_command_info {
 /* EEPROM Word Offsets */
 #define EEPROM_COMPAT                 0x0003
 #define EEPROM_ID_LED_SETTINGS        0x0004
+#define EEPROM_VERSION                0x0005
 #define EEPROM_SERDES_AMPLITUDE       0x0006 /* For SERDES output amplitude adjustment. */
 #define EEPROM_PHY_CLASS_WORD         0x0007
 #define EEPROM_INIT_CONTROL1_REG      0x000A
@@ -1990,6 +2058,8 @@ struct e1000_host_command_info {
 #define EEPROM_FLASH_VERSION          0x0032
 #define EEPROM_CHECKSUM_REG           0x003F
 
+#define E1000_EEPROM_CFG_DONE         0x00040000   /* MNG config cycle done */
+
 /* Word definitions for ID LED Settings */
 #define ID_LED_RESERVED_0000 0x0000
 #define ID_LED_RESERVED_FFFF 0xFFFF
@@ -2108,6 +2178,8 @@ struct e1000_host_command_info {
 #define E1000_PBA_22K 0x0016
 #define E1000_PBA_24K 0x0018
 #define E1000_PBA_30K 0x001E
+#define E1000_PBA_32K 0x0020
+#define E1000_PBA_38K 0x0026
 #define E1000_PBA_40K 0x0028
 #define E1000_PBA_48K 0x0030    /* 48KB, default RX allocation */
 
@@ -2592,11 +2664,11 @@ struct e1000_host_command_info {
 
 /* 7 bits (3 Coarse + 4 Fine) --> 128 optional values */
 #define IGP01E1000_AGC_LENGTH_TABLE_SIZE 128
-#define IGP02E1000_AGC_LENGTH_TABLE_SIZE 128
+#define IGP02E1000_AGC_LENGTH_TABLE_SIZE 113
 
 /* The precision error of the cable length is +/- 10 meters */
 #define IGP01E1000_AGC_RANGE    10
-#define IGP02E1000_AGC_RANGE    10
+#define IGP02E1000_AGC_RANGE    15
 
 /* IGP01E1000 PCS Initialization register */
 /* bits 3:6 in the PCS registers stores the channels polarity */

+ 822 - 256
drivers/net/e1000/e1000_main.c

@@ -43,7 +43,7 @@ char e1000_driver_string[] = "Intel(R) PRO/1000 Network Driver";
 #else
 #define DRIVERNAPI "-NAPI"
 #endif
-#define DRV_VERSION		"6.0.60-k2"DRIVERNAPI
+#define DRV_VERSION "6.1.16-k2"DRIVERNAPI
 char e1000_driver_version[] = DRV_VERSION;
 char e1000_copyright[] = "Copyright (c) 1999-2005 Intel Corporation.";
 
@@ -80,6 +80,9 @@ static struct pci_device_id e1000_pci_tbl[] = {
 	INTEL_E1000_ETHERNET_DEVICE(0x1026),
 	INTEL_E1000_ETHERNET_DEVICE(0x1027),
 	INTEL_E1000_ETHERNET_DEVICE(0x1028),
+	INTEL_E1000_ETHERNET_DEVICE(0x105E),
+	INTEL_E1000_ETHERNET_DEVICE(0x105F),
+	INTEL_E1000_ETHERNET_DEVICE(0x1060),
 	INTEL_E1000_ETHERNET_DEVICE(0x1075),
 	INTEL_E1000_ETHERNET_DEVICE(0x1076),
 	INTEL_E1000_ETHERNET_DEVICE(0x1077),
@@ -88,10 +91,13 @@ static struct pci_device_id e1000_pci_tbl[] = {
 	INTEL_E1000_ETHERNET_DEVICE(0x107A),
 	INTEL_E1000_ETHERNET_DEVICE(0x107B),
 	INTEL_E1000_ETHERNET_DEVICE(0x107C),
+	INTEL_E1000_ETHERNET_DEVICE(0x107D),
+	INTEL_E1000_ETHERNET_DEVICE(0x107E),
+	INTEL_E1000_ETHERNET_DEVICE(0x107F),
 	INTEL_E1000_ETHERNET_DEVICE(0x108A),
 	INTEL_E1000_ETHERNET_DEVICE(0x108B),
 	INTEL_E1000_ETHERNET_DEVICE(0x108C),
-	INTEL_E1000_ETHERNET_DEVICE(0x1099),
+	INTEL_E1000_ETHERNET_DEVICE(0x109A),
 	/* required last entry */
 	{0,}
 };
@@ -102,10 +108,18 @@ int e1000_up(struct e1000_adapter *adapter);
 void e1000_down(struct e1000_adapter *adapter);
 void e1000_reset(struct e1000_adapter *adapter);
 int e1000_set_spd_dplx(struct e1000_adapter *adapter, uint16_t spddplx);
-int e1000_setup_tx_resources(struct e1000_adapter *adapter);
-int e1000_setup_rx_resources(struct e1000_adapter *adapter);
-void e1000_free_tx_resources(struct e1000_adapter *adapter);
-void e1000_free_rx_resources(struct e1000_adapter *adapter);
+int e1000_setup_all_tx_resources(struct e1000_adapter *adapter);
+int e1000_setup_all_rx_resources(struct e1000_adapter *adapter);
+void e1000_free_all_tx_resources(struct e1000_adapter *adapter);
+void e1000_free_all_rx_resources(struct e1000_adapter *adapter);
+int e1000_setup_tx_resources(struct e1000_adapter *adapter,
+                             struct e1000_tx_ring *txdr);
+int e1000_setup_rx_resources(struct e1000_adapter *adapter,
+                             struct e1000_rx_ring *rxdr);
+void e1000_free_tx_resources(struct e1000_adapter *adapter,
+                             struct e1000_tx_ring *tx_ring);
+void e1000_free_rx_resources(struct e1000_adapter *adapter,
+                             struct e1000_rx_ring *rx_ring);
 void e1000_update_stats(struct e1000_adapter *adapter);
 
 /* Local Function Prototypes */
@@ -114,14 +128,22 @@ static int e1000_init_module(void);
 static void e1000_exit_module(void);
 static int e1000_probe(struct pci_dev *pdev, const struct pci_device_id *ent);
 static void __devexit e1000_remove(struct pci_dev *pdev);
+static int e1000_alloc_queues(struct e1000_adapter *adapter);
+#ifdef CONFIG_E1000_MQ
+static void e1000_setup_queue_mapping(struct e1000_adapter *adapter);
+#endif
 static int e1000_sw_init(struct e1000_adapter *adapter);
 static int e1000_open(struct net_device *netdev);
 static int e1000_close(struct net_device *netdev);
 static void e1000_configure_tx(struct e1000_adapter *adapter);
 static void e1000_configure_rx(struct e1000_adapter *adapter);
 static void e1000_setup_rctl(struct e1000_adapter *adapter);
-static void e1000_clean_tx_ring(struct e1000_adapter *adapter);
-static void e1000_clean_rx_ring(struct e1000_adapter *adapter);
+static void e1000_clean_all_tx_rings(struct e1000_adapter *adapter);
+static void e1000_clean_all_rx_rings(struct e1000_adapter *adapter);
+static void e1000_clean_tx_ring(struct e1000_adapter *adapter,
+                                struct e1000_tx_ring *tx_ring);
+static void e1000_clean_rx_ring(struct e1000_adapter *adapter,
+                                struct e1000_rx_ring *rx_ring);
 static void e1000_set_multi(struct net_device *netdev);
 static void e1000_update_phy_info(unsigned long data);
 static void e1000_watchdog(unsigned long data);
@@ -132,19 +154,26 @@ static struct net_device_stats * e1000_get_stats(struct net_device *netdev);
 static int e1000_change_mtu(struct net_device *netdev, int new_mtu);
 static int e1000_set_mac(struct net_device *netdev, void *p);
 static irqreturn_t e1000_intr(int irq, void *data, struct pt_regs *regs);
-static boolean_t e1000_clean_tx_irq(struct e1000_adapter *adapter);
+static boolean_t e1000_clean_tx_irq(struct e1000_adapter *adapter,
+                                    struct e1000_tx_ring *tx_ring);
 #ifdef CONFIG_E1000_NAPI
-static int e1000_clean(struct net_device *netdev, int *budget);
+static int e1000_clean(struct net_device *poll_dev, int *budget);
 static boolean_t e1000_clean_rx_irq(struct e1000_adapter *adapter,
+                                    struct e1000_rx_ring *rx_ring,
                                     int *work_done, int work_to_do);
 static boolean_t e1000_clean_rx_irq_ps(struct e1000_adapter *adapter,
+                                       struct e1000_rx_ring *rx_ring,
                                        int *work_done, int work_to_do);
 #else
-static boolean_t e1000_clean_rx_irq(struct e1000_adapter *adapter);
-static boolean_t e1000_clean_rx_irq_ps(struct e1000_adapter *adapter);
+static boolean_t e1000_clean_rx_irq(struct e1000_adapter *adapter,
+                                    struct e1000_rx_ring *rx_ring);
+static boolean_t e1000_clean_rx_irq_ps(struct e1000_adapter *adapter,
+                                       struct e1000_rx_ring *rx_ring);
 #endif
-static void e1000_alloc_rx_buffers(struct e1000_adapter *adapter);
-static void e1000_alloc_rx_buffers_ps(struct e1000_adapter *adapter);
+static void e1000_alloc_rx_buffers(struct e1000_adapter *adapter,
+                                   struct e1000_rx_ring *rx_ring);
+static void e1000_alloc_rx_buffers_ps(struct e1000_adapter *adapter,
+                                      struct e1000_rx_ring *rx_ring);
 static int e1000_ioctl(struct net_device *netdev, struct ifreq *ifr, int cmd);
 static int e1000_mii_ioctl(struct net_device *netdev, struct ifreq *ifr,
 			   int cmd);
@@ -172,6 +201,11 @@ static int e1000_resume(struct pci_dev *pdev);
 static void e1000_netpoll (struct net_device *netdev);
 #endif
 
+#ifdef CONFIG_E1000_MQ
+/* for multiple Rx queues */
+void e1000_rx_schedule(void *data);
+#endif
+
 /* Exported from other modules */
 
 extern void e1000_check_options(struct e1000_adapter *adapter);
@@ -289,7 +323,7 @@ int
 e1000_up(struct e1000_adapter *adapter)
 {
 	struct net_device *netdev = adapter->netdev;
-	int err;
+	int i, err;
 
 	/* hardware has been reset, we need to reload some things */
 
@@ -308,7 +342,8 @@ e1000_up(struct e1000_adapter *adapter)
 	e1000_configure_tx(adapter);
 	e1000_setup_rctl(adapter);
 	e1000_configure_rx(adapter);
-	adapter->alloc_rx_buf(adapter);
+	for (i = 0; i < adapter->num_queues; i++)
+		adapter->alloc_rx_buf(adapter, &adapter->rx_ring[i]);
 
 #ifdef CONFIG_PCI_MSI
 	if(adapter->hw.mac_type > e1000_82547_rev_2) {
@@ -344,6 +379,9 @@ e1000_down(struct e1000_adapter *adapter)
 	struct net_device *netdev = adapter->netdev;
 
 	e1000_irq_disable(adapter);
+#ifdef CONFIG_E1000_MQ
+	while (atomic_read(&adapter->rx_sched_call_data.count) != 0);
+#endif
 	free_irq(adapter->pdev->irq, netdev);
 #ifdef CONFIG_PCI_MSI
 	if(adapter->hw.mac_type > e1000_82547_rev_2 &&
@@ -363,11 +401,10 @@ e1000_down(struct e1000_adapter *adapter)
 	netif_stop_queue(netdev);
 
 	e1000_reset(adapter);
-	e1000_clean_tx_ring(adapter);
-	e1000_clean_rx_ring(adapter);
+	e1000_clean_all_tx_rings(adapter);
+	e1000_clean_all_rx_rings(adapter);
 
-	/* If WoL is not enabled
-	 * and management mode is not IAMT
+	/* If WoL is not enabled and management mode is not IAMT
 	 * Power down the PHY so no link is implied when interface is down */
 	if(!adapter->wol && adapter->hw.mac_type >= e1000_82540 &&
 	   adapter->hw.media_type == e1000_media_type_copper &&
@@ -398,6 +435,10 @@ e1000_reset(struct e1000_adapter *adapter)
 	case e1000_82547_rev_2:
 		pba = E1000_PBA_30K;
 		break;
+	case e1000_82571:
+	case e1000_82572:
+		pba = E1000_PBA_38K;
+		break;
 	case e1000_82573:
 		pba = E1000_PBA_12K;
 		break;
@@ -475,6 +516,7 @@ e1000_probe(struct pci_dev *pdev,
 	struct net_device *netdev;
 	struct e1000_adapter *adapter;
 	unsigned long mmio_start, mmio_len;
+	uint32_t ctrl_ext;
 	uint32_t swsm;
 
 	static int cards_found = 0;
@@ -614,8 +656,9 @@ e1000_probe(struct pci_dev *pdev,
 	if(e1000_read_mac_addr(&adapter->hw))
 		DPRINTK(PROBE, ERR, "EEPROM Read Error\n");
 	memcpy(netdev->dev_addr, adapter->hw.mac_addr, netdev->addr_len);
+	memcpy(netdev->perm_addr, adapter->hw.mac_addr, netdev->addr_len);
 
-	if(!is_valid_ether_addr(netdev->dev_addr)) {
+	if(!is_valid_ether_addr(netdev->perm_addr)) {
 		DPRINTK(PROBE, ERR, "Invalid MAC Address\n");
 		err = -EIO;
 		goto err_eeprom;
@@ -687,6 +730,12 @@ e1000_probe(struct pci_dev *pdev,
 
 	/* Let firmware know the driver has taken over */
 	switch(adapter->hw.mac_type) {
+	case e1000_82571:
+	case e1000_82572:
+		ctrl_ext = E1000_READ_REG(&adapter->hw, CTRL_EXT);
+		E1000_WRITE_REG(&adapter->hw, CTRL_EXT,
+				ctrl_ext | E1000_CTRL_EXT_DRV_LOAD);
+		break;
 	case e1000_82573:
 		swsm = E1000_READ_REG(&adapter->hw, SWSM);
 		E1000_WRITE_REG(&adapter->hw, SWSM,
@@ -731,7 +780,11 @@ e1000_remove(struct pci_dev *pdev)
 {
 	struct net_device *netdev = pci_get_drvdata(pdev);
 	struct e1000_adapter *adapter = netdev_priv(netdev);
+	uint32_t ctrl_ext;
 	uint32_t manc, swsm;
+#ifdef CONFIG_E1000_NAPI
+	int i;
+#endif
 
 	flush_scheduled_work();
 
@@ -745,6 +798,12 @@ e1000_remove(struct pci_dev *pdev)
 	}
 
 	switch(adapter->hw.mac_type) {
+	case e1000_82571:
+	case e1000_82572:
+		ctrl_ext = E1000_READ_REG(&adapter->hw, CTRL_EXT);
+		E1000_WRITE_REG(&adapter->hw, CTRL_EXT,
+				ctrl_ext & ~E1000_CTRL_EXT_DRV_LOAD);
+		break;
 	case e1000_82573:
 		swsm = E1000_READ_REG(&adapter->hw, SWSM);
 		E1000_WRITE_REG(&adapter->hw, SWSM,
@@ -756,13 +815,27 @@ e1000_remove(struct pci_dev *pdev)
 	}
 
 	unregister_netdev(netdev);
+#ifdef CONFIG_E1000_NAPI
+	for (i = 0; i < adapter->num_queues; i++)
+		__dev_put(&adapter->polling_netdev[i]);
+#endif
 
 	if(!e1000_check_phy_reset_block(&adapter->hw))
 		e1000_phy_hw_reset(&adapter->hw);
 
+	kfree(adapter->tx_ring);
+	kfree(adapter->rx_ring);
+#ifdef CONFIG_E1000_NAPI
+	kfree(adapter->polling_netdev);
+#endif
+
 	iounmap(adapter->hw.hw_addr);
 	pci_release_regions(pdev);
 
+#ifdef CONFIG_E1000_MQ
+	free_percpu(adapter->cpu_netdev);
+	free_percpu(adapter->cpu_tx_ring);
+#endif
 	free_netdev(netdev);
 
 	pci_disable_device(pdev);
@@ -783,6 +856,9 @@ e1000_sw_init(struct e1000_adapter *adapter)
 	struct e1000_hw *hw = &adapter->hw;
 	struct net_device *netdev = adapter->netdev;
 	struct pci_dev *pdev = adapter->pdev;
+#ifdef CONFIG_E1000_NAPI
+	int i;
+#endif
 
 	/* PCI config space info */
 
@@ -840,13 +916,122 @@ e1000_sw_init(struct e1000_adapter *adapter)
 		hw->master_slave = E1000_MASTER_SLAVE;
 	}
 
+#ifdef CONFIG_E1000_MQ
+	/* Number of supported queues */
+	switch (hw->mac_type) {
+	case e1000_82571:
+	case e1000_82572:
+		adapter->num_queues = 2;
+		break;
+	default:
+		adapter->num_queues = 1;
+		break;
+	}
+	adapter->num_queues = min(adapter->num_queues, num_online_cpus());
+#else
+	adapter->num_queues = 1;
+#endif
+
+	if (e1000_alloc_queues(adapter)) {
+		DPRINTK(PROBE, ERR, "Unable to allocate memory for queues\n");
+		return -ENOMEM;
+	}
+
+#ifdef CONFIG_E1000_NAPI
+	for (i = 0; i < adapter->num_queues; i++) {
+		adapter->polling_netdev[i].priv = adapter;
+		adapter->polling_netdev[i].poll = &e1000_clean;
+		adapter->polling_netdev[i].weight = 64;
+		dev_hold(&adapter->polling_netdev[i]);
+		set_bit(__LINK_STATE_START, &adapter->polling_netdev[i].state);
+	}
+#endif
+
+#ifdef CONFIG_E1000_MQ
+	e1000_setup_queue_mapping(adapter);
+#endif
+
 	atomic_set(&adapter->irq_sem, 1);
 	spin_lock_init(&adapter->stats_lock);
-	spin_lock_init(&adapter->tx_lock);
 
 	return 0;
 }
 
+/**
+ * e1000_alloc_queues - Allocate memory for all rings
+ * @adapter: board private structure to initialize
+ *
+ * We allocate one ring per queue at run-time since we don't know the
+ * number of queues at compile-time.  The polling_netdev array is
+ * intended for Multiqueue, but should work fine with a single queue.
+ **/
+
+static int __devinit
+e1000_alloc_queues(struct e1000_adapter *adapter)
+{
+	int size;
+
+	size = sizeof(struct e1000_tx_ring) * adapter->num_queues;
+	adapter->tx_ring = kmalloc(size, GFP_KERNEL);
+	if (!adapter->tx_ring)
+		return -ENOMEM;
+	memset(adapter->tx_ring, 0, size);
+
+	size = sizeof(struct e1000_rx_ring) * adapter->num_queues;
+	adapter->rx_ring = kmalloc(size, GFP_KERNEL);
+	if (!adapter->rx_ring) {
+		kfree(adapter->tx_ring);
+		return -ENOMEM;
+	}
+	memset(adapter->rx_ring, 0, size);
+
+#ifdef CONFIG_E1000_NAPI
+	size = sizeof(struct net_device) * adapter->num_queues;
+	adapter->polling_netdev = kmalloc(size, GFP_KERNEL);
+	if (!adapter->polling_netdev) {
+		kfree(adapter->tx_ring);
+		kfree(adapter->rx_ring);
+		return -ENOMEM;
+	}
+	memset(adapter->polling_netdev, 0, size);
+#endif
+
+	return E1000_SUCCESS;
+}
+
+#ifdef CONFIG_E1000_MQ
+static void __devinit
+e1000_setup_queue_mapping(struct e1000_adapter *adapter)
+{
+	int i, cpu;
+
+	adapter->rx_sched_call_data.func = e1000_rx_schedule;
+	adapter->rx_sched_call_data.info = adapter->netdev;
+	cpus_clear(adapter->rx_sched_call_data.cpumask);
+
+	adapter->cpu_netdev = alloc_percpu(struct net_device *);
+	adapter->cpu_tx_ring = alloc_percpu(struct e1000_tx_ring *);
+
+	lock_cpu_hotplug();
+	i = 0;
+	for_each_online_cpu(cpu) {
+		*per_cpu_ptr(adapter->cpu_tx_ring, cpu) = &adapter->tx_ring[i % adapter->num_queues];
+		/* This is incomplete because we'd like to assign separate
+		 * physical cpus to these netdev polling structures and
+		 * avoid saturating a subset of cpus.
+		 */
+		if (i < adapter->num_queues) {
+			*per_cpu_ptr(adapter->cpu_netdev, cpu) = &adapter->polling_netdev[i];
+			adapter->cpu_for_queue[i] = cpu;
+		} else
+			*per_cpu_ptr(adapter->cpu_netdev, cpu) = NULL;
+
+		i++;
+	}
+	unlock_cpu_hotplug();
+}
+#endif
+
 /**
  * e1000_open - Called when a network interface is made active
  * @netdev: network interface device structure
@@ -868,12 +1053,12 @@ e1000_open(struct net_device *netdev)
 
 	/* allocate transmit descriptors */
 
-	if((err = e1000_setup_tx_resources(adapter)))
+	if ((err = e1000_setup_all_tx_resources(adapter)))
 		goto err_setup_tx;
 
 	/* allocate receive descriptors */
 
-	if((err = e1000_setup_rx_resources(adapter)))
+	if ((err = e1000_setup_all_rx_resources(adapter)))
 		goto err_setup_rx;
 
 	if((err = e1000_up(adapter)))
@@ -887,9 +1072,9 @@ e1000_open(struct net_device *netdev)
 	return E1000_SUCCESS;
 
 err_up:
-	e1000_free_rx_resources(adapter);
+	e1000_free_all_rx_resources(adapter);
 err_setup_rx:
-	e1000_free_tx_resources(adapter);
+	e1000_free_all_tx_resources(adapter);
 err_setup_tx:
 	e1000_reset(adapter);
 
@@ -915,8 +1100,8 @@ e1000_close(struct net_device *netdev)
 
 	e1000_down(adapter);
 
-	e1000_free_tx_resources(adapter);
-	e1000_free_rx_resources(adapter);
+	e1000_free_all_tx_resources(adapter);
+	e1000_free_all_rx_resources(adapter);
 
 	if((adapter->hw.mng_cookie.status &
 			  E1000_MNG_DHCP_COOKIE_STATUS_VLAN_SUPPORT)) {
@@ -951,14 +1136,15 @@ e1000_check_64k_bound(struct e1000_adapter *adapter,
 /**
  * e1000_setup_tx_resources - allocate Tx resources (Descriptors)
  * @adapter: board private structure
+ * @txdr:    tx descriptor ring (for a specific queue) to setup
  *
  * Return 0 on success, negative on failure
  **/
 
 int
-e1000_setup_tx_resources(struct e1000_adapter *adapter)
+e1000_setup_tx_resources(struct e1000_adapter *adapter,
+                         struct e1000_tx_ring *txdr)
 {
-	struct e1000_desc_ring *txdr = &adapter->tx_ring;
 	struct pci_dev *pdev = adapter->pdev;
 	int size;
 
@@ -970,6 +1156,7 @@ e1000_setup_tx_resources(struct e1000_adapter *adapter)
 		return -ENOMEM;
 	}
 	memset(txdr->buffer_info, 0, size);
+	memset(&txdr->previous_buffer_info, 0, sizeof(struct e1000_buffer));
 
 	/* round up to nearest 4K */
 
@@ -1018,10 +1205,40 @@ setup_tx_desc_die:
 
 	txdr->next_to_use = 0;
 	txdr->next_to_clean = 0;
+	spin_lock_init(&txdr->tx_lock);
 
 	return 0;
 }
 
+/**
+ * e1000_setup_all_tx_resources - wrapper to allocate Tx resources
+ * 				  (Descriptors) for all queues
+ * @adapter: board private structure
+ *
+ * If this function returns with an error, then it's possible one or
+ * more of the rings is populated (while the rest are not).  It is the
+ * callers duty to clean those orphaned rings.
+ *
+ * Return 0 on success, negative on failure
+ **/
+
+int
+e1000_setup_all_tx_resources(struct e1000_adapter *adapter)
+{
+	int i, err = 0;
+
+	for (i = 0; i < adapter->num_queues; i++) {
+		err = e1000_setup_tx_resources(adapter, &adapter->tx_ring[i]);
+		if (err) {
+			DPRINTK(PROBE, ERR,
+				"Allocation for Tx Queue %u failed\n", i);
+			break;
+		}
+	}
+
+	return err;
+}
+
 /**
  * e1000_configure_tx - Configure 8254x Transmit Unit after Reset
  * @adapter: board private structure
@@ -1032,23 +1249,43 @@ setup_tx_desc_die:
 static void
 e1000_configure_tx(struct e1000_adapter *adapter)
 {
-	uint64_t tdba = adapter->tx_ring.dma;
-	uint32_t tdlen = adapter->tx_ring.count * sizeof(struct e1000_tx_desc);
-	uint32_t tctl, tipg;
-
-	E1000_WRITE_REG(&adapter->hw, TDBAL, (tdba & 0x00000000ffffffffULL));
-	E1000_WRITE_REG(&adapter->hw, TDBAH, (tdba >> 32));
-
-	E1000_WRITE_REG(&adapter->hw, TDLEN, tdlen);
+	uint64_t tdba;
+	struct e1000_hw *hw = &adapter->hw;
+	uint32_t tdlen, tctl, tipg, tarc;
 
 	/* Setup the HW Tx Head and Tail descriptor pointers */
 
-	E1000_WRITE_REG(&adapter->hw, TDH, 0);
-	E1000_WRITE_REG(&adapter->hw, TDT, 0);
+	switch (adapter->num_queues) {
+	case 2:
+		tdba = adapter->tx_ring[1].dma;
+		tdlen = adapter->tx_ring[1].count *
+			sizeof(struct e1000_tx_desc);
+		E1000_WRITE_REG(hw, TDBAL1, (tdba & 0x00000000ffffffffULL));
+		E1000_WRITE_REG(hw, TDBAH1, (tdba >> 32));
+		E1000_WRITE_REG(hw, TDLEN1, tdlen);
+		E1000_WRITE_REG(hw, TDH1, 0);
+		E1000_WRITE_REG(hw, TDT1, 0);
+		adapter->tx_ring[1].tdh = E1000_TDH1;
+		adapter->tx_ring[1].tdt = E1000_TDT1;
+		/* Fall Through */
+	case 1:
+	default:
+		tdba = adapter->tx_ring[0].dma;
+		tdlen = adapter->tx_ring[0].count *
+			sizeof(struct e1000_tx_desc);
+		E1000_WRITE_REG(hw, TDBAL, (tdba & 0x00000000ffffffffULL));
+		E1000_WRITE_REG(hw, TDBAH, (tdba >> 32));
+		E1000_WRITE_REG(hw, TDLEN, tdlen);
+		E1000_WRITE_REG(hw, TDH, 0);
+		E1000_WRITE_REG(hw, TDT, 0);
+		adapter->tx_ring[0].tdh = E1000_TDH;
+		adapter->tx_ring[0].tdt = E1000_TDT;
+		break;
+	}
 
 	/* Set the default values for the Tx Inter Packet Gap timer */
 
-	switch (adapter->hw.mac_type) {
+	switch (hw->mac_type) {
 	case e1000_82542_rev2_0:
 	case e1000_82542_rev2_1:
 		tipg = DEFAULT_82542_TIPG_IPGT;
@@ -1056,67 +1293,81 @@ e1000_configure_tx(struct e1000_adapter *adapter)
 		tipg |= DEFAULT_82542_TIPG_IPGR2 << E1000_TIPG_IPGR2_SHIFT;
 		break;
 	default:
-		if(adapter->hw.media_type == e1000_media_type_fiber ||
-		   adapter->hw.media_type == e1000_media_type_internal_serdes)
+		if (hw->media_type == e1000_media_type_fiber ||
+		    hw->media_type == e1000_media_type_internal_serdes)
 			tipg = DEFAULT_82543_TIPG_IPGT_FIBER;
 		else
 			tipg = DEFAULT_82543_TIPG_IPGT_COPPER;
 		tipg |= DEFAULT_82543_TIPG_IPGR1 << E1000_TIPG_IPGR1_SHIFT;
 		tipg |= DEFAULT_82543_TIPG_IPGR2 << E1000_TIPG_IPGR2_SHIFT;
 	}
-	E1000_WRITE_REG(&adapter->hw, TIPG, tipg);
+	E1000_WRITE_REG(hw, TIPG, tipg);
 
 	/* Set the Tx Interrupt Delay register */
 
-	E1000_WRITE_REG(&adapter->hw, TIDV, adapter->tx_int_delay);
-	if(adapter->hw.mac_type >= e1000_82540)
-		E1000_WRITE_REG(&adapter->hw, TADV, adapter->tx_abs_int_delay);
+	E1000_WRITE_REG(hw, TIDV, adapter->tx_int_delay);
+	if (hw->mac_type >= e1000_82540)
+		E1000_WRITE_REG(hw, TADV, adapter->tx_abs_int_delay);
 
 	/* Program the Transmit Control Register */
 
-	tctl = E1000_READ_REG(&adapter->hw, TCTL);
+	tctl = E1000_READ_REG(hw, TCTL);
 
 	tctl &= ~E1000_TCTL_CT;
-	tctl |= E1000_TCTL_EN | E1000_TCTL_PSP |
+	tctl |= E1000_TCTL_EN | E1000_TCTL_PSP | E1000_TCTL_RTLC |
 		(E1000_COLLISION_THRESHOLD << E1000_CT_SHIFT);
 
-	E1000_WRITE_REG(&adapter->hw, TCTL, tctl);
+	E1000_WRITE_REG(hw, TCTL, tctl);
 
-	e1000_config_collision_dist(&adapter->hw);
+	if (hw->mac_type == e1000_82571 || hw->mac_type == e1000_82572) {
+		tarc = E1000_READ_REG(hw, TARC0);
+		tarc |= ((1 << 25) | (1 << 21));
+		E1000_WRITE_REG(hw, TARC0, tarc);
+		tarc = E1000_READ_REG(hw, TARC1);
+		tarc |= (1 << 25);
+		if (tctl & E1000_TCTL_MULR)
+			tarc &= ~(1 << 28);
+		else
+			tarc |= (1 << 28);
+		E1000_WRITE_REG(hw, TARC1, tarc);
+	}
+
+	e1000_config_collision_dist(hw);
 
 	/* Setup Transmit Descriptor Settings for eop descriptor */
 	adapter->txd_cmd = E1000_TXD_CMD_IDE | E1000_TXD_CMD_EOP |
 		E1000_TXD_CMD_IFCS;
 
-	if(adapter->hw.mac_type < e1000_82543)
+	if (hw->mac_type < e1000_82543)
 		adapter->txd_cmd |= E1000_TXD_CMD_RPS;
 	else
 		adapter->txd_cmd |= E1000_TXD_CMD_RS;
 
 	/* Cache if we're 82544 running in PCI-X because we'll
 	 * need this to apply a workaround later in the send path. */
-	if(adapter->hw.mac_type == e1000_82544 &&
-	   adapter->hw.bus_type == e1000_bus_type_pcix)
+	if (hw->mac_type == e1000_82544 &&
+	    hw->bus_type == e1000_bus_type_pcix)
 		adapter->pcix_82544 = 1;
 }
 
 /**
  * e1000_setup_rx_resources - allocate Rx resources (Descriptors)
  * @adapter: board private structure
+ * @rxdr:    rx descriptor ring (for a specific queue) to setup
  *
  * Returns 0 on success, negative on failure
  **/
 
 int
-e1000_setup_rx_resources(struct e1000_adapter *adapter)
+e1000_setup_rx_resources(struct e1000_adapter *adapter,
+                         struct e1000_rx_ring *rxdr)
 {
-	struct e1000_desc_ring *rxdr = &adapter->rx_ring;
 	struct pci_dev *pdev = adapter->pdev;
 	int size, desc_len;
 
 	size = sizeof(struct e1000_buffer) * rxdr->count;
 	rxdr->buffer_info = vmalloc(size);
-	if(!rxdr->buffer_info) {
+	if (!rxdr->buffer_info) {
 		DPRINTK(PROBE, ERR,
 		"Unable to allocate memory for the receive descriptor ring\n");
 		return -ENOMEM;
@@ -1156,13 +1407,13 @@ e1000_setup_rx_resources(struct e1000_adapter *adapter)
 
 	rxdr->desc = pci_alloc_consistent(pdev, rxdr->size, &rxdr->dma);
 
-	if(!rxdr->desc) {
+	if (!rxdr->desc) {
+		DPRINTK(PROBE, ERR,
+		"Unable to allocate memory for the receive descriptor ring\n");
 setup_rx_desc_die:
 		vfree(rxdr->buffer_info);
 		kfree(rxdr->ps_page);
 		kfree(rxdr->ps_page_dma);
-		DPRINTK(PROBE, ERR,
-		"Unable to allocate memory for the receive descriptor ring\n");
 		return -ENOMEM;
 	}
 
@@ -1174,9 +1425,12 @@ setup_rx_desc_die:
 				     "at %p\n", rxdr->size, rxdr->desc);
 		/* Try again, without freeing the previous */
 		rxdr->desc = pci_alloc_consistent(pdev, rxdr->size, &rxdr->dma);
-		if(!rxdr->desc) {
 		/* Failed allocation, critical failure */
+		if (!rxdr->desc) {
 			pci_free_consistent(pdev, rxdr->size, olddesc, olddma);
+			DPRINTK(PROBE, ERR,
+				"Unable to allocate memory "
+				"for the receive descriptor ring\n");
 			goto setup_rx_desc_die;
 		}
 
@@ -1188,10 +1442,7 @@ setup_rx_desc_die:
 			DPRINTK(PROBE, ERR,
 				"Unable to allocate aligned memory "
 				"for the receive descriptor ring\n");
-			vfree(rxdr->buffer_info);
-			kfree(rxdr->ps_page);
-			kfree(rxdr->ps_page_dma);
-			return -ENOMEM;
+			goto setup_rx_desc_die;
 		} else {
 			/* Free old allocation, new allocation was successful */
 			pci_free_consistent(pdev, rxdr->size, olddesc, olddma);
@@ -1205,16 +1456,49 @@ setup_rx_desc_die:
 	return 0;
 }
 
+/**
+ * e1000_setup_all_rx_resources - wrapper to allocate Rx resources
+ * 				  (Descriptors) for all queues
+ * @adapter: board private structure
+ *
+ * If this function returns with an error, then it's possible one or
+ * more of the rings is populated (while the rest are not).  It is the
+ * callers duty to clean those orphaned rings.
+ *
+ * Return 0 on success, negative on failure
+ **/
+
+int
+e1000_setup_all_rx_resources(struct e1000_adapter *adapter)
+{
+	int i, err = 0;
+
+	for (i = 0; i < adapter->num_queues; i++) {
+		err = e1000_setup_rx_resources(adapter, &adapter->rx_ring[i]);
+		if (err) {
+			DPRINTK(PROBE, ERR,
+				"Allocation for Rx Queue %u failed\n", i);
+			break;
+		}
+	}
+
+	return err;
+}
+
 /**
  * e1000_setup_rctl - configure the receive control registers
  * @adapter: Board private structure
  **/
-
+#define PAGE_USE_COUNT(S) (((S) >> PAGE_SHIFT) + \
+			(((S) & (PAGE_SIZE - 1)) ? 1 : 0))
 static void
 e1000_setup_rctl(struct e1000_adapter *adapter)
 {
 	uint32_t rctl, rfctl;
 	uint32_t psrctl = 0;
+#ifdef CONFIG_E1000_PACKET_SPLIT
+	uint32_t pages = 0;
+#endif
 
 	rctl = E1000_READ_REG(&adapter->hw, RCTL);
 
@@ -1235,7 +1519,7 @@ e1000_setup_rctl(struct e1000_adapter *adapter)
 		rctl |= E1000_RCTL_LPE;
 
 	/* Setup buffer sizes */
-	if(adapter->hw.mac_type == e1000_82573) {
+	if(adapter->hw.mac_type >= e1000_82571) {
 		/* We can now specify buffers in 1K increments.
 		 * BSIZE and BSEX are ignored in this case. */
 		rctl |= adapter->rx_buffer_len << 0x11;
@@ -1268,11 +1552,14 @@ e1000_setup_rctl(struct e1000_adapter *adapter)
 	 * followed by the page buffers.  Therefore, skb->data is
 	 * sized to hold the largest protocol header.
 	 */
-	adapter->rx_ps = (adapter->hw.mac_type > e1000_82547_rev_2) 
-			  && (adapter->netdev->mtu 
-	                      < ((3 * PAGE_SIZE) + adapter->rx_ps_bsize0));
+	pages = PAGE_USE_COUNT(adapter->netdev->mtu);
+	if ((adapter->hw.mac_type > e1000_82547_rev_2) && (pages <= 3) &&
+	    PAGE_SIZE <= 16384)
+		adapter->rx_ps_pages = pages;
+	else
+		adapter->rx_ps_pages = 0;
 #endif
-	if(adapter->rx_ps) {
+	if (adapter->rx_ps_pages) {
 		/* Configure extra packet-split registers */
 		rfctl = E1000_READ_REG(&adapter->hw, RFCTL);
 		rfctl |= E1000_RFCTL_EXTEN;
@@ -1284,12 +1571,19 @@ e1000_setup_rctl(struct e1000_adapter *adapter)
 		
 		psrctl |= adapter->rx_ps_bsize0 >>
 			E1000_PSRCTL_BSIZE0_SHIFT;
-		psrctl |= PAGE_SIZE >>
-			E1000_PSRCTL_BSIZE1_SHIFT;
-		psrctl |= PAGE_SIZE <<
-			E1000_PSRCTL_BSIZE2_SHIFT;
-		psrctl |= PAGE_SIZE <<
-			E1000_PSRCTL_BSIZE3_SHIFT;
+
+		switch (adapter->rx_ps_pages) {
+		case 3:
+			psrctl |= PAGE_SIZE <<
+				E1000_PSRCTL_BSIZE3_SHIFT;
+		case 2:
+			psrctl |= PAGE_SIZE <<
+				E1000_PSRCTL_BSIZE2_SHIFT;
+		case 1:
+			psrctl |= PAGE_SIZE >>
+				E1000_PSRCTL_BSIZE1_SHIFT;
+			break;
+		}
 
 		E1000_WRITE_REG(&adapter->hw, PSRCTL, psrctl);
 	}
@@ -1307,91 +1601,181 @@ e1000_setup_rctl(struct e1000_adapter *adapter)
 static void
 e1000_configure_rx(struct e1000_adapter *adapter)
 {
-	uint64_t rdba = adapter->rx_ring.dma;
-	uint32_t rdlen, rctl, rxcsum;
+	uint64_t rdba;
+	struct e1000_hw *hw = &adapter->hw;
+	uint32_t rdlen, rctl, rxcsum, ctrl_ext;
+#ifdef CONFIG_E1000_MQ
+	uint32_t reta, mrqc;
+	int i;
+#endif
 
-	if(adapter->rx_ps) {
-		rdlen = adapter->rx_ring.count *
+	if (adapter->rx_ps_pages) {
+		rdlen = adapter->rx_ring[0].count *
 			sizeof(union e1000_rx_desc_packet_split);
 		adapter->clean_rx = e1000_clean_rx_irq_ps;
 		adapter->alloc_rx_buf = e1000_alloc_rx_buffers_ps;
 	} else {
-		rdlen = adapter->rx_ring.count * sizeof(struct e1000_rx_desc);
+		rdlen = adapter->rx_ring[0].count *
+			sizeof(struct e1000_rx_desc);
 		adapter->clean_rx = e1000_clean_rx_irq;
 		adapter->alloc_rx_buf = e1000_alloc_rx_buffers;
 	}
 
 	/* disable receives while setting up the descriptors */
-	rctl = E1000_READ_REG(&adapter->hw, RCTL);
-	E1000_WRITE_REG(&adapter->hw, RCTL, rctl & ~E1000_RCTL_EN);
+	rctl = E1000_READ_REG(hw, RCTL);
+	E1000_WRITE_REG(hw, RCTL, rctl & ~E1000_RCTL_EN);
 
 	/* set the Receive Delay Timer Register */
-	E1000_WRITE_REG(&adapter->hw, RDTR, adapter->rx_int_delay);
+	E1000_WRITE_REG(hw, RDTR, adapter->rx_int_delay);
 
-	if(adapter->hw.mac_type >= e1000_82540) {
-		E1000_WRITE_REG(&adapter->hw, RADV, adapter->rx_abs_int_delay);
+	if (hw->mac_type >= e1000_82540) {
+		E1000_WRITE_REG(hw, RADV, adapter->rx_abs_int_delay);
 		if(adapter->itr > 1)
-			E1000_WRITE_REG(&adapter->hw, ITR,
+			E1000_WRITE_REG(hw, ITR,
 				1000000000 / (adapter->itr * 256));
 	}
 
-	/* Setup the Base and Length of the Rx Descriptor Ring */
-	E1000_WRITE_REG(&adapter->hw, RDBAL, (rdba & 0x00000000ffffffffULL));
-	E1000_WRITE_REG(&adapter->hw, RDBAH, (rdba >> 32));
+	if (hw->mac_type >= e1000_82571) {
+		/* Reset delay timers after every interrupt */
+		ctrl_ext = E1000_READ_REG(hw, CTRL_EXT);
+		ctrl_ext |= E1000_CTRL_EXT_CANC;
+		E1000_WRITE_REG(hw, CTRL_EXT, ctrl_ext);
+		E1000_WRITE_FLUSH(hw);
+	}
+
+	/* Setup the HW Rx Head and Tail Descriptor Pointers and
+	 * the Base and Length of the Rx Descriptor Ring */
+	switch (adapter->num_queues) {
+#ifdef CONFIG_E1000_MQ
+	case 2:
+		rdba = adapter->rx_ring[1].dma;
+		E1000_WRITE_REG(hw, RDBAL1, (rdba & 0x00000000ffffffffULL));
+		E1000_WRITE_REG(hw, RDBAH1, (rdba >> 32));
+		E1000_WRITE_REG(hw, RDLEN1, rdlen);
+		E1000_WRITE_REG(hw, RDH1, 0);
+		E1000_WRITE_REG(hw, RDT1, 0);
+		adapter->rx_ring[1].rdh = E1000_RDH1;
+		adapter->rx_ring[1].rdt = E1000_RDT1;
+		/* Fall Through */
+#endif
+	case 1:
+	default:
+		rdba = adapter->rx_ring[0].dma;
+		E1000_WRITE_REG(hw, RDBAL, (rdba & 0x00000000ffffffffULL));
+		E1000_WRITE_REG(hw, RDBAH, (rdba >> 32));
+		E1000_WRITE_REG(hw, RDLEN, rdlen);
+		E1000_WRITE_REG(hw, RDH, 0);
+		E1000_WRITE_REG(hw, RDT, 0);
+		adapter->rx_ring[0].rdh = E1000_RDH;
+		adapter->rx_ring[0].rdt = E1000_RDT;
+		break;
+	}
+
+#ifdef CONFIG_E1000_MQ
+	if (adapter->num_queues > 1) {
+		uint32_t random[10];
+
+		get_random_bytes(&random[0], 40);
+
+		if (hw->mac_type <= e1000_82572) {
+			E1000_WRITE_REG(hw, RSSIR, 0);
+			E1000_WRITE_REG(hw, RSSIM, 0);
+		}
+
+		switch (adapter->num_queues) {
+		case 2:
+		default:
+			reta = 0x00800080;
+			mrqc = E1000_MRQC_ENABLE_RSS_2Q;
+			break;
+		}
+
+		/* Fill out redirection table */
+		for (i = 0; i < 32; i++)
+			E1000_WRITE_REG_ARRAY(hw, RETA, i, reta);
+		/* Fill out hash function seeds */
+		for (i = 0; i < 10; i++)
+			E1000_WRITE_REG_ARRAY(hw, RSSRK, i, random[i]);
+
+		mrqc |= (E1000_MRQC_RSS_FIELD_IPV4 |
+			 E1000_MRQC_RSS_FIELD_IPV4_TCP);
+		E1000_WRITE_REG(hw, MRQC, mrqc);
+	}
 
-	E1000_WRITE_REG(&adapter->hw, RDLEN, rdlen);
+	/* Multiqueue and packet checksumming are mutually exclusive. */
+	if (hw->mac_type >= e1000_82571) {
+		rxcsum = E1000_READ_REG(hw, RXCSUM);
+		rxcsum |= E1000_RXCSUM_PCSD;
+		E1000_WRITE_REG(hw, RXCSUM, rxcsum);
+	}
 
-	/* Setup the HW Rx Head and Tail Descriptor Pointers */
-	E1000_WRITE_REG(&adapter->hw, RDH, 0);
-	E1000_WRITE_REG(&adapter->hw, RDT, 0);
+#else
 
 	/* Enable 82543 Receive Checksum Offload for TCP and UDP */
-	if(adapter->hw.mac_type >= e1000_82543) {
-		rxcsum = E1000_READ_REG(&adapter->hw, RXCSUM);
+	if (hw->mac_type >= e1000_82543) {
+		rxcsum = E1000_READ_REG(hw, RXCSUM);
 		if(adapter->rx_csum == TRUE) {
 			rxcsum |= E1000_RXCSUM_TUOFL;
 
-			/* Enable 82573 IPv4 payload checksum for UDP fragments
+			/* Enable 82571 IPv4 payload checksum for UDP fragments
 			 * Must be used in conjunction with packet-split. */
-			if((adapter->hw.mac_type > e1000_82547_rev_2) && 
-			   (adapter->rx_ps)) {
+			if ((hw->mac_type >= e1000_82571) && 
+			   (adapter->rx_ps_pages)) {
 				rxcsum |= E1000_RXCSUM_IPPCSE;
 			}
 		} else {
 			rxcsum &= ~E1000_RXCSUM_TUOFL;
 			/* don't need to clear IPPCSE as it defaults to 0 */
 		}
-		E1000_WRITE_REG(&adapter->hw, RXCSUM, rxcsum);
+		E1000_WRITE_REG(hw, RXCSUM, rxcsum);
 	}
+#endif /* CONFIG_E1000_MQ */
 
-	if (adapter->hw.mac_type == e1000_82573)
-		E1000_WRITE_REG(&adapter->hw, ERT, 0x0100);
+	if (hw->mac_type == e1000_82573)
+		E1000_WRITE_REG(hw, ERT, 0x0100);
 
 	/* Enable Receives */
-	E1000_WRITE_REG(&adapter->hw, RCTL, rctl);
+	E1000_WRITE_REG(hw, RCTL, rctl);
 }
 
 /**
- * e1000_free_tx_resources - Free Tx Resources
+ * e1000_free_tx_resources - Free Tx Resources per Queue
  * @adapter: board private structure
+ * @tx_ring: Tx descriptor ring for a specific queue
  *
  * Free all transmit software resources
  **/
 
 void
-e1000_free_tx_resources(struct e1000_adapter *adapter)
+e1000_free_tx_resources(struct e1000_adapter *adapter,
+                        struct e1000_tx_ring *tx_ring)
 {
 	struct pci_dev *pdev = adapter->pdev;
 
-	e1000_clean_tx_ring(adapter);
+	e1000_clean_tx_ring(adapter, tx_ring);
+
+	vfree(tx_ring->buffer_info);
+	tx_ring->buffer_info = NULL;
 
-	vfree(adapter->tx_ring.buffer_info);
-	adapter->tx_ring.buffer_info = NULL;
+	pci_free_consistent(pdev, tx_ring->size, tx_ring->desc, tx_ring->dma);
+
+	tx_ring->desc = NULL;
+}
 
-	pci_free_consistent(pdev, adapter->tx_ring.size,
-	                    adapter->tx_ring.desc, adapter->tx_ring.dma);
+/**
+ * e1000_free_all_tx_resources - Free Tx Resources for All Queues
+ * @adapter: board private structure
+ *
+ * Free all transmit software resources
+ **/
+
+void
+e1000_free_all_tx_resources(struct e1000_adapter *adapter)
+{
+	int i;
 
-	adapter->tx_ring.desc = NULL;
+	for (i = 0; i < adapter->num_queues; i++)
+		e1000_free_tx_resources(adapter, &adapter->tx_ring[i]);
 }
 
 static inline void
@@ -1414,21 +1798,22 @@ e1000_unmap_and_free_tx_resource(struct e1000_adapter *adapter,
 /**
  * e1000_clean_tx_ring - Free Tx Buffers
  * @adapter: board private structure
+ * @tx_ring: ring to be cleaned
  **/
 
 static void
-e1000_clean_tx_ring(struct e1000_adapter *adapter)
+e1000_clean_tx_ring(struct e1000_adapter *adapter,
+                    struct e1000_tx_ring *tx_ring)
 {
-	struct e1000_desc_ring *tx_ring = &adapter->tx_ring;
 	struct e1000_buffer *buffer_info;
 	unsigned long size;
 	unsigned int i;
 
 	/* Free all the Tx ring sk_buffs */
 
-	if (likely(adapter->previous_buffer_info.skb != NULL)) {
+	if (likely(tx_ring->previous_buffer_info.skb != NULL)) {
 		e1000_unmap_and_free_tx_resource(adapter,
-				&adapter->previous_buffer_info);
+				&tx_ring->previous_buffer_info);
 	}
 
 	for(i = 0; i < tx_ring->count; i++) {
@@ -1446,24 +1831,39 @@ e1000_clean_tx_ring(struct e1000_adapter *adapter)
 	tx_ring->next_to_use = 0;
 	tx_ring->next_to_clean = 0;
 
-	E1000_WRITE_REG(&adapter->hw, TDH, 0);
-	E1000_WRITE_REG(&adapter->hw, TDT, 0);
+	writel(0, adapter->hw.hw_addr + tx_ring->tdh);
+	writel(0, adapter->hw.hw_addr + tx_ring->tdt);
+}
+
+/**
+ * e1000_clean_all_tx_rings - Free Tx Buffers for all queues
+ * @adapter: board private structure
+ **/
+
+static void
+e1000_clean_all_tx_rings(struct e1000_adapter *adapter)
+{
+	int i;
+
+	for (i = 0; i < adapter->num_queues; i++)
+		e1000_clean_tx_ring(adapter, &adapter->tx_ring[i]);
 }
 
 /**
  * e1000_free_rx_resources - Free Rx Resources
  * @adapter: board private structure
+ * @rx_ring: ring to clean the resources from
  *
  * Free all receive software resources
  **/
 
 void
-e1000_free_rx_resources(struct e1000_adapter *adapter)
+e1000_free_rx_resources(struct e1000_adapter *adapter,
+                        struct e1000_rx_ring *rx_ring)
 {
-	struct e1000_desc_ring *rx_ring = &adapter->rx_ring;
 	struct pci_dev *pdev = adapter->pdev;
 
-	e1000_clean_rx_ring(adapter);
+	e1000_clean_rx_ring(adapter, rx_ring);
 
 	vfree(rx_ring->buffer_info);
 	rx_ring->buffer_info = NULL;
@@ -1478,14 +1878,31 @@ e1000_free_rx_resources(struct e1000_adapter *adapter)
 }
 
 /**
- * e1000_clean_rx_ring - Free Rx Buffers
+ * e1000_free_all_rx_resources - Free Rx Resources for All Queues
+ * @adapter: board private structure
+ *
+ * Free all receive software resources
+ **/
+
+void
+e1000_free_all_rx_resources(struct e1000_adapter *adapter)
+{
+	int i;
+
+	for (i = 0; i < adapter->num_queues; i++)
+		e1000_free_rx_resources(adapter, &adapter->rx_ring[i]);
+}
+
+/**
+ * e1000_clean_rx_ring - Free Rx Buffers per Queue
  * @adapter: board private structure
+ * @rx_ring: ring to free buffers from
  **/
 
 static void
-e1000_clean_rx_ring(struct e1000_adapter *adapter)
+e1000_clean_rx_ring(struct e1000_adapter *adapter,
+                    struct e1000_rx_ring *rx_ring)
 {
-	struct e1000_desc_ring *rx_ring = &adapter->rx_ring;
 	struct e1000_buffer *buffer_info;
 	struct e1000_ps_page *ps_page;
 	struct e1000_ps_page_dma *ps_page_dma;
@@ -1508,7 +1925,7 @@ e1000_clean_rx_ring(struct e1000_adapter *adapter)
 			dev_kfree_skb(buffer_info->skb);
 			buffer_info->skb = NULL;
 
-			for(j = 0; j < PS_PAGE_BUFFERS; j++) {
+			for(j = 0; j < adapter->rx_ps_pages; j++) {
 				if(!ps_page->ps_page[j]) break;
 				pci_unmap_single(pdev,
 						 ps_page_dma->ps_page_dma[j],
@@ -1534,8 +1951,22 @@ e1000_clean_rx_ring(struct e1000_adapter *adapter)
 	rx_ring->next_to_clean = 0;
 	rx_ring->next_to_use = 0;
 
-	E1000_WRITE_REG(&adapter->hw, RDH, 0);
-	E1000_WRITE_REG(&adapter->hw, RDT, 0);
+	writel(0, adapter->hw.hw_addr + rx_ring->rdh);
+	writel(0, adapter->hw.hw_addr + rx_ring->rdt);
+}
+
+/**
+ * e1000_clean_all_rx_rings - Free Rx Buffers for all queues
+ * @adapter: board private structure
+ **/
+
+static void
+e1000_clean_all_rx_rings(struct e1000_adapter *adapter)
+{
+	int i;
+
+	for (i = 0; i < adapter->num_queues; i++)
+		e1000_clean_rx_ring(adapter, &adapter->rx_ring[i]);
 }
 
 /* The 82542 2.0 (revision 2) needs to have the receive unit in reset
@@ -1556,7 +1987,7 @@ e1000_enter_82542_rst(struct e1000_adapter *adapter)
 	mdelay(5);
 
 	if(netif_running(netdev))
-		e1000_clean_rx_ring(adapter);
+		e1000_clean_all_rx_rings(adapter);
 }
 
 static void
@@ -1576,7 +2007,7 @@ e1000_leave_82542_rst(struct e1000_adapter *adapter)
 
 	if(netif_running(netdev)) {
 		e1000_configure_rx(adapter);
-		e1000_alloc_rx_buffers(adapter);
+		e1000_alloc_rx_buffers(adapter, &adapter->rx_ring[0]);
 	}
 }
 
@@ -1607,6 +2038,22 @@ e1000_set_mac(struct net_device *netdev, void *p)
 
 	e1000_rar_set(&adapter->hw, adapter->hw.mac_addr, 0);
 
+	/* With 82571 controllers, LAA may be overwritten (with the default)
+	 * due to controller reset from the other port. */
+	if (adapter->hw.mac_type == e1000_82571) {
+		/* activate the work around */
+		adapter->hw.laa_is_present = 1;
+
+		/* Hold a copy of the LAA in RAR[14] This is done so that 
+		 * between the time RAR[0] gets clobbered  and the time it 
+		 * gets fixed (in e1000_watchdog), the actual LAA is in one 
+		 * of the RARs and no incoming packets directed to this port
+		 * are dropped. Eventaully the LAA will be in RAR[0] and 
+		 * RAR[14] */
+		e1000_rar_set(&adapter->hw, adapter->hw.mac_addr, 
+					E1000_RAR_ENTRIES - 1);
+	}
+
 	if(adapter->hw.mac_type == e1000_82542_rev2_0)
 		e1000_leave_82542_rst(adapter);
 
@@ -1629,12 +2076,13 @@ e1000_set_multi(struct net_device *netdev)
 	struct e1000_adapter *adapter = netdev_priv(netdev);
 	struct e1000_hw *hw = &adapter->hw;
 	struct dev_mc_list *mc_ptr;
-	unsigned long flags;
 	uint32_t rctl;
 	uint32_t hash_value;
-	int i;
+	int i, rar_entries = E1000_RAR_ENTRIES;
 
-	spin_lock_irqsave(&adapter->tx_lock, flags);
+	/* reserve RAR[14] for LAA over-write work-around */
+	if (adapter->hw.mac_type == e1000_82571)
+		rar_entries--;
 
 	/* Check for Promiscuous and All Multicast modes */
 
@@ -1659,11 +2107,12 @@ e1000_set_multi(struct net_device *netdev)
 	/* load the first 14 multicast address into the exact filters 1-14
 	 * RAR 0 is used for the station MAC adddress
 	 * if there are not 14 addresses, go ahead and clear the filters
+	 * -- with 82571 controllers only 0-13 entries are filled here
 	 */
 	mc_ptr = netdev->mc_list;
 
-	for(i = 1; i < E1000_RAR_ENTRIES; i++) {
-		if(mc_ptr) {
+	for(i = 1; i < rar_entries; i++) {
+		if (mc_ptr) {
 			e1000_rar_set(hw, mc_ptr->dmi_addr, i);
 			mc_ptr = mc_ptr->next;
 		} else {
@@ -1686,8 +2135,6 @@ e1000_set_multi(struct net_device *netdev)
 
 	if(hw->mac_type == e1000_82542_rev2_0)
 		e1000_leave_82542_rst(adapter);
-
-	spin_unlock_irqrestore(&adapter->tx_lock, flags);
 }
 
 /* Need to wait a few seconds after link up to get diagnostic information from
@@ -1759,7 +2206,7 @@ static void
 e1000_watchdog_task(struct e1000_adapter *adapter)
 {
 	struct net_device *netdev = adapter->netdev;
-	struct e1000_desc_ring *txdr = &adapter->tx_ring;
+	struct e1000_tx_ring *txdr = &adapter->tx_ring[0];
 	uint32_t link;
 
 	e1000_check_for_link(&adapter->hw);
@@ -1818,8 +2265,8 @@ e1000_watchdog_task(struct e1000_adapter *adapter)
 
 	e1000_update_adaptive(&adapter->hw);
 
-	if(!netif_carrier_ok(netdev)) {
-		if(E1000_DESC_UNUSED(txdr) + 1 < txdr->count) {
+	if (adapter->num_queues == 1 && !netif_carrier_ok(netdev)) {
+		if (E1000_DESC_UNUSED(txdr) + 1 < txdr->count) {
 			/* We've lost link, so the controller stops DMA,
 			 * but we've got queued Tx work that's never going
 			 * to get done, so reset controller to flush Tx.
@@ -1847,6 +2294,11 @@ e1000_watchdog_task(struct e1000_adapter *adapter)
 	/* Force detection of hung controller every watchdog period */
 	adapter->detect_tx_hung = TRUE;
 
+	/* With 82571 controllers, LAA may be overwritten due to controller 
+	 * reset from the other port. Set the appropriate LAA in RAR[0] */
+	if (adapter->hw.mac_type == e1000_82571 && adapter->hw.laa_is_present)
+		e1000_rar_set(&adapter->hw, adapter->hw.mac_addr, 0);
+
 	/* Reset the timer */
 	mod_timer(&adapter->watchdog_timer, jiffies + 2 * HZ);
 }
@@ -1859,7 +2311,8 @@ e1000_watchdog_task(struct e1000_adapter *adapter)
 #define E1000_TX_FLAGS_VLAN_SHIFT	16
 
 static inline int
-e1000_tso(struct e1000_adapter *adapter, struct sk_buff *skb)
+e1000_tso(struct e1000_adapter *adapter, struct e1000_tx_ring *tx_ring,
+          struct sk_buff *skb)
 {
 #ifdef NETIF_F_TSO
 	struct e1000_context_desc *context_desc;
@@ -1910,8 +2363,8 @@ e1000_tso(struct e1000_adapter *adapter, struct sk_buff *skb)
 		cmd_length |= (E1000_TXD_CMD_DEXT | E1000_TXD_CMD_TSE |
 			       E1000_TXD_CMD_TCP | (skb->len - (hdr_len)));
 
-		i = adapter->tx_ring.next_to_use;
-		context_desc = E1000_CONTEXT_DESC(adapter->tx_ring, i);
+		i = tx_ring->next_to_use;
+		context_desc = E1000_CONTEXT_DESC(*tx_ring, i);
 
 		context_desc->lower_setup.ip_fields.ipcss  = ipcss;
 		context_desc->lower_setup.ip_fields.ipcso  = ipcso;
@@ -1923,8 +2376,8 @@ e1000_tso(struct e1000_adapter *adapter, struct sk_buff *skb)
 		context_desc->tcp_seg_setup.fields.hdr_len = hdr_len;
 		context_desc->cmd_and_length = cpu_to_le32(cmd_length);
 
-		if(++i == adapter->tx_ring.count) i = 0;
-		adapter->tx_ring.next_to_use = i;
+		if (++i == tx_ring->count) i = 0;
+		tx_ring->next_to_use = i;
 
 		return 1;
 	}
@@ -1934,7 +2387,8 @@ e1000_tso(struct e1000_adapter *adapter, struct sk_buff *skb)
 }
 
 static inline boolean_t
-e1000_tx_csum(struct e1000_adapter *adapter, struct sk_buff *skb)
+e1000_tx_csum(struct e1000_adapter *adapter, struct e1000_tx_ring *tx_ring,
+              struct sk_buff *skb)
 {
 	struct e1000_context_desc *context_desc;
 	unsigned int i;
@@ -1943,8 +2397,8 @@ e1000_tx_csum(struct e1000_adapter *adapter, struct sk_buff *skb)
 	if(likely(skb->ip_summed == CHECKSUM_HW)) {
 		css = skb->h.raw - skb->data;
 
-		i = adapter->tx_ring.next_to_use;
-		context_desc = E1000_CONTEXT_DESC(adapter->tx_ring, i);
+		i = tx_ring->next_to_use;
+		context_desc = E1000_CONTEXT_DESC(*tx_ring, i);
 
 		context_desc->upper_setup.tcp_fields.tucss = css;
 		context_desc->upper_setup.tcp_fields.tucso = css + skb->csum;
@@ -1952,8 +2406,8 @@ e1000_tx_csum(struct e1000_adapter *adapter, struct sk_buff *skb)
 		context_desc->tcp_seg_setup.data = 0;
 		context_desc->cmd_and_length = cpu_to_le32(E1000_TXD_CMD_DEXT);
 
-		if(unlikely(++i == adapter->tx_ring.count)) i = 0;
-		adapter->tx_ring.next_to_use = i;
+		if (unlikely(++i == tx_ring->count)) i = 0;
+		tx_ring->next_to_use = i;
 
 		return TRUE;
 	}
@@ -1965,11 +2419,10 @@ e1000_tx_csum(struct e1000_adapter *adapter, struct sk_buff *skb)
 #define E1000_MAX_DATA_PER_TXD	(1<<E1000_MAX_TXD_PWR)
 
 static inline int
-e1000_tx_map(struct e1000_adapter *adapter, struct sk_buff *skb,
-	unsigned int first, unsigned int max_per_txd,
-	unsigned int nr_frags, unsigned int mss)
+e1000_tx_map(struct e1000_adapter *adapter, struct e1000_tx_ring *tx_ring,
+             struct sk_buff *skb, unsigned int first, unsigned int max_per_txd,
+             unsigned int nr_frags, unsigned int mss)
 {
-	struct e1000_desc_ring *tx_ring = &adapter->tx_ring;
 	struct e1000_buffer *buffer_info;
 	unsigned int len = skb->len;
 	unsigned int offset = 0, size, count = 0, i;
@@ -2065,9 +2518,9 @@ e1000_tx_map(struct e1000_adapter *adapter, struct sk_buff *skb,
 }
 
 static inline void
-e1000_tx_queue(struct e1000_adapter *adapter, int count, int tx_flags)
+e1000_tx_queue(struct e1000_adapter *adapter, struct e1000_tx_ring *tx_ring,
+               int tx_flags, int count)
 {
-	struct e1000_desc_ring *tx_ring = &adapter->tx_ring;
 	struct e1000_tx_desc *tx_desc = NULL;
 	struct e1000_buffer *buffer_info;
 	uint32_t txd_upper = 0, txd_lower = E1000_TXD_CMD_IFCS;
@@ -2113,7 +2566,7 @@ e1000_tx_queue(struct e1000_adapter *adapter, int count, int tx_flags)
 	wmb();
 
 	tx_ring->next_to_use = i;
-	E1000_WRITE_REG(&adapter->hw, TDT, i);
+	writel(i, adapter->hw.hw_addr + tx_ring->tdt);
 }
 
 /**
@@ -2206,6 +2659,7 @@ static int
 e1000_xmit_frame(struct sk_buff *skb, struct net_device *netdev)
 {
 	struct e1000_adapter *adapter = netdev_priv(netdev);
+	struct e1000_tx_ring *tx_ring;
 	unsigned int first, max_per_txd = E1000_MAX_DATA_PER_TXD;
 	unsigned int max_txd_pwr = E1000_MAX_TXD_PWR;
 	unsigned int tx_flags = 0;
@@ -2218,7 +2672,13 @@ e1000_xmit_frame(struct sk_buff *skb, struct net_device *netdev)
 	unsigned int f;
 	len -= skb->data_len;
 
-	if(unlikely(skb->len <= 0)) {
+#ifdef CONFIG_E1000_MQ
+	tx_ring = *per_cpu_ptr(adapter->cpu_tx_ring, smp_processor_id());
+#else
+	tx_ring = adapter->tx_ring;
+#endif
+
+	if (unlikely(skb->len <= 0)) {
 		dev_kfree_skb_any(skb);
 		return NETDEV_TX_OK;
 	}
@@ -2262,21 +2722,42 @@ e1000_xmit_frame(struct sk_buff *skb, struct net_device *netdev)
 	if(adapter->pcix_82544)
 		count += nr_frags;
 
- 	local_irq_save(flags); 
- 	if (!spin_trylock(&adapter->tx_lock)) { 
- 		/* Collision - tell upper layer to requeue */ 
- 		local_irq_restore(flags); 
- 		return NETDEV_TX_LOCKED; 
- 	} 
+#ifdef NETIF_F_TSO
+	/* TSO Workaround for 82571/2 Controllers -- if skb->data
+	 * points to just header, pull a few bytes of payload from 
+	 * frags into skb->data */
+	if (skb_shinfo(skb)->tso_size) {
+		uint8_t hdr_len;
+		hdr_len = ((skb->h.raw - skb->data) + (skb->h.th->doff << 2));
+		if (skb->data_len && (hdr_len < (skb->len - skb->data_len)) && 
+			(adapter->hw.mac_type == e1000_82571 ||
+			adapter->hw.mac_type == e1000_82572)) {
+			unsigned int pull_size;
+			pull_size = min((unsigned int)4, skb->data_len);
+			if (!__pskb_pull_tail(skb, pull_size)) {
+				printk(KERN_ERR "__pskb_pull_tail failed.\n");
+				dev_kfree_skb_any(skb);
+				return -EFAULT;
+			}
+		}
+	}
+#endif
+
 	if(adapter->hw.tx_pkt_filtering && (adapter->hw.mac_type == e1000_82573) )
 		e1000_transfer_dhcp_info(adapter, skb);
 
+	local_irq_save(flags);
+	if (!spin_trylock(&tx_ring->tx_lock)) {
+		/* Collision - tell upper layer to requeue */
+		local_irq_restore(flags);
+		return NETDEV_TX_LOCKED;
+	}
 
 	/* need: count + 2 desc gap to keep tail from touching
 	 * head, otherwise try next time */
-	if(unlikely(E1000_DESC_UNUSED(&adapter->tx_ring) < count + 2)) {
+	if (unlikely(E1000_DESC_UNUSED(tx_ring) < count + 2)) {
 		netif_stop_queue(netdev);
-		spin_unlock_irqrestore(&adapter->tx_lock, flags);
+		spin_unlock_irqrestore(&tx_ring->tx_lock, flags);
 		return NETDEV_TX_BUSY;
 	}
 
@@ -2284,7 +2765,7 @@ e1000_xmit_frame(struct sk_buff *skb, struct net_device *netdev)
 		if(unlikely(e1000_82547_fifo_workaround(adapter, skb))) {
 			netif_stop_queue(netdev);
 			mod_timer(&adapter->tx_fifo_stall_timer, jiffies);
-			spin_unlock_irqrestore(&adapter->tx_lock, flags);
+			spin_unlock_irqrestore(&tx_ring->tx_lock, flags);
 			return NETDEV_TX_BUSY;
 		}
 	}
@@ -2294,37 +2775,37 @@ e1000_xmit_frame(struct sk_buff *skb, struct net_device *netdev)
 		tx_flags |= (vlan_tx_tag_get(skb) << E1000_TX_FLAGS_VLAN_SHIFT);
 	}
 
-	first = adapter->tx_ring.next_to_use;
+	first = tx_ring->next_to_use;
 	
-	tso = e1000_tso(adapter, skb);
+	tso = e1000_tso(adapter, tx_ring, skb);
 	if (tso < 0) {
 		dev_kfree_skb_any(skb);
-		spin_unlock_irqrestore(&adapter->tx_lock, flags);
+		spin_unlock_irqrestore(&tx_ring->tx_lock, flags);
 		return NETDEV_TX_OK;
 	}
 
 	if (likely(tso))
 		tx_flags |= E1000_TX_FLAGS_TSO;
-	else if(likely(e1000_tx_csum(adapter, skb)))
+	else if (likely(e1000_tx_csum(adapter, tx_ring, skb)))
 		tx_flags |= E1000_TX_FLAGS_CSUM;
 
 	/* Old method was to assume IPv4 packet by default if TSO was enabled.
-	 * 82573 hardware supports TSO capabilities for IPv6 as well...
+	 * 82571 hardware supports TSO capabilities for IPv6 as well...
 	 * no longer assume, we must. */
-	if(likely(skb->protocol == ntohs(ETH_P_IP)))
+	if (likely(skb->protocol == ntohs(ETH_P_IP)))
 		tx_flags |= E1000_TX_FLAGS_IPV4;
 
-	e1000_tx_queue(adapter,
-		e1000_tx_map(adapter, skb, first, max_per_txd, nr_frags, mss),
-		tx_flags);
+	e1000_tx_queue(adapter, tx_ring, tx_flags,
+	               e1000_tx_map(adapter, tx_ring, skb, first,
+	                            max_per_txd, nr_frags, mss));
 
 	netdev->trans_start = jiffies;
 
 	/* Make sure there is space in the ring for the next send. */
-	if(unlikely(E1000_DESC_UNUSED(&adapter->tx_ring) < MAX_SKB_FRAGS + 2))
+	if (unlikely(E1000_DESC_UNUSED(tx_ring) < MAX_SKB_FRAGS + 2))
 		netif_stop_queue(netdev);
 
-	spin_unlock_irqrestore(&adapter->tx_lock, flags);
+	spin_unlock_irqrestore(&tx_ring->tx_lock, flags);
 	return NETDEV_TX_OK;
 }
 
@@ -2388,9 +2869,18 @@ e1000_change_mtu(struct net_device *netdev, int new_mtu)
 			return -EINVAL;
 	}
 
-#define MAX_STD_JUMBO_FRAME_SIZE 9216
+#define MAX_STD_JUMBO_FRAME_SIZE 9234
 	/* might want this to be bigger enum check... */
-	if (adapter->hw.mac_type == e1000_82573 &&
+	/* 82571 controllers limit jumbo frame size to 10500 bytes */
+	if ((adapter->hw.mac_type == e1000_82571 || 
+	     adapter->hw.mac_type == e1000_82572) &&
+	    max_frame > MAX_STD_JUMBO_FRAME_SIZE) {
+		DPRINTK(PROBE, ERR, "MTU > 9216 bytes not supported "
+				    "on 82571 and 82572 controllers.\n");
+		return -EINVAL;
+	}
+
+	if(adapter->hw.mac_type == e1000_82573 &&
 	    max_frame > MAXIMUM_ETHERNET_FRAME_SIZE) {
 		DPRINTK(PROBE, ERR, "Jumbo Frames not supported "
 				    "on 82573\n");
@@ -2578,6 +3068,29 @@ e1000_update_stats(struct e1000_adapter *adapter)
 	spin_unlock_irqrestore(&adapter->stats_lock, flags);
 }
 
+#ifdef CONFIG_E1000_MQ
+void
+e1000_rx_schedule(void *data)
+{
+	struct net_device *poll_dev, *netdev = data;
+	struct e1000_adapter *adapter = netdev->priv;
+	int this_cpu = get_cpu();
+
+	poll_dev = *per_cpu_ptr(adapter->cpu_netdev, this_cpu);
+	if (poll_dev == NULL) {
+		put_cpu();
+		return;
+	}
+
+	if (likely(netif_rx_schedule_prep(poll_dev)))
+		__netif_rx_schedule(poll_dev);
+	else
+		e1000_irq_enable(adapter);
+
+	put_cpu();
+}
+#endif
+
 /**
  * e1000_intr - Interrupt Handler
  * @irq: interrupt number
@@ -2592,8 +3105,8 @@ e1000_intr(int irq, void *data, struct pt_regs *regs)
 	struct e1000_adapter *adapter = netdev_priv(netdev);
 	struct e1000_hw *hw = &adapter->hw;
 	uint32_t icr = E1000_READ_REG(hw, ICR);
-#ifndef CONFIG_E1000_NAPI
-	unsigned int i;
+#if defined(CONFIG_E1000_NAPI) && defined(CONFIG_E1000_MQ) || !defined(CONFIG_E1000_NAPI)
+	int i;
 #endif
 
 	if(unlikely(!icr))
@@ -2605,17 +3118,31 @@ e1000_intr(int irq, void *data, struct pt_regs *regs)
 	}
 
 #ifdef CONFIG_E1000_NAPI
-	if(likely(netif_rx_schedule_prep(netdev))) {
-
-		/* Disable interrupts and register for poll. The flush 
-		  of the posted write is intentionally left out.
-		*/
-
-		atomic_inc(&adapter->irq_sem);
-		E1000_WRITE_REG(hw, IMC, ~0);
-		__netif_rx_schedule(netdev);
+	atomic_inc(&adapter->irq_sem);
+	E1000_WRITE_REG(hw, IMC, ~0);
+	E1000_WRITE_FLUSH(hw);
+#ifdef CONFIG_E1000_MQ
+	if (atomic_read(&adapter->rx_sched_call_data.count) == 0) {
+		cpu_set(adapter->cpu_for_queue[0],
+			adapter->rx_sched_call_data.cpumask);
+		for (i = 1; i < adapter->num_queues; i++) {
+			cpu_set(adapter->cpu_for_queue[i],
+				adapter->rx_sched_call_data.cpumask);
+			atomic_inc(&adapter->irq_sem);
+		}
+		atomic_set(&adapter->rx_sched_call_data.count, i);
+		smp_call_async_mask(&adapter->rx_sched_call_data);
+	} else {
+		printk("call_data.count == %u\n", atomic_read(&adapter->rx_sched_call_data.count));
 	}
-#else
+#else /* if !CONFIG_E1000_MQ */
+	if (likely(netif_rx_schedule_prep(&adapter->polling_netdev[0])))
+		__netif_rx_schedule(&adapter->polling_netdev[0]);
+	else
+		e1000_irq_enable(adapter);
+#endif /* CONFIG_E1000_MQ */
+
+#else /* if !CONFIG_E1000_NAPI */
 	/* Writing IMC and IMS is needed for 82547.
 	   Due to Hub Link bus being occupied, an interrupt
 	   de-assertion message is not able to be sent.
@@ -2632,13 +3159,14 @@ e1000_intr(int irq, void *data, struct pt_regs *regs)
 	}
 
 	for(i = 0; i < E1000_MAX_INTR; i++)
-		if(unlikely(!adapter->clean_rx(adapter) &
-		   !e1000_clean_tx_irq(adapter)))
+		if(unlikely(!adapter->clean_rx(adapter, adapter->rx_ring) &
+		   !e1000_clean_tx_irq(adapter, adapter->tx_ring)))
 			break;
 
 	if(hw->mac_type == e1000_82547 || hw->mac_type == e1000_82547_rev_2)
 		e1000_irq_enable(adapter);
-#endif
+
+#endif /* CONFIG_E1000_NAPI */
 
 	return IRQ_HANDLED;
 }
@@ -2650,22 +3178,37 @@ e1000_intr(int irq, void *data, struct pt_regs *regs)
  **/
 
 static int
-e1000_clean(struct net_device *netdev, int *budget)
+e1000_clean(struct net_device *poll_dev, int *budget)
 {
-	struct e1000_adapter *adapter = netdev_priv(netdev);
-	int work_to_do = min(*budget, netdev->quota);
-	int tx_cleaned;
-	int work_done = 0;
+	struct e1000_adapter *adapter;
+	int work_to_do = min(*budget, poll_dev->quota);
+	int tx_cleaned, i = 0, work_done = 0;
 
-	tx_cleaned = e1000_clean_tx_irq(adapter);
-	adapter->clean_rx(adapter, &work_done, work_to_do);
+	/* Must NOT use netdev_priv macro here. */
+	adapter = poll_dev->priv;
+
+	/* Keep link state information with original netdev */
+	if (!netif_carrier_ok(adapter->netdev))
+		goto quit_polling;
+
+	while (poll_dev != &adapter->polling_netdev[i]) {
+		i++;
+		if (unlikely(i == adapter->num_queues))
+			BUG();
+	}
+
+	tx_cleaned = e1000_clean_tx_irq(adapter, &adapter->tx_ring[i]);
+	adapter->clean_rx(adapter, &adapter->rx_ring[i],
+	                  &work_done, work_to_do);
 
 	*budget -= work_done;
-	netdev->quota -= work_done;
+	poll_dev->quota -= work_done;
 	
-	if ((!tx_cleaned && (work_done == 0)) || !netif_running(netdev)) {
 	/* If no Tx and not enough Rx work done, exit the polling mode */
-		netif_rx_complete(netdev);
+	if((!tx_cleaned && (work_done == 0)) ||
+	   !netif_running(adapter->netdev)) {
+quit_polling:
+		netif_rx_complete(poll_dev);
 		e1000_irq_enable(adapter);
 		return 0;
 	}
@@ -2680,9 +3223,9 @@ e1000_clean(struct net_device *netdev, int *budget)
  **/
 
 static boolean_t
-e1000_clean_tx_irq(struct e1000_adapter *adapter)
+e1000_clean_tx_irq(struct e1000_adapter *adapter,
+                   struct e1000_tx_ring *tx_ring)
 {
-	struct e1000_desc_ring *tx_ring = &adapter->tx_ring;
 	struct net_device *netdev = adapter->netdev;
 	struct e1000_tx_desc *tx_desc, *eop_desc;
 	struct e1000_buffer *buffer_info;
@@ -2693,12 +3236,12 @@ e1000_clean_tx_irq(struct e1000_adapter *adapter)
 	eop = tx_ring->buffer_info[i].next_to_watch;
 	eop_desc = E1000_TX_DESC(*tx_ring, eop);
 
-	while(eop_desc->upper.data & cpu_to_le32(E1000_TXD_STAT_DD)) {
+	while (eop_desc->upper.data & cpu_to_le32(E1000_TXD_STAT_DD)) {
 		/* Premature writeback of Tx descriptors clear (free buffers
 		 * and unmap pci_mapping) previous_buffer_info */
-		if (likely(adapter->previous_buffer_info.skb != NULL)) {
+		if (likely(tx_ring->previous_buffer_info.skb != NULL)) {
 			e1000_unmap_and_free_tx_resource(adapter,
-					&adapter->previous_buffer_info);
+					&tx_ring->previous_buffer_info);
 		}
 
 		for(cleaned = FALSE; !cleaned; ) {
@@ -2714,7 +3257,7 @@ e1000_clean_tx_irq(struct e1000_adapter *adapter)
 #ifdef NETIF_F_TSO
 			} else {
 				if (cleaned) {
-					memcpy(&adapter->previous_buffer_info,
+					memcpy(&tx_ring->previous_buffer_info,
 					       buffer_info,
 					       sizeof(struct e1000_buffer));
 					memset(buffer_info, 0,
@@ -2732,6 +3275,8 @@ e1000_clean_tx_irq(struct e1000_adapter *adapter)
 
 			if(unlikely(++i == tx_ring->count)) i = 0;
 		}
+
+		tx_ring->pkt++;
 		
 		eop = tx_ring->buffer_info[i].next_to_watch;
 		eop_desc = E1000_TX_DESC(*tx_ring, eop);
@@ -2739,15 +3284,15 @@ e1000_clean_tx_irq(struct e1000_adapter *adapter)
 
 	tx_ring->next_to_clean = i;
 
-	spin_lock(&adapter->tx_lock);
+	spin_lock(&tx_ring->tx_lock);
 
 	if(unlikely(cleaned && netif_queue_stopped(netdev) &&
 		    netif_carrier_ok(netdev)))
 		netif_wake_queue(netdev);
 
-	spin_unlock(&adapter->tx_lock);
-	if(adapter->detect_tx_hung) {
+	spin_unlock(&tx_ring->tx_lock);
 
+	if (adapter->detect_tx_hung) {
 		/* Detect a transmit hang in hardware, this serializes the
 		 * check with the clearing of time_stamp and movement of i */
 		adapter->detect_tx_hung = FALSE;
@@ -2771,8 +3316,8 @@ e1000_clean_tx_irq(struct e1000_adapter *adapter)
 					"  next_to_watch        <%x>\n"
 					"  jiffies              <%lx>\n"
 					"  next_to_watch.status <%x>\n",
-				E1000_READ_REG(&adapter->hw, TDH),
-				E1000_READ_REG(&adapter->hw, TDT),
+				readl(adapter->hw.hw_addr + tx_ring->tdh),
+				readl(adapter->hw.hw_addr + tx_ring->tdt),
 				tx_ring->next_to_use,
 				i,
 				(unsigned long long)tx_ring->buffer_info[i].dma,
@@ -2784,12 +3329,10 @@ e1000_clean_tx_irq(struct e1000_adapter *adapter)
 		}
 	}
 #ifdef NETIF_F_TSO
-
-	if( unlikely(!(eop_desc->upper.data & cpu_to_le32(E1000_TXD_STAT_DD)) &&
-	    time_after(jiffies, adapter->previous_buffer_info.time_stamp + HZ)))
+	if (unlikely(!(eop_desc->upper.data & cpu_to_le32(E1000_TXD_STAT_DD)) &&
+	    time_after(jiffies, tx_ring->previous_buffer_info.time_stamp + HZ)))
 		e1000_unmap_and_free_tx_resource(
-		    adapter, &adapter->previous_buffer_info);
-
+		    adapter, &tx_ring->previous_buffer_info);
 #endif
 	return cleaned;
 }
@@ -2852,13 +3395,14 @@ e1000_rx_checksum(struct e1000_adapter *adapter,
 
 static boolean_t
 #ifdef CONFIG_E1000_NAPI
-e1000_clean_rx_irq(struct e1000_adapter *adapter, int *work_done,
-                   int work_to_do)
+e1000_clean_rx_irq(struct e1000_adapter *adapter,
+                   struct e1000_rx_ring *rx_ring,
+                   int *work_done, int work_to_do)
 #else
-e1000_clean_rx_irq(struct e1000_adapter *adapter)
+e1000_clean_rx_irq(struct e1000_adapter *adapter,
+                   struct e1000_rx_ring *rx_ring)
 #endif
 {
-	struct e1000_desc_ring *rx_ring = &adapter->rx_ring;
 	struct net_device *netdev = adapter->netdev;
 	struct pci_dev *pdev = adapter->pdev;
 	struct e1000_rx_desc *rx_desc;
@@ -2944,6 +3488,7 @@ e1000_clean_rx_irq(struct e1000_adapter *adapter)
 		}
 #endif /* CONFIG_E1000_NAPI */
 		netdev->last_rx = jiffies;
+		rx_ring->pkt++;
 
 next_desc:
 		rx_desc->status = 0;
@@ -2953,7 +3498,7 @@ next_desc:
 		rx_desc = E1000_RX_DESC(*rx_ring, i);
 	}
 	rx_ring->next_to_clean = i;
-	adapter->alloc_rx_buf(adapter);
+	adapter->alloc_rx_buf(adapter, rx_ring);
 
 	return cleaned;
 }
@@ -2965,13 +3510,14 @@ next_desc:
 
 static boolean_t
 #ifdef CONFIG_E1000_NAPI
-e1000_clean_rx_irq_ps(struct e1000_adapter *adapter, int *work_done,
-                      int work_to_do)
+e1000_clean_rx_irq_ps(struct e1000_adapter *adapter,
+                      struct e1000_rx_ring *rx_ring,
+                      int *work_done, int work_to_do)
 #else
-e1000_clean_rx_irq_ps(struct e1000_adapter *adapter)
+e1000_clean_rx_irq_ps(struct e1000_adapter *adapter,
+                      struct e1000_rx_ring *rx_ring)
 #endif
 {
-	struct e1000_desc_ring *rx_ring = &adapter->rx_ring;
 	union e1000_rx_desc_packet_split *rx_desc;
 	struct net_device *netdev = adapter->netdev;
 	struct pci_dev *pdev = adapter->pdev;
@@ -3027,7 +3573,7 @@ e1000_clean_rx_irq_ps(struct e1000_adapter *adapter)
 		/* Good Receive */
 		skb_put(skb, length);
 
-		for(j = 0; j < PS_PAGE_BUFFERS; j++) {
+		for(j = 0; j < adapter->rx_ps_pages; j++) {
 			if(!(length = le16_to_cpu(rx_desc->wb.upper.length[j])))
 				break;
 
@@ -3048,11 +3594,13 @@ e1000_clean_rx_irq_ps(struct e1000_adapter *adapter)
 				  rx_desc->wb.lower.hi_dword.csum_ip.csum, skb);
 		skb->protocol = eth_type_trans(skb, netdev);
 
-#ifdef HAVE_RX_ZERO_COPY
 		if(likely(rx_desc->wb.upper.header_status &
-			  E1000_RXDPS_HDRSTAT_HDRSP))
+			  E1000_RXDPS_HDRSTAT_HDRSP)) {
+			adapter->rx_hdr_split++;
+#ifdef HAVE_RX_ZERO_COPY
 			skb_shinfo(skb)->zero_copy = TRUE;
 #endif
+	        }
 #ifdef CONFIG_E1000_NAPI
 		if(unlikely(adapter->vlgrp && (staterr & E1000_RXD_STAT_VP))) {
 			vlan_hwaccel_receive_skb(skb, adapter->vlgrp,
@@ -3071,6 +3619,7 @@ e1000_clean_rx_irq_ps(struct e1000_adapter *adapter)
 		}
 #endif /* CONFIG_E1000_NAPI */
 		netdev->last_rx = jiffies;
+		rx_ring->pkt++;
 
 next_desc:
 		rx_desc->wb.middle.status_error &= ~0xFF;
@@ -3081,7 +3630,7 @@ next_desc:
 		staterr = le32_to_cpu(rx_desc->wb.middle.status_error);
 	}
 	rx_ring->next_to_clean = i;
-	adapter->alloc_rx_buf(adapter);
+	adapter->alloc_rx_buf(adapter, rx_ring);
 
 	return cleaned;
 }
@@ -3092,9 +3641,9 @@ next_desc:
  **/
 
 static void
-e1000_alloc_rx_buffers(struct e1000_adapter *adapter)
+e1000_alloc_rx_buffers(struct e1000_adapter *adapter,
+                       struct e1000_rx_ring *rx_ring)
 {
-	struct e1000_desc_ring *rx_ring = &adapter->rx_ring;
 	struct net_device *netdev = adapter->netdev;
 	struct pci_dev *pdev = adapter->pdev;
 	struct e1000_rx_desc *rx_desc;
@@ -3178,7 +3727,7 @@ e1000_alloc_rx_buffers(struct e1000_adapter *adapter)
 			 * applicable for weak-ordered memory model archs,
 			 * such as IA-64). */
 			wmb();
-			E1000_WRITE_REG(&adapter->hw, RDT, i);
+			writel(i, adapter->hw.hw_addr + rx_ring->rdt);
 		}
 
 		if(unlikely(++i == rx_ring->count)) i = 0;
@@ -3194,9 +3743,9 @@ e1000_alloc_rx_buffers(struct e1000_adapter *adapter)
  **/
 
 static void
-e1000_alloc_rx_buffers_ps(struct e1000_adapter *adapter)
+e1000_alloc_rx_buffers_ps(struct e1000_adapter *adapter,
+                          struct e1000_rx_ring *rx_ring)
 {
-	struct e1000_desc_ring *rx_ring = &adapter->rx_ring;
 	struct net_device *netdev = adapter->netdev;
 	struct pci_dev *pdev = adapter->pdev;
 	union e1000_rx_desc_packet_split *rx_desc;
@@ -3215,22 +3764,26 @@ e1000_alloc_rx_buffers_ps(struct e1000_adapter *adapter)
 		rx_desc = E1000_RX_DESC_PS(*rx_ring, i);
 
 		for(j = 0; j < PS_PAGE_BUFFERS; j++) {
-			if(unlikely(!ps_page->ps_page[j])) {
-				ps_page->ps_page[j] =
-					alloc_page(GFP_ATOMIC);
-				if(unlikely(!ps_page->ps_page[j]))
-					goto no_buffers;
-				ps_page_dma->ps_page_dma[j] =
-					pci_map_page(pdev,
-						     ps_page->ps_page[j],
-						     0, PAGE_SIZE,
-						     PCI_DMA_FROMDEVICE);
-			}
-			/* Refresh the desc even if buffer_addrs didn't
-			 * change because each write-back erases this info.
-			 */
-			rx_desc->read.buffer_addr[j+1] =
-				cpu_to_le64(ps_page_dma->ps_page_dma[j]);
+			if (j < adapter->rx_ps_pages) {
+				if (likely(!ps_page->ps_page[j])) {
+					ps_page->ps_page[j] =
+						alloc_page(GFP_ATOMIC);
+					if (unlikely(!ps_page->ps_page[j]))
+						goto no_buffers;
+					ps_page_dma->ps_page_dma[j] =
+						pci_map_page(pdev,
+							    ps_page->ps_page[j],
+							    0, PAGE_SIZE,
+							    PCI_DMA_FROMDEVICE);
+				}
+				/* Refresh the desc even if buffer_addrs didn't
+				 * change because each write-back erases 
+				 * this info.
+				 */
+				rx_desc->read.buffer_addr[j+1] =
+				     cpu_to_le64(ps_page_dma->ps_page_dma[j]);
+			} else
+				rx_desc->read.buffer_addr[j+1] = ~0;
 		}
 
 		skb = dev_alloc_skb(adapter->rx_ps_bsize0 + NET_IP_ALIGN);
@@ -3264,7 +3817,7 @@ e1000_alloc_rx_buffers_ps(struct e1000_adapter *adapter)
 			 * descriptors are 32 bytes...so we increment tail
 			 * twice as much.
 			 */
-			E1000_WRITE_REG(&adapter->hw, RDT, i<<1);
+			writel(i<<1, adapter->hw.hw_addr + rx_ring->rdt);
 		}
 
 		if(unlikely(++i == rx_ring->count)) i = 0;
@@ -3715,6 +4268,12 @@ e1000_suspend(struct pci_dev *pdev, pm_message_t state)
 	}
 
 	switch(adapter->hw.mac_type) {
+	case e1000_82571:
+	case e1000_82572:
+		ctrl_ext = E1000_READ_REG(&adapter->hw, CTRL_EXT);
+		E1000_WRITE_REG(&adapter->hw, CTRL_EXT,
+				ctrl_ext & ~E1000_CTRL_EXT_DRV_LOAD);
+		break;
 	case e1000_82573:
 		swsm = E1000_READ_REG(&adapter->hw, SWSM);
 		E1000_WRITE_REG(&adapter->hw, SWSM,
@@ -3737,6 +4296,7 @@ e1000_resume(struct pci_dev *pdev)
 	struct net_device *netdev = pci_get_drvdata(pdev);
 	struct e1000_adapter *adapter = netdev_priv(netdev);
 	uint32_t manc, ret_val, swsm;
+	uint32_t ctrl_ext;
 
 	pci_set_power_state(pdev, PCI_D0);
 	pci_restore_state(pdev);
@@ -3762,6 +4322,12 @@ e1000_resume(struct pci_dev *pdev)
 	}
 
 	switch(adapter->hw.mac_type) {
+	case e1000_82571:
+	case e1000_82572:
+		ctrl_ext = E1000_READ_REG(&adapter->hw, CTRL_EXT);
+		E1000_WRITE_REG(&adapter->hw, CTRL_EXT,
+				ctrl_ext | E1000_CTRL_EXT_DRV_LOAD);
+		break;
 	case e1000_82573:
 		swsm = E1000_READ_REG(&adapter->hw, SWSM);
 		E1000_WRITE_REG(&adapter->hw, SWSM,
@@ -3786,7 +4352,7 @@ e1000_netpoll(struct net_device *netdev)
 	struct e1000_adapter *adapter = netdev_priv(netdev);
 	disable_irq(adapter->pdev->irq);
 	e1000_intr(adapter->pdev->irq, netdev, NULL);
-	e1000_clean_tx_irq(adapter);
+	e1000_clean_tx_irq(adapter, adapter->tx_ring);
 	enable_irq(adapter->pdev->irq);
 }
 #endif

+ 8 - 2
drivers/net/e1000/e1000_param.c

@@ -306,7 +306,8 @@ e1000_check_options(struct e1000_adapter *adapter)
 			.def  = E1000_DEFAULT_TXD,
 			.arg  = { .r = { .min = E1000_MIN_TXD }}
 		};
-		struct e1000_desc_ring *tx_ring = &adapter->tx_ring;
+		struct e1000_tx_ring *tx_ring = adapter->tx_ring;
+		int i;
 		e1000_mac_type mac_type = adapter->hw.mac_type;
 		opt.arg.r.max = mac_type < e1000_82544 ?
 			E1000_MAX_TXD : E1000_MAX_82544_TXD;
@@ -319,6 +320,8 @@ e1000_check_options(struct e1000_adapter *adapter)
 		} else {
 			tx_ring->count = opt.def;
 		}
+		for (i = 0; i < adapter->num_queues; i++)
+			tx_ring[i].count = tx_ring->count;
 	}
 	{ /* Receive Descriptor Count */
 		struct e1000_option opt = {
@@ -329,7 +332,8 @@ e1000_check_options(struct e1000_adapter *adapter)
 			.def  = E1000_DEFAULT_RXD,
 			.arg  = { .r = { .min = E1000_MIN_RXD }}
 		};
-		struct e1000_desc_ring *rx_ring = &adapter->rx_ring;
+		struct e1000_rx_ring *rx_ring = adapter->rx_ring;
+		int i;
 		e1000_mac_type mac_type = adapter->hw.mac_type;
 		opt.arg.r.max = mac_type < e1000_82544 ? E1000_MAX_RXD :
 			E1000_MAX_82544_RXD;
@@ -342,6 +346,8 @@ e1000_check_options(struct e1000_adapter *adapter)
 		} else {
 			rx_ring->count = opt.def;
 		}
+		for (i = 0; i < adapter->num_queues; i++)
+			rx_ring[i].count = rx_ring->count;
 	}
 	{ /* Checksum Offload Enable/Disable */
 		struct e1000_option opt = {

+ 2 - 2
drivers/net/epic100.c

@@ -1334,7 +1334,7 @@ static void epic_rx_err(struct net_device *dev, struct epic_private *ep)
 static int epic_poll(struct net_device *dev, int *budget)
 {
 	struct epic_private *ep = dev->priv;
-	int work_done, orig_budget;
+	int work_done = 0, orig_budget;
 	long ioaddr = dev->base_addr;
 
 	orig_budget = (*budget > dev->quota) ? dev->quota : *budget;
@@ -1343,7 +1343,7 @@ rx_action:
 
 	epic_tx(dev, ep);
 
-	work_done = epic_rx(dev, *budget);
+	work_done += epic_rx(dev, *budget);
 
 	epic_rx_err(dev, ep);
 

+ 202 - 108
drivers/net/forcedeth.c

@@ -95,6 +95,8 @@
  *			   of nv_remove
  *      0.42: 06 Aug 2005: Fix lack of link speed initialization
  *			   in the second (and later) nv_open call
+ *      0.43: 10 Aug 2005: Add support for tx checksum.
+ *      0.44: 20 Aug 2005: Add support for scatter gather and segmentation.
  *
  * Known bugs:
  * We suspect that on some hardware no TX done interrupts are generated.
@@ -106,7 +108,7 @@
  * DEV_NEED_TIMERIRQ will not harm you on sane hardware, only generating a few
  * superfluous timer interrupts from the nic.
  */
-#define FORCEDETH_VERSION		"0.41"
+#define FORCEDETH_VERSION		"0.44"
 #define DRV_NAME			"forcedeth"
 
 #include <linux/module.h>
@@ -145,6 +147,7 @@
 #define DEV_NEED_LINKTIMER	0x0002	/* poll link settings. Relies on the timer irq */
 #define DEV_HAS_LARGEDESC	0x0004	/* device supports jumbo frames and needs packet format 2 */
 #define DEV_HAS_HIGH_DMA        0x0008  /* device supports 64bit dma */
+#define DEV_HAS_CHECKSUM        0x0010  /* device supports tx and rx checksum offloads */
 
 enum {
 	NvRegIrqStatus = 0x000,
@@ -241,6 +244,9 @@ enum {
 #define NVREG_TXRXCTL_IDLE	0x0008
 #define NVREG_TXRXCTL_RESET	0x0010
 #define NVREG_TXRXCTL_RXCHECK	0x0400
+#define NVREG_TXRXCTL_DESC_1	0
+#define NVREG_TXRXCTL_DESC_2	0x02100
+#define NVREG_TXRXCTL_DESC_3	0x02200
 	NvRegMIIStatus = 0x180,
 #define NVREG_MIISTAT_ERROR		0x0001
 #define NVREG_MIISTAT_LINKCHANGE	0x0008
@@ -335,6 +341,10 @@ typedef union _ring_type {
 /* error and valid are the same for both */
 #define NV_TX2_ERROR		(1<<30)
 #define NV_TX2_VALID		(1<<31)
+#define NV_TX2_TSO		(1<<28)
+#define NV_TX2_TSO_SHIFT	14
+#define NV_TX2_CHECKSUM_L3	(1<<27)
+#define NV_TX2_CHECKSUM_L4	(1<<26)
 
 #define NV_RX_DESCRIPTORVALID	(1<<16)
 #define NV_RX_MISSEDFRAME	(1<<17)
@@ -417,14 +427,14 @@ typedef union _ring_type {
 
 /* 
  * desc_ver values:
- * This field has two purposes:
- * - Newer nics uses a different ring layout. The layout is selected by
- *   comparing np->desc_ver with DESC_VER_xy.
- * - It contains bits that are forced on when writing to NvRegTxRxControl.
+ * The nic supports three different descriptor types:
+ * - DESC_VER_1: Original
+ * - DESC_VER_2: support for jumbo frames.
+ * - DESC_VER_3: 64-bit format.
  */
-#define DESC_VER_1	0x0
-#define DESC_VER_2	(0x02100|NVREG_TXRXCTL_RXCHECK)
-#define DESC_VER_3      (0x02200|NVREG_TXRXCTL_RXCHECK)
+#define DESC_VER_1	1
+#define DESC_VER_2	2
+#define DESC_VER_3	3
 
 /* PHY defines */
 #define PHY_OUI_MARVELL	0x5043
@@ -491,6 +501,7 @@ struct fe_priv {
 	u32 orig_mac[2];
 	u32 irqmask;
 	u32 desc_ver;
+	u32 txrxctl_bits;
 
 	void __iomem *base;
 
@@ -534,7 +545,7 @@ static inline struct fe_priv *get_nvpriv(struct net_device *dev)
 
 static inline u8 __iomem *get_hwbase(struct net_device *dev)
 {
-	return get_nvpriv(dev)->base;
+	return ((struct fe_priv *)netdev_priv(dev))->base;
 }
 
 static inline void pci_push(u8 __iomem *base)
@@ -623,7 +634,7 @@ static int mii_rw(struct net_device *dev, int addr, int miireg, int value)
 
 static int phy_reset(struct net_device *dev)
 {
-	struct fe_priv *np = get_nvpriv(dev);
+	struct fe_priv *np = netdev_priv(dev);
 	u32 miicontrol;
 	unsigned int tries = 0;
 
@@ -726,7 +737,7 @@ static int phy_init(struct net_device *dev)
 
 static void nv_start_rx(struct net_device *dev)
 {
-	struct fe_priv *np = get_nvpriv(dev);
+	struct fe_priv *np = netdev_priv(dev);
 	u8 __iomem *base = get_hwbase(dev);
 
 	dprintk(KERN_DEBUG "%s: nv_start_rx\n", dev->name);
@@ -782,14 +793,14 @@ static void nv_stop_tx(struct net_device *dev)
 
 static void nv_txrx_reset(struct net_device *dev)
 {
-	struct fe_priv *np = get_nvpriv(dev);
+	struct fe_priv *np = netdev_priv(dev);
 	u8 __iomem *base = get_hwbase(dev);
 
 	dprintk(KERN_DEBUG "%s: nv_txrx_reset\n", dev->name);
-	writel(NVREG_TXRXCTL_BIT2 | NVREG_TXRXCTL_RESET | np->desc_ver, base + NvRegTxRxControl);
+	writel(NVREG_TXRXCTL_BIT2 | NVREG_TXRXCTL_RESET | np->txrxctl_bits, base + NvRegTxRxControl);
 	pci_push(base);
 	udelay(NV_TXRX_RESET_DELAY);
-	writel(NVREG_TXRXCTL_BIT2 | np->desc_ver, base + NvRegTxRxControl);
+	writel(NVREG_TXRXCTL_BIT2 | np->txrxctl_bits, base + NvRegTxRxControl);
 	pci_push(base);
 }
 
@@ -801,7 +812,7 @@ static void nv_txrx_reset(struct net_device *dev)
  */
 static struct net_device_stats *nv_get_stats(struct net_device *dev)
 {
-	struct fe_priv *np = get_nvpriv(dev);
+	struct fe_priv *np = netdev_priv(dev);
 
 	/* It seems that the nic always generates interrupts and doesn't
 	 * accumulate errors internally. Thus the current values in np->stats
@@ -817,7 +828,7 @@ static struct net_device_stats *nv_get_stats(struct net_device *dev)
  */
 static int nv_alloc_rx(struct net_device *dev)
 {
-	struct fe_priv *np = get_nvpriv(dev);
+	struct fe_priv *np = netdev_priv(dev);
 	unsigned int refill_rx = np->refill_rx;
 	int nr;
 
@@ -861,7 +872,7 @@ static int nv_alloc_rx(struct net_device *dev)
 static void nv_do_rx_refill(unsigned long data)
 {
 	struct net_device *dev = (struct net_device *) data;
-	struct fe_priv *np = get_nvpriv(dev);
+	struct fe_priv *np = netdev_priv(dev);
 
 	disable_irq(dev->irq);
 	if (nv_alloc_rx(dev)) {
@@ -875,7 +886,7 @@ static void nv_do_rx_refill(unsigned long data)
 
 static void nv_init_rx(struct net_device *dev) 
 {
-	struct fe_priv *np = get_nvpriv(dev);
+	struct fe_priv *np = netdev_priv(dev);
 	int i;
 
 	np->cur_rx = RX_RING;
@@ -889,15 +900,17 @@ static void nv_init_rx(struct net_device *dev)
 
 static void nv_init_tx(struct net_device *dev)
 {
-	struct fe_priv *np = get_nvpriv(dev);
+	struct fe_priv *np = netdev_priv(dev);
 	int i;
 
 	np->next_tx = np->nic_tx = 0;
-	for (i = 0; i < TX_RING; i++)
+	for (i = 0; i < TX_RING; i++) {
 		if (np->desc_ver == DESC_VER_1 || np->desc_ver == DESC_VER_2)
 			np->tx_ring.orig[i].FlagLen = 0;
 	        else
 			np->tx_ring.ex[i].FlagLen = 0;
+		np->tx_skbuff[i] = NULL;
+	}
 }
 
 static int nv_init_ring(struct net_device *dev)
@@ -907,21 +920,44 @@ static int nv_init_ring(struct net_device *dev)
 	return nv_alloc_rx(dev);
 }
 
+static void nv_release_txskb(struct net_device *dev, unsigned int skbnr)
+{
+	struct fe_priv *np = netdev_priv(dev);
+	struct sk_buff *skb = np->tx_skbuff[skbnr];
+	unsigned int j, entry, fragments;
+			
+	dprintk(KERN_INFO "%s: nv_release_txskb for skbnr %d, skb %p\n",
+		dev->name, skbnr, np->tx_skbuff[skbnr]);
+	
+	entry = skbnr;
+	if ((fragments = skb_shinfo(skb)->nr_frags) != 0) {
+		for (j = fragments; j >= 1; j--) {
+			skb_frag_t *frag = &skb_shinfo(skb)->frags[j-1];
+			pci_unmap_page(np->pci_dev, np->tx_dma[entry],
+				       frag->size,
+				       PCI_DMA_TODEVICE);
+			entry = (entry - 1) % TX_RING;
+		}
+	}
+	pci_unmap_single(np->pci_dev, np->tx_dma[entry],
+			 skb->len - skb->data_len,
+			 PCI_DMA_TODEVICE);
+	dev_kfree_skb_irq(skb);
+	np->tx_skbuff[skbnr] = NULL;
+}
+
 static void nv_drain_tx(struct net_device *dev)
 {
-	struct fe_priv *np = get_nvpriv(dev);
-	int i;
+	struct fe_priv *np = netdev_priv(dev);
+	unsigned int i;
+	
 	for (i = 0; i < TX_RING; i++) {
 		if (np->desc_ver == DESC_VER_1 || np->desc_ver == DESC_VER_2)
 			np->tx_ring.orig[i].FlagLen = 0;
 		else
 			np->tx_ring.ex[i].FlagLen = 0;
 		if (np->tx_skbuff[i]) {
-			pci_unmap_single(np->pci_dev, np->tx_dma[i],
-						np->tx_skbuff[i]->len,
-						PCI_DMA_TODEVICE);
-			dev_kfree_skb(np->tx_skbuff[i]);
-			np->tx_skbuff[i] = NULL;
+			nv_release_txskb(dev, i);
 			np->stats.tx_dropped++;
 		}
 	}
@@ -929,7 +965,7 @@ static void nv_drain_tx(struct net_device *dev)
 
 static void nv_drain_rx(struct net_device *dev)
 {
-	struct fe_priv *np = get_nvpriv(dev);
+	struct fe_priv *np = netdev_priv(dev);
 	int i;
 	for (i = 0; i < RX_RING; i++) {
 		if (np->desc_ver == DESC_VER_1 || np->desc_ver == DESC_VER_2)
@@ -959,28 +995,69 @@ static void drain_ring(struct net_device *dev)
  */
 static int nv_start_xmit(struct sk_buff *skb, struct net_device *dev)
 {
-	struct fe_priv *np = get_nvpriv(dev);
-	int nr = np->next_tx % TX_RING;
+	struct fe_priv *np = netdev_priv(dev);
+	u32 tx_flags_extra = (np->desc_ver == DESC_VER_1 ? NV_TX_LASTPACKET : NV_TX2_LASTPACKET);
+	unsigned int fragments = skb_shinfo(skb)->nr_frags;
+	unsigned int nr = (np->next_tx + fragments) % TX_RING;
+	unsigned int i;
+
+	spin_lock_irq(&np->lock);
+
+	if ((np->next_tx - np->nic_tx + fragments) > TX_LIMIT_STOP) {
+		spin_unlock_irq(&np->lock);
+		netif_stop_queue(dev);
+		return NETDEV_TX_BUSY;
+	}
 
 	np->tx_skbuff[nr] = skb;
-	np->tx_dma[nr] = pci_map_single(np->pci_dev, skb->data,skb->len,
-					PCI_DMA_TODEVICE);
+	
+	if (fragments) {
+		dprintk(KERN_DEBUG "%s: nv_start_xmit: buffer contains %d fragments\n", dev->name, fragments);
+		/* setup descriptors in reverse order */
+		for (i = fragments; i >= 1; i--) {
+			skb_frag_t *frag = &skb_shinfo(skb)->frags[i-1];
+			np->tx_dma[nr] = pci_map_page(np->pci_dev, frag->page, frag->page_offset, frag->size,
+							PCI_DMA_TODEVICE);
 
-	if (np->desc_ver == DESC_VER_1 || np->desc_ver == DESC_VER_2)
+			if (np->desc_ver == DESC_VER_1 || np->desc_ver == DESC_VER_2) {
+				np->tx_ring.orig[nr].PacketBuffer = cpu_to_le32(np->tx_dma[nr]);
+				np->tx_ring.orig[nr].FlagLen = cpu_to_le32( (frag->size-1) | np->tx_flags | tx_flags_extra);
+			} else {
+				np->tx_ring.ex[nr].PacketBufferHigh = cpu_to_le64(np->tx_dma[nr]) >> 32;
+				np->tx_ring.ex[nr].PacketBufferLow = cpu_to_le64(np->tx_dma[nr]) & 0x0FFFFFFFF;
+				np->tx_ring.ex[nr].FlagLen = cpu_to_le32( (frag->size-1) | np->tx_flags | tx_flags_extra);
+			}
+			
+			nr = (nr - 1) % TX_RING;
+
+			if (np->desc_ver == DESC_VER_1)
+				tx_flags_extra &= ~NV_TX_LASTPACKET;
+			else
+				tx_flags_extra &= ~NV_TX2_LASTPACKET;		
+		}
+	}
+
+#ifdef NETIF_F_TSO
+	if (skb_shinfo(skb)->tso_size)
+		tx_flags_extra |= NV_TX2_TSO | (skb_shinfo(skb)->tso_size << NV_TX2_TSO_SHIFT);
+	else
+#endif
+	tx_flags_extra |= (skb->ip_summed == CHECKSUM_HW ? (NV_TX2_CHECKSUM_L3|NV_TX2_CHECKSUM_L4) : 0);
+
+	np->tx_dma[nr] = pci_map_single(np->pci_dev, skb->data, skb->len-skb->data_len,
+					PCI_DMA_TODEVICE);
+	
+	if (np->desc_ver == DESC_VER_1 || np->desc_ver == DESC_VER_2) {
 		np->tx_ring.orig[nr].PacketBuffer = cpu_to_le32(np->tx_dma[nr]);
-	else {
+		np->tx_ring.orig[nr].FlagLen = cpu_to_le32( (skb->len-skb->data_len-1) | np->tx_flags | tx_flags_extra);
+	} else {
 		np->tx_ring.ex[nr].PacketBufferHigh = cpu_to_le64(np->tx_dma[nr]) >> 32;
 		np->tx_ring.ex[nr].PacketBufferLow = cpu_to_le64(np->tx_dma[nr]) & 0x0FFFFFFFF;
-	}
+		np->tx_ring.ex[nr].FlagLen = cpu_to_le32( (skb->len-skb->data_len-1) | np->tx_flags | tx_flags_extra);
+	}	
 
-	spin_lock_irq(&np->lock);
-	wmb();
-	if (np->desc_ver == DESC_VER_1 || np->desc_ver == DESC_VER_2)
-		np->tx_ring.orig[nr].FlagLen = cpu_to_le32( (skb->len-1) | np->tx_flags );
-	else
-		np->tx_ring.ex[nr].FlagLen = cpu_to_le32( (skb->len-1) | np->tx_flags );
-	dprintk(KERN_DEBUG "%s: nv_start_xmit: packet packet %d queued for transmission.\n",
-				dev->name, np->next_tx);
+	dprintk(KERN_DEBUG "%s: nv_start_xmit: packet packet %d queued for transmission. tx_flags_extra: %x\n",
+				dev->name, np->next_tx, tx_flags_extra);
 	{
 		int j;
 		for (j=0; j<64; j++) {
@@ -991,15 +1068,13 @@ static int nv_start_xmit(struct sk_buff *skb, struct net_device *dev)
 		dprintk("\n");
 	}
 
-	np->next_tx++;
+	np->next_tx += 1 + fragments;
 
 	dev->trans_start = jiffies;
-	if (np->next_tx - np->nic_tx >= TX_LIMIT_STOP)
-		netif_stop_queue(dev);
 	spin_unlock_irq(&np->lock);
-	writel(NVREG_TXRXCTL_KICK|np->desc_ver, get_hwbase(dev) + NvRegTxRxControl);
+	writel(NVREG_TXRXCTL_KICK|np->txrxctl_bits, get_hwbase(dev) + NvRegTxRxControl);
 	pci_push(get_hwbase(dev));
-	return 0;
+	return NETDEV_TX_OK;
 }
 
 /*
@@ -1009,9 +1084,10 @@ static int nv_start_xmit(struct sk_buff *skb, struct net_device *dev)
  */
 static void nv_tx_done(struct net_device *dev)
 {
-	struct fe_priv *np = get_nvpriv(dev);
+	struct fe_priv *np = netdev_priv(dev);
 	u32 Flags;
-	int i;
+	unsigned int i;
+	struct sk_buff *skb;
 
 	while (np->nic_tx != np->next_tx) {
 		i = np->nic_tx % TX_RING;
@@ -1026,35 +1102,38 @@ static void nv_tx_done(struct net_device *dev)
 		if (Flags & NV_TX_VALID)
 			break;
 		if (np->desc_ver == DESC_VER_1) {
-			if (Flags & (NV_TX_RETRYERROR|NV_TX_CARRIERLOST|NV_TX_LATECOLLISION|
-							NV_TX_UNDERFLOW|NV_TX_ERROR)) {
-				if (Flags & NV_TX_UNDERFLOW)
-					np->stats.tx_fifo_errors++;
-				if (Flags & NV_TX_CARRIERLOST)
-					np->stats.tx_carrier_errors++;
-				np->stats.tx_errors++;
-			} else {
-				np->stats.tx_packets++;
-				np->stats.tx_bytes += np->tx_skbuff[i]->len;
+			if (Flags & NV_TX_LASTPACKET) {
+				skb = np->tx_skbuff[i];
+				if (Flags & (NV_TX_RETRYERROR|NV_TX_CARRIERLOST|NV_TX_LATECOLLISION|
+					     NV_TX_UNDERFLOW|NV_TX_ERROR)) {
+					if (Flags & NV_TX_UNDERFLOW)
+						np->stats.tx_fifo_errors++;
+					if (Flags & NV_TX_CARRIERLOST)
+						np->stats.tx_carrier_errors++;
+					np->stats.tx_errors++;
+				} else {
+					np->stats.tx_packets++;
+					np->stats.tx_bytes += skb->len;
+				}
+				nv_release_txskb(dev, i);
 			}
 		} else {
-			if (Flags & (NV_TX2_RETRYERROR|NV_TX2_CARRIERLOST|NV_TX2_LATECOLLISION|
-							NV_TX2_UNDERFLOW|NV_TX2_ERROR)) {
-				if (Flags & NV_TX2_UNDERFLOW)
-					np->stats.tx_fifo_errors++;
-				if (Flags & NV_TX2_CARRIERLOST)
-					np->stats.tx_carrier_errors++;
-				np->stats.tx_errors++;
-			} else {
-				np->stats.tx_packets++;
-				np->stats.tx_bytes += np->tx_skbuff[i]->len;
+			if (Flags & NV_TX2_LASTPACKET) {
+				skb = np->tx_skbuff[i];
+				if (Flags & (NV_TX2_RETRYERROR|NV_TX2_CARRIERLOST|NV_TX2_LATECOLLISION|
+					     NV_TX2_UNDERFLOW|NV_TX2_ERROR)) {
+					if (Flags & NV_TX2_UNDERFLOW)
+						np->stats.tx_fifo_errors++;
+					if (Flags & NV_TX2_CARRIERLOST)
+						np->stats.tx_carrier_errors++;
+					np->stats.tx_errors++;
+				} else {
+					np->stats.tx_packets++;
+					np->stats.tx_bytes += skb->len;
+				}				
+				nv_release_txskb(dev, i);
 			}
 		}
-		pci_unmap_single(np->pci_dev, np->tx_dma[i],
-					np->tx_skbuff[i]->len,
-					PCI_DMA_TODEVICE);
-		dev_kfree_skb_irq(np->tx_skbuff[i]);
-		np->tx_skbuff[i] = NULL;
 		np->nic_tx++;
 	}
 	if (np->next_tx - np->nic_tx < TX_LIMIT_START)
@@ -1067,7 +1146,7 @@ static void nv_tx_done(struct net_device *dev)
  */
 static void nv_tx_timeout(struct net_device *dev)
 {
-	struct fe_priv *np = get_nvpriv(dev);
+	struct fe_priv *np = netdev_priv(dev);
 	u8 __iomem *base = get_hwbase(dev);
 
 	printk(KERN_INFO "%s: Got tx_timeout. irq: %08x\n", dev->name,
@@ -1200,7 +1279,7 @@ static int nv_getlen(struct net_device *dev, void *packet, int datalen)
 
 static void nv_rx_process(struct net_device *dev)
 {
-	struct fe_priv *np = get_nvpriv(dev);
+	struct fe_priv *np = netdev_priv(dev);
 	u32 Flags;
 
 	for (;;) {
@@ -1355,7 +1434,7 @@ static void set_bufsize(struct net_device *dev)
  */
 static int nv_change_mtu(struct net_device *dev, int new_mtu)
 {
-	struct fe_priv *np = get_nvpriv(dev);
+	struct fe_priv *np = netdev_priv(dev);
 	int old_mtu;
 
 	if (new_mtu < 64 || new_mtu > np->pkt_limit)
@@ -1408,7 +1487,7 @@ static int nv_change_mtu(struct net_device *dev, int new_mtu)
 		writel( ((RX_RING-1) << NVREG_RINGSZ_RXSHIFT) + ((TX_RING-1) << NVREG_RINGSZ_TXSHIFT),
 			base + NvRegRingSizes);
 		pci_push(base);
-		writel(NVREG_TXRXCTL_KICK|np->desc_ver, get_hwbase(dev) + NvRegTxRxControl);
+		writel(NVREG_TXRXCTL_KICK|np->txrxctl_bits, get_hwbase(dev) + NvRegTxRxControl);
 		pci_push(base);
 
 		/* restart rx engine */
@@ -1440,7 +1519,7 @@ static void nv_copy_mac_to_hw(struct net_device *dev)
  */
 static int nv_set_mac_address(struct net_device *dev, void *addr)
 {
-	struct fe_priv *np = get_nvpriv(dev);
+	struct fe_priv *np = netdev_priv(dev);
 	struct sockaddr *macaddr = (struct sockaddr*)addr;
 
 	if(!is_valid_ether_addr(macaddr->sa_data))
@@ -1475,7 +1554,7 @@ static int nv_set_mac_address(struct net_device *dev, void *addr)
  */
 static void nv_set_multicast(struct net_device *dev)
 {
-	struct fe_priv *np = get_nvpriv(dev);
+	struct fe_priv *np = netdev_priv(dev);
 	u8 __iomem *base = get_hwbase(dev);
 	u32 addr[2];
 	u32 mask[2];
@@ -1535,7 +1614,7 @@ static void nv_set_multicast(struct net_device *dev)
 
 static int nv_update_linkspeed(struct net_device *dev)
 {
-	struct fe_priv *np = get_nvpriv(dev);
+	struct fe_priv *np = netdev_priv(dev);
 	u8 __iomem *base = get_hwbase(dev);
 	int adv, lpa;
 	int newls = np->linkspeed;
@@ -1705,7 +1784,7 @@ static void nv_link_irq(struct net_device *dev)
 static irqreturn_t nv_nic_irq(int foo, void *data, struct pt_regs *regs)
 {
 	struct net_device *dev = (struct net_device *) data;
-	struct fe_priv *np = get_nvpriv(dev);
+	struct fe_priv *np = netdev_priv(dev);
 	u8 __iomem *base = get_hwbase(dev);
 	u32 events;
 	int i;
@@ -1777,7 +1856,7 @@ static irqreturn_t nv_nic_irq(int foo, void *data, struct pt_regs *regs)
 static void nv_do_nic_poll(unsigned long data)
 {
 	struct net_device *dev = (struct net_device *) data;
-	struct fe_priv *np = get_nvpriv(dev);
+	struct fe_priv *np = netdev_priv(dev);
 	u8 __iomem *base = get_hwbase(dev);
 
 	disable_irq(dev->irq);
@@ -1801,7 +1880,7 @@ static void nv_poll_controller(struct net_device *dev)
 
 static void nv_get_drvinfo(struct net_device *dev, struct ethtool_drvinfo *info)
 {
-	struct fe_priv *np = get_nvpriv(dev);
+	struct fe_priv *np = netdev_priv(dev);
 	strcpy(info->driver, "forcedeth");
 	strcpy(info->version, FORCEDETH_VERSION);
 	strcpy(info->bus_info, pci_name(np->pci_dev));
@@ -1809,7 +1888,7 @@ static void nv_get_drvinfo(struct net_device *dev, struct ethtool_drvinfo *info)
 
 static void nv_get_wol(struct net_device *dev, struct ethtool_wolinfo *wolinfo)
 {
-	struct fe_priv *np = get_nvpriv(dev);
+	struct fe_priv *np = netdev_priv(dev);
 	wolinfo->supported = WAKE_MAGIC;
 
 	spin_lock_irq(&np->lock);
@@ -1820,7 +1899,7 @@ static void nv_get_wol(struct net_device *dev, struct ethtool_wolinfo *wolinfo)
 
 static int nv_set_wol(struct net_device *dev, struct ethtool_wolinfo *wolinfo)
 {
-	struct fe_priv *np = get_nvpriv(dev);
+	struct fe_priv *np = netdev_priv(dev);
 	u8 __iomem *base = get_hwbase(dev);
 
 	spin_lock_irq(&np->lock);
@@ -2021,7 +2100,7 @@ static int nv_get_regs_len(struct net_device *dev)
 
 static void nv_get_regs(struct net_device *dev, struct ethtool_regs *regs, void *buf)
 {
-	struct fe_priv *np = get_nvpriv(dev);
+	struct fe_priv *np = netdev_priv(dev);
 	u8 __iomem *base = get_hwbase(dev);
 	u32 *rbuf = buf;
 	int i;
@@ -2035,7 +2114,7 @@ static void nv_get_regs(struct net_device *dev, struct ethtool_regs *regs, void
 
 static int nv_nway_reset(struct net_device *dev)
 {
-	struct fe_priv *np = get_nvpriv(dev);
+	struct fe_priv *np = netdev_priv(dev);
 	int ret;
 
 	spin_lock_irq(&np->lock);
@@ -2065,11 +2144,12 @@ static struct ethtool_ops ops = {
 	.get_regs_len = nv_get_regs_len,
 	.get_regs = nv_get_regs,
 	.nway_reset = nv_nway_reset,
+	.get_perm_addr = ethtool_op_get_perm_addr,
 };
 
 static int nv_open(struct net_device *dev)
 {
-	struct fe_priv *np = get_nvpriv(dev);
+	struct fe_priv *np = netdev_priv(dev);
 	u8 __iomem *base = get_hwbase(dev);
 	int ret, oom, i;
 
@@ -2114,9 +2194,9 @@ static int nv_open(struct net_device *dev)
 	/* 5) continue setup */
 	writel(np->linkspeed, base + NvRegLinkSpeed);
 	writel(NVREG_UNKSETUP3_VAL1, base + NvRegUnknownSetupReg3);
-	writel(np->desc_ver, base + NvRegTxRxControl);
+	writel(np->txrxctl_bits, base + NvRegTxRxControl);
 	pci_push(base);
-	writel(NVREG_TXRXCTL_BIT1|np->desc_ver, base + NvRegTxRxControl);
+	writel(NVREG_TXRXCTL_BIT1|np->txrxctl_bits, base + NvRegTxRxControl);
 	reg_delay(dev, NvRegUnknownSetupReg5, NVREG_UNKSETUP5_BIT31, NVREG_UNKSETUP5_BIT31,
 			NV_SETUP5_DELAY, NV_SETUP5_DELAYMAX,
 			KERN_INFO "open: SetupReg5, Bit 31 remained off\n");
@@ -2205,7 +2285,7 @@ out_drain:
 
 static int nv_close(struct net_device *dev)
 {
-	struct fe_priv *np = get_nvpriv(dev);
+	struct fe_priv *np = netdev_priv(dev);
 	u8 __iomem *base;
 
 	spin_lock_irq(&np->lock);
@@ -2261,7 +2341,7 @@ static int __devinit nv_probe(struct pci_dev *pci_dev, const struct pci_device_i
 	if (!dev)
 		goto out;
 
-	np = get_nvpriv(dev);
+	np = netdev_priv(dev);
 	np->pci_dev = pci_dev;
 	spin_lock_init(&np->lock);
 	SET_MODULE_OWNER(dev);
@@ -2313,19 +2393,32 @@ static int __devinit nv_probe(struct pci_dev *pci_dev, const struct pci_device_i
 		if (pci_set_dma_mask(pci_dev, 0x0000007fffffffffULL)) {
 			printk(KERN_INFO "forcedeth: 64-bit DMA failed, using 32-bit addressing for device %s.\n",
 					pci_name(pci_dev));
+		} else {
+			dev->features |= NETIF_F_HIGHDMA;
 		}
+		np->txrxctl_bits = NVREG_TXRXCTL_DESC_3;
 	} else if (id->driver_data & DEV_HAS_LARGEDESC) {
 		/* packet format 2: supports jumbo frames */
 		np->desc_ver = DESC_VER_2;
+		np->txrxctl_bits = NVREG_TXRXCTL_DESC_2;
 	} else {
 		/* original packet format */
 		np->desc_ver = DESC_VER_1;
+		np->txrxctl_bits = NVREG_TXRXCTL_DESC_1;
 	}
 
 	np->pkt_limit = NV_PKTLIMIT_1;
 	if (id->driver_data & DEV_HAS_LARGEDESC)
 		np->pkt_limit = NV_PKTLIMIT_2;
 
+	if (id->driver_data & DEV_HAS_CHECKSUM) {
+		np->txrxctl_bits |= NVREG_TXRXCTL_RXCHECK;
+		dev->features |= NETIF_F_HW_CSUM | NETIF_F_SG;
+#ifdef NETIF_F_TSO
+		dev->features |= NETIF_F_TSO;
+#endif
+ 	}
+
 	err = -ENOMEM;
 	np->base = ioremap(addr, NV_PCI_REGSZ);
 	if (!np->base)
@@ -2377,8 +2470,9 @@ static int __devinit nv_probe(struct pci_dev *pci_dev, const struct pci_device_i
 	dev->dev_addr[3] = (np->orig_mac[0] >> 16) & 0xff;
 	dev->dev_addr[4] = (np->orig_mac[0] >>  8) & 0xff;
 	dev->dev_addr[5] = (np->orig_mac[0] >>  0) & 0xff;
+	memcpy(dev->perm_addr, dev->dev_addr, dev->addr_len);
 
-	if (!is_valid_ether_addr(dev->dev_addr)) {
+	if (!is_valid_ether_addr(dev->perm_addr)) {
 		/*
 		 * Bad mac address. At least one bios sets the mac address
 		 * to 01:23:45:67:89:ab
@@ -2403,9 +2497,9 @@ static int __devinit nv_probe(struct pci_dev *pci_dev, const struct pci_device_i
 	np->wolenabled = 0;
 
 	if (np->desc_ver == DESC_VER_1) {
-		np->tx_flags = NV_TX_LASTPACKET|NV_TX_VALID;
+		np->tx_flags = NV_TX_VALID;
 	} else {
-		np->tx_flags = NV_TX2_LASTPACKET|NV_TX2_VALID;
+		np->tx_flags = NV_TX2_VALID;
 	}
 	np->irqmask = NVREG_IRQMASK_WANTED;
 	if (id->driver_data & DEV_NEED_TIMERIRQ)
@@ -2494,7 +2588,7 @@ out:
 static void __devexit nv_remove(struct pci_dev *pci_dev)
 {
 	struct net_device *dev = pci_get_drvdata(pci_dev);
-	struct fe_priv *np = get_nvpriv(dev);
+	struct fe_priv *np = netdev_priv(dev);
 
 	unregister_netdev(dev);
 
@@ -2525,35 +2619,35 @@ static struct pci_device_id pci_tbl[] = {
 	},
 	{	/* nForce3 Ethernet Controller */
 		PCI_DEVICE(PCI_VENDOR_ID_NVIDIA, PCI_DEVICE_ID_NVIDIA_NVENET_4),
-		.driver_data = DEV_NEED_TIMERIRQ|DEV_NEED_LINKTIMER|DEV_HAS_LARGEDESC,
+		.driver_data = DEV_NEED_TIMERIRQ|DEV_NEED_LINKTIMER|DEV_HAS_LARGEDESC|DEV_HAS_CHECKSUM,
 	},
 	{	/* nForce3 Ethernet Controller */
 		PCI_DEVICE(PCI_VENDOR_ID_NVIDIA, PCI_DEVICE_ID_NVIDIA_NVENET_5),
-		.driver_data = DEV_NEED_TIMERIRQ|DEV_NEED_LINKTIMER|DEV_HAS_LARGEDESC,
+		.driver_data = DEV_NEED_TIMERIRQ|DEV_NEED_LINKTIMER|DEV_HAS_LARGEDESC|DEV_HAS_CHECKSUM,
 	},
 	{	/* nForce3 Ethernet Controller */
 		PCI_DEVICE(PCI_VENDOR_ID_NVIDIA, PCI_DEVICE_ID_NVIDIA_NVENET_6),
-		.driver_data = DEV_NEED_TIMERIRQ|DEV_NEED_LINKTIMER|DEV_HAS_LARGEDESC,
+		.driver_data = DEV_NEED_TIMERIRQ|DEV_NEED_LINKTIMER|DEV_HAS_LARGEDESC|DEV_HAS_CHECKSUM,
 	},
 	{	/* nForce3 Ethernet Controller */
 		PCI_DEVICE(PCI_VENDOR_ID_NVIDIA, PCI_DEVICE_ID_NVIDIA_NVENET_7),
-		.driver_data = DEV_NEED_TIMERIRQ|DEV_NEED_LINKTIMER|DEV_HAS_LARGEDESC,
+		.driver_data = DEV_NEED_TIMERIRQ|DEV_NEED_LINKTIMER|DEV_HAS_LARGEDESC|DEV_HAS_CHECKSUM,
 	},
 	{	/* CK804 Ethernet Controller */
 		PCI_DEVICE(PCI_VENDOR_ID_NVIDIA, PCI_DEVICE_ID_NVIDIA_NVENET_8),
-		.driver_data = DEV_NEED_TIMERIRQ|DEV_NEED_LINKTIMER|DEV_HAS_LARGEDESC|DEV_HAS_HIGH_DMA,
+		.driver_data = DEV_NEED_TIMERIRQ|DEV_NEED_LINKTIMER|DEV_HAS_LARGEDESC|DEV_HAS_CHECKSUM|DEV_HAS_HIGH_DMA,
 	},
 	{	/* CK804 Ethernet Controller */
 		PCI_DEVICE(PCI_VENDOR_ID_NVIDIA, PCI_DEVICE_ID_NVIDIA_NVENET_9),
-		.driver_data = DEV_NEED_TIMERIRQ|DEV_NEED_LINKTIMER|DEV_HAS_LARGEDESC|DEV_HAS_HIGH_DMA,
+		.driver_data = DEV_NEED_TIMERIRQ|DEV_NEED_LINKTIMER|DEV_HAS_LARGEDESC|DEV_HAS_CHECKSUM|DEV_HAS_HIGH_DMA,
 	},
 	{	/* MCP04 Ethernet Controller */
 		PCI_DEVICE(PCI_VENDOR_ID_NVIDIA, PCI_DEVICE_ID_NVIDIA_NVENET_10),
-		.driver_data = DEV_NEED_TIMERIRQ|DEV_NEED_LINKTIMER|DEV_HAS_LARGEDESC|DEV_HAS_HIGH_DMA,
+		.driver_data = DEV_NEED_TIMERIRQ|DEV_NEED_LINKTIMER|DEV_HAS_LARGEDESC|DEV_HAS_CHECKSUM|DEV_HAS_HIGH_DMA,
 	},
 	{	/* MCP04 Ethernet Controller */
 		PCI_DEVICE(PCI_VENDOR_ID_NVIDIA, PCI_DEVICE_ID_NVIDIA_NVENET_11),
-		.driver_data = DEV_NEED_TIMERIRQ|DEV_NEED_LINKTIMER|DEV_HAS_LARGEDESC|DEV_HAS_HIGH_DMA,
+		.driver_data = DEV_NEED_TIMERIRQ|DEV_NEED_LINKTIMER|DEV_HAS_LARGEDESC|DEV_HAS_CHECKSUM|DEV_HAS_HIGH_DMA,
 	},
 	{	/* MCP51 Ethernet Controller */
 		PCI_DEVICE(PCI_VENDOR_ID_NVIDIA, PCI_DEVICE_ID_NVIDIA_NVENET_12),
@@ -2565,11 +2659,11 @@ static struct pci_device_id pci_tbl[] = {
 	},
 	{	/* MCP55 Ethernet Controller */
 		PCI_DEVICE(PCI_VENDOR_ID_NVIDIA, PCI_DEVICE_ID_NVIDIA_NVENET_14),
-		.driver_data = DEV_NEED_TIMERIRQ|DEV_NEED_LINKTIMER|DEV_HAS_LARGEDESC|DEV_HAS_HIGH_DMA,
+		.driver_data = DEV_NEED_TIMERIRQ|DEV_NEED_LINKTIMER|DEV_HAS_LARGEDESC|DEV_HAS_CHECKSUM|DEV_HAS_HIGH_DMA,
 	},
 	{	/* MCP55 Ethernet Controller */
 		PCI_DEVICE(PCI_VENDOR_ID_NVIDIA, PCI_DEVICE_ID_NVIDIA_NVENET_15),
-		.driver_data = DEV_NEED_TIMERIRQ|DEV_NEED_LINKTIMER|DEV_HAS_LARGEDESC|DEV_HAS_HIGH_DMA,
+		.driver_data = DEV_NEED_TIMERIRQ|DEV_NEED_LINKTIMER|DEV_HAS_LARGEDESC|DEV_HAS_CHECKSUM|DEV_HAS_HIGH_DMA,
 	},
 	{0,},
 };

+ 86 - 326
drivers/net/gianfar.c

@@ -29,12 +29,7 @@
  *  define the configuration needed by the board are defined in a
  *  board structure in arch/ppc/platforms (though I do not
  *  discount the possibility that other architectures could one
- *  day be supported.  One assumption the driver currently makes
- *  is that the PHY is configured in such a way to advertise all
- *  capabilities.  This is a sensible default, and on certain
- *  PHYs, changing this default encounters substantial errata
- *  issues.  Future versions may remove this requirement, but for
- *  now, it is best for the firmware to ensure this is the case.
+ *  day be supported.
  *
  *  The Gianfar Ethernet Controller uses a ring of buffer
  *  descriptors.  The beginning is indicated by a register
@@ -47,7 +42,7 @@
  *  corresponding bit in the IMASK register is also set (if
  *  interrupt coalescing is active, then the interrupt may not
  *  happen immediately, but will wait until either a set number
- *  of frames or amount of time have passed.).  In NAPI, the
+ *  of frames or amount of time have passed).  In NAPI, the
  *  interrupt handler will signal there is work to be done, and
  *  exit.  Without NAPI, the packet(s) will be handled
  *  immediately.  Both methods will start at the last known empty
@@ -75,6 +70,7 @@
 #include <linux/sched.h>
 #include <linux/string.h>
 #include <linux/errno.h>
+#include <linux/unistd.h>
 #include <linux/slab.h>
 #include <linux/interrupt.h>
 #include <linux/init.h>
@@ -97,9 +93,11 @@
 #include <linux/version.h>
 #include <linux/dma-mapping.h>
 #include <linux/crc32.h>
+#include <linux/mii.h>
+#include <linux/phy.h>
 
 #include "gianfar.h"
-#include "gianfar_phy.h"
+#include "gianfar_mii.h"
 
 #define TX_TIMEOUT      (1*HZ)
 #define SKB_ALLOC_TIMEOUT 1000000
@@ -113,9 +111,8 @@
 #endif
 
 const char gfar_driver_name[] = "Gianfar Ethernet";
-const char gfar_driver_version[] = "1.1";
+const char gfar_driver_version[] = "1.2";
 
-int startup_gfar(struct net_device *dev);
 static int gfar_enet_open(struct net_device *dev);
 static int gfar_start_xmit(struct sk_buff *skb, struct net_device *dev);
 static void gfar_timeout(struct net_device *dev);
@@ -126,17 +123,13 @@ static int gfar_set_mac_address(struct net_device *dev);
 static int gfar_change_mtu(struct net_device *dev, int new_mtu);
 static irqreturn_t gfar_error(int irq, void *dev_id, struct pt_regs *regs);
 static irqreturn_t gfar_transmit(int irq, void *dev_id, struct pt_regs *regs);
-static irqreturn_t gfar_receive(int irq, void *dev_id, struct pt_regs *regs);
 static irqreturn_t gfar_interrupt(int irq, void *dev_id, struct pt_regs *regs);
-static irqreturn_t phy_interrupt(int irq, void *dev_id, struct pt_regs *regs);
-static void gfar_phy_change(void *data);
-static void gfar_phy_timer(unsigned long data);
 static void adjust_link(struct net_device *dev);
 static void init_registers(struct net_device *dev);
 static int init_phy(struct net_device *dev);
 static int gfar_probe(struct device *device);
 static int gfar_remove(struct device *device);
-void free_skb_resources(struct gfar_private *priv);
+static void free_skb_resources(struct gfar_private *priv);
 static void gfar_set_multi(struct net_device *dev);
 static void gfar_set_hash_for_addr(struct net_device *dev, u8 *addr);
 #ifdef CONFIG_GFAR_NAPI
@@ -144,7 +137,6 @@ static int gfar_poll(struct net_device *dev, int *budget);
 #endif
 int gfar_clean_rx_ring(struct net_device *dev, int rx_work_limit);
 static int gfar_process_frame(struct net_device *dev, struct sk_buff *skb, int length);
-static void gfar_phy_startup_timer(unsigned long data);
 static void gfar_vlan_rx_register(struct net_device *netdev,
 		                struct vlan_group *grp);
 static void gfar_vlan_rx_kill_vid(struct net_device *netdev, uint16_t vid);
@@ -162,6 +154,9 @@ int gfar_uses_fcb(struct gfar_private *priv)
 	else
 		return 0;
 }
+
+/* Set up the ethernet device structure, private data,
+ * and anything else we need before we start */
 static int gfar_probe(struct device *device)
 {
 	u32 tempval;
@@ -175,7 +170,7 @@ static int gfar_probe(struct device *device)
 
 	einfo = (struct gianfar_platform_data *) pdev->dev.platform_data;
 
-	if (einfo == NULL) {
+	if (NULL == einfo) {
 		printk(KERN_ERR "gfar %d: Missing additional data!\n",
 		       pdev->id);
 
@@ -185,7 +180,7 @@ static int gfar_probe(struct device *device)
 	/* Create an ethernet device instance */
 	dev = alloc_etherdev(sizeof (*priv));
 
-	if (dev == NULL)
+	if (NULL == dev)
 		return -ENOMEM;
 
 	priv = netdev_priv(dev);
@@ -207,20 +202,11 @@ static int gfar_probe(struct device *device)
 	priv->regs = (struct gfar *)
 		ioremap(r->start, sizeof (struct gfar));
 
-	if (priv->regs == NULL) {
+	if (NULL == priv->regs) {
 		err = -ENOMEM;
 		goto regs_fail;
 	}
 
-	/* Set the PHY base address */
-	priv->phyregs = (struct gfar *)
-	    ioremap(einfo->phy_reg_addr, sizeof (struct gfar));
-
-	if (priv->phyregs == NULL) {
-		err = -ENOMEM;
-		goto phy_regs_fail;
-	}
-
 	spin_lock_init(&priv->lock);
 
 	dev_set_drvdata(device, dev);
@@ -386,12 +372,10 @@ static int gfar_probe(struct device *device)
 	return 0;
 
 register_fail:
-	iounmap((void *) priv->phyregs);
-phy_regs_fail:
 	iounmap((void *) priv->regs);
 regs_fail:
 	free_netdev(dev);
-	return -ENOMEM;
+	return err;
 }
 
 static int gfar_remove(struct device *device)
@@ -402,108 +386,41 @@ static int gfar_remove(struct device *device)
 	dev_set_drvdata(device, NULL);
 
 	iounmap((void *) priv->regs);
-	iounmap((void *) priv->phyregs);
 	free_netdev(dev);
 
 	return 0;
 }
 
 
-/* Configure the PHY for dev.
- * returns 0 if success.  -1 if failure
+/* Initializes driver's PHY state, and attaches to the PHY.
+ * Returns 0 on success.
  */
 static int init_phy(struct net_device *dev)
 {
 	struct gfar_private *priv = netdev_priv(dev);
-	struct phy_info *curphy;
-	unsigned int timeout = PHY_INIT_TIMEOUT;
-	struct gfar *phyregs = priv->phyregs;
-	struct gfar_mii_info *mii_info;
-	int err;
+	uint gigabit_support =
+		priv->einfo->device_flags & FSL_GIANFAR_DEV_HAS_GIGABIT ?
+		SUPPORTED_1000baseT_Full : 0;
+	struct phy_device *phydev;
 
 	priv->oldlink = 0;
 	priv->oldspeed = 0;
 	priv->oldduplex = -1;
 
-	mii_info = kmalloc(sizeof(struct gfar_mii_info),
-			GFP_KERNEL);
-
-	if(NULL == mii_info) {
-		if (netif_msg_ifup(priv))
-			printk(KERN_ERR "%s: Could not allocate mii_info\n",
-					dev->name);
-		return -ENOMEM;
-	}
-
-	mii_info->speed = SPEED_1000;
-	mii_info->duplex = DUPLEX_FULL;
-	mii_info->pause = 0;
-	mii_info->link = 1;
-
-	mii_info->advertising = (ADVERTISED_10baseT_Half |
-			ADVERTISED_10baseT_Full |
-			ADVERTISED_100baseT_Half |
-			ADVERTISED_100baseT_Full |
-			ADVERTISED_1000baseT_Full);
-	mii_info->autoneg = 1;
+	phydev = phy_connect(dev, priv->einfo->bus_id, &adjust_link, 0);
 
-	spin_lock_init(&mii_info->mdio_lock);
-
-	mii_info->mii_id = priv->einfo->phyid;
-
-	mii_info->dev = dev;
-
-	mii_info->mdio_read = &read_phy_reg;
-	mii_info->mdio_write = &write_phy_reg;
-
-	priv->mii_info = mii_info;
-
-	/* Reset the management interface */
-	gfar_write(&phyregs->miimcfg, MIIMCFG_RESET);
-
-	/* Setup the MII Mgmt clock speed */
-	gfar_write(&phyregs->miimcfg, MIIMCFG_INIT_VALUE);
-
-	/* Wait until the bus is free */
-	while ((gfar_read(&phyregs->miimind) & MIIMIND_BUSY) &&
-			timeout--)
-		cpu_relax();
-
-	if(timeout <= 0) {
-		printk(KERN_ERR "%s: The MII Bus is stuck!\n",
-				dev->name);
-		err = -1;
-		goto bus_fail;
-	}
-
-	/* get info for this PHY */
-	curphy = get_phy_info(priv->mii_info);
-
-	if (curphy == NULL) {
-		if (netif_msg_ifup(priv))
-			printk(KERN_ERR "%s: No PHY found\n", dev->name);
-		err = -1;
-		goto no_phy;
+	if (IS_ERR(phydev)) {
+		printk(KERN_ERR "%s: Could not attach to PHY\n", dev->name);
+		return PTR_ERR(phydev);
 	}
 
-	mii_info->phyinfo = curphy;
+	/* Remove any features not supported by the controller */
+	phydev->supported &= (GFAR_SUPPORTED | gigabit_support);
+	phydev->advertising = phydev->supported;
 
-	/* Run the commands which initialize the PHY */
-	if(curphy->init) {
-		err = curphy->init(priv->mii_info);
-
-		if (err)
-			goto phy_init_fail;
-	}
+	priv->phydev = phydev;
 
 	return 0;
-
-phy_init_fail:
-no_phy:
-bus_fail:
-	kfree(mii_info);
-
-	return err;
 }
 
 static void init_registers(struct net_device *dev)
@@ -603,24 +520,13 @@ void stop_gfar(struct net_device *dev)
 	struct gfar *regs = priv->regs;
 	unsigned long flags;
 
+	phy_stop(priv->phydev);
+
 	/* Lock it down */
 	spin_lock_irqsave(&priv->lock, flags);
 
-	/* Tell the kernel the link is down */
-	priv->mii_info->link = 0;
-	adjust_link(dev);
-
 	gfar_halt(dev);
 
-	if (priv->einfo->board_flags & FSL_GIANFAR_BRD_HAS_PHY_INTR) {
-		/* Clear any pending interrupts */
-		mii_clear_phy_interrupt(priv->mii_info);
-
-		/* Disable PHY Interrupts */
-		mii_configure_phy_interrupt(priv->mii_info,
-				MII_INTERRUPT_DISABLED);
-	}
-
 	spin_unlock_irqrestore(&priv->lock, flags);
 
 	/* Free the IRQs */
@@ -629,13 +535,7 @@ void stop_gfar(struct net_device *dev)
 		free_irq(priv->interruptTransmit, dev);
 		free_irq(priv->interruptReceive, dev);
 	} else {
-		free_irq(priv->interruptTransmit, dev);
-	}
-
-	if (priv->einfo->board_flags & FSL_GIANFAR_BRD_HAS_PHY_INTR) {
-		free_irq(priv->einfo->interruptPHY, dev);
-	} else {
-		del_timer_sync(&priv->phy_info_timer);
+ 		free_irq(priv->interruptTransmit, dev);
 	}
 
 	free_skb_resources(priv);
@@ -649,7 +549,7 @@ void stop_gfar(struct net_device *dev)
 
 /* If there are any tx skbs or rx skbs still around, free them.
  * Then free tx_skbuff and rx_skbuff */
-void free_skb_resources(struct gfar_private *priv)
+static void free_skb_resources(struct gfar_private *priv)
 {
 	struct rxbd8 *rxbdp;
 	struct txbd8 *txbdp;
@@ -770,7 +670,7 @@ int startup_gfar(struct net_device *dev)
 	    (struct sk_buff **) kmalloc(sizeof (struct sk_buff *) *
 					priv->tx_ring_size, GFP_KERNEL);
 
-	if (priv->tx_skbuff == NULL) {
+	if (NULL == priv->tx_skbuff) {
 		if (netif_msg_ifup(priv))
 			printk(KERN_ERR "%s: Could not allocate tx_skbuff\n",
 					dev->name);
@@ -785,7 +685,7 @@ int startup_gfar(struct net_device *dev)
 	    (struct sk_buff **) kmalloc(sizeof (struct sk_buff *) *
 					priv->rx_ring_size, GFP_KERNEL);
 
-	if (priv->rx_skbuff == NULL) {
+	if (NULL == priv->rx_skbuff) {
 		if (netif_msg_ifup(priv))
 			printk(KERN_ERR "%s: Could not allocate rx_skbuff\n",
 					dev->name);
@@ -879,13 +779,7 @@ int startup_gfar(struct net_device *dev)
 		}
 	}
 
-	/* Set up the PHY change work queue */
-	INIT_WORK(&priv->tq, gfar_phy_change, dev);
-
-	init_timer(&priv->phy_info_timer);
-	priv->phy_info_timer.function = &gfar_phy_startup_timer;
-	priv->phy_info_timer.data = (unsigned long) priv->mii_info;
-	mod_timer(&priv->phy_info_timer, jiffies + HZ);
+	phy_start(priv->phydev);
 
 	/* Configure the coalescing support */
 	if (priv->txcoalescing)
@@ -933,11 +827,6 @@ tx_skb_fail:
 			priv->tx_bd_base,
 			gfar_read(&regs->tbase0));
 
-	if (priv->mii_info->phyinfo->close)
-		priv->mii_info->phyinfo->close(priv->mii_info);
-
-	kfree(priv->mii_info);
-
 	return err;
 }
 
@@ -1035,7 +924,7 @@ static int gfar_start_xmit(struct sk_buff *skb, struct net_device *dev)
 	txbdp->status &= TXBD_WRAP;
 
 	/* Set up checksumming */
-	if ((dev->features & NETIF_F_IP_CSUM) 
+	if ((dev->features & NETIF_F_IP_CSUM)
 			&& (CHECKSUM_HW == skb->ip_summed)) {
 		fcb = gfar_add_fcb(skb, txbdp);
 		gfar_tx_checksum(skb, fcb);
@@ -1103,11 +992,9 @@ static int gfar_close(struct net_device *dev)
 	struct gfar_private *priv = netdev_priv(dev);
 	stop_gfar(dev);
 
-	/* Shutdown the PHY */
-	if (priv->mii_info->phyinfo->close)
-		priv->mii_info->phyinfo->close(priv->mii_info);
-
-	kfree(priv->mii_info);
+	/* Disconnect from the PHY */
+	phy_disconnect(priv->phydev);
+	priv->phydev = NULL;
 
 	netif_stop_queue(dev);
 
@@ -1343,7 +1230,7 @@ struct sk_buff * gfar_new_skb(struct net_device *dev, struct rxbd8 *bdp)
 	while ((!skb) && timeout--)
 		skb = dev_alloc_skb(priv->rx_buffer_size + RXBUF_ALIGNMENT);
 
-	if (skb == NULL)
+	if (NULL == skb)
 		return NULL;
 
 	/* We need the data buffer to be aligned properly.  We will reserve
@@ -1490,7 +1377,7 @@ static int gfar_process_frame(struct net_device *dev, struct sk_buff *skb,
 	struct gfar_private *priv = netdev_priv(dev);
 	struct rxfcb *fcb = NULL;
 
-	if (skb == NULL) {
+	if (NULL == skb) {
 		if (netif_msg_rx_err(priv))
 			printk(KERN_WARNING "%s: Missing skb!!.\n", dev->name);
 		priv->stats.rx_dropped++;
@@ -1718,131 +1605,9 @@ static irqreturn_t gfar_interrupt(int irq, void *dev_id, struct pt_regs *regs)
 	return IRQ_HANDLED;
 }
 
-static irqreturn_t phy_interrupt(int irq, void *dev_id, struct pt_regs *regs)
-{
-	struct net_device *dev = (struct net_device *) dev_id;
-	struct gfar_private *priv = netdev_priv(dev);
-
-	/* Clear the interrupt */
-	mii_clear_phy_interrupt(priv->mii_info);
-
-	/* Disable PHY interrupts */
-	mii_configure_phy_interrupt(priv->mii_info,
-			MII_INTERRUPT_DISABLED);
-
-	/* Schedule the phy change */
-	schedule_work(&priv->tq);
-
-	return IRQ_HANDLED;
-}
-
-/* Scheduled by the phy_interrupt/timer to handle PHY changes */
-static void gfar_phy_change(void *data)
-{
-	struct net_device *dev = (struct net_device *) data;
-	struct gfar_private *priv = netdev_priv(dev);
-	int result = 0;
-
-	/* Delay to give the PHY a chance to change the
-	 * register state */
-	msleep(1);
-
-	/* Update the link, speed, duplex */
-	result = priv->mii_info->phyinfo->read_status(priv->mii_info);
-
-	/* Adjust the known status as long as the link
-	 * isn't still coming up */
-	if((0 == result) || (priv->mii_info->link == 0))
-		adjust_link(dev);
-
-	/* Reenable interrupts, if needed */
-	if (priv->einfo->board_flags & FSL_GIANFAR_BRD_HAS_PHY_INTR)
-		mii_configure_phy_interrupt(priv->mii_info,
-				MII_INTERRUPT_ENABLED);
-}
-
-/* Called every so often on systems that don't interrupt
- * the core for PHY changes */
-static void gfar_phy_timer(unsigned long data)
-{
-	struct net_device *dev = (struct net_device *) data;
-	struct gfar_private *priv = netdev_priv(dev);
-
-	schedule_work(&priv->tq);
-
-	mod_timer(&priv->phy_info_timer, jiffies +
-			GFAR_PHY_CHANGE_TIME * HZ);
-}
-
-/* Keep trying aneg for some time
- * If, after GFAR_AN_TIMEOUT seconds, it has not
- * finished, we switch to forced.
- * Either way, once the process has completed, we either
- * request the interrupt, or switch the timer over to
- * using gfar_phy_timer to check status */
-static void gfar_phy_startup_timer(unsigned long data)
-{
-	int result;
-	static int secondary = GFAR_AN_TIMEOUT;
-	struct gfar_mii_info *mii_info = (struct gfar_mii_info *)data;
-	struct gfar_private *priv = netdev_priv(mii_info->dev);
-
-	/* Configure the Auto-negotiation */
-	result = mii_info->phyinfo->config_aneg(mii_info);
-
-	/* If autonegotiation failed to start, and
-	 * we haven't timed out, reset the timer, and return */
-	if (result && secondary--) {
-		mod_timer(&priv->phy_info_timer, jiffies + HZ);
-		return;
-	} else if (result) {
-		/* Couldn't start autonegotiation.
-		 * Try switching to forced */
-		mii_info->autoneg = 0;
-		result = mii_info->phyinfo->config_aneg(mii_info);
-
-		/* Forcing failed!  Give up */
-		if(result) {
-			if (netif_msg_link(priv))
-				printk(KERN_ERR "%s: Forcing failed!\n",
-						mii_info->dev->name);
-			return;
-		}
-	}
-
-	/* Kill the timer so it can be restarted */
-	del_timer_sync(&priv->phy_info_timer);
-
-	/* Grab the PHY interrupt, if necessary/possible */
-	if (priv->einfo->board_flags & FSL_GIANFAR_BRD_HAS_PHY_INTR) {
-		if (request_irq(priv->einfo->interruptPHY,
-					phy_interrupt,
-					SA_SHIRQ,
-					"phy_interrupt",
-					mii_info->dev) < 0) {
-			if (netif_msg_intr(priv))
-				printk(KERN_ERR "%s: Can't get IRQ %d (PHY)\n",
-						mii_info->dev->name,
-					priv->einfo->interruptPHY);
-		} else {
-			mii_configure_phy_interrupt(priv->mii_info,
-					MII_INTERRUPT_ENABLED);
-			return;
-		}
-	}
-
-	/* Start the timer again, this time in order to
-	 * handle a change in status */
-	init_timer(&priv->phy_info_timer);
-	priv->phy_info_timer.function = &gfar_phy_timer;
-	priv->phy_info_timer.data = (unsigned long) mii_info->dev;
-	mod_timer(&priv->phy_info_timer, jiffies +
-			GFAR_PHY_CHANGE_TIME * HZ);
-}
-
 /* Called every time the controller might need to be made
  * aware of new link state.  The PHY code conveys this
- * information through variables in the priv structure, and this
+ * information through variables in the phydev structure, and this
  * function converts those variables into the appropriate
  * register values, and can bring down the device if needed.
  */
@@ -1850,84 +1615,68 @@ static void adjust_link(struct net_device *dev)
 {
 	struct gfar_private *priv = netdev_priv(dev);
 	struct gfar *regs = priv->regs;
-	u32 tempval;
-	struct gfar_mii_info *mii_info = priv->mii_info;
+	unsigned long flags;
+	struct phy_device *phydev = priv->phydev;
+	int new_state = 0;
+
+	spin_lock_irqsave(&priv->lock, flags);
+	if (phydev->link) {
+		u32 tempval = gfar_read(&regs->maccfg2);
 
-	if (mii_info->link) {
 		/* Now we make sure that we can be in full duplex mode.
 		 * If not, we operate in half-duplex mode. */
-		if (mii_info->duplex != priv->oldduplex) {
-			if (!(mii_info->duplex)) {
-				tempval = gfar_read(&regs->maccfg2);
+		if (phydev->duplex != priv->oldduplex) {
+			new_state = 1;
+			if (!(phydev->duplex))
 				tempval &= ~(MACCFG2_FULL_DUPLEX);
-				gfar_write(&regs->maccfg2, tempval);
-
-				if (netif_msg_link(priv))
-					printk(KERN_INFO "%s: Half Duplex\n",
-							dev->name);
-			} else {
-				tempval = gfar_read(&regs->maccfg2);
+			else
 				tempval |= MACCFG2_FULL_DUPLEX;
-				gfar_write(&regs->maccfg2, tempval);
 
-				if (netif_msg_link(priv))
-					printk(KERN_INFO "%s: Full Duplex\n",
-							dev->name);
-			}
-
-			priv->oldduplex = mii_info->duplex;
+			priv->oldduplex = phydev->duplex;
 		}
 
-		if (mii_info->speed != priv->oldspeed) {
-			switch (mii_info->speed) {
+		if (phydev->speed != priv->oldspeed) {
+			new_state = 1;
+			switch (phydev->speed) {
 			case 1000:
-				tempval = gfar_read(&regs->maccfg2);
 				tempval =
 				    ((tempval & ~(MACCFG2_IF)) | MACCFG2_GMII);
-				gfar_write(&regs->maccfg2, tempval);
 				break;
 			case 100:
 			case 10:
-				tempval = gfar_read(&regs->maccfg2);
 				tempval =
 				    ((tempval & ~(MACCFG2_IF)) | MACCFG2_MII);
-				gfar_write(&regs->maccfg2, tempval);
 				break;
 			default:
 				if (netif_msg_link(priv))
 					printk(KERN_WARNING
-							"%s: Ack!  Speed (%d) is not 10/100/1000!\n",
-							dev->name, mii_info->speed);
+						"%s: Ack!  Speed (%d) is not 10/100/1000!\n",
+						dev->name, phydev->speed);
 				break;
 			}
 
-			if (netif_msg_link(priv))
-				printk(KERN_INFO "%s: Speed %dBT\n", dev->name,
-						mii_info->speed);
-
-			priv->oldspeed = mii_info->speed;
+			priv->oldspeed = phydev->speed;
 		}
 
+		gfar_write(&regs->maccfg2, tempval);
+
 		if (!priv->oldlink) {
-			if (netif_msg_link(priv))
-				printk(KERN_INFO "%s: Link is up\n", dev->name);
+			new_state = 1;
 			priv->oldlink = 1;
-			netif_carrier_on(dev);
 			netif_schedule(dev);
 		}
-	} else {
-		if (priv->oldlink) {
-			if (netif_msg_link(priv))
-				printk(KERN_INFO "%s: Link is down\n",
-						dev->name);
-			priv->oldlink = 0;
-			priv->oldspeed = 0;
-			priv->oldduplex = -1;
-			netif_carrier_off(dev);
-		}
+	} else if (priv->oldlink) {
+		new_state = 1;
+		priv->oldlink = 0;
+		priv->oldspeed = 0;
+		priv->oldduplex = -1;
 	}
-}
 
+	if (new_state && netif_msg_link(priv))
+		phy_print_status(phydev);
+
+	spin_unlock_irqrestore(&priv->lock, flags);
+}
 
 /* Update the hash table based on the current list of multicast
  * addresses we subscribe to.  Also, change the promiscuity of
@@ -2122,12 +1871,23 @@ static struct device_driver gfar_driver = {
 
 static int __init gfar_init(void)
 {
-	return driver_register(&gfar_driver);
+	int err = gfar_mdio_init();
+
+	if (err)
+		return err;
+
+	err = driver_register(&gfar_driver);
+
+	if (err)
+		gfar_mdio_exit();
+	
+	return err;
 }
 
 static void __exit gfar_exit(void)
 {
 	driver_unregister(&gfar_driver);
+	gfar_mdio_exit();
 }
 
 module_init(gfar_init);

+ 16 - 14
drivers/net/gianfar.h

@@ -17,7 +17,6 @@
  *
  *  Still left to do:
  *      -Add support for module parameters
- *	-Add support for ethtool -s
  *	-Add patch for ethtool phys id
  */
 #ifndef __GIANFAR_H
@@ -37,7 +36,8 @@
 #include <linux/skbuff.h>
 #include <linux/spinlock.h>
 #include <linux/mm.h>
-#include <linux/fsl_devices.h>
+#include <linux/mii.h>
+#include <linux/phy.h>
 
 #include <asm/io.h>
 #include <asm/irq.h>
@@ -48,7 +48,8 @@
 #include <linux/workqueue.h>
 #include <linux/ethtool.h>
 #include <linux/netdevice.h>
-#include "gianfar_phy.h"
+#include <linux/fsl_devices.h>
+#include "gianfar_mii.h"
 
 /* The maximum number of packets to be handled in one call of gfar_poll */
 #define GFAR_DEV_WEIGHT 64
@@ -73,7 +74,7 @@
 #define PHY_INIT_TIMEOUT 100000
 #define GFAR_PHY_CHANGE_TIME 2
 
-#define DEVICE_NAME "%s: Gianfar Ethernet Controller Version 1.1, "
+#define DEVICE_NAME "%s: Gianfar Ethernet Controller Version 1.2, "
 #define DRV_NAME "gfar-enet"
 extern const char gfar_driver_name[];
 extern const char gfar_driver_version[];
@@ -578,12 +579,7 @@ struct gfar {
 	u32	hafdup;		/* 0x.50c - Half Duplex Register */
 	u32	maxfrm;		/* 0x.510 - Maximum Frame Length Register */
 	u8	res18[12];
-	u32	miimcfg;	/* 0x.520 - MII Management Configuration Register */
-	u32	miimcom;	/* 0x.524 - MII Management Command Register */
-	u32	miimadd;	/* 0x.528 - MII Management Address Register */
-	u32	miimcon;	/* 0x.52c - MII Management Control Register */
-	u32	miimstat;	/* 0x.530 - MII Management Status Register */
-	u32	miimind;	/* 0x.534 - MII Management Indicator Register */
+	u8	gfar_mii_regs[24];	/* See gianfar_phy.h */
 	u8	res19[4];
 	u32	ifstat;		/* 0x.53c - Interface Status Register */
 	u32	macstnaddr1;	/* 0x.540 - Station Address Part 1 Register */
@@ -688,9 +684,6 @@ struct gfar_private {
 	struct gfar *regs;	/* Pointer to the GFAR memory mapped Registers */
 	u32 *hash_regs[16];
 	int hash_width;
-	struct gfar *phyregs;
-	struct work_struct tq;
-	struct timer_list phy_info_timer;
 	struct net_device_stats stats; /* linux network statistics */
 	struct gfar_extra_stats extra_stats;
 	spinlock_t lock;
@@ -710,7 +703,8 @@ struct gfar_private {
 	unsigned int interruptError;
 	struct gianfar_platform_data *einfo;
 
-	struct gfar_mii_info *mii_info;
+	struct phy_device *phydev;
+	struct mii_bus *mii_bus;
 	int oldspeed;
 	int oldduplex;
 	int oldlink;
@@ -732,4 +726,12 @@ extern inline void gfar_write(volatile unsigned *addr, u32 val)
 
 extern struct ethtool_ops *gfar_op_array[];
 
+extern irqreturn_t gfar_receive(int irq, void *dev_id, struct pt_regs *regs);
+extern int startup_gfar(struct net_device *dev);
+extern void stop_gfar(struct net_device *dev);
+extern void gfar_halt(struct net_device *dev);
+extern void gfar_phy_test(struct mii_bus *bus, struct phy_device *phydev,
+		int enable, u32 regnum, u32 read);
+void gfar_setup_stashing(struct net_device *dev);
+
 #endif /* __GIANFAR_H */

+ 64 - 36
drivers/net/gianfar_ethtool.c

@@ -39,17 +39,18 @@
 #include <asm/types.h>
 #include <asm/uaccess.h>
 #include <linux/ethtool.h>
+#include <linux/mii.h>
+#include <linux/phy.h>
 
 #include "gianfar.h"
 
 #define is_power_of_2(x)        ((x) != 0 && (((x) & ((x) - 1)) == 0))
 
-extern int startup_gfar(struct net_device *dev);
-extern void stop_gfar(struct net_device *dev);
-extern void gfar_halt(struct net_device *dev);
 extern void gfar_start(struct net_device *dev);
 extern int gfar_clean_rx_ring(struct net_device *dev, int rx_work_limit);
 
+#define GFAR_MAX_COAL_USECS 0xffff
+#define GFAR_MAX_COAL_FRAMES 0xff
 static void gfar_fill_stats(struct net_device *dev, struct ethtool_stats *dummy,
 		     u64 * buf);
 static void gfar_gstrings(struct net_device *dev, u32 stringset, u8 * buf);
@@ -182,38 +183,32 @@ static void gfar_gdrvinfo(struct net_device *dev, struct
 	drvinfo->eedump_len = 0;
 }
 
+
+static int gfar_ssettings(struct net_device *dev, struct ethtool_cmd *cmd)
+{
+	struct gfar_private *priv = netdev_priv(dev);
+	struct phy_device *phydev = priv->phydev;
+
+	if (NULL == phydev)
+		return -ENODEV;
+
+	return phy_ethtool_sset(phydev, cmd);
+}
+
+
 /* Return the current settings in the ethtool_cmd structure */
 static int gfar_gsettings(struct net_device *dev, struct ethtool_cmd *cmd)
 {
 	struct gfar_private *priv = netdev_priv(dev);
-	uint gigabit_support = 
-		priv->einfo->device_flags & FSL_GIANFAR_DEV_HAS_GIGABIT ?
-			SUPPORTED_1000baseT_Full : 0;
-	uint gigabit_advert = 
-		priv->einfo->device_flags & FSL_GIANFAR_DEV_HAS_GIGABIT ?
-			ADVERTISED_1000baseT_Full: 0;
-
-	cmd->supported = (SUPPORTED_10baseT_Half
-			  | SUPPORTED_100baseT_Half
-			  | SUPPORTED_100baseT_Full
-			  | gigabit_support | SUPPORTED_Autoneg);
-
-	/* For now, we always advertise everything */
-	cmd->advertising = (ADVERTISED_10baseT_Half
-			    | ADVERTISED_100baseT_Half
-			    | ADVERTISED_100baseT_Full
-			    | gigabit_advert | ADVERTISED_Autoneg);
-
-	cmd->speed = priv->mii_info->speed;
-	cmd->duplex = priv->mii_info->duplex;
-	cmd->port = PORT_MII;
-	cmd->phy_address = priv->mii_info->mii_id;
-	cmd->transceiver = XCVR_EXTERNAL;
-	cmd->autoneg = AUTONEG_ENABLE;
+	struct phy_device *phydev = priv->phydev;
+
+	if (NULL == phydev)
+		return -ENODEV;
+	
 	cmd->maxtxpkt = priv->txcount;
 	cmd->maxrxpkt = priv->rxcount;
 
-	return 0;
+	return phy_ethtool_gset(phydev, cmd);
 }
 
 /* Return the length of the register structure */
@@ -241,14 +236,14 @@ static unsigned int gfar_usecs2ticks(struct gfar_private *priv, unsigned int use
 	unsigned int count;
 
 	/* The timer is different, depending on the interface speed */
-	switch (priv->mii_info->speed) {
-	case 1000:
+	switch (priv->phydev->speed) {
+	case SPEED_1000:
 		count = GFAR_GBIT_TIME;
 		break;
-	case 100:
+	case SPEED_100:
 		count = GFAR_100_TIME;
 		break;
-	case 10:
+	case SPEED_10:
 	default:
 		count = GFAR_10_TIME;
 		break;
@@ -265,14 +260,14 @@ static unsigned int gfar_ticks2usecs(struct gfar_private *priv, unsigned int tic
 	unsigned int count;
 
 	/* The timer is different, depending on the interface speed */
-	switch (priv->mii_info->speed) {
-	case 1000:
+	switch (priv->phydev->speed) {
+	case SPEED_1000:
 		count = GFAR_GBIT_TIME;
 		break;
-	case 100:
+	case SPEED_100:
 		count = GFAR_100_TIME;
 		break;
-	case 10:
+	case SPEED_10:
 	default:
 		count = GFAR_10_TIME;
 		break;
@@ -292,6 +287,9 @@ static int gfar_gcoalesce(struct net_device *dev, struct ethtool_coalesce *cvals
 	if (!(priv->einfo->device_flags & FSL_GIANFAR_DEV_HAS_COALESCE))
 		return -EOPNOTSUPP;
 
+	if (NULL == priv->phydev)
+		return -ENODEV;
+
 	cvals->rx_coalesce_usecs = gfar_ticks2usecs(priv, priv->rxtime);
 	cvals->rx_max_coalesced_frames = priv->rxcount;
 
@@ -348,6 +346,22 @@ static int gfar_scoalesce(struct net_device *dev, struct ethtool_coalesce *cvals
 	else
 		priv->rxcoalescing = 1;
 
+	if (NULL == priv->phydev)
+		return -ENODEV;
+
+	/* Check the bounds of the values */
+	if (cvals->rx_coalesce_usecs > GFAR_MAX_COAL_USECS) {
+		pr_info("Coalescing is limited to %d microseconds\n",
+				GFAR_MAX_COAL_USECS);
+		return -EINVAL;
+	}
+
+	if (cvals->rx_max_coalesced_frames > GFAR_MAX_COAL_FRAMES) {
+		pr_info("Coalescing is limited to %d frames\n",
+				GFAR_MAX_COAL_FRAMES);
+		return -EINVAL;
+	}
+
 	priv->rxtime = gfar_usecs2ticks(priv, cvals->rx_coalesce_usecs);
 	priv->rxcount = cvals->rx_max_coalesced_frames;
 
@@ -358,6 +372,19 @@ static int gfar_scoalesce(struct net_device *dev, struct ethtool_coalesce *cvals
 	else
 		priv->txcoalescing = 1;
 
+	/* Check the bounds of the values */
+	if (cvals->tx_coalesce_usecs > GFAR_MAX_COAL_USECS) {
+		pr_info("Coalescing is limited to %d microseconds\n",
+				GFAR_MAX_COAL_USECS);
+		return -EINVAL;
+	}
+
+	if (cvals->tx_max_coalesced_frames > GFAR_MAX_COAL_FRAMES) {
+		pr_info("Coalescing is limited to %d frames\n",
+				GFAR_MAX_COAL_FRAMES);
+		return -EINVAL;
+	}
+
 	priv->txtime = gfar_usecs2ticks(priv, cvals->tx_coalesce_usecs);
 	priv->txcount = cvals->tx_max_coalesced_frames;
 
@@ -536,6 +563,7 @@ static void gfar_set_msglevel(struct net_device *dev, uint32_t data)
 
 struct ethtool_ops gfar_ethtool_ops = {
 	.get_settings = gfar_gsettings,
+	.set_settings = gfar_ssettings,
 	.get_drvinfo = gfar_gdrvinfo,
 	.get_regs_len = gfar_reglen,
 	.get_regs = gfar_get_regs,

+ 219 - 0
drivers/net/gianfar_mii.c

@@ -0,0 +1,219 @@
+/* 
+ * drivers/net/gianfar_mii.c
+ *
+ * Gianfar Ethernet Driver -- MIIM bus implementation
+ * Provides Bus interface for MIIM regs
+ *
+ * Author: Andy Fleming
+ * Maintainer: Kumar Gala (kumar.gala@freescale.com)
+ *
+ * Copyright (c) 2002-2004 Freescale Semiconductor, Inc.
+ *
+ * This program is free software; you can redistribute  it and/or modify it
+ * under  the terms of  the GNU General  Public License as published by the
+ * Free Software Foundation;  either version 2 of the  License, or (at your
+ * option) any later version.
+ *
+ */
+
+#include <linux/config.h>
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/string.h>
+#include <linux/errno.h>
+#include <linux/unistd.h>
+#include <linux/slab.h>
+#include <linux/interrupt.h>
+#include <linux/init.h>
+#include <linux/delay.h>
+#include <linux/netdevice.h>
+#include <linux/etherdevice.h>
+#include <linux/skbuff.h>
+#include <linux/spinlock.h>
+#include <linux/mm.h>
+#include <linux/module.h>
+#include <linux/version.h>
+#include <asm/ocp.h>
+#include <linux/crc32.h>
+#include <linux/mii.h>
+#include <linux/phy.h>
+
+#include <asm/io.h>
+#include <asm/irq.h>
+#include <asm/uaccess.h>
+
+#include "gianfar.h"
+#include "gianfar_mii.h"
+
+/* Write value to the PHY at mii_id at register regnum,
+ * on the bus, waiting until the write is done before returning.
+ * All PHY configuration is done through the TSEC1 MIIM regs */
+int gfar_mdio_write(struct mii_bus *bus, int mii_id, int regnum, u16 value)
+{
+	struct gfar_mii *regs = bus->priv;
+
+	/* Set the PHY address and the register address we want to write */
+	gfar_write(&regs->miimadd, (mii_id << 8) | regnum);
+
+	/* Write out the value we want */
+	gfar_write(&regs->miimcon, value);
+
+	/* Wait for the transaction to finish */
+	while (gfar_read(&regs->miimind) & MIIMIND_BUSY)
+		cpu_relax();
+
+	return 0;
+}
+
+/* Read the bus for PHY at addr mii_id, register regnum, and
+ * return the value.  Clears miimcom first.  All PHY
+ * configuration has to be done through the TSEC1 MIIM regs */
+int gfar_mdio_read(struct mii_bus *bus, int mii_id, int regnum)
+{
+	struct gfar_mii *regs = bus->priv;
+	u16 value;
+
+	/* Set the PHY address and the register address we want to read */
+	gfar_write(&regs->miimadd, (mii_id << 8) | regnum);
+
+	/* Clear miimcom, and then initiate a read */
+	gfar_write(&regs->miimcom, 0);
+	gfar_write(&regs->miimcom, MII_READ_COMMAND);
+
+	/* Wait for the transaction to finish */
+	while (gfar_read(&regs->miimind) & (MIIMIND_NOTVALID | MIIMIND_BUSY))
+		cpu_relax();
+
+	/* Grab the value of the register from miimstat */
+	value = gfar_read(&regs->miimstat);
+
+	return value;
+}
+
+
+/* Reset the MIIM registers, and wait for the bus to free */
+int gfar_mdio_reset(struct mii_bus *bus)
+{
+	struct gfar_mii *regs = bus->priv;
+	unsigned int timeout = PHY_INIT_TIMEOUT;
+
+	spin_lock_bh(&bus->mdio_lock);
+
+	/* Reset the management interface */
+	gfar_write(&regs->miimcfg, MIIMCFG_RESET);
+
+	/* Setup the MII Mgmt clock speed */
+	gfar_write(&regs->miimcfg, MIIMCFG_INIT_VALUE);
+
+	/* Wait until the bus is free */
+	while ((gfar_read(&regs->miimind) & MIIMIND_BUSY) &&
+			timeout--)
+		cpu_relax();
+
+	spin_unlock_bh(&bus->mdio_lock);
+
+	if(timeout <= 0) {
+		printk(KERN_ERR "%s: The MII Bus is stuck!\n",
+				bus->name);
+		return -EBUSY;
+	}
+
+	return 0;
+}
+
+
+int gfar_mdio_probe(struct device *dev)
+{
+	struct platform_device *pdev = to_platform_device(dev);
+	struct gianfar_mdio_data *pdata;
+	struct gfar_mii *regs;
+	struct mii_bus *new_bus;
+	int err = 0;
+
+	if (NULL == dev)
+		return -EINVAL;
+
+	new_bus = kmalloc(sizeof(struct mii_bus), GFP_KERNEL);
+
+	if (NULL == new_bus)
+		return -ENOMEM;
+
+	new_bus->name = "Gianfar MII Bus",
+	new_bus->read = &gfar_mdio_read,
+	new_bus->write = &gfar_mdio_write,
+	new_bus->reset = &gfar_mdio_reset,
+	new_bus->id = pdev->id;
+
+	pdata = (struct gianfar_mdio_data *)pdev->dev.platform_data;
+
+	if (NULL == pdata) {
+		printk(KERN_ERR "gfar mdio %d: Missing platform data!\n", pdev->id);
+		return -ENODEV;
+	}
+
+	/* Set the PHY base address */
+	regs = (struct gfar_mii *) ioremap(pdata->paddr, 
+			sizeof (struct gfar_mii));
+
+	if (NULL == regs) {
+		err = -ENOMEM;
+		goto reg_map_fail;
+	}
+
+	new_bus->priv = regs;
+
+	new_bus->irq = pdata->irq;
+
+	new_bus->dev = dev;
+	dev_set_drvdata(dev, new_bus);
+
+	err = mdiobus_register(new_bus);
+
+	if (0 != err) {
+		printk (KERN_ERR "%s: Cannot register as MDIO bus\n", 
+				new_bus->name);
+		goto bus_register_fail;
+	}
+
+	return 0;
+
+bus_register_fail:
+	iounmap((void *) regs);
+reg_map_fail:
+	kfree(new_bus);
+
+	return err;
+}
+
+
+int gfar_mdio_remove(struct device *dev)
+{
+	struct mii_bus *bus = dev_get_drvdata(dev);
+
+	mdiobus_unregister(bus);
+
+	dev_set_drvdata(dev, NULL);
+
+	iounmap((void *) (&bus->priv));
+	bus->priv = NULL;
+	kfree(bus);
+
+	return 0;
+}
+
+static struct device_driver gianfar_mdio_driver = {
+	.name = "fsl-gianfar_mdio",
+	.bus = &platform_bus_type,
+	.probe = gfar_mdio_probe,
+	.remove = gfar_mdio_remove,
+};
+
+int __init gfar_mdio_init(void)
+{
+	return driver_register(&gianfar_mdio_driver);
+}
+
+void __exit gfar_mdio_exit(void)
+{
+	driver_unregister(&gianfar_mdio_driver);
+}

+ 45 - 0
drivers/net/gianfar_mii.h

@@ -0,0 +1,45 @@
+/* 
+ * drivers/net/gianfar_mii.h
+ *
+ * Gianfar Ethernet Driver -- MII Management Bus Implementation
+ * Driver for the MDIO bus controller in the Gianfar register space
+ *
+ * Author: Andy Fleming
+ * Maintainer: Kumar Gala (kumar.gala@freescale.com)
+ *
+ * Copyright (c) 2002-2004 Freescale Semiconductor, Inc.
+ *
+ * This program is free software; you can redistribute  it and/or modify it
+ * under  the terms of  the GNU General  Public License as published by the
+ * Free Software Foundation;  either version 2 of the  License, or (at your
+ * option) any later version.
+ *
+ */
+#ifndef __GIANFAR_MII_H
+#define __GIANFAR_MII_H
+
+#define MIIMIND_BUSY            0x00000001
+#define MIIMIND_NOTVALID        0x00000004
+
+#define MII_READ_COMMAND       0x00000001
+
+#define GFAR_SUPPORTED (SUPPORTED_10baseT_Half \
+		| SUPPORTED_100baseT_Half \
+		| SUPPORTED_100baseT_Full \
+		| SUPPORTED_Autoneg \
+		| SUPPORTED_MII)
+
+struct gfar_mii {
+	u32	miimcfg;	/* 0x.520 - MII Management Config Register */
+	u32	miimcom;	/* 0x.524 - MII Management Command Register */
+	u32	miimadd;	/* 0x.528 - MII Management Address Register */
+	u32	miimcon;	/* 0x.52c - MII Management Control Register */
+	u32	miimstat;	/* 0x.530 - MII Management Status Register */
+	u32	miimind;	/* 0x.534 - MII Management Indicator Register */
+};
+
+int gfar_mdio_read(struct mii_bus *bus, int mii_id, int regnum);
+int gfar_mdio_write(struct mii_bus *bus, int mii_id, int regnum, u16 value);
+int __init gfar_mdio_init(void);
+void __exit gfar_mdio_exit(void);
+#endif /* GIANFAR_PHY_H */

+ 0 - 661
drivers/net/gianfar_phy.c

@@ -1,661 +0,0 @@
-/* 
- * drivers/net/gianfar_phy.c
- *
- * Gianfar Ethernet Driver -- PHY handling
- * Driver for FEC on MPC8540 and TSEC on MPC8540/MPC8560
- * Based on 8260_io/fcc_enet.c
- *
- * Author: Andy Fleming
- * Maintainer: Kumar Gala (kumar.gala@freescale.com)
- *
- * Copyright (c) 2002-2004 Freescale Semiconductor, Inc.
- *
- * This program is free software; you can redistribute  it and/or modify it
- * under  the terms of  the GNU General  Public License as published by the
- * Free Software Foundation;  either version 2 of the  License, or (at your
- * option) any later version.
- *
- */
-
-#include <linux/config.h>
-#include <linux/kernel.h>
-#include <linux/sched.h>
-#include <linux/string.h>
-#include <linux/errno.h>
-#include <linux/slab.h>
-#include <linux/interrupt.h>
-#include <linux/init.h>
-#include <linux/delay.h>
-#include <linux/netdevice.h>
-#include <linux/etherdevice.h>
-#include <linux/skbuff.h>
-#include <linux/spinlock.h>
-#include <linux/mm.h>
-
-#include <asm/io.h>
-#include <asm/irq.h>
-#include <asm/uaccess.h>
-#include <linux/module.h>
-#include <linux/version.h>
-#include <linux/crc32.h>
-#include <linux/mii.h>
-
-#include "gianfar.h"
-#include "gianfar_phy.h"
-
-static void config_genmii_advert(struct gfar_mii_info *mii_info);
-static void genmii_setup_forced(struct gfar_mii_info *mii_info);
-static void genmii_restart_aneg(struct gfar_mii_info *mii_info);
-static int gbit_config_aneg(struct gfar_mii_info *mii_info);
-static int genmii_config_aneg(struct gfar_mii_info *mii_info);
-static int genmii_update_link(struct gfar_mii_info *mii_info);
-static int genmii_read_status(struct gfar_mii_info *mii_info);
-u16 phy_read(struct gfar_mii_info *mii_info, u16 regnum);
-void phy_write(struct gfar_mii_info *mii_info, u16 regnum, u16 val);
-
-/* Write value to the PHY for this device to the register at regnum, */
-/* waiting until the write is done before it returns.  All PHY */
-/* configuration has to be done through the TSEC1 MIIM regs */
-void write_phy_reg(struct net_device *dev, int mii_id, int regnum, int value)
-{
-	struct gfar_private *priv = netdev_priv(dev);
-	struct gfar *regbase = priv->phyregs;
-
-	/* Set the PHY address and the register address we want to write */
-	gfar_write(&regbase->miimadd, (mii_id << 8) | regnum);
-
-	/* Write out the value we want */
-	gfar_write(&regbase->miimcon, value);
-
-	/* Wait for the transaction to finish */
-	while (gfar_read(&regbase->miimind) & MIIMIND_BUSY)
-		cpu_relax();
-}
-
-/* Reads from register regnum in the PHY for device dev, */
-/* returning the value.  Clears miimcom first.  All PHY */
-/* configuration has to be done through the TSEC1 MIIM regs */
-int read_phy_reg(struct net_device *dev, int mii_id, int regnum)
-{
-	struct gfar_private *priv = netdev_priv(dev);
-	struct gfar *regbase = priv->phyregs;
-	u16 value;
-
-	/* Set the PHY address and the register address we want to read */
-	gfar_write(&regbase->miimadd, (mii_id << 8) | regnum);
-
-	/* Clear miimcom, and then initiate a read */
-	gfar_write(&regbase->miimcom, 0);
-	gfar_write(&regbase->miimcom, MII_READ_COMMAND);
-
-	/* Wait for the transaction to finish */
-	while (gfar_read(&regbase->miimind) & (MIIMIND_NOTVALID | MIIMIND_BUSY))
-		cpu_relax();
-
-	/* Grab the value of the register from miimstat */
-	value = gfar_read(&regbase->miimstat);
-
-	return value;
-}
-
-void mii_clear_phy_interrupt(struct gfar_mii_info *mii_info)
-{
-	if(mii_info->phyinfo->ack_interrupt)
-		mii_info->phyinfo->ack_interrupt(mii_info);
-}
-
-
-void mii_configure_phy_interrupt(struct gfar_mii_info *mii_info, u32 interrupts)
-{
-	mii_info->interrupts = interrupts;
-	if(mii_info->phyinfo->config_intr)
-		mii_info->phyinfo->config_intr(mii_info);
-}
-
-
-/* Writes MII_ADVERTISE with the appropriate values, after
- * sanitizing advertise to make sure only supported features
- * are advertised 
- */
-static void config_genmii_advert(struct gfar_mii_info *mii_info)
-{
-	u32 advertise;
-	u16 adv;
-
-	/* Only allow advertising what this PHY supports */
-	mii_info->advertising &= mii_info->phyinfo->features;
-	advertise = mii_info->advertising;
-
-	/* Setup standard advertisement */
-	adv = phy_read(mii_info, MII_ADVERTISE);
-	adv &= ~(ADVERTISE_ALL | ADVERTISE_100BASE4);
-	if (advertise & ADVERTISED_10baseT_Half)
-		adv |= ADVERTISE_10HALF;
-	if (advertise & ADVERTISED_10baseT_Full)
-		adv |= ADVERTISE_10FULL;
-	if (advertise & ADVERTISED_100baseT_Half)
-		adv |= ADVERTISE_100HALF;
-	if (advertise & ADVERTISED_100baseT_Full)
-		adv |= ADVERTISE_100FULL;
-	phy_write(mii_info, MII_ADVERTISE, adv);
-}
-
-static void genmii_setup_forced(struct gfar_mii_info *mii_info)
-{
-	u16 ctrl;
-	u32 features = mii_info->phyinfo->features;
-	
-	ctrl = phy_read(mii_info, MII_BMCR);
-
-	ctrl &= ~(BMCR_FULLDPLX|BMCR_SPEED100|BMCR_SPEED1000|BMCR_ANENABLE);
-	ctrl |= BMCR_RESET;
-
-	switch(mii_info->speed) {
-		case SPEED_1000:
-			if(features & (SUPPORTED_1000baseT_Half
-						| SUPPORTED_1000baseT_Full)) {
-				ctrl |= BMCR_SPEED1000;
-				break;
-			}
-			mii_info->speed = SPEED_100;
-		case SPEED_100:
-			if (features & (SUPPORTED_100baseT_Half
-						| SUPPORTED_100baseT_Full)) {
-				ctrl |= BMCR_SPEED100;
-				break;
-			}
-			mii_info->speed = SPEED_10;
-		case SPEED_10:
-			if (features & (SUPPORTED_10baseT_Half
-						| SUPPORTED_10baseT_Full))
-				break;
-		default: /* Unsupported speed! */
-			printk(KERN_ERR "%s: Bad speed!\n", 
-					mii_info->dev->name);
-			break;
-	}
-
-	phy_write(mii_info, MII_BMCR, ctrl);
-}
-
-
-/* Enable and Restart Autonegotiation */
-static void genmii_restart_aneg(struct gfar_mii_info *mii_info)
-{
-	u16 ctl;
-
-	ctl = phy_read(mii_info, MII_BMCR);
-	ctl |= (BMCR_ANENABLE | BMCR_ANRESTART);
-	phy_write(mii_info, MII_BMCR, ctl);
-}
-
-
-static int gbit_config_aneg(struct gfar_mii_info *mii_info)
-{
-	u16 adv;
-	u32 advertise;
-
-	if(mii_info->autoneg) {
-		/* Configure the ADVERTISE register */
-		config_genmii_advert(mii_info);
-		advertise = mii_info->advertising;
-
-		adv = phy_read(mii_info, MII_1000BASETCONTROL);
-		adv &= ~(MII_1000BASETCONTROL_FULLDUPLEXCAP |
-				MII_1000BASETCONTROL_HALFDUPLEXCAP);
-		if (advertise & SUPPORTED_1000baseT_Half)
-			adv |= MII_1000BASETCONTROL_HALFDUPLEXCAP;
-		if (advertise & SUPPORTED_1000baseT_Full)
-			adv |= MII_1000BASETCONTROL_FULLDUPLEXCAP;
-		phy_write(mii_info, MII_1000BASETCONTROL, adv);
-
-		/* Start/Restart aneg */
-		genmii_restart_aneg(mii_info);
-	} else
-		genmii_setup_forced(mii_info);
-
-	return 0;
-}
-
-static int marvell_config_aneg(struct gfar_mii_info *mii_info)
-{
-	/* The Marvell PHY has an errata which requires
-	 * that certain registers get written in order
-	 * to restart autonegotiation */
-	phy_write(mii_info, MII_BMCR, BMCR_RESET);
-
-	phy_write(mii_info, 0x1d, 0x1f);
-	phy_write(mii_info, 0x1e, 0x200c);
-	phy_write(mii_info, 0x1d, 0x5);
-	phy_write(mii_info, 0x1e, 0);
-	phy_write(mii_info, 0x1e, 0x100);
-
-	gbit_config_aneg(mii_info);
-
-	return 0;
-}
-static int genmii_config_aneg(struct gfar_mii_info *mii_info)
-{
-	if (mii_info->autoneg) {
-		config_genmii_advert(mii_info);
-		genmii_restart_aneg(mii_info);
-	} else
-		genmii_setup_forced(mii_info);
-
-	return 0;
-}
-
-
-static int genmii_update_link(struct gfar_mii_info *mii_info)
-{
-	u16 status;
-
-	/* Do a fake read */
-	phy_read(mii_info, MII_BMSR);
-
-	/* Read link and autonegotiation status */
-	status = phy_read(mii_info, MII_BMSR);
-	if ((status & BMSR_LSTATUS) == 0)
-		mii_info->link = 0;
-	else
-		mii_info->link = 1;
-
-	/* If we are autonegotiating, and not done, 
-	 * return an error */
-	if (mii_info->autoneg && !(status & BMSR_ANEGCOMPLETE))
-		return -EAGAIN;
-
-	return 0;
-}
-
-static int genmii_read_status(struct gfar_mii_info *mii_info)
-{
-	u16 status;
-	int err;
-
-	/* Update the link, but return if there
-	 * was an error */
-	err = genmii_update_link(mii_info);
-	if (err)
-		return err;
-
-	if (mii_info->autoneg) {
-		status = phy_read(mii_info, MII_LPA);
-
-		if (status & (LPA_10FULL | LPA_100FULL))
-			mii_info->duplex = DUPLEX_FULL;
-		else
-			mii_info->duplex = DUPLEX_HALF;
-		if (status & (LPA_100FULL | LPA_100HALF))
-			mii_info->speed = SPEED_100;
-		else
-			mii_info->speed = SPEED_10;
-		mii_info->pause = 0;
-	}
-	/* On non-aneg, we assume what we put in BMCR is the speed,
-	 * though magic-aneg shouldn't prevent this case from occurring
-	 */
-
-	return 0;
-}
-static int marvell_read_status(struct gfar_mii_info *mii_info)
-{
-	u16 status;
-	int err;
-
-	/* Update the link, but return if there
-	 * was an error */
-	err = genmii_update_link(mii_info);
-	if (err)
-		return err;
-
-	/* If the link is up, read the speed and duplex */
-	/* If we aren't autonegotiating, assume speeds 
-	 * are as set */
-	if (mii_info->autoneg && mii_info->link) {
-		int speed;
-		status = phy_read(mii_info, MII_M1011_PHY_SPEC_STATUS);
-
-#if 0
-		/* If speed and duplex aren't resolved,
-		 * return an error.  Isn't this handled
-		 * by checking aneg?
-		 */
-		if ((status & MII_M1011_PHY_SPEC_STATUS_RESOLVED) == 0)
-			return -EAGAIN;
-#endif
-
-		/* Get the duplexity */
-		if (status & MII_M1011_PHY_SPEC_STATUS_FULLDUPLEX)
-			mii_info->duplex = DUPLEX_FULL;
-		else
-			mii_info->duplex = DUPLEX_HALF;
-
-		/* Get the speed */
-		speed = status & MII_M1011_PHY_SPEC_STATUS_SPD_MASK;
-		switch(speed) {
-			case MII_M1011_PHY_SPEC_STATUS_1000:
-				mii_info->speed = SPEED_1000;
-				break;
-			case MII_M1011_PHY_SPEC_STATUS_100:
-				mii_info->speed = SPEED_100;
-				break;
-			default:
-				mii_info->speed = SPEED_10;
-				break;
-		}
-		mii_info->pause = 0;
-	}
-
-	return 0;
-}
-
-
-static int cis820x_read_status(struct gfar_mii_info *mii_info)
-{
-	u16 status;
-	int err;
-
-	/* Update the link, but return if there
-	 * was an error */
-	err = genmii_update_link(mii_info);
-	if (err)
-		return err;
-
-	/* If the link is up, read the speed and duplex */
-	/* If we aren't autonegotiating, assume speeds 
-	 * are as set */
-	if (mii_info->autoneg && mii_info->link) {
-		int speed;
-
-		status = phy_read(mii_info, MII_CIS8201_AUX_CONSTAT);
-		if (status & MII_CIS8201_AUXCONSTAT_DUPLEX)
-			mii_info->duplex = DUPLEX_FULL;
-		else
-			mii_info->duplex = DUPLEX_HALF;
-
-		speed = status & MII_CIS8201_AUXCONSTAT_SPEED;
-
-		switch (speed) {
-		case MII_CIS8201_AUXCONSTAT_GBIT:
-			mii_info->speed = SPEED_1000;
-			break;
-		case MII_CIS8201_AUXCONSTAT_100:
-			mii_info->speed = SPEED_100;
-			break;
-		default:
-			mii_info->speed = SPEED_10;
-			break;
-		}
-	}
-
-	return 0;
-}
-
-static int marvell_ack_interrupt(struct gfar_mii_info *mii_info)
-{
-	/* Clear the interrupts by reading the reg */
-	phy_read(mii_info, MII_M1011_IEVENT);
-
-	return 0;
-}
-
-static int marvell_config_intr(struct gfar_mii_info *mii_info)
-{
-	if(mii_info->interrupts == MII_INTERRUPT_ENABLED)
-		phy_write(mii_info, MII_M1011_IMASK, MII_M1011_IMASK_INIT);
-	else
-		phy_write(mii_info, MII_M1011_IMASK, MII_M1011_IMASK_CLEAR);
-
-	return 0;
-}
-
-static int cis820x_init(struct gfar_mii_info *mii_info)
-{
-	phy_write(mii_info, MII_CIS8201_AUX_CONSTAT, 
-			MII_CIS8201_AUXCONSTAT_INIT);
-	phy_write(mii_info, MII_CIS8201_EXT_CON1,
-			MII_CIS8201_EXTCON1_INIT);
-
-	return 0;
-}
-
-static int cis820x_ack_interrupt(struct gfar_mii_info *mii_info)
-{
-	phy_read(mii_info, MII_CIS8201_ISTAT);
-
-	return 0;
-}
-
-static int cis820x_config_intr(struct gfar_mii_info *mii_info)
-{
-	if(mii_info->interrupts == MII_INTERRUPT_ENABLED)
-		phy_write(mii_info, MII_CIS8201_IMASK, MII_CIS8201_IMASK_MASK);
-	else
-		phy_write(mii_info, MII_CIS8201_IMASK, 0);
-
-	return 0;
-}
-
-#define DM9161_DELAY 10
-
-static int dm9161_read_status(struct gfar_mii_info *mii_info)
-{
-	u16 status;
-	int err;
-
-	/* Update the link, but return if there
-	 * was an error */
-	err = genmii_update_link(mii_info);
-	if (err)
-		return err;
-
-	/* If the link is up, read the speed and duplex */
-	/* If we aren't autonegotiating, assume speeds 
-	 * are as set */
-	if (mii_info->autoneg && mii_info->link) {
-		status = phy_read(mii_info, MII_DM9161_SCSR);
-		if (status & (MII_DM9161_SCSR_100F | MII_DM9161_SCSR_100H))
-			mii_info->speed = SPEED_100;
-		else
-			mii_info->speed = SPEED_10;
-
-		if (status & (MII_DM9161_SCSR_100F | MII_DM9161_SCSR_10F))
-			mii_info->duplex = DUPLEX_FULL;
-		else
-			mii_info->duplex = DUPLEX_HALF;
-	}
-
-	return 0;
-}
-
-
-static int dm9161_config_aneg(struct gfar_mii_info *mii_info)
-{
-	struct dm9161_private *priv = mii_info->priv;
-
-	if(0 == priv->resetdone)
-		return -EAGAIN;
-
-	return 0;
-}
-
-static void dm9161_timer(unsigned long data)
-{
-	struct gfar_mii_info *mii_info = (struct gfar_mii_info *)data;
-	struct dm9161_private *priv = mii_info->priv;
-	u16 status = phy_read(mii_info, MII_BMSR);
-
-	if (status & BMSR_ANEGCOMPLETE) {
-		priv->resetdone = 1;
-	} else
-		mod_timer(&priv->timer, jiffies + DM9161_DELAY * HZ);
-}
-
-static int dm9161_init(struct gfar_mii_info *mii_info)
-{
-	struct dm9161_private *priv;
-
-	/* Allocate the private data structure */
-	priv = kmalloc(sizeof(struct dm9161_private), GFP_KERNEL);
-
-	if (NULL == priv)
-		return -ENOMEM;
-
-	mii_info->priv = priv;
-
-	/* Reset is not done yet */
-	priv->resetdone = 0;
-
-	/* Isolate the PHY */
-	phy_write(mii_info, MII_BMCR, BMCR_ISOLATE);
-
-	/* Do not bypass the scrambler/descrambler */
-	phy_write(mii_info, MII_DM9161_SCR, MII_DM9161_SCR_INIT);
-
-	/* Clear 10BTCSR to default */
-	phy_write(mii_info, MII_DM9161_10BTCSR, MII_DM9161_10BTCSR_INIT);
-
-	/* Reconnect the PHY, and enable Autonegotiation */
-	phy_write(mii_info, MII_BMCR, BMCR_ANENABLE);
-
-	/* Start a timer for DM9161_DELAY seconds to wait
-	 * for the PHY to be ready */
-	init_timer(&priv->timer);
-	priv->timer.function = &dm9161_timer;
-	priv->timer.data = (unsigned long) mii_info;
-	mod_timer(&priv->timer, jiffies + DM9161_DELAY * HZ);
-
-	return 0;
-}
-
-static void dm9161_close(struct gfar_mii_info *mii_info)
-{
-	struct dm9161_private *priv = mii_info->priv;
-
-	del_timer_sync(&priv->timer);
-	kfree(priv);
-}
-
-#if 0
-static int dm9161_ack_interrupt(struct gfar_mii_info *mii_info)
-{
-	phy_read(mii_info, MII_DM9161_INTR);
-
-	return 0;
-}
-#endif
-
-/* Cicada 820x */
-static struct phy_info phy_info_cis820x = {
-	0x000fc440,
-	"Cicada Cis8204",
-	0x000fffc0,
-	.features	= MII_GBIT_FEATURES,
-	.init		= &cis820x_init,
-	.config_aneg	= &gbit_config_aneg,
-	.read_status	= &cis820x_read_status,
-	.ack_interrupt	= &cis820x_ack_interrupt,
-	.config_intr	= &cis820x_config_intr,
-};
-
-static struct phy_info phy_info_dm9161 = {
-	.phy_id		= 0x0181b880,
-	.name		= "Davicom DM9161E",
-	.phy_id_mask	= 0x0ffffff0,
-	.init		= dm9161_init,
-	.config_aneg	= dm9161_config_aneg,
-	.read_status	= dm9161_read_status,
-	.close		= dm9161_close,
-};
-
-static struct phy_info phy_info_marvell = {
-	.phy_id		= 0x01410c00,
-	.phy_id_mask	= 0xffffff00,
-	.name		= "Marvell 88E1101/88E1111",
-	.features	= MII_GBIT_FEATURES,
-	.config_aneg	= &marvell_config_aneg,
-	.read_status	= &marvell_read_status,
-	.ack_interrupt	= &marvell_ack_interrupt,
-	.config_intr	= &marvell_config_intr,
-};
-
-static struct phy_info phy_info_genmii= {
-	.phy_id		= 0x00000000,
-	.phy_id_mask	= 0x00000000,
-	.name		= "Generic MII",
-	.features	= MII_BASIC_FEATURES,
-	.config_aneg	= genmii_config_aneg,
-	.read_status	= genmii_read_status,
-};
-
-static struct phy_info *phy_info[] = {
-	&phy_info_cis820x,
-	&phy_info_marvell,
-	&phy_info_dm9161,
-	&phy_info_genmii,
-	NULL
-};
-
-u16 phy_read(struct gfar_mii_info *mii_info, u16 regnum)
-{
-	u16 retval;
-	unsigned long flags;
-
-	spin_lock_irqsave(&mii_info->mdio_lock, flags);
-	retval = mii_info->mdio_read(mii_info->dev, mii_info->mii_id, regnum);
-	spin_unlock_irqrestore(&mii_info->mdio_lock, flags);
-
-	return retval;
-}
-
-void phy_write(struct gfar_mii_info *mii_info, u16 regnum, u16 val)
-{
-	unsigned long flags;
-
-	spin_lock_irqsave(&mii_info->mdio_lock, flags);
-	mii_info->mdio_write(mii_info->dev, 
-			mii_info->mii_id, 
-			regnum, val);
-	spin_unlock_irqrestore(&mii_info->mdio_lock, flags);
-}
-
-/* Use the PHY ID registers to determine what type of PHY is attached
- * to device dev.  return a struct phy_info structure describing that PHY
- */
-struct phy_info * get_phy_info(struct gfar_mii_info *mii_info)
-{
-	u16 phy_reg;
-	u32 phy_ID;
-	int i;
-	struct phy_info *theInfo = NULL;
-	struct net_device *dev = mii_info->dev;
-
-	/* Grab the bits from PHYIR1, and put them in the upper half */
-	phy_reg = phy_read(mii_info, MII_PHYSID1);
-	phy_ID = (phy_reg & 0xffff) << 16;
-
-	/* Grab the bits from PHYIR2, and put them in the lower half */
-	phy_reg = phy_read(mii_info, MII_PHYSID2);
-	phy_ID |= (phy_reg & 0xffff);
-
-	/* loop through all the known PHY types, and find one that */
-	/* matches the ID we read from the PHY. */
-	for (i = 0; phy_info[i]; i++)
-		if (phy_info[i]->phy_id == 
-				(phy_ID & phy_info[i]->phy_id_mask)) {
-			theInfo = phy_info[i];
-			break;
-		}
-
-	/* This shouldn't happen, as we have generic PHY support */
-	if (theInfo == NULL) {
-		printk("%s: PHY id %x is not supported!\n", dev->name, phy_ID);
-		return NULL;
-	} else {
-		printk("%s: PHY is %s (%x)\n", dev->name, theInfo->name,
-		       phy_ID);
-	}
-
-	return theInfo;
-}

+ 0 - 213
drivers/net/gianfar_phy.h

@@ -1,213 +0,0 @@
-/* 
- * drivers/net/gianfar_phy.h
- *
- * Gianfar Ethernet Driver -- PHY handling
- * Driver for FEC on MPC8540 and TSEC on MPC8540/MPC8560
- * Based on 8260_io/fcc_enet.c
- *
- * Author: Andy Fleming
- * Maintainer: Kumar Gala (kumar.gala@freescale.com)
- *
- * Copyright (c) 2002-2004 Freescale Semiconductor, Inc.
- *
- * This program is free software; you can redistribute  it and/or modify it
- * under  the terms of  the GNU General  Public License as published by the
- * Free Software Foundation;  either version 2 of the  License, or (at your
- * option) any later version.
- *
- */
-#ifndef __GIANFAR_PHY_H
-#define __GIANFAR_PHY_H
-
-#define MII_end ((u32)-2)
-#define MII_read ((u32)-1)
-
-#define MIIMIND_BUSY            0x00000001
-#define MIIMIND_NOTVALID        0x00000004
-
-#define GFAR_AN_TIMEOUT         2000
-
-/* 1000BT control (Marvell & BCM54xx at least) */
-#define MII_1000BASETCONTROL			0x09
-#define MII_1000BASETCONTROL_FULLDUPLEXCAP	0x0200
-#define MII_1000BASETCONTROL_HALFDUPLEXCAP	0x0100
-
-/* Cicada Extended Control Register 1 */
-#define MII_CIS8201_EXT_CON1           0x17
-#define MII_CIS8201_EXTCON1_INIT       0x0000
-
-/* Cicada Interrupt Mask Register */
-#define MII_CIS8201_IMASK		0x19
-#define MII_CIS8201_IMASK_IEN		0x8000
-#define MII_CIS8201_IMASK_SPEED	0x4000
-#define MII_CIS8201_IMASK_LINK		0x2000
-#define MII_CIS8201_IMASK_DUPLEX	0x1000
-#define MII_CIS8201_IMASK_MASK		0xf000
-
-/* Cicada Interrupt Status Register */
-#define MII_CIS8201_ISTAT		0x1a
-#define MII_CIS8201_ISTAT_STATUS	0x8000
-#define MII_CIS8201_ISTAT_SPEED	0x4000
-#define MII_CIS8201_ISTAT_LINK		0x2000
-#define MII_CIS8201_ISTAT_DUPLEX	0x1000
-
-/* Cicada Auxiliary Control/Status Register */
-#define MII_CIS8201_AUX_CONSTAT        0x1c
-#define MII_CIS8201_AUXCONSTAT_INIT    0x0004
-#define MII_CIS8201_AUXCONSTAT_DUPLEX  0x0020
-#define MII_CIS8201_AUXCONSTAT_SPEED   0x0018
-#define MII_CIS8201_AUXCONSTAT_GBIT    0x0010
-#define MII_CIS8201_AUXCONSTAT_100     0x0008
-                                                                                
-/* 88E1011 PHY Status Register */
-#define MII_M1011_PHY_SPEC_STATUS		0x11
-#define MII_M1011_PHY_SPEC_STATUS_1000		0x8000
-#define MII_M1011_PHY_SPEC_STATUS_100		0x4000
-#define MII_M1011_PHY_SPEC_STATUS_SPD_MASK	0xc000
-#define MII_M1011_PHY_SPEC_STATUS_FULLDUPLEX	0x2000
-#define MII_M1011_PHY_SPEC_STATUS_RESOLVED	0x0800
-#define MII_M1011_PHY_SPEC_STATUS_LINK		0x0400
-
-#define MII_M1011_IEVENT		0x13
-#define MII_M1011_IEVENT_CLEAR		0x0000
-
-#define MII_M1011_IMASK			0x12
-#define MII_M1011_IMASK_INIT		0x6400
-#define MII_M1011_IMASK_CLEAR		0x0000
-
-#define MII_DM9161_SCR		0x10
-#define MII_DM9161_SCR_INIT	0x0610
-
-/* DM9161 Specified Configuration and Status Register */
-#define MII_DM9161_SCSR	0x11
-#define MII_DM9161_SCSR_100F	0x8000
-#define MII_DM9161_SCSR_100H	0x4000
-#define MII_DM9161_SCSR_10F	0x2000
-#define MII_DM9161_SCSR_10H	0x1000
-
-/* DM9161 Interrupt Register */
-#define MII_DM9161_INTR	0x15
-#define MII_DM9161_INTR_PEND		0x8000
-#define MII_DM9161_INTR_DPLX_MASK	0x0800
-#define MII_DM9161_INTR_SPD_MASK	0x0400
-#define MII_DM9161_INTR_LINK_MASK	0x0200
-#define MII_DM9161_INTR_MASK		0x0100
-#define MII_DM9161_INTR_DPLX_CHANGE	0x0010
-#define MII_DM9161_INTR_SPD_CHANGE	0x0008
-#define MII_DM9161_INTR_LINK_CHANGE	0x0004
-#define MII_DM9161_INTR_INIT 		0x0000
-#define MII_DM9161_INTR_STOP	\
-(MII_DM9161_INTR_DPLX_MASK | MII_DM9161_INTR_SPD_MASK \
- | MII_DM9161_INTR_LINK_MASK | MII_DM9161_INTR_MASK)
-
-/* DM9161 10BT Configuration/Status */
-#define MII_DM9161_10BTCSR	0x12
-#define MII_DM9161_10BTCSR_INIT	0x7800
-
-#define MII_BASIC_FEATURES	(SUPPORTED_10baseT_Half | \
-				 SUPPORTED_10baseT_Full | \
-				 SUPPORTED_100baseT_Half | \
-				 SUPPORTED_100baseT_Full | \
-				 SUPPORTED_Autoneg | \
-				 SUPPORTED_TP | \
-				 SUPPORTED_MII)
-
-#define MII_GBIT_FEATURES	(MII_BASIC_FEATURES | \
-				 SUPPORTED_1000baseT_Half | \
-				 SUPPORTED_1000baseT_Full)
-
-#define MII_READ_COMMAND       0x00000001
-
-#define MII_INTERRUPT_DISABLED 0x0
-#define MII_INTERRUPT_ENABLED 0x1
-/* Taken from mii_if_info and sungem_phy.h */
-struct gfar_mii_info {
-	/* Information about the PHY type */
-	/* And management functions */
-	struct phy_info *phyinfo;
-
-	/* forced speed & duplex (no autoneg)
-	 * partner speed & duplex & pause (autoneg)
-	 */
-	int speed;
-	int duplex;
-	int pause;
-
-	/* The most recently read link state */
-	int link;
-
-	/* Enabled Interrupts */
-	u32 interrupts;
-
-	u32 advertising;
-	int autoneg;
-	int mii_id;
-
-	/* private data pointer */
-	/* For use by PHYs to maintain extra state */
-	void *priv;
-
-	/* Provided by host chip */
-	struct net_device *dev;
-
-	/* A lock to ensure that only one thing can read/write
-	 * the MDIO bus at a time */
-	spinlock_t mdio_lock;
-
-	/* Provided by ethernet driver */
-	int (*mdio_read) (struct net_device *dev, int mii_id, int reg);
-	void (*mdio_write) (struct net_device *dev, int mii_id, int reg, int val);
-};
-
-/* struct phy_info: a structure which defines attributes for a PHY
- *
- * id will contain a number which represents the PHY.  During
- * startup, the driver will poll the PHY to find out what its
- * UID--as defined by registers 2 and 3--is.  The 32-bit result
- * gotten from the PHY will be ANDed with phy_id_mask to
- * discard any bits which may change based on revision numbers
- * unimportant to functionality
- *
- * There are 6 commands which take a gfar_mii_info structure.
- * Each PHY must declare config_aneg, and read_status.
- */
-struct phy_info {
-	u32 phy_id;
-	char *name;
-	unsigned int phy_id_mask;
-	u32 features;
-
-	/* Called to initialize the PHY */
-	int (*init)(struct gfar_mii_info *mii_info);
-
-	/* Called to suspend the PHY for power */
-	int (*suspend)(struct gfar_mii_info *mii_info);
-
-	/* Reconfigures autonegotiation (or disables it) */
-	int (*config_aneg)(struct gfar_mii_info *mii_info);
-
-	/* Determines the negotiated speed and duplex */
-	int (*read_status)(struct gfar_mii_info *mii_info);
-
-	/* Clears any pending interrupts */
-	int (*ack_interrupt)(struct gfar_mii_info *mii_info);
-
-	/* Enables or disables interrupts */
-	int (*config_intr)(struct gfar_mii_info *mii_info);
-
-	/* Clears up any memory if needed */
-	void (*close)(struct gfar_mii_info *mii_info);
-};
-
-struct phy_info *get_phy_info(struct gfar_mii_info *mii_info);
-int read_phy_reg(struct net_device *dev, int mii_id, int regnum);
-void write_phy_reg(struct net_device *dev, int mii_id, int regnum, int value);
-void mii_clear_phy_interrupt(struct gfar_mii_info *mii_info);
-void mii_configure_phy_interrupt(struct gfar_mii_info *mii_info, u32 interrupts);
-
-struct dm9161_private {
-	struct timer_list timer;
-	int resetdone;
-};
-
-#endif /* GIANFAR_PHY_H */

+ 1 - 0
drivers/net/hamradio/Kconfig

@@ -1,6 +1,7 @@
 config MKISS
 	tristate "Serial port KISS driver"
 	depends on AX25
+	select CRC16
 	---help---
 	  KISS is a protocol used for the exchange of data between a computer
 	  and a Terminal Node Controller (a small embedded system commonly

+ 3 - 6
drivers/net/hamradio/bpqether.c

@@ -144,7 +144,7 @@ static inline struct net_device *bpq_get_ax25_dev(struct net_device *dev)
 {
 	struct bpqdev *bpq;
 
-	list_for_each_entry(bpq, &bpq_devices, bpq_list) {
+	list_for_each_entry_rcu(bpq, &bpq_devices, bpq_list) {
 		if (bpq->ethdev == dev)
 			return bpq->axdev;
 	}
@@ -399,7 +399,7 @@ static void *bpq_seq_start(struct seq_file *seq, loff_t *pos)
 	if (*pos == 0)
 		return SEQ_START_TOKEN;
 	
-	list_for_each_entry(bpqdev, &bpq_devices, bpq_list) {
+	list_for_each_entry_rcu(bpqdev, &bpq_devices, bpq_list) {
 		if (i == *pos)
 			return bpqdev;
 	}
@@ -418,7 +418,7 @@ static void *bpq_seq_next(struct seq_file *seq, void *v, loff_t *pos)
 		p = ((struct bpqdev *)v)->bpq_list.next;
 
 	return (p == &bpq_devices) ? NULL 
-		: list_entry(p, struct bpqdev, bpq_list);
+		: rcu_dereference(list_entry(p, struct bpqdev, bpq_list));
 }
 
 static void bpq_seq_stop(struct seq_file *seq, void *v)
@@ -561,8 +561,6 @@ static int bpq_device_event(struct notifier_block *this,unsigned long event, voi
 	if (!dev_is_ethdev(dev))
 		return NOTIFY_DONE;
 
-	rcu_read_lock();
-
 	switch (event) {
 	case NETDEV_UP:		/* new ethernet device -> new BPQ interface */
 		if (bpq_get_ax25_dev(dev) == NULL)
@@ -581,7 +579,6 @@ static int bpq_device_event(struct notifier_block *this,unsigned long event, voi
 	default:
 		break;
 	}
-	rcu_read_unlock();
 
 	return NOTIFY_DONE;
 }

+ 149 - 33
drivers/net/hamradio/mkiss.c

@@ -14,13 +14,14 @@
  *
  * Copyright (C) Hans Alblas PE1AYX <hans@esrac.ele.tue.nl>
  * Copyright (C) 2004, 05 Ralf Baechle DL5RB <ralf@linux-mips.org>
+ * Copyright (C) 2004, 05 Thomas Osterried DL9SAU <thomas@x-berg.in-berlin.de>
  */
-
 #include <linux/config.h>
 #include <linux/module.h>
 #include <asm/system.h>
 #include <linux/bitops.h>
 #include <asm/uaccess.h>
+#include <linux/crc16.h>
 #include <linux/string.h>
 #include <linux/mm.h>
 #include <linux/interrupt.h>
@@ -39,11 +40,6 @@
 
 #include <net/ax25.h>
 
-#ifdef CONFIG_INET
-#include <linux/ip.h>
-#include <linux/tcp.h>
-#endif
-
 #define AX_MTU		236
 
 /* SLIP/KISS protocol characters. */
@@ -80,9 +76,13 @@ struct mkiss {
 
 	int		mode;
         int		crcmode;	/* MW: for FlexNet, SMACK etc.  */
-#define CRC_MODE_NONE   0
-#define CRC_MODE_FLEX   1
-#define CRC_MODE_SMACK  2
+	int		crcauto;	/* CRC auto mode */
+
+#define CRC_MODE_NONE		0
+#define CRC_MODE_FLEX		1
+#define CRC_MODE_SMACK		2
+#define CRC_MODE_FLEX_TEST	3
+#define CRC_MODE_SMACK_TEST	4
 
 	atomic_t		refcnt;
 	struct semaphore	dead_sem;
@@ -151,6 +151,21 @@ static int check_crc_flex(unsigned char *cp, int size)
 	return 0;
 }
 
+static int check_crc_16(unsigned char *cp, int size)
+{
+	unsigned short crc = 0x0000;
+
+	if (size < 3)
+		return -1;
+
+	crc = crc16(0, cp, size);
+
+	if (crc != 0x0000)
+		return -1;
+
+	return 0;
+}
+
 /*
  * Standard encapsulation
  */
@@ -237,19 +252,42 @@ static void ax_bump(struct mkiss *ax)
 
 	spin_lock_bh(&ax->buflock);
 	if (ax->rbuff[0] > 0x0f) {
-		if (ax->rbuff[0] & 0x20) {
-		        ax->crcmode = CRC_MODE_FLEX;
+		if (ax->rbuff[0] & 0x80) {
+			if (check_crc_16(ax->rbuff, ax->rcount) < 0) {
+				ax->stats.rx_errors++;
+				spin_unlock_bh(&ax->buflock);
+
+				return;
+			}
+			if (ax->crcmode != CRC_MODE_SMACK && ax->crcauto) {
+				printk(KERN_INFO
+				       "mkiss: %s: Switchting to crc-smack\n",
+				       ax->dev->name);
+				ax->crcmode = CRC_MODE_SMACK;
+			}
+			ax->rcount -= 2;
+			*ax->rbuff &= ~0x80;
+		} else if (ax->rbuff[0] & 0x20)  {
 			if (check_crc_flex(ax->rbuff, ax->rcount) < 0) {
-			        ax->stats.rx_errors++;
+				ax->stats.rx_errors++;
+				spin_unlock_bh(&ax->buflock);
 				return;
 			}
+			if (ax->crcmode != CRC_MODE_FLEX && ax->crcauto) {
+				printk(KERN_INFO
+				       "mkiss: %s: Switchting to crc-flexnet\n",
+				       ax->dev->name);
+				ax->crcmode = CRC_MODE_FLEX;
+			}
 			ax->rcount -= 2;
-                        /* dl9sau bugfix: the trailling two bytes flexnet crc
-                         * will not be passed to the kernel. thus we have
-                         * to correct the kissparm signature, because it
-                         * indicates a crc but there's none
+
+			/*
+			 * dl9sau bugfix: the trailling two bytes flexnet crc
+			 * will not be passed to the kernel. thus we have to
+			 * correct the kissparm signature, because it indicates
+			 * a crc but there's none
 			 */
-                        *ax->rbuff &= ~0x20;
+			*ax->rbuff &= ~0x20;
 		}
  	}
 	spin_unlock_bh(&ax->buflock);
@@ -417,20 +455,69 @@ static void ax_encaps(struct net_device *dev, unsigned char *icp, int len)
 	p = icp;
 
 	spin_lock_bh(&ax->buflock);
-        switch (ax->crcmode) {
-	         unsigned short crc;
+	if ((*p & 0x0f) != 0) {
+		/* Configuration Command (kissparms(1).
+		 * Protocol spec says: never append CRC.
+		 * This fixes a very old bug in the linux
+		 * kiss driver. -- dl9sau */
+		switch (*p & 0xff) {
+		case 0x85:
+			/* command from userspace especially for us,
+			 * not for delivery to the tnc */
+			if (len > 1) {
+				int cmd = (p[1] & 0xff);
+				switch(cmd) {
+				case 3:
+				  ax->crcmode = CRC_MODE_SMACK;
+				  break;
+				case 2:
+				  ax->crcmode = CRC_MODE_FLEX;
+				  break;
+				case 1:
+				  ax->crcmode = CRC_MODE_NONE;
+				  break;
+				case 0:
+				default:
+				  ax->crcmode = CRC_MODE_SMACK_TEST;
+				  cmd = 0;
+				}
+				ax->crcauto = (cmd ? 0 : 1);
+				printk(KERN_INFO "mkiss: %s: crc mode %s %d\n", ax->dev->name, (len) ? "set to" : "is", cmd);
+			}
+			spin_unlock_bh(&ax->buflock);
+			netif_start_queue(dev);
 
-	case CRC_MODE_FLEX:
-	         *p |= 0x20;
-	         crc = calc_crc_flex(p, len);
-		 count = kiss_esc_crc(p, (unsigned char *)ax->xbuff, crc, len+2);
-		 break;
+			return;
+		default:
+			count = kiss_esc(p, (unsigned char *)ax->xbuff, len);
+		}
+	} else {
+		unsigned short crc;
+		switch (ax->crcmode) {
+		case CRC_MODE_SMACK_TEST:
+			ax->crcmode  = CRC_MODE_FLEX_TEST;
+			printk(KERN_INFO "mkiss: %s: Trying crc-smack\n", ax->dev->name);
+			// fall through
+		case CRC_MODE_SMACK:
+			*p |= 0x80;
+			crc = swab16(crc16(0, p, len));
+			count = kiss_esc_crc(p, (unsigned char *)ax->xbuff, crc, len+2);
+			break;
+		case CRC_MODE_FLEX_TEST:
+			ax->crcmode = CRC_MODE_NONE;
+			printk(KERN_INFO "mkiss: %s: Trying crc-flexnet\n", ax->dev->name);
+			// fall through
+		case CRC_MODE_FLEX:
+			*p |= 0x20;
+			crc = calc_crc_flex(p, len);
+			count = kiss_esc_crc(p, (unsigned char *)ax->xbuff, crc, len+2);
+			break;
+
+		default:
+			count = kiss_esc(p, (unsigned char *)ax->xbuff, len);
+		}
+  	}
 
-	default:
-	         count = kiss_esc(p, (unsigned char *)ax->xbuff, len);
-		 break;
-	}
-	
 	set_bit(TTY_DO_WRITE_WAKEUP, &ax->tty->flags);
 	actual = ax->tty->driver->write(ax->tty, ax->xbuff, count);
 	ax->stats.tx_packets++;
@@ -439,8 +526,6 @@ static void ax_encaps(struct net_device *dev, unsigned char *icp, int len)
 	ax->dev->trans_start = jiffies;
 	ax->xleft = count - actual;
 	ax->xhead = ax->xbuff + actual;
-
-	spin_unlock_bh(&ax->buflock);
 }
 
 /* Encapsulate an AX.25 packet and kick it into a TTY queue. */
@@ -622,7 +707,7 @@ static void ax_setup(struct net_device *dev)
  * best way to fix this is to use a rwlock in the tty struct, but for now we
  * use a single global rwlock for all ttys in ppp line discipline.
  */
-static rwlock_t disc_data_lock = RW_LOCK_UNLOCKED;
+static DEFINE_RWLOCK(disc_data_lock);
 
 static struct mkiss *mkiss_get(struct tty_struct *tty)
 {
@@ -643,6 +728,8 @@ static void mkiss_put(struct mkiss *ax)
 		up(&ax->dead_sem);
 }
 
+static int crc_force = 0;	/* Can be overridden with insmod */
+
 static int mkiss_open(struct tty_struct *tty)
 {
 	struct net_device *dev;
@@ -682,6 +769,33 @@ static int mkiss_open(struct tty_struct *tty)
 	if (register_netdev(dev))
 		goto out_free_buffers;
 
+	/* after register_netdev() - because else printk smashes the kernel */
+	switch (crc_force) {
+	case 3:
+		ax->crcmode  = CRC_MODE_SMACK;
+		printk(KERN_INFO "mkiss: %s: crc mode smack forced.\n",
+		       ax->dev->name);
+		break;
+	case 2:
+		ax->crcmode  = CRC_MODE_FLEX;
+		printk(KERN_INFO "mkiss: %s: crc mode flexnet forced.\n",
+		       ax->dev->name);
+		break;
+	case 1:
+		ax->crcmode  = CRC_MODE_NONE;
+		printk(KERN_INFO "mkiss: %s: crc mode disabled.\n",
+		       ax->dev->name);
+		break;
+	case 0:
+		/* fall through */
+	default:
+		crc_force = 0;
+		printk(KERN_INFO "mkiss: %s: crc mode is auto.\n",
+		       ax->dev->name);
+		ax->crcmode  = CRC_MODE_SMACK_TEST;
+	}
+	ax->crcauto = (crc_force ? 0 : 1);
+
 	netif_start_queue(dev);
 
 	/* Done.  We have linked the TTY line to a channel. */
@@ -765,7 +879,6 @@ static int mkiss_ioctl(struct tty_struct *tty, struct file *file,
 
 	case SIOCSIFHWADDR: {
 		char addr[AX25_ADDR_LEN];
-printk(KERN_INFO "In SIOCSIFHWADDR");
 
 		if (copy_from_user(&addr,
 		                   (void __user *) arg, AX25_ADDR_LEN)) {
@@ -864,6 +977,7 @@ out:
 }
 
 static struct tty_ldisc ax_ldisc = {
+	.owner		= THIS_MODULE,
 	.magic		= TTY_LDISC_MAGIC,
 	.name		= "mkiss",
 	.open		= mkiss_open,
@@ -904,6 +1018,8 @@ static void __exit mkiss_exit_driver(void)
 
 MODULE_AUTHOR("Ralf Baechle DL5RB <ralf@linux-mips.org>");
 MODULE_DESCRIPTION("KISS driver for AX.25 over TTYs");
+MODULE_PARM(crc_force, "i");
+MODULE_PARM_DESC(crc_force, "crc [0 = auto | 1 = none | 2 = flexnet | 3 = smack]");
 MODULE_LICENSE("GPL");
 MODULE_ALIAS_LDISC(N_AX25);
 

Some files were not shown because too many files changed in this diff