Browse Source

Merge branch 'fixes-dts' into omap-for-v4.20/fixes

Tony Lindgren 6 years ago
parent
commit
91e4339582
100 changed files with 2726 additions and 2278 deletions
  1. 0 1
      .clang-format
  2. 12 0
      .mailmap
  3. 0 428
      Documentation/00-INDEX
  4. 9 0
      Documentation/ABI/stable/sysfs-bus-xen-backend
  5. 9 0
      Documentation/ABI/stable/sysfs-devices-system-xen_memory
  6. 0 35
      Documentation/ABI/stable/sysfs-driver-usb-usbtmc
  7. 41 0
      Documentation/ABI/testing/configfs-stp-policy-p_sys-t
  8. 24 0
      Documentation/ABI/testing/configfs-usb-gadget-uvc
  9. 1 1
      Documentation/ABI/testing/sysfs-bus-iio
  10. 24 0
      Documentation/ABI/testing/sysfs-bus-pci
  11. 18 1
      Documentation/ABI/testing/sysfs-bus-usb
  12. 21 0
      Documentation/ABI/testing/sysfs-bus-vmbus
  13. 0 27
      Documentation/ABI/testing/sysfs-class-lcd-s6e63m0
  14. 22 0
      Documentation/ABI/testing/sysfs-class-led-driver-sc27xx
  15. 82 0
      Documentation/ABI/testing/sysfs-class-led-trigger-pattern
  16. 20 2
      Documentation/ABI/testing/sysfs-class-net
  17. 7 0
      Documentation/ABI/testing/sysfs-class-net-dsa
  18. 10 0
      Documentation/ABI/testing/sysfs-driver-xen-blkback
  19. 16 1
      Documentation/ABI/testing/sysfs-fs-f2fs
  20. 35 0
      Documentation/ABI/testing/sysfs-platform-lg-laptop
  21. 1 1
      Documentation/ABI/testing/sysfs-power
  22. 0 26
      Documentation/PCI/00-INDEX
  23. 11 8
      Documentation/PCI/endpoint/pci-test-howto.txt
  24. 10 25
      Documentation/PCI/pci-error-recovery.txt
  25. 0 34
      Documentation/RCU/00-INDEX
  26. 12 19
      Documentation/RCU/Design/Data-Structures/Data-Structures.html
  27. 4 5
      Documentation/RCU/Design/Expedited-Grace-Periods/Expedited-Grace-Periods.html
  28. 72 142
      Documentation/RCU/Design/Requirements/Requirements.html
  29. 0 4
      Documentation/RCU/rcu.txt
  30. 5 8
      Documentation/RCU/stallwarn.txt
  31. 2 1
      Documentation/RCU/whatisRCU.txt
  32. 73 0
      Documentation/accounting/psi.txt
  33. 2 2
      Documentation/admin-guide/LSM/Yama.rst
  34. 1 2
      Documentation/admin-guide/README.rst
  35. 22 0
      Documentation/admin-guide/cgroup-v2.rst
  36. 574 0
      Documentation/admin-guide/ext4.rst
  37. 1 0
      Documentation/admin-guide/index.rst
  38. 73 19
      Documentation/admin-guide/kernel-parameters.txt
  39. 1 1
      Documentation/admin-guide/l1tf.rst
  40. 1 0
      Documentation/admin-guide/mm/index.rst
  41. 63 126
      Documentation/admin-guide/mm/memory-hotplug.rst
  42. 7 0
      Documentation/admin-guide/pm/intel_pstate.rst
  43. 29 18
      Documentation/admin-guide/security-bugs.rst
  44. 0 50
      Documentation/arm/00-INDEX
  45. 1 0
      Documentation/arm/Samsung/Bootloader-interface.txt
  46. 8 4
      Documentation/arm64/elf_hwcaps.txt
  47. 38 0
      Documentation/arm64/hugetlbpage.txt
  48. 1 0
      Documentation/arm64/silicon-errata.txt
  49. 2 2
      Documentation/arm64/sve.txt
  50. 0 34
      Documentation/block/00-INDEX
  51. 0 18
      Documentation/blockdev/00-INDEX
  52. 0 756
      Documentation/blockdev/README.DAC960
  53. 1 1
      Documentation/blockdev/zram.txt
  54. 0 11
      Documentation/cdrom/00-INDEX
  55. 0 26
      Documentation/cgroup-v1/00-INDEX
  56. 1 1
      Documentation/cgroup-v1/rdma.txt
  57. 7 3
      Documentation/conf.py
  58. 10 61
      Documentation/core-api/boot-time-mm.rst
  59. 2 0
      Documentation/core-api/gfp_mask-from-fs-io.rst
  60. 1 1
      Documentation/core-api/idr.rst
  61. 4 0
      Documentation/core-api/index.rst
  62. 122 0
      Documentation/core-api/memory-allocation.rst
  63. 125 0
      Documentation/core-api/memory-hotplug.rst
  64. 2 0
      Documentation/core-api/mm-api.rst
  65. 5 6
      Documentation/core-api/printk-formats.rst
  66. 435 0
      Documentation/core-api/xarray.rst
  67. 21 5
      Documentation/crypto/asymmetric-keys.txt
  68. 19 4
      Documentation/dev-tools/coccinelle.rst
  69. 1 1
      Documentation/dev-tools/kselftest.rst
  70. 4 0
      Documentation/device-mapper/dm-flakey.txt
  71. 4 0
      Documentation/device-mapper/dm-raid.txt
  72. 1 1
      Documentation/device-mapper/log-writes.txt
  73. 0 12
      Documentation/devicetree/00-INDEX
  74. 0 72
      Documentation/devicetree/bindings/arm/al,alpine.txt
  75. 7 0
      Documentation/devicetree/bindings/arm/amlogic.txt
  76. 0 170
      Documentation/devicetree/bindings/arm/atmel-at91.txt
  77. 171 0
      Documentation/devicetree/bindings/arm/atmel-sysregs.txt
  78. 8 0
      Documentation/devicetree/bindings/arm/bcm/brcm,bcm2835.txt
  79. 65 55
      Documentation/devicetree/bindings/arm/coresight.txt
  80. 5 3
      Documentation/devicetree/bindings/arm/cpu-capacity.txt
  81. 31 3
      Documentation/devicetree/bindings/arm/cpu-enable-method/al,alpine-smp
  82. 2 2
      Documentation/devicetree/bindings/arm/cpus.txt
  83. 19 0
      Documentation/devicetree/bindings/arm/freescale/fsl,layerscape-dcfg.txt
  84. 19 0
      Documentation/devicetree/bindings/arm/freescale/fsl,layerscape-scfg.txt
  85. 183 0
      Documentation/devicetree/bindings/arm/freescale/fsl,scu.txt
  86. 44 39
      Documentation/devicetree/bindings/arm/fsl.txt
  87. 8 0
      Documentation/devicetree/bindings/arm/hisilicon/hisilicon.txt
  88. 4 0
      Documentation/devicetree/bindings/arm/keystone/ti,sci.txt
  89. 1 0
      Documentation/devicetree/bindings/arm/mediatek/mediatek,apmixedsys.txt
  90. 1 0
      Documentation/devicetree/bindings/arm/mediatek/mediatek,audsys.txt
  91. 1 0
      Documentation/devicetree/bindings/arm/mediatek/mediatek,bdpsys.txt
  92. 1 0
      Documentation/devicetree/bindings/arm/mediatek/mediatek,ethsys.txt
  93. 1 0
      Documentation/devicetree/bindings/arm/mediatek/mediatek,hifsys.txt
  94. 1 0
      Documentation/devicetree/bindings/arm/mediatek/mediatek,imgsys.txt
  95. 1 0
      Documentation/devicetree/bindings/arm/mediatek/mediatek,infracfg.txt
  96. 1 0
      Documentation/devicetree/bindings/arm/mediatek/mediatek,mmsys.txt
  97. 1 0
      Documentation/devicetree/bindings/arm/mediatek/mediatek,pericfg.txt
  98. 1 0
      Documentation/devicetree/bindings/arm/mediatek/mediatek,topckgen.txt
  99. 1 0
      Documentation/devicetree/bindings/arm/mediatek/mediatek,vdecsys.txt
  100. 19 0
      Documentation/devicetree/bindings/arm/msm/qcom,kpss-acc.txt

+ 0 - 1
.clang-format

@@ -323,7 +323,6 @@ ForEachMacros:
   - 'protocol_for_each_card'
   - 'protocol_for_each_dev'
   - 'queue_for_each_hw_ctx'
-  - 'radix_tree_for_each_contig'
   - 'radix_tree_for_each_slot'
   - 'radix_tree_for_each_tagged'
   - 'rbtree_postorder_for_each_entry_safe'

+ 12 - 0
.mailmap

@@ -119,6 +119,13 @@ Mark Brown <broonie@sirena.org.uk>
 Mark Yao <markyao0591@gmail.com> <mark.yao@rock-chips.com>
 Martin Kepplinger <martink@posteo.de> <martin.kepplinger@theobroma-systems.com>
 Martin Kepplinger <martink@posteo.de> <martin.kepplinger@ginzinger.com>
+Matthew Wilcox <willy@infradead.org> <matthew.r.wilcox@intel.com>
+Matthew Wilcox <willy@infradead.org> <matthew@wil.cx>
+Matthew Wilcox <willy@infradead.org> <mawilcox@linuxonhyperv.com>
+Matthew Wilcox <willy@infradead.org> <mawilcox@microsoft.com>
+Matthew Wilcox <willy@infradead.org> <willy@debian.org>
+Matthew Wilcox <willy@infradead.org> <willy@linux.intel.com>
+Matthew Wilcox <willy@infradead.org> <willy@parisc-linux.org>
 Matthieu CASTET <castet.matthieu@free.fr>
 Mauro Carvalho Chehab <mchehab@kernel.org> <mchehab@brturbo.com.br>
 Mauro Carvalho Chehab <mchehab@kernel.org> <maurochehab@gmail.com>
@@ -153,6 +160,11 @@ Peter Oruba <peter.oruba@amd.com>
 Pratyush Anand <pratyush.anand@gmail.com> <pratyush.anand@st.com>
 Praveen BP <praveenbp@ti.com>
 Qais Yousef <qsyousef@gmail.com> <qais.yousef@imgtec.com>
+Oleksij Rempel <linux@rempel-privat.de> <bug-track@fisher-privat.net>
+Oleksij Rempel <linux@rempel-privat.de> <external.Oleksij.Rempel@de.bosch.com>
+Oleksij Rempel <linux@rempel-privat.de> <fixed-term.Oleksij.Rempel@de.bosch.com>
+Oleksij Rempel <linux@rempel-privat.de> <o.rempel@pengutronix.de>
+Oleksij Rempel <linux@rempel-privat.de> <ore@pengutronix.de>
 Rajesh Shah <rajesh.shah@intel.com>
 Ralf Baechle <ralf@linux-mips.org>
 Ralf Wildenhues <Ralf.Wildenhues@gmx.de>

+ 0 - 428
Documentation/00-INDEX

@@ -1,428 +0,0 @@
-
-This is a brief list of all the files in ./linux/Documentation and what
-they contain. If you add a documentation file, please list it here in
-alphabetical order as well, or risk being hunted down like a rabid dog.
-Please keep the descriptions small enough to fit on one line.
-							 Thanks -- Paul G.
-
-Following translations are available on the WWW:
-
-   - Japanese, maintained by the JF Project (jf@listserv.linux.or.jp), at
-     http://linuxjf.sourceforge.jp/
-
-00-INDEX
-	- this file.
-ABI/
-	- info on kernel <-> userspace ABI and relative interface stability.
-CodingStyle
-	- nothing here, just a pointer to process/coding-style.rst.
-DMA-API.txt
-	- DMA API, pci_ API & extensions for non-consistent memory machines.
-DMA-API-HOWTO.txt
-	- Dynamic DMA mapping Guide
-DMA-ISA-LPC.txt
-	- How to do DMA with ISA (and LPC) devices.
-DMA-attributes.txt
-	- listing of the various possible attributes a DMA region can have
-EDID/
-	- directory with info on customizing EDID for broken gfx/displays.
-IPMI.txt
-	- info on Linux Intelligent Platform Management Interface (IPMI) Driver.
-IRQ-affinity.txt
-	- how to select which CPU(s) handle which interrupt events on SMP.
-IRQ-domain.txt
-	- info on interrupt numbering and setting up IRQ domains.
-IRQ.txt
-	- description of what an IRQ is.
-Intel-IOMMU.txt
-	- basic info on the Intel IOMMU virtualization support.
-Makefile
-	- It's not of interest for those who aren't touching the build system.
-PCI/
-	- info related to PCI drivers.
-RCU/
-	- directory with info on RCU (read-copy update).
-SAK.txt
-	- info on Secure Attention Keys.
-SM501.txt
-	- Silicon Motion SM501 multimedia companion chip
-SubmittingPatches
-	- nothing here, just a pointer to process/coding-style.rst.
-accounting/
-	- documentation on accounting and taskstats.
-acpi/
-	- info on ACPI-specific hooks in the kernel.
-admin-guide/
-	- info related to Linux users and system admins.
-aoe/
-	- description of AoE (ATA over Ethernet) along with config examples.
-arm/
-	- directory with info about Linux on the ARM architecture.
-arm64/
-	- directory with info about Linux on the 64 bit ARM architecture.
-auxdisplay/
-	- misc. LCD driver documentation (cfag12864b, ks0108).
-backlight/
-	- directory with info on controlling backlights in flat panel displays
-block/
-	- info on the Block I/O (BIO) layer.
-blockdev/
-	- info on block devices & drivers
-bt8xxgpio.txt
-	- info on how to modify a bt8xx video card for GPIO usage.
-btmrvl.txt
-	- info on Marvell Bluetooth driver usage.
-bus-devices/
-	- directory with info on TI GPMC (General Purpose Memory Controller)
-bus-virt-phys-mapping.txt
-	- how to access I/O mapped memory from within device drivers.
-cdrom/
-	- directory with information on the CD-ROM drivers that Linux has.
-cgroup-v1/
-	- cgroups v1 features, including cpusets and memory controller.
-cma/
-	- Continuous Memory Area (CMA) debugfs interface.
-conf.py
-	- It's not of interest for those who aren't touching the build system.
-connector/
-	- docs on the netlink based userspace<->kernel space communication mod.
-console/
-	- documentation on Linux console drivers.
-core-api/
-	- documentation on kernel core components.
-cpu-freq/
-	- info on CPU frequency and voltage scaling.
-cpu-hotplug.txt
-	- document describing CPU hotplug support in the Linux kernel.
-cpu-load.txt
-	- document describing how CPU load statistics are collected.
-cpuidle/
-	- info on CPU_IDLE, CPU idle state management subsystem.
-cputopology.txt
-	- documentation on how CPU topology info is exported via sysfs.
-crc32.txt
-	- brief tutorial on CRC computation
-crypto/
-	- directory with info on the Crypto API.
-dcdbas.txt
-	- information on the Dell Systems Management Base Driver.
-debugging-modules.txt
-	- some notes on debugging modules after Linux 2.6.3.
-debugging-via-ohci1394.txt
-	- how to use firewire like a hardware debugger memory reader.
-dell_rbu.txt
-	- document demonstrating the use of the Dell Remote BIOS Update driver.
-dev-tools/
-	- directory with info on development tools for the kernel.
-device-mapper/
-	- directory with info on Device Mapper.
-dmaengine/
-	- the DMA engine and controller API guides.
-devicetree/
-	- directory with info on device tree files used by OF/PowerPC/ARM
-digsig.txt
-	-info on the Digital Signature Verification API
-dma-buf-sharing.txt
-	- the DMA Buffer Sharing API Guide
-docutils.conf
-	- nothing here. Just a configuration file for docutils.
-dontdiff
-	- file containing a list of files that should never be diff'ed.
-driver-api/
-	- the Linux driver implementer's API guide.
-driver-model/
-	- directory with info about Linux driver model.
-early-userspace/
-	- info about initramfs, klibc, and userspace early during boot.
-efi-stub.txt
-	- How to use the EFI boot stub to bypass GRUB or elilo on EFI systems.
-eisa.txt
-	- info on EISA bus support.
-extcon/
-	- directory with porting guide for Android kernel switch driver.
-isa.txt
-	- info on EISA bus support.
-fault-injection/
-	- dir with docs about the fault injection capabilities infrastructure.
-fb/
-	- directory with info on the frame buffer graphics abstraction layer.
-features/
-	- status of feature implementation on different architectures.
-filesystems/
-	- info on the vfs and the various filesystems that Linux supports.
-firmware_class/
-	- request_firmware() hotplug interface info.
-flexible-arrays.txt
-	- how to make use of flexible sized arrays in linux
-fmc/
-	- information about the FMC bus abstraction
-fpga/
-	- FPGA Manager Core.
-futex-requeue-pi.txt
-	- info on requeueing of tasks from a non-PI futex to a PI futex
-gcc-plugins.txt
-	- GCC plugin infrastructure.
-gpio/
-	- gpio related documentation
-gpu/
-	- directory with information on GPU driver developer's guide.
-hid/
-	- directory with information on human interface devices
-highuid.txt
-	- notes on the change from 16 bit to 32 bit user/group IDs.
-hwspinlock.txt
-	- hardware spinlock provides hardware assistance for synchronization
-timers/
-	- info on the timer related topics
-hw_random.txt
-	- info on Linux support for random number generator in i8xx chipsets.
-hwmon/
-	- directory with docs on various hardware monitoring drivers.
-i2c/
-	- directory with info about the I2C bus/protocol (2 wire, kHz speed).
-x86/i386/
-	- directory with info about Linux on Intel 32 bit architecture.
-ia64/
-	- directory with info about Linux on Intel 64 bit architecture.
-ide/
-	- Information regarding the Enhanced IDE drive.
-iio/
-	- info on industrial IIO configfs support.
-index.rst
-	- main index for the documentation at ReST format.
-infiniband/
-	- directory with documents concerning Linux InfiniBand support.
-input/
-	- info on Linux input device support.
-intel_txt.txt
-	- info on intel Trusted Execution Technology (intel TXT).
-io-mapping.txt
-	- description of io_mapping functions in linux/io-mapping.h
-io_ordering.txt
-	- info on ordering I/O writes to memory-mapped addresses.
-ioctl/
-	- directory with documents describing various IOCTL calls.
-iostats.txt
-	- info on I/O statistics Linux kernel provides.
-irqflags-tracing.txt
-	- how to use the irq-flags tracing feature.
-isapnp.txt
-	- info on Linux ISA Plug & Play support.
-isdn/
-	- directory with info on the Linux ISDN support, and supported cards.
-kbuild/
-	- directory with info about the kernel build process.
-kdump/
-	- directory with mini HowTo on getting the crash dump code to work.
-doc-guide/
-	- how to write and format reStructuredText kernel documentation
-kernel-per-CPU-kthreads.txt
-	- List of all per-CPU kthreads and how they introduce jitter.
-kobject.txt
-	- info of the kobject infrastructure of the Linux kernel.
-kprobes.txt
-	- documents the kernel probes debugging feature.
-kref.txt
-	- docs on adding reference counters (krefs) to kernel objects.
-laptops/
-	- directory with laptop related info and laptop driver documentation.
-ldm.txt
-	- a brief description of LDM (Windows Dynamic Disks).
-leds/
-	- directory with info about LED handling under Linux.
-livepatch/
-	- info on kernel live patching.
-locking/
-	- directory with info about kernel locking primitives
-lockup-watchdogs.txt
-	- info on soft and hard lockup detectors (aka nmi_watchdog).
-logo.gif
-	- full colour GIF image of Linux logo (penguin - Tux).
-logo.txt
-	- info on creator of above logo & site to get additional images from.
-lsm.txt
-	- Linux Security Modules: General Security Hooks for Linux
-lzo.txt
-	- kernel LZO decompressor input formats
-m68k/
-	- directory with info about Linux on Motorola 68k architecture.
-mailbox.txt
-	- How to write drivers for the common mailbox framework (IPC).
-md/
-	- directory with info about Linux Software RAID
-media/
-	- info on media drivers: uAPI, kAPI and driver documentation.
-memory-barriers.txt
-	- info on Linux kernel memory barriers.
-memory-devices/
-	- directory with info on parts like the Texas Instruments EMIF driver
-memory-hotplug.txt
-	- Hotpluggable memory support, how to use and current status.
-men-chameleon-bus.txt
-	- info on MEN chameleon bus.
-mic/
-	- Intel Many Integrated Core (MIC) architecture device driver.
-mips/
-	- directory with info about Linux on MIPS architecture.
-misc-devices/
-	- directory with info about devices using the misc dev subsystem
-mmc/
-	- directory with info about the MMC subsystem
-mtd/
-	- directory with info about memory technology devices (flash)
-namespaces/
-	- directory with various information about namespaces
-netlabel/
-	- directory with information on the NetLabel subsystem.
-networking/
-	- directory with info on various aspects of networking with Linux.
-nfc/
-	- directory relating info about Near Field Communications support.
-nios2/
-	- Linux on the Nios II architecture.
-nommu-mmap.txt
-	- documentation about no-mmu memory mapping support.
-numastat.txt
-	- info on how to read Numa policy hit/miss statistics in sysfs.
-ntb.txt
-	- info on Non-Transparent Bridge (NTB) drivers.
-nvdimm/
-	- info on non-volatile devices.
-nvmem/
-	- info on non volatile memory framework.
-output/
-	- default directory where html/LaTeX/pdf files will be written.
-padata.txt
-	- An introduction to the "padata" parallel execution API
-parisc/
-	- directory with info on using Linux on PA-RISC architecture.
-parport-lowlevel.txt
-	- description and usage of the low level parallel port functions.
-pcmcia/
-	- info on the Linux PCMCIA driver.
-percpu-rw-semaphore.txt
-	- RCU based read-write semaphore optimized for locking for reading
-perf/
-	- info about the APM X-Gene SoC Performance Monitoring Unit (PMU).
-phy/
-	- ino on Samsung USB 2.0 PHY adaptation layer.
-phy.txt
-	- Description of the generic PHY framework.
-pi-futex.txt
-	- documentation on lightweight priority inheritance futexes.
-pinctrl.txt
-	- info on pinctrl subsystem and the PINMUX/PINCONF and drivers
-platform/
-	- List of supported hardware by compal and Dell laptop.
-pnp.txt
-	- Linux Plug and Play documentation.
-power/
-	- directory with info on Linux PCI power management.
-powerpc/
-	- directory with info on using Linux with the PowerPC.
-prctl/
-	- directory with info on the priveledge control subsystem
-preempt-locking.txt
-	- info on locking under a preemptive kernel.
-process/
-	- how to work with the mainline kernel development process.
-pps/
-	- directory with information on the pulse-per-second support
-pti/
-	- directory with info on Intel MID PTI.
-ptp/
-	- directory with info on support for IEEE 1588 PTP clocks in Linux.
-pwm.txt
-	- info on the pulse width modulation driver subsystem
-rapidio/
-	- directory with info on RapidIO packet-based fabric interconnect
-rbtree.txt
-	- info on what red-black trees are and what they are for.
-remoteproc.txt
-	- info on how to handle remote processor (e.g. AMP) offloads/usage.
-rfkill.txt
-	- info on the radio frequency kill switch subsystem/support.
-robust-futex-ABI.txt
-	- documentation of the robust futex ABI.
-robust-futexes.txt
-	- a description of what robust futexes are.
-rpmsg.txt
-	- info on the Remote Processor Messaging (rpmsg) Framework
-rtc.txt
-	- notes on how to use the Real Time Clock (aka CMOS clock) driver.
-s390/
-	- directory with info on using Linux on the IBM S390.
-scheduler/
-	- directory with info on the scheduler.
-scsi/
-	- directory with info on Linux scsi support.
-security/
-	- directory that contains security-related info
-serial/
-	- directory with info on the low level serial API.
-sgi-ioc4.txt
-	- description of the SGI IOC4 PCI (multi function) device.
-sh/
-	- directory with info on porting Linux to a new architecture.
-smsc_ece1099.txt
-	-info on the smsc Keyboard Scan Expansion/GPIO Expansion device.
-sound/
-	- directory with info on sound card support.
-spi/
-	- overview of Linux kernel Serial Peripheral Interface (SPI) support.
-sphinx/
-	- no documentation here, just files required by Sphinx toolchain.
-sphinx-static/
-	- no documentation here, just files required by Sphinx toolchain.
-static-keys.txt
-	- info on how static keys allow debug code in hotpaths via patching
-svga.txt
-	- short guide on selecting video modes at boot via VGA BIOS.
-sync_file.txt
-	- Sync file API guide.
-sysctl/
-	- directory with info on the /proc/sys/* files.
-target/
-	- directory with info on generating TCM v4 fabric .ko modules
-tee.txt
-	- info on the TEE subsystem and drivers
-this_cpu_ops.txt
-	- List rationale behind and the way to use this_cpu operations.
-thermal/
-	- directory with information on managing thermal issues (CPU/temp)
-trace/
-	- directory with info on tracing technologies within linux
-translations/
-	- translations of this document from English to another language
-unaligned-memory-access.txt
-	- info on how to avoid arch breaking unaligned memory access in code.
-unshare.txt
-	- description of the Linux unshare system call.
-usb/
-	- directory with info regarding the Universal Serial Bus.
-vfio.txt
-	- info on Virtual Function I/O used in guest/hypervisor instances.
-video-output.txt
-	- sysfs class driver interface to enable/disable a video output device.
-virtual/
-	- directory with information on the various linux virtualizations.
-vm/
-	- directory with info on the Linux vm code.
-w1/
-	- directory with documents regarding the 1-wire (w1) subsystem.
-watchdog/
-	- how to auto-reboot Linux if it has "fallen and can't get up". ;-)
-wimax/
-	- directory with info about Intel Wireless Wimax Connections
-core-api/workqueue.rst
-	- information on the Concurrency Managed Workqueue implementation
-x86/x86_64/
-	- directory with info on Linux support for AMD x86-64 (Hammer) machines.
-xillybus.txt
-	- Overview and basic ui of xillybus driver
-xtensa/
-	- directory with documents relating to arch/xtensa port/implementation
-xz.txt
-	- how to make use of the XZ data compression within linux kernel
-zorro.txt
-	- info on writing drivers for Zorro bus devices found on Amigas.

+ 9 - 0
Documentation/ABI/stable/sysfs-bus-xen-backend

@@ -73,3 +73,12 @@ KernelVersion:	3.0
 Contact:	Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
 Description:
                 Number of sectors written by the frontend.
+
+What:		/sys/bus/xen-backend/devices/*/state
+Date:		August 2018
+KernelVersion:	4.19
+Contact:	Joe Jin <joe.jin@oracle.com>
+Description:
+                The state of the device. One of: 'Unknown',
+                'Initialising', 'Initialised', 'Connected', 'Closing',
+                'Closed', 'Reconfiguring', 'Reconfigured'.

+ 9 - 0
Documentation/ABI/stable/sysfs-devices-system-xen_memory

@@ -75,3 +75,12 @@ Contact:	Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
 Description:
 		Amount (in KiB) of low (or normal) memory in the
 		balloon.
+
+What:		/sys/devices/system/xen_memory/xen_memory0/scrub_pages
+Date:		September 2018
+KernelVersion:	4.20
+Contact:	xen-devel@lists.xenproject.org
+Description:
+		Control scrubbing pages before returning them to Xen for others domains
+		use. Can be set with xen_scrub_pages cmdline
+		parameter. Default value controlled with CONFIG_XEN_SCRUB_PAGES_DEFAULT.

+ 0 - 35
Documentation/ABI/stable/sysfs-driver-usb-usbtmc

@@ -25,38 +25,3 @@ Description:
 		4.2.2.
 
 		The files are read only.
-
-
-What:		/sys/bus/usb/drivers/usbtmc/*/TermChar
-Date:		August 2008
-Contact:	Greg Kroah-Hartman <gregkh@linuxfoundation.org>
-Description:
-		This file is the TermChar value to be sent to the USB TMC
-		device as described by the document, "Universal Serial Bus Test
-		and Measurement Class Specification
-		(USBTMC) Revision 1.0" as published by the USB-IF.
-
-		Note that the TermCharEnabled file determines if this value is
-		sent to the device or not.
-
-
-What:		/sys/bus/usb/drivers/usbtmc/*/TermCharEnabled
-Date:		August 2008
-Contact:	Greg Kroah-Hartman <gregkh@linuxfoundation.org>
-Description:
-		This file determines if the TermChar is to be sent to the
-		device on every transaction or not.  For more details about
-		this, please see the document, "Universal Serial Bus Test and
-		Measurement Class Specification (USBTMC) Revision 1.0" as
-		published by the USB-IF.
-
-
-What:		/sys/bus/usb/drivers/usbtmc/*/auto_abort
-Date:		August 2008
-Contact:	Greg Kroah-Hartman <gregkh@linuxfoundation.org>
-Description:
-		This file determines if the transaction of the USB TMC
-		device is to be automatically aborted if there is any error.
-		For more details about this, please see the document,
-		"Universal Serial Bus Test and Measurement Class Specification
-		(USBTMC) Revision 1.0" as published by the USB-IF.

+ 41 - 0
Documentation/ABI/testing/configfs-stp-policy-p_sys-t

@@ -0,0 +1,41 @@
+What:		/config/stp-policy/<device>:p_sys-t.<policy>/<node>/uuid
+Date:		June 2018
+KernelVersion:	4.19
+Description:
+		UUID source identifier string, RW.
+		Default value is randomly generated at the mkdir <node> time.
+		Data coming from trace sources that use this <node> will be
+		tagged with this UUID in the MIPI SyS-T packet stream, to
+		allow the decoder to discern between different sources
+		within the same master/channel range, and identify the
+		higher level decoders that may be needed for each source.
+
+What:		/config/stp-policy/<device>:p_sys-t.<policy>/<node>/do_len
+Date:		June 2018
+KernelVersion:	4.19
+Description:
+		Include payload length in the MIPI SyS-T header, boolean.
+		If enabled, the SyS-T protocol encoder will include payload
+		length in each packet's metadata. This is normally redundant
+		if the underlying transport protocol supports marking message
+		boundaries (which STP does), so this is off by default.
+
+What:		/config/stp-policy/<device>:p_sys-t.<policy>/<node>/ts_interval
+Date:		June 2018
+KernelVersion:	4.19
+Description:
+		Time interval in milliseconds. Include a timestamp in the
+		MIPI SyS-T packet metadata, if this many milliseconds have
+		passed since the previous packet from this source. Zero is
+		the default and stands for "never send the timestamp".
+
+What:		/config/stp-policy/<device>:p_sys-t.<policy>/<node>/clocksync_interval
+Date:		June 2018
+KernelVersion:	4.19
+Description:
+		Time interval in milliseconds. Send a CLOCKSYNC packet if
+		this many milliseconds have passed since the previous
+		CLOCKSYNC packet from this source. Zero is the default and
+		stands for "never send the CLOCKSYNC". It makes sense to
+		use this option with sources that generate constant and/or
+		periodic data, like stm_heartbeat.

+ 24 - 0
Documentation/ABI/testing/configfs-usb-gadget-uvc

@@ -12,6 +12,10 @@ Date:		Dec 2014
 KernelVersion:	4.0
 Description:	Control descriptors
 
+		All attributes read only:
+		bInterfaceNumber	- USB interface number for this
+					  streaming interface
+
 What:		/config/usb-gadget/gadget/functions/uvc.name/control/class
 Date:		Dec 2014
 KernelVersion:	4.0
@@ -109,6 +113,10 @@ Date:		Dec 2014
 KernelVersion:	4.0
 Description:	Streaming descriptors
 
+		All attributes read only:
+		bInterfaceNumber	- USB interface number for this
+					  streaming interface
+
 What:		/config/usb-gadget/gadget/functions/uvc.name/streaming/class
 Date:		Dec 2014
 KernelVersion:	4.0
@@ -160,6 +168,10 @@ Description:	Specific MJPEG format descriptors
 
 		All attributes read only,
 		except bmaControls and bDefaultFrameIndex:
+		bFormatIndex		- unique id for this format descriptor;
+					only defined after parent header is
+					linked into the streaming class;
+					read-only
 		bmaControls		- this format's data for bmaControls in
 					the streaming header
 		bmInterfaceFlags	- specifies interlace information,
@@ -177,6 +189,10 @@ Date:		Dec 2014
 KernelVersion:	4.0
 Description:	Specific MJPEG frame descriptors
 
+		bFrameIndex		- unique id for this framedescriptor;
+					only defined after parent format is
+					linked into the streaming header;
+					read-only
 		dwFrameInterval		- indicates how frame interval can be
 					programmed; a number of values
 					separated by newline can be specified
@@ -204,6 +220,10 @@ Date:		Dec 2014
 KernelVersion:	4.0
 Description:	Specific uncompressed format descriptors
 
+		bFormatIndex		- unique id for this format descriptor;
+					only defined after parent header is
+					linked into the streaming class;
+					read-only
 		bmaControls		- this format's data for bmaControls in
 					the streaming header
 		bmInterfaceFlags	- specifies interlace information,
@@ -224,6 +244,10 @@ Date:		Dec 2014
 KernelVersion:	4.0
 Description:	Specific uncompressed frame descriptors
 
+		bFrameIndex		- unique id for this framedescriptor;
+					only defined after parent format is
+					linked into the streaming header;
+					read-only
 		dwFrameInterval		- indicates how frame interval can be
 					programmed; a number of values
 					separated by newline can be specified

+ 1 - 1
Documentation/ABI/testing/sysfs-bus-iio

@@ -199,7 +199,7 @@ Description:
 
 What:		/sys/bus/iio/devices/iio:deviceX/in_positionrelative_x_raw
 What:		/sys/bus/iio/devices/iio:deviceX/in_positionrelative_y_raw
-KernelVersion:	4.18
+KernelVersion:	4.19
 Contact:	linux-iio@vger.kernel.org
 Description:
 		Relative position in direction x or y on a pad (may be

+ 24 - 0
Documentation/ABI/testing/sysfs-bus-pci

@@ -323,3 +323,27 @@ Description:
 
 		This is similar to /sys/bus/pci/drivers_autoprobe, but
 		affects only the VFs associated with a specific PF.
+
+What:		/sys/bus/pci/devices/.../p2pmem/size
+Date:		November 2017
+Contact:	Logan Gunthorpe <logang@deltatee.com>
+Description:
+		If the device has any Peer-to-Peer memory registered, this
+	        file contains the total amount of memory that the device
+		provides (in decimal).
+
+What:		/sys/bus/pci/devices/.../p2pmem/available
+Date:		November 2017
+Contact:	Logan Gunthorpe <logang@deltatee.com>
+Description:
+		If the device has any Peer-to-Peer memory registered, this
+	        file contains the amount of memory that has not been
+		allocated (in decimal).
+
+What:		/sys/bus/pci/devices/.../p2pmem/published
+Date:		November 2017
+Contact:	Logan Gunthorpe <logang@deltatee.com>
+Description:
+		If the device has any Peer-to-Peer memory registered, this
+	        file contains a '1' if the memory has been published for
+		use outside the driver that owns the device.

+ 18 - 1
Documentation/ABI/testing/sysfs-bus-usb

@@ -189,6 +189,16 @@ Description:
 		The file will read "hotplug", "wired" and "not used" if the
 		information is available, and "unknown" otherwise.
 
+What:		/sys/bus/usb/devices/.../(hub interface)/portX/location
+Date:		October 2018
+Contact:	Bjørn Mork <bjorn@mork.no>
+Description:
+		Some platforms provide usb port physical location through
+		firmware. This is used by the kernel to pair up logical ports
+		mapping to the same physical connector. The attribute exposes the
+		raw location value as a hex integer.
+
+
 What:		/sys/bus/usb/devices/.../(hub interface)/portX/quirks
 Date:		May 2018
 Contact:	Nicolas Boichat <drinkcat@chromium.org>
@@ -219,7 +229,14 @@ Description:
 		ports and report them to the kernel. This attribute is to expose
 		the number of over-current situation occurred on a specific port
 		to user space. This file will contain an unsigned 32 bit value
-		which wraps to 0 after its maximum is reached.
+		which wraps to 0 after its maximum is reached. This file supports
+		poll() for monitoring changes to this value in user space.
+
+		Any time this value changes the corresponding hub device will send a
+		udev event with the following attributes:
+
+		OVER_CURRENT_PORT=/sys/bus/usb/devices/.../(hub interface)/portX
+		OVER_CURRENT_COUNT=[current value of this sysfs attribute]
 
 What:		/sys/bus/usb/devices/.../(hub interface)/portX/usb3_lpm_permit
 Date:		November 2015

+ 21 - 0
Documentation/ABI/testing/sysfs-bus-vmbus

@@ -0,0 +1,21 @@
+What:		/sys/bus/vmbus/devices/.../driver_override
+Date:		August 2019
+Contact:	Stephen Hemminger <sthemmin@microsoft.com>
+Description:
+		This file allows the driver for a device to be specified which
+		will override standard static and dynamic ID matching.  When
+		specified, only a driver with a name matching the value written
+		to driver_override will have an opportunity to bind to the
+		device.  The override is specified by writing a string to the
+		driver_override file (echo uio_hv_generic > driver_override) and
+		may be cleared with an empty string (echo > driver_override).
+		This returns the device to standard matching rules binding.
+		Writing to driver_override does not automatically unbind the
+		device from its current driver or make any attempt to
+		automatically load the specified driver.  If no driver with a
+		matching name is currently loaded in the kernel, the device
+		will not bind to any driver.  This also allows devices to
+		opt-out of driver binding using a driver_override name such as
+		"none".  Only a single driver may be specified in the override,
+		there is no support for parsing delimiters.
+

+ 0 - 27
Documentation/ABI/testing/sysfs-class-lcd-s6e63m0

@@ -1,27 +0,0 @@
-sysfs interface for the S6E63M0 AMOLED LCD panel driver
--------------------------------------------------------
-
-What:		/sys/class/lcd/<lcd>/gamma_mode
-Date:		May, 2010
-KernelVersion:	v2.6.35
-Contact:	dri-devel@lists.freedesktop.org
-Description:
-		(RW) Read or write the gamma mode. Following three modes are
-		supported:
-		0 - gamma value 2.2,
-		1 - gamma value 1.9 and
-		2 - gamma value 1.7.
-
-
-What:		/sys/class/lcd/<lcd>/gamma_table
-Date:		May, 2010
-KernelVersion:	v2.6.35
-Contact:	dri-devel@lists.freedesktop.org
-Description:
-		(RO) Displays the size of the gamma table i.e. the number of
-		gamma modes available.
-
-This is a backlight lcd driver. These interfaces are an extension to the API
-documented in Documentation/ABI/testing/sysfs-class-lcd and in
-Documentation/ABI/stable/sysfs-class-backlight (under
-/sys/class/backlight/<backlight>/).

+ 22 - 0
Documentation/ABI/testing/sysfs-class-led-driver-sc27xx

@@ -0,0 +1,22 @@
+What:		/sys/class/leds/<led>/hw_pattern
+Date:		September 2018
+KernelVersion:	4.20
+Description:
+		Specify a hardware pattern for the SC27XX LED. For the SC27XX
+		LED controller, it only supports 4 stages to make a single
+		hardware pattern, which is used to configure the rise time,
+		high time, fall time and low time for the breathing mode.
+
+		For the breathing mode, the SC27XX LED only expects one brightness
+		for the high stage. To be compatible with the hardware pattern
+		format, we should set brightness as 0 for rise stage, fall
+		stage and low stage.
+
+		Min stage duration: 125 ms
+		Max stage duration: 31875 ms
+
+		Since the stage duration step is 125 ms, the duration should be
+		a multiplier of 125, like 125ms, 250ms, 375ms, 500ms ... 31875ms.
+
+		Thus the format of the hardware pattern values should be:
+		"0 rise_duration brightness high_duration 0 fall_duration 0 low_duration".

+ 82 - 0
Documentation/ABI/testing/sysfs-class-led-trigger-pattern

@@ -0,0 +1,82 @@
+What:		/sys/class/leds/<led>/pattern
+Date:		September 2018
+KernelVersion:	4.20
+Description:
+		Specify a software pattern for the LED, that supports altering
+		the brightness for the specified duration with one software
+		timer. It can do gradual dimming and step change of brightness.
+
+		The pattern is given by a series of tuples, of brightness and
+		duration (ms). The LED is expected to traverse the series and
+		each brightness value for the specified duration. Duration of
+		0 means brightness should immediately change to new value, and
+		writing malformed pattern deactivates any active one.
+
+		1. For gradual dimming, the dimming interval now is set as 50
+		milliseconds. So the tuple with duration less than dimming
+		interval (50ms) is treated as a step change of brightness,
+		i.e. the subsequent brightness will be applied without adding
+		intervening dimming intervals.
+
+		The gradual dimming format of the software pattern values should be:
+		"brightness_1 duration_1 brightness_2 duration_2 brightness_3
+		duration_3 ...". For example:
+
+		echo 0 1000 255 2000 > pattern
+
+		It will make the LED go gradually from zero-intensity to max (255)
+		intensity in 1000 milliseconds, then back to zero intensity in 2000
+		milliseconds:
+
+		LED brightness
+		    ^
+		255-|       / \            / \            /
+		    |      /    \         /    \         /
+		    |     /       \      /       \      /
+		    |    /          \   /          \   /
+		  0-|   /             \/             \/
+		    +---0----1----2----3----4----5----6------------> time (s)
+
+		2. To make the LED go instantly from one brigntess value to another,
+		we should use use zero-time lengths (the brightness must be same as
+		the previous tuple's). So the format should be:
+		"brightness_1 duration_1 brightness_1 0 brightness_2 duration_2
+		brightness_2 0 ...". For example:
+
+		echo 0 1000 0 0 255 2000 255 0 > pattern
+
+		It will make the LED stay off for one second, then stay at max brightness
+		for two seconds:
+
+		LED brightness
+		    ^
+		255-|        +---------+    +---------+
+		    |        |         |    |         |
+		    |        |         |    |         |
+		    |        |         |    |         |
+		  0-|   -----+         +----+         +----
+		    +---0----1----2----3----4----5----6------------> time (s)
+
+What:		/sys/class/leds/<led>/hw_pattern
+Date:		September 2018
+KernelVersion:	4.20
+Description:
+		Specify a hardware pattern for the LED, for LED hardware that
+		supports autonomously controlling brightness over time, according
+		to some preprogrammed hardware patterns. It deactivates any active
+		software pattern.
+
+		Since different LED hardware can have different semantics of
+		hardware patterns, each driver is expected to provide its own
+		description for the hardware patterns in their ABI documentation
+		file.
+
+What:		/sys/class/leds/<led>/repeat
+Date:		September 2018
+KernelVersion:	4.20
+Description:
+		Specify a pattern repeat number. -1 means repeat indefinitely,
+		other negative numbers and number 0 are invalid.
+
+		This file will always return the originally written repeat
+		number.

+ 20 - 2
Documentation/ABI/testing/sysfs-class-net

@@ -91,6 +91,24 @@ Description:
 		stacked (e.g: VLAN interfaces) but still have the same MAC
 		address as their parent device.
 
+What:		/sys/class/net/<iface>/dev_port
+Date:		February 2014
+KernelVersion:	3.15
+Contact:	netdev@vger.kernel.org
+Description:
+		Indicates the port number of this network device, formatted
+		as a decimal value. Some NICs have multiple independent ports
+		on the same PCI bus, device and function. This attribute allows
+		userspace to distinguish the respective interfaces.
+
+		Note: some device drivers started to use 'dev_id' for this
+		purpose since long before 3.15 and have not adopted the new
+		attribute ever since. To query the port number, some tools look
+		exclusively at 'dev_port', while others only consult 'dev_id'.
+		If a network device has multiple client adapter ports as
+		described in the previous paragraph and does not set this
+		attribute to its port number, it's a kernel bug.
+
 What:		/sys/class/net/<iface>/dormant
 Date:		March 2006
 KernelVersion:	2.6.17
@@ -117,7 +135,7 @@ Description:
 		full: full duplex
 
 		Note: This attribute is only valid for interfaces that implement
-		the ethtool get_settings method (mostly Ethernet).
+		the ethtool get_link_ksettings method (mostly Ethernet).
 
 What:		/sys/class/net/<iface>/flags
 Date:		April 2005
@@ -224,7 +242,7 @@ Description:
 		an integer representing the link speed in Mbits/sec.
 
 		Note: this attribute is only valid for interfaces that implement
-		the ethtool get_settings method (mostly Ethernet ).
+		the ethtool get_link_ksettings method (mostly Ethernet).
 
 What:		/sys/class/net/<iface>/tx_queue_len
 Date:		April 2005

+ 7 - 0
Documentation/ABI/testing/sysfs-class-net-dsa

@@ -0,0 +1,7 @@
+What:		/sys/class/net/<iface>/tagging
+Date:		August 2018
+KernelVersion:	4.20
+Contact:	netdev@vger.kernel.org
+Description:
+		String indicating the type of tagging protocol used by the
+		DSA slave network device.

+ 10 - 0
Documentation/ABI/testing/sysfs-driver-xen-blkback

@@ -15,3 +15,13 @@ Description:
                 blkback. If the frontend tries to use more than
                 max_persistent_grants, the LRU kicks in and starts
                 removing 5% of max_persistent_grants every 100ms.
+
+What:           /sys/module/xen_blkback/parameters/persistent_grant_unused_seconds
+Date:           August 2018
+KernelVersion:  4.19
+Contact:        Roger Pau Monné <roger.pau@citrix.com>
+Description:
+                How long a persistent grant is allowed to remain
+                allocated without being in use. The time is in
+                seconds, 0 means indefinitely long.
+                The default is 60 seconds.

+ 16 - 1
Documentation/ABI/testing/sysfs-fs-f2fs

@@ -121,7 +121,22 @@ What:		/sys/fs/f2fs/<disk>/idle_interval
 Date:		January 2016
 Contact:	"Jaegeuk Kim" <jaegeuk@kernel.org>
 Description:
-		 Controls the idle timing.
+		 Controls the idle timing for all paths other than
+		 discard and gc path.
+
+What:		/sys/fs/f2fs/<disk>/discard_idle_interval
+Date:		September 2018
+Contact:	"Chao Yu" <yuchao0@huawei.com>
+Contact:	"Sahitya Tummala" <stummala@codeaurora.org>
+Description:
+		 Controls the idle timing for discard path.
+
+What:		/sys/fs/f2fs/<disk>/gc_idle_interval
+Date:		September 2018
+Contact:	"Chao Yu" <yuchao0@huawei.com>
+Contact:	"Sahitya Tummala" <stummala@codeaurora.org>
+Description:
+		 Controls the idle timing for gc path.
 
 What:		/sys/fs/f2fs/<disk>/iostat_enable
 Date:		August 2017

+ 35 - 0
Documentation/ABI/testing/sysfs-platform-lg-laptop

@@ -0,0 +1,35 @@
+What:		/sys/devices/platform/lg-laptop/reader_mode
+Date:		October 2018
+KernelVersion:	4.20
+Contact:	"Matan Ziv-Av <matan@svgalib.org>
+Description:
+        Control reader mode. 1 means on, 0 means off.
+
+What:		/sys/devices/platform/lg-laptop/fn_lock
+Date:		October 2018
+KernelVersion:	4.20
+Contact:	"Matan Ziv-Av <matan@svgalib.org>
+Description:
+        Control FN lock mode. 1 means on, 0 means off.
+
+What:		/sys/devices/platform/lg-laptop/battery_care_limit
+Date:		October 2018
+KernelVersion:	4.20
+Contact:	"Matan Ziv-Av <matan@svgalib.org>
+Description:
+        Maximal battery charge level. Accepted values are 80 or 100.
+
+What:		/sys/devices/platform/lg-laptop/fan_mode
+Date:		October 2018
+KernelVersion:	4.20
+Contact:	"Matan Ziv-Av <matan@svgalib.org>
+Description:
+        Control fan mode. 1 for performance mode, 0 for silent mode.
+
+What:		/sys/devices/platform/lg-laptop/usb_charge
+Date:		October 2018
+KernelVersion:	4.20
+Contact:	"Matan Ziv-Av <matan@svgalib.org>
+Description:
+        Control USB port charging when device is turned off.
+        1 means on, 0 means off.

+ 1 - 1
Documentation/ABI/testing/sysfs-power

@@ -99,7 +99,7 @@ Description:
 		this file, the suspend image will be as small as possible.
 
 		Reading from this file will display the current image size
-		limit, which is set to 500 MB by default.
+		limit, which is set to around 2/5 of available RAM by default.
 
 What:		/sys/power/pm_trace
 Date:		August 2006

+ 0 - 26
Documentation/PCI/00-INDEX

@@ -1,26 +0,0 @@
-00-INDEX
-	- this file
-acpi-info.txt
-	- info on how PCI host bridges are represented in ACPI
-MSI-HOWTO.txt
-	- the Message Signaled Interrupts (MSI) Driver Guide HOWTO and FAQ.
-PCIEBUS-HOWTO.txt
-	- a guide describing the PCI Express Port Bus driver
-pci-error-recovery.txt
-	- info on PCI error recovery
-pci-iov-howto.txt
-	- the PCI Express I/O Virtualization HOWTO
-pci.txt
-	- info on the PCI subsystem for device driver authors
-pcieaer-howto.txt
-	- the PCI Express Advanced Error Reporting Driver Guide HOWTO
-endpoint/pci-endpoint.txt
-	- guide to add endpoint controller driver and endpoint function driver.
-endpoint/pci-endpoint-cfs.txt
-	- guide to use configfs to configure the PCI endpoint function.
-endpoint/pci-test-function.txt
-	- specification of *PCI test* function device.
-endpoint/pci-test-howto.txt
-	- userguide for PCI endpoint test function.
-endpoint/function/binding/
-	- binding documentation for PCI endpoint function

+ 11 - 8
Documentation/PCI/endpoint/pci-test-howto.txt

@@ -99,17 +99,20 @@ Note that the devices listed here correspond to the value populated in 1.4 above
 2.2 Using Endpoint Test function Device
 
 pcitest.sh added in tools/pci/ can be used to run all the default PCI endpoint
-tests. Before pcitest.sh can be used pcitest.c should be compiled using the
-following commands.
+tests. To compile this tool the following commands should be used:
 
-	cd <kernel-dir>
-	make headers_install ARCH=arm
-	arm-linux-gnueabihf-gcc -Iusr/include tools/pci/pcitest.c -o pcitest
-	cp pcitest  <rootfs>/usr/sbin/
-	cp tools/pci/pcitest.sh <rootfs>
+	# cd <kernel-dir>
+	# make -C tools/pci
+
+or if you desire to compile and install in your system:
+
+	# cd <kernel-dir>
+	# make -C tools/pci install
+
+The tool and script will be located in <rootfs>/usr/bin/
 
 2.2.1 pcitest.sh Output
-	# ./pcitest.sh
+	# pcitest.sh
 	BAR tests
 
 	BAR0:           OKAY

+ 10 - 25
Documentation/PCI/pci-error-recovery.txt

@@ -110,7 +110,7 @@ The actual steps taken by a platform to recover from a PCI error
 event will be platform-dependent, but will follow the general
 sequence described below.
 
-STEP 0: Error Event: ERR_NONFATAL
+STEP 0: Error Event
 -------------------
 A PCI bus error is detected by the PCI hardware.  On powerpc, the slot
 is isolated, in that all I/O is blocked: all reads return 0xffffffff,
@@ -228,7 +228,13 @@ proceeds to either STEP3 (Link Reset) or to STEP 5 (Resume Operations).
 If any driver returned PCI_ERS_RESULT_NEED_RESET, then the platform
 proceeds to STEP 4 (Slot Reset)
 
-STEP 3: Slot Reset
+STEP 3: Link Reset
+------------------
+The platform resets the link.  This is a PCI-Express specific step
+and is done whenever a fatal error has been detected that can be
+"solved" by resetting the link.
+
+STEP 4: Slot Reset
 ------------------
 
 In response to a return value of PCI_ERS_RESULT_NEED_RESET, the
@@ -314,7 +320,7 @@ Failure).
 >>> However, it probably should.
 
 
-STEP 4: Resume Operations
+STEP 5: Resume Operations
 -------------------------
 The platform will call the resume() callback on all affected device
 drivers if all drivers on the segment have returned
@@ -326,7 +332,7 @@ a result code.
 At this point, if a new error happens, the platform will restart
 a new error recovery sequence.
 
-STEP 5: Permanent Failure
+STEP 6: Permanent Failure
 -------------------------
 A "permanent failure" has occurred, and the platform cannot recover
 the device.  The platform will call error_detected() with a
@@ -349,27 +355,6 @@ errors. See the discussion in powerpc/eeh-pci-error-recovery.txt
 for additional detail on real-life experience of the causes of
 software errors.
 
-STEP 0: Error Event: ERR_FATAL
--------------------
-PCI bus error is detected by the PCI hardware. On powerpc, the slot is
-isolated, in that all I/O is blocked: all reads return 0xffffffff, all
-writes are ignored.
-
-STEP 1: Remove devices
---------------------
-Platform removes the devices depending on the error agent, it could be
-this port for all subordinates or upstream component (likely downstream
-port)
-
-STEP 2: Reset link
---------------------
-The platform resets the link.  This is a PCI-Express specific step and is
-done whenever a fatal error has been detected that can be "solved" by
-resetting the link.
-
-STEP 3: Re-enumerate the devices
---------------------
-Initiates the re-enumeration.
 
 Conclusion; General Remarks
 ---------------------------

+ 0 - 34
Documentation/RCU/00-INDEX

@@ -1,34 +0,0 @@
-00-INDEX
-	- This file
-arrayRCU.txt
-	- Using RCU to Protect Read-Mostly Arrays
-checklist.txt
-	- Review Checklist for RCU Patches
-listRCU.txt
-	- Using RCU to Protect Read-Mostly Linked Lists
-lockdep.txt
-	- RCU and lockdep checking
-lockdep-splat.txt
-	- RCU Lockdep splats explained.
-NMI-RCU.txt
-	- Using RCU to Protect Dynamic NMI Handlers
-rcu_dereference.txt
-	- Proper care and feeding of return values from rcu_dereference()
-rcubarrier.txt
-	- RCU and Unloadable Modules
-rculist_nulls.txt
-	- RCU list primitives for use with SLAB_TYPESAFE_BY_RCU
-rcuref.txt
-	- Reference-count design for elements of lists/arrays protected by RCU
-rcu.txt
-	- RCU Concepts
-RTFP.txt
-	- List of RCU papers (bibliography) going back to 1980.
-stallwarn.txt
-	- RCU CPU stall warnings (module parameter rcu_cpu_stall_suppress)
-torture.txt
-	- RCU Torture Test Operation (CONFIG_RCU_TORTURE_TEST)
-UP.txt
-	- RCU on Uniprocessor Systems
-whatisRCU.txt
-	- What is RCU?

+ 12 - 19
Documentation/RCU/Design/Data-Structures/Data-Structures.html

@@ -1227,9 +1227,11 @@ to overflow the counter, this approach corrects the
 CPU enters the idle loop from process context.
 
 </p><p>The <tt>-&gt;dynticks</tt> field counts the corresponding
-CPU's transitions to and from dyntick-idle mode, so that this counter
-has an even value when the CPU is in dyntick-idle mode and an odd
-value otherwise.
+CPU's transitions to and from either dyntick-idle or user mode, so
+that this counter has an even value when the CPU is in dyntick-idle
+mode or user mode and an odd value otherwise. The transitions to/from
+user mode need to be counted for user mode adaptive-ticks support
+(see timers/NO_HZ.txt).
 
 </p><p>The <tt>-&gt;rcu_need_heavy_qs</tt> field is used
 to record the fact that the RCU core code would really like to
@@ -1372,8 +1374,7 @@ that is, if the CPU is currently idle.
 Accessor Functions</a></h3>
 
 <p>The following listing shows the
-<tt>rcu_get_root()</tt>, <tt>rcu_for_each_node_breadth_first</tt>,
-<tt>rcu_for_each_nonleaf_node_breadth_first()</tt>, and
+<tt>rcu_get_root()</tt>, <tt>rcu_for_each_node_breadth_first</tt> and
 <tt>rcu_for_each_leaf_node()</tt> function and macros:
 
 <pre>
@@ -1386,13 +1387,9 @@ Accessor Functions</a></h3>
   7   for ((rnp) = &amp;(rsp)-&gt;node[0]; \
   8        (rnp) &lt; &amp;(rsp)-&gt;node[NUM_RCU_NODES]; (rnp)++)
   9
- 10 #define rcu_for_each_nonleaf_node_breadth_first(rsp, rnp) \
- 11   for ((rnp) = &amp;(rsp)-&gt;node[0]; \
- 12        (rnp) &lt; (rsp)-&gt;level[NUM_RCU_LVLS - 1]; (rnp)++)
- 13
- 14 #define rcu_for_each_leaf_node(rsp, rnp) \
- 15   for ((rnp) = (rsp)-&gt;level[NUM_RCU_LVLS - 1]; \
- 16        (rnp) &lt; &amp;(rsp)-&gt;node[NUM_RCU_NODES]; (rnp)++)
+ 10 #define rcu_for_each_leaf_node(rsp, rnp) \
+ 11   for ((rnp) = (rsp)-&gt;level[NUM_RCU_LVLS - 1]; \
+ 12        (rnp) &lt; &amp;(rsp)-&gt;node[NUM_RCU_NODES]; (rnp)++)
 </pre>
 
 <p>The <tt>rcu_get_root()</tt> simply returns a pointer to the
@@ -1405,10 +1402,7 @@ macro takes advantage of the layout of the <tt>rcu_node</tt>
 structures in the <tt>rcu_state</tt> structure's
 <tt>-&gt;node[]</tt> array, performing a breadth-first traversal by
 simply traversing the array in order.
-The <tt>rcu_for_each_nonleaf_node_breadth_first()</tt> macro operates
-similarly, but traverses only the first part of the array, thus excluding
-the leaf <tt>rcu_node</tt> structures.
-Finally, the <tt>rcu_for_each_leaf_node()</tt> macro traverses only
+Similarly, the <tt>rcu_for_each_leaf_node()</tt> macro traverses only
 the last part of the array, thus traversing only the leaf
 <tt>rcu_node</tt> structures.
 
@@ -1416,15 +1410,14 @@ the last part of the array, thus traversing only the leaf
 <tr><th>&nbsp;</th></tr>
 <tr><th align="left">Quick Quiz:</th></tr>
 <tr><td>
-	What do <tt>rcu_for_each_nonleaf_node_breadth_first()</tt> and
+	What does
 	<tt>rcu_for_each_leaf_node()</tt> do if the <tt>rcu_node</tt> tree
 	contains only a single node?
 </td></tr>
 <tr><th align="left">Answer:</th></tr>
 <tr><td bgcolor="#ffffff"><font color="ffffff">
 	In the single-node case,
-	<tt>rcu_for_each_nonleaf_node_breadth_first()</tt> is a no-op
-	and <tt>rcu_for_each_leaf_node()</tt> traverses the single node.
+	<tt>rcu_for_each_leaf_node()</tt> traverses the single node.
 </font></td></tr>
 <tr><td>&nbsp;</td></tr>
 </table>

+ 4 - 5
Documentation/RCU/Design/Expedited-Grace-Periods/Expedited-Grace-Periods.html

@@ -12,10 +12,9 @@ high efficiency and minimal disturbance, expedited grace periods accept
 lower efficiency and significant disturbance to attain shorter latencies.
 
 <p>
-There are three flavors of RCU (RCU-bh, RCU-preempt, and RCU-sched),
-but only two flavors of expedited grace periods because the RCU-bh
-expedited grace period maps onto the RCU-sched expedited grace period.
-Each of the remaining two implementations is covered in its own section.
+There are two flavors of RCU (RCU-preempt and RCU-sched), with an earlier
+third RCU-bh flavor having been implemented in terms of the other two.
+Each of the two implementations is covered in its own section.
 
 <ol>
 <li>	<a href="#Expedited Grace Period Design">
@@ -158,7 +157,7 @@ whether or not the current CPU is in an RCU read-side critical section.
 The best that <tt>sync_sched_exp_handler()</tt> can do is to check
 for idle, on the off-chance that the CPU went idle while the IPI
 was in flight.
-If the CPU is idle, then tt>sync_sched_exp_handler()</tt> reports
+If the CPU is idle, then <tt>sync_sched_exp_handler()</tt> reports
 the quiescent state.
 
 <p>

+ 72 - 142
Documentation/RCU/Design/Requirements/Requirements.html

@@ -1306,8 +1306,6 @@ doing so would degrade real-time response.
 
 <p>
 This non-requirement appeared with preemptible RCU.
-If you need a grace period that waits on non-preemptible code regions, use
-<a href="#Sched Flavor">RCU-sched</a>.
 
 <h2><a name="Parallelism Facts of Life">Parallelism Facts of Life</a></h2>
 
@@ -2165,14 +2163,9 @@ however, this is not a panacea because there would be severe restrictions
 on what operations those callbacks could invoke.
 
 <p>
-Perhaps surprisingly, <tt>synchronize_rcu()</tt>,
-<a href="#Bottom-Half Flavor"><tt>synchronize_rcu_bh()</tt></a>
-(<a href="#Bottom-Half Flavor">discussed below</a>),
-<a href="#Sched Flavor"><tt>synchronize_sched()</tt></a>,
+Perhaps surprisingly, <tt>synchronize_rcu()</tt> and
 <tt>synchronize_rcu_expedited()</tt>,
-<tt>synchronize_rcu_bh_expedited()</tt>, and
-<tt>synchronize_sched_expedited()</tt>
-will all operate normally
+will operate normally
 during very early boot, the reason being that there is only one CPU
 and preemption is disabled.
 This means that the call <tt>synchronize_rcu()</tt> (or friends)
@@ -2269,12 +2262,23 @@ Thankfully, RCU update-side primitives, including
 The name notwithstanding, some Linux-kernel architectures
 can have nested NMIs, which RCU must handle correctly.
 Andy Lutomirski
-<a href="https://lkml.kernel.org/g/CALCETrXLq1y7e_dKFPgou-FKHB6Pu-r8+t-6Ds+8=va7anBWDA@mail.gmail.com">surprised me</a>
+<a href="https://lkml.kernel.org/r/CALCETrXLq1y7e_dKFPgou-FKHB6Pu-r8+t-6Ds+8=va7anBWDA@mail.gmail.com">surprised me</a>
 with this requirement;
 he also kindly surprised me with
-<a href="https://lkml.kernel.org/g/CALCETrXSY9JpW3uE6H8WYk81sg56qasA2aqmjMPsq5dOtzso=g@mail.gmail.com">an algorithm</a>
+<a href="https://lkml.kernel.org/r/CALCETrXSY9JpW3uE6H8WYk81sg56qasA2aqmjMPsq5dOtzso=g@mail.gmail.com">an algorithm</a>
 that meets this requirement.
 
+<p>
+Furthermore, NMI handlers can be interrupted by what appear to RCU
+to be normal interrupts.
+One way that this can happen is for code that directly invokes
+<tt>rcu_irq_enter()</tt> and </tt>rcu_irq_exit()</tt> to be called
+from an NMI handler.
+This astonishing fact of life prompted the current code structure,
+which has <tt>rcu_irq_enter()</tt> invoking <tt>rcu_nmi_enter()</tt>
+and <tt>rcu_irq_exit()</tt> invoking <tt>rcu_nmi_exit()</tt>.
+And yes, I also learned of this requirement the hard way.
+
 <h3><a name="Loadable Modules">Loadable Modules</a></h3>
 
 <p>
@@ -2394,30 +2398,9 @@ when invoked from a CPU-hotplug notifier.
 <p>
 RCU depends on the scheduler, and the scheduler uses RCU to
 protect some of its data structures.
-This means the scheduler is forbidden from acquiring
-the runqueue locks and the priority-inheritance locks
-in the middle of an outermost RCU read-side critical section unless either
-(1)&nbsp;it releases them before exiting that same
-RCU read-side critical section, or
-(2)&nbsp;interrupts are disabled across
-that entire RCU read-side critical section.
-This same prohibition also applies (recursively!) to any lock that is acquired
-while holding any lock to which this prohibition applies.
-Adhering to this rule prevents preemptible RCU from invoking
-<tt>rcu_read_unlock_special()</tt> while either runqueue or
-priority-inheritance locks are held, thus avoiding deadlock.
-
-<p>
-Prior to v4.4, it was only necessary to disable preemption across
-RCU read-side critical sections that acquired scheduler locks.
-In v4.4, expedited grace periods started using IPIs, and these
-IPIs could force a <tt>rcu_read_unlock()</tt> to take the slowpath.
-Therefore, this expedited-grace-period change required disabling of
-interrupts, not just preemption.
-
-<p>
-For RCU's part, the preemptible-RCU <tt>rcu_read_unlock()</tt>
-implementation must be written carefully to avoid similar deadlocks.
+The preemptible-RCU <tt>rcu_read_unlock()</tt>
+implementation must therefore be written carefully to avoid deadlocks
+involving the scheduler's runqueue and priority-inheritance locks.
 In particular, <tt>rcu_read_unlock()</tt> must tolerate an
 interrupt where the interrupt handler invokes both
 <tt>rcu_read_lock()</tt> and <tt>rcu_read_unlock()</tt>.
@@ -2426,7 +2409,7 @@ negative nesting levels to avoid destructive recursion via
 interrupt handler's use of RCU.
 
 <p>
-This pair of mutual scheduler-RCU requirements came as a
+This scheduler-RCU requirement came as a
 <a href="https://lwn.net/Articles/453002/">complete surprise</a>.
 
 <p>
@@ -2437,9 +2420,28 @@ when running context-switch-heavy workloads when built with
 <tt>CONFIG_NO_HZ_FULL=y</tt>
 <a href="http://www.rdrop.com/users/paulmck/scalability/paper/BareMetal.2015.01.15b.pdf">did come as a surprise [PDF]</a>.
 RCU has made good progress towards meeting this requirement, even
-for context-switch-have <tt>CONFIG_NO_HZ_FULL=y</tt> workloads,
+for context-switch-heavy <tt>CONFIG_NO_HZ_FULL=y</tt> workloads,
 but there is room for further improvement.
 
+<p>
+In the past, it was forbidden to disable interrupts across an
+<tt>rcu_read_unlock()</tt> unless that interrupt-disabled region
+of code also included the matching <tt>rcu_read_lock()</tt>.
+Violating this restriction could result in deadlocks involving the
+scheduler's runqueue and priority-inheritance spinlocks.
+This restriction was lifted when interrupt-disabled calls to
+<tt>rcu_read_unlock()</tt> started deferring the reporting of
+the resulting RCU-preempt quiescent state until the end of that
+interrupts-disabled region.
+This deferred reporting means that the scheduler's runqueue and
+priority-inheritance locks cannot be held while reporting an RCU-preempt
+quiescent state, which lifts the earlier restriction, at least from
+a deadlock perspective.
+Unfortunately, real-time systems using RCU priority boosting may
+need this restriction to remain in effect because deferred
+quiescent-state reporting also defers deboosting, which in turn
+degrades real-time latencies.
+
 <h3><a name="Tracing and RCU">Tracing and RCU</a></h3>
 
 <p>
@@ -2850,15 +2852,22 @@ The other four flavors are listed below, with requirements for each
 described in a separate section.
 
 <ol>
-<li>	<a href="#Bottom-Half Flavor">Bottom-Half Flavor</a>
-<li>	<a href="#Sched Flavor">Sched Flavor</a>
+<li>	<a href="#Bottom-Half Flavor">Bottom-Half Flavor (Historical)</a>
+<li>	<a href="#Sched Flavor">Sched Flavor (Historical)</a>
 <li>	<a href="#Sleepable RCU">Sleepable RCU</a>
 <li>	<a href="#Tasks RCU">Tasks RCU</a>
-<li>	<a href="#Waiting for Multiple Grace Periods">
-	Waiting for Multiple Grace Periods</a>
 </ol>
 
-<h3><a name="Bottom-Half Flavor">Bottom-Half Flavor</a></h3>
+<h3><a name="Bottom-Half Flavor">Bottom-Half Flavor (Historical)</a></h3>
+
+<p>
+The RCU-bh flavor of RCU has since been expressed in terms of
+the other RCU flavors as part of a consolidation of the three
+flavors into a single flavor.
+The read-side API remains, and continues to disable softirq and to
+be accounted for by lockdep.
+Much of the material in this section is therefore strictly historical
+in nature.
 
 <p>
 The softirq-disable (AKA &ldquo;bottom-half&rdquo;,
@@ -2918,8 +2927,20 @@ includes
 <tt>call_rcu_bh()</tt>,
 <tt>rcu_barrier_bh()</tt>, and
 <tt>rcu_read_lock_bh_held()</tt>.
+However, the update-side APIs are now simple wrappers for other RCU
+flavors, namely RCU-sched in CONFIG_PREEMPT=n kernels and RCU-preempt
+otherwise.
+
+<h3><a name="Sched Flavor">Sched Flavor (Historical)</a></h3>
 
-<h3><a name="Sched Flavor">Sched Flavor</a></h3>
+<p>
+The RCU-sched flavor of RCU has since been expressed in terms of
+the other RCU flavors as part of a consolidation of the three
+flavors into a single flavor.
+The read-side API remains, and continues to disable preemption and to
+be accounted for by lockdep.
+Much of the material in this section is therefore strictly historical
+in nature.
 
 <p>
 Before preemptible RCU, waiting for an RCU grace period had the
@@ -3139,94 +3160,14 @@ The tasks-RCU API is quite compact, consisting only of
 <tt>call_rcu_tasks()</tt>,
 <tt>synchronize_rcu_tasks()</tt>, and
 <tt>rcu_barrier_tasks()</tt>.
-
-<h3><a name="Waiting for Multiple Grace Periods">
-Waiting for Multiple Grace Periods</a></h3>
-
-<p>
-Perhaps you have an RCU protected data structure that is accessed from
-RCU read-side critical sections, from softirq handlers, and from
-hardware interrupt handlers.
-That is three flavors of RCU, the normal flavor, the bottom-half flavor,
-and the sched flavor.
-How to wait for a compound grace period?
-
-<p>
-The best approach is usually to &ldquo;just say no!&rdquo; and
-insert <tt>rcu_read_lock()</tt> and <tt>rcu_read_unlock()</tt>
-around each RCU read-side critical section, regardless of what
-environment it happens to be in.
-But suppose that some of the RCU read-side critical sections are
-on extremely hot code paths, and that use of <tt>CONFIG_PREEMPT=n</tt>
-is not a viable option, so that <tt>rcu_read_lock()</tt> and
-<tt>rcu_read_unlock()</tt> are not free.
-What then?
-
-<p>
-You <i>could</i> wait on all three grace periods in succession, as follows:
-
-<blockquote>
-<pre>
- 1 synchronize_rcu();
- 2 synchronize_rcu_bh();
- 3 synchronize_sched();
-</pre>
-</blockquote>
-
-<p>
-This works, but triples the update-side latency penalty.
-In cases where this is not acceptable, <tt>synchronize_rcu_mult()</tt>
-may be used to wait on all three flavors of grace period concurrently:
-
-<blockquote>
-<pre>
- 1 synchronize_rcu_mult(call_rcu, call_rcu_bh, call_rcu_sched);
-</pre>
-</blockquote>
-
-<p>
-But what if it is necessary to also wait on SRCU?
-This can be done as follows:
-
-<blockquote>
-<pre>
- 1 static void call_my_srcu(struct rcu_head *head,
- 2        void (*func)(struct rcu_head *head))
- 3 {
- 4   call_srcu(&amp;my_srcu, head, func);
- 5 }
- 6
- 7 synchronize_rcu_mult(call_rcu, call_rcu_bh, call_rcu_sched, call_my_srcu);
-</pre>
-</blockquote>
-
-<p>
-If you needed to wait on multiple different flavors of SRCU
-(but why???), you would need to create a wrapper function resembling
-<tt>call_my_srcu()</tt> for each SRCU flavor.
-
-<table>
-<tr><th>&nbsp;</th></tr>
-<tr><th align="left">Quick Quiz:</th></tr>
-<tr><td>
-	But what if I need to wait for multiple RCU flavors, but I also need
-	the grace periods to be expedited?
-</td></tr>
-<tr><th align="left">Answer:</th></tr>
-<tr><td bgcolor="#ffffff"><font color="ffffff">
-	If you are using expedited grace periods, there should be less penalty
-	for waiting on them in succession.
-	But if that is nevertheless a problem, you can use workqueues
-	or multiple kthreads to wait on the various expedited grace
-	periods concurrently.
-</font></td></tr>
-<tr><td>&nbsp;</td></tr>
-</table>
-
-<p>
-Again, it is usually better to adjust the RCU read-side critical sections
-to use a single flavor of RCU, but when this is not feasible, you can use
-<tt>synchronize_rcu_mult()</tt>.
+In <tt>CONFIG_PREEMPT=n</tt> kernels, trampolines cannot be preempted,
+so these APIs map to
+<tt>call_rcu()</tt>,
+<tt>synchronize_rcu()</tt>, and
+<tt>rcu_barrier()</tt>, respectively.
+In <tt>CONFIG_PREEMPT=y</tt> kernels, trampolines can be preempted,
+and these three APIs are therefore implemented by separate functions
+that check for voluntary context switches.
 
 <h2><a name="Possible Future Changes">Possible Future Changes</a></h2>
 
@@ -3237,12 +3178,6 @@ If this becomes a serious problem, it will be necessary to rework the
 grace-period state machine so as to avoid the need for the additional
 latency.
 
-<p>
-Expedited grace periods scan the CPUs, so their latency and overhead
-increases with increasing numbers of CPUs.
-If this becomes a serious problem on large systems, it will be necessary
-to do some redesign to avoid this scalability problem.
-
 <p>
 RCU disables CPU hotplug in a few places, perhaps most notably in the
 <tt>rcu_barrier()</tt> operations.
@@ -3287,11 +3222,6 @@ Please note that arrangements that require RCU to remap CPU numbers will
 require extremely good demonstration of need and full exploration of
 alternatives.
 
-<p>
-There is an embarrassingly large number of flavors of RCU, and this
-number has been increasing over time.
-Perhaps it will be possible to combine some at some future date.
-
 <p>
 RCU's various kthreads are reasonably recent additions.
 It is quite likely that adjustments will be required to more gracefully

+ 0 - 4
Documentation/RCU/rcu.txt

@@ -87,7 +87,3 @@ o	Where can I find more information on RCU?
 
 	See the RTFP.txt file in this directory.
 	Or point your browser at http://www.rdrop.com/users/paulmck/RCU/.
-
-o	What are all these files in this directory?
-
-	See 00-INDEX for the list.

+ 5 - 8
Documentation/RCU/stallwarn.txt

@@ -16,12 +16,9 @@ o	A CPU looping in an RCU read-side critical section.
 
 o	A CPU looping with interrupts disabled.
 
-o	A CPU looping with preemption disabled.  This condition can
-	result in RCU-sched stalls and, if ksoftirqd is in use, RCU-bh
-	stalls.
+o	A CPU looping with preemption disabled.
 
-o	A CPU looping with bottom halves disabled.  This condition can
-	result in RCU-sched and RCU-bh stalls.
+o	A CPU looping with bottom halves disabled.
 
 o	For !CONFIG_PREEMPT kernels, a CPU looping anywhere in the kernel
 	without invoking schedule().  If the looping in the kernel is
@@ -87,9 +84,9 @@ o	A hardware failure.  This is quite unlikely, but has occurred
 	This resulted in a series of RCU CPU stall warnings, eventually
 	leading the realization that the CPU had failed.
 
-The RCU, RCU-sched, RCU-bh, and RCU-tasks implementations have CPU stall
-warning.  Note that SRCU does -not- have CPU stall warnings.  Please note
-that RCU only detects CPU stalls when there is a grace period in progress.
+The RCU, RCU-sched, and RCU-tasks implementations have CPU stall warning.
+Note that SRCU does -not- have CPU stall warnings.  Please note that
+RCU only detects CPU stalls when there is a grace period in progress.
 No grace period, no CPU stall warnings.
 
 To diagnose the cause of the stall, inspect the stack traces.

+ 2 - 1
Documentation/RCU/whatisRCU.txt

@@ -934,7 +934,8 @@ c.	Do you need to treat NMI handlers, hardirq handlers,
 d.	Do you need RCU grace periods to complete even in the face
 	of softirq monopolization of one or more of the CPUs?  For
 	example, is your code subject to network-based denial-of-service
-	attacks?  If so, you need RCU-bh.
+	attacks?  If so, you should disable softirq across your readers,
+	for example, by using rcu_read_lock_bh().
 
 e.	Is your workload too update-intensive for normal use of
 	RCU, but inappropriate for other synchronization mechanisms?

+ 73 - 0
Documentation/accounting/psi.txt

@@ -0,0 +1,73 @@
+================================
+PSI - Pressure Stall Information
+================================
+
+:Date: April, 2018
+:Author: Johannes Weiner <hannes@cmpxchg.org>
+
+When CPU, memory or IO devices are contended, workloads experience
+latency spikes, throughput losses, and run the risk of OOM kills.
+
+Without an accurate measure of such contention, users are forced to
+either play it safe and under-utilize their hardware resources, or
+roll the dice and frequently suffer the disruptions resulting from
+excessive overcommit.
+
+The psi feature identifies and quantifies the disruptions caused by
+such resource crunches and the time impact it has on complex workloads
+or even entire systems.
+
+Having an accurate measure of productivity losses caused by resource
+scarcity aids users in sizing workloads to hardware--or provisioning
+hardware according to workload demand.
+
+As psi aggregates this information in realtime, systems can be managed
+dynamically using techniques such as load shedding, migrating jobs to
+other systems or data centers, or strategically pausing or killing low
+priority or restartable batch jobs.
+
+This allows maximizing hardware utilization without sacrificing
+workload health or risking major disruptions such as OOM kills.
+
+Pressure interface
+==================
+
+Pressure information for each resource is exported through the
+respective file in /proc/pressure/ -- cpu, memory, and io.
+
+The format for CPU is as such:
+
+some avg10=0.00 avg60=0.00 avg300=0.00 total=0
+
+and for memory and IO:
+
+some avg10=0.00 avg60=0.00 avg300=0.00 total=0
+full avg10=0.00 avg60=0.00 avg300=0.00 total=0
+
+The "some" line indicates the share of time in which at least some
+tasks are stalled on a given resource.
+
+The "full" line indicates the share of time in which all non-idle
+tasks are stalled on a given resource simultaneously. In this state
+actual CPU cycles are going to waste, and a workload that spends
+extended time in this state is considered to be thrashing. This has
+severe impact on performance, and it's useful to distinguish this
+situation from a state where some tasks are stalled but the CPU is
+still doing productive work. As such, time spent in this subset of the
+stall state is tracked separately and exported in the "full" averages.
+
+The ratios are tracked as recent trends over ten, sixty, and three
+hundred second windows, which gives insight into short term events as
+well as medium and long term trends. The total absolute stall time is
+tracked and exported as well, to allow detection of latency spikes
+which wouldn't necessarily make a dent in the time averages, or to
+average trends over custom time frames.
+
+Cgroup2 interface
+=================
+
+In a system with a CONFIG_CGROUP=y kernel and the cgroup2 filesystem
+mounted, pressure stall information is also tracked for tasks grouped
+into cgroups. Each subdirectory in the cgroupfs mountpoint contains
+cpu.pressure, memory.pressure, and io.pressure files; the format is
+the same as the /proc/pressure/ files.

+ 2 - 2
Documentation/admin-guide/LSM/Yama.rst

@@ -64,8 +64,8 @@ The sysctl settings (writable only with ``CAP_SYS_PTRACE``) are:
     Using ``PTRACE_TRACEME`` is unchanged.
 
 2 - admin-only attach:
-    only processes with ``CAP_SYS_PTRACE`` may use ptrace
-    with ``PTRACE_ATTACH``, or through children calling ``PTRACE_TRACEME``.
+    only processes with ``CAP_SYS_PTRACE`` may use ptrace, either with
+    ``PTRACE_ATTACH`` or through children calling ``PTRACE_TRACEME``.
 
 3 - no attach:
     no processes may use ptrace with ``PTRACE_ATTACH`` nor via

+ 1 - 2
Documentation/admin-guide/README.rst

@@ -51,8 +51,7 @@ Documentation
 
  - There are various README files in the Documentation/ subdirectory:
    these typically contain kernel-specific installation notes for some
-   drivers for example. See Documentation/00-INDEX for a list of what
-   is contained in each file.  Please read the
+   drivers for example. Please read the
    :ref:`Documentation/process/changes.rst <changes>` file, as it
    contains information about the problems, which may result by upgrading
    your kernel.

+ 22 - 0
Documentation/admin-guide/cgroup-v2.rst

@@ -966,6 +966,12 @@ All time durations are in microseconds.
 	$PERIOD duration.  "max" for $MAX indicates no limit.  If only
 	one number is written, $MAX is updated.
 
+  cpu.pressure
+	A read-only nested-key file which exists on non-root cgroups.
+
+	Shows pressure stall information for CPU. See
+	Documentation/accounting/psi.txt for details.
+
 
 Memory
 ------
@@ -1127,6 +1133,10 @@ PAGE_SIZE multiple when read back.
 		disk readahead.  For now OOM in memory cgroup kills
 		tasks iff shortage has happened inside page fault.
 
+		This event is not raised if the OOM killer is not
+		considered as an option, e.g. for failed high-order
+		allocations.
+
 	  oom_kill
 		The number of processes belonging to this cgroup
 		killed by any kind of OOM killer.
@@ -1271,6 +1281,12 @@ PAGE_SIZE multiple when read back.
 	higher than the limit for an extended period of time.  This
 	reduces the impact on the workload and memory management.
 
+  memory.pressure
+	A read-only nested-key file which exists on non-root cgroups.
+
+	Shows pressure stall information for memory. See
+	Documentation/accounting/psi.txt for details.
+
 
 Usage Guidelines
 ~~~~~~~~~~~~~~~~
@@ -1408,6 +1424,12 @@ IO Interface Files
 
 	  8:16 rbps=2097152 wbps=max riops=max wiops=max
 
+  io.pressure
+	A read-only nested-key file which exists on non-root cgroups.
+
+	Shows pressure stall information for IO. See
+	Documentation/accounting/psi.txt for details.
+
 
 Writeback
 ~~~~~~~~~

+ 574 - 0
Documentation/admin-guide/ext4.rst

@@ -0,0 +1,574 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+========================
+ext4 General Information
+========================
+
+Ext4 is an advanced level of the ext3 filesystem which incorporates
+scalability and reliability enhancements for supporting large filesystems
+(64 bit) in keeping with increasing disk capacities and state-of-the-art
+feature requirements.
+
+Mailing list:	linux-ext4@vger.kernel.org
+Web site:	http://ext4.wiki.kernel.org
+
+
+Quick usage instructions
+========================
+
+Note: More extensive information for getting started with ext4 can be
+found at the ext4 wiki site at the URL:
+http://ext4.wiki.kernel.org/index.php/Ext4_Howto
+
+  - The latest version of e2fsprogs can be found at:
+
+    https://www.kernel.org/pub/linux/kernel/people/tytso/e2fsprogs/
+
+	or
+
+    http://sourceforge.net/project/showfiles.php?group_id=2406
+
+	or grab the latest git repository from:
+
+   https://git.kernel.org/pub/scm/fs/ext2/e2fsprogs.git
+
+  - Create a new filesystem using the ext4 filesystem type:
+
+        # mke2fs -t ext4 /dev/hda1
+
+    Or to configure an existing ext3 filesystem to support extents:
+
+	# tune2fs -O extents /dev/hda1
+
+    If the filesystem was created with 128 byte inodes, it can be
+    converted to use 256 byte for greater efficiency via:
+
+        # tune2fs -I 256 /dev/hda1
+
+  - Mounting:
+
+	# mount -t ext4 /dev/hda1 /wherever
+
+  - When comparing performance with other filesystems, it's always
+    important to try multiple workloads; very often a subtle change in a
+    workload parameter can completely change the ranking of which
+    filesystems do well compared to others.  When comparing versus ext3,
+    note that ext4 enables write barriers by default, while ext3 does
+    not enable write barriers by default.  So it is useful to use
+    explicitly specify whether barriers are enabled or not when via the
+    '-o barriers=[0|1]' mount option for both ext3 and ext4 filesystems
+    for a fair comparison.  When tuning ext3 for best benchmark numbers,
+    it is often worthwhile to try changing the data journaling mode; '-o
+    data=writeback' can be faster for some workloads.  (Note however that
+    running mounted with data=writeback can potentially leave stale data
+    exposed in recently written files in case of an unclean shutdown,
+    which could be a security exposure in some situations.)  Configuring
+    the filesystem with a large journal can also be helpful for
+    metadata-intensive workloads.
+
+Features
+========
+
+Currently Available
+-------------------
+
+* ability to use filesystems > 16TB (e2fsprogs support not available yet)
+* extent format reduces metadata overhead (RAM, IO for access, transactions)
+* extent format more robust in face of on-disk corruption due to magics,
+* internal redundancy in tree
+* improved file allocation (multi-block alloc)
+* lift 32000 subdirectory limit imposed by i_links_count[1]
+* nsec timestamps for mtime, atime, ctime, create time
+* inode version field on disk (NFSv4, Lustre)
+* reduced e2fsck time via uninit_bg feature
+* journal checksumming for robustness, performance
+* persistent file preallocation (e.g for streaming media, databases)
+* ability to pack bitmaps and inode tables into larger virtual groups via the
+  flex_bg feature
+* large file support
+* inode allocation using large virtual block groups via flex_bg
+* delayed allocation
+* large block (up to pagesize) support
+* efficient new ordered mode in JBD2 and ext4 (avoid using buffer head to force
+  the ordering)
+
+[1] Filesystems with a block size of 1k may see a limit imposed by the
+directory hash tree having a maximum depth of two.
+
+Options
+=======
+
+When mounting an ext4 filesystem, the following option are accepted:
+(*) == default
+
+  ro
+        Mount filesystem read only. Note that ext4 will replay the journal (and
+        thus write to the partition) even when mounted "read only". The mount
+        options "ro,noload" can be used to prevent writes to the filesystem.
+
+  journal_checksum
+        Enable checksumming of the journal transactions.  This will allow the
+        recovery code in e2fsck and the kernel to detect corruption in the
+        kernel.  It is a compatible change and will be ignored by older
+        kernels.
+
+  journal_async_commit
+        Commit block can be written to disk without waiting for descriptor
+        blocks. If enabled older kernels cannot mount the device. This will
+        enable 'journal_checksum' internally.
+
+  journal_path=path, journal_dev=devnum
+        When the external journal device's major/minor numbers have changed,
+        these options allow the user to specify the new journal location.  The
+        journal device is identified through either its new major/minor numbers
+        encoded in devnum, or via a path to the device.
+
+  norecovery, noload
+        Don't load the journal on mounting.  Note that if the filesystem was
+        not unmounted cleanly, skipping the journal replay will lead to the
+        filesystem containing inconsistencies that can lead to any number of
+        problems.
+
+  data=journal
+        All data are committed into the journal prior to being written into the
+        main file system.  Enabling this mode will disable delayed allocation
+        and O_DIRECT support.
+
+  data=ordered	(*)
+        All data are forced directly out to the main file system prior to its
+        metadata being committed to the journal.
+
+  data=writeback
+        Data ordering is not preserved, data may be written into the main file
+        system after its metadata has been committed to the journal.
+
+  commit=nrsec	(*)
+        Ext4 can be told to sync all its data and metadata every 'nrsec'
+        seconds. The default value is 5 seconds.  This means that if you lose
+        your power, you will lose as much as the latest 5 seconds of work (your
+        filesystem will not be damaged though, thanks to the journaling).  This
+        default value (or any low value) will hurt performance, but it's good
+        for data-safety.  Setting it to 0 will have the same effect as leaving
+        it at the default (5 seconds).  Setting it to very large values will
+        improve performance.
+
+  barrier=<0|1(*)>, barrier(*), nobarrier
+        This enables/disables the use of write barriers in the jbd code.
+        barrier=0 disables, barrier=1 enables.  This also requires an IO stack
+        which can support barriers, and if jbd gets an error on a barrier
+        write, it will disable again with a warning.  Write barriers enforce
+        proper on-disk ordering of journal commits, making volatile disk write
+        caches safe to use, at some performance penalty.  If your disks are
+        battery-backed in one way or another, disabling barriers may safely
+        improve performance.  The mount options "barrier" and "nobarrier" can
+        also be used to enable or disable barriers, for consistency with other
+        ext4 mount options.
+
+  inode_readahead_blks=n
+        This tuning parameter controls the maximum number of inode table blocks
+        that ext4's inode table readahead algorithm will pre-read into the
+        buffer cache.  The default value is 32 blocks.
+
+  nouser_xattr
+        Disables Extended User Attributes.  See the attr(5) manual page for
+        more information about extended attributes.
+
+  noacl
+        This option disables POSIX Access Control List support. If ACL support
+        is enabled in the kernel configuration (CONFIG_EXT4_FS_POSIX_ACL), ACL
+        is enabled by default on mount. See the acl(5) manual page for more
+        information about acl.
+
+  bsddf	(*)
+        Make 'df' act like BSD.
+
+  minixdf
+        Make 'df' act like Minix.
+
+  debug
+        Extra debugging information is sent to syslog.
+
+  abort
+        Simulate the effects of calling ext4_abort() for debugging purposes.
+        This is normally used while remounting a filesystem which is already
+        mounted.
+
+  errors=remount-ro
+        Remount the filesystem read-only on an error.
+
+  errors=continue
+        Keep going on a filesystem error.
+
+  errors=panic
+        Panic and halt the machine if an error occurs.  (These mount options
+        override the errors behavior specified in the superblock, which can be
+        configured using tune2fs)
+
+  data_err=ignore(*)
+        Just print an error message if an error occurs in a file data buffer in
+        ordered mode.
+  data_err=abort
+        Abort the journal if an error occurs in a file data buffer in ordered
+        mode.
+
+  grpid | bsdgroups
+        New objects have the group ID of their parent.
+
+  nogrpid (*) | sysvgroups
+        New objects have the group ID of their creator.
+
+  resgid=n
+        The group ID which may use the reserved blocks.
+
+  resuid=n
+        The user ID which may use the reserved blocks.
+
+  sb=
+        Use alternate superblock at this location.
+
+  quota, noquota, grpquota, usrquota
+        These options are ignored by the filesystem. They are used only by
+        quota tools to recognize volumes where quota should be turned on. See
+        documentation in the quota-tools package for more details
+        (http://sourceforge.net/projects/linuxquota).
+
+  jqfmt=<quota type>, usrjquota=<file>, grpjquota=<file>
+        These options tell filesystem details about quota so that quota
+        information can be properly updated during journal replay. They replace
+        the above quota options. See documentation in the quota-tools package
+        for more details (http://sourceforge.net/projects/linuxquota).
+
+  stripe=n
+        Number of filesystem blocks that mballoc will try to use for allocation
+        size and alignment. For RAID5/6 systems this should be the number of
+        data disks *  RAID chunk size in file system blocks.
+
+  delalloc	(*)
+        Defer block allocation until just before ext4 writes out the block(s)
+        in question.  This allows ext4 to better allocation decisions more
+        efficiently.
+
+  nodelalloc
+        Disable delayed allocation.  Blocks are allocated when the data is
+        copied from userspace to the page cache, either via the write(2) system
+        call or when an mmap'ed page which was previously unallocated is
+        written for the first time.
+
+  max_batch_time=usec
+        Maximum amount of time ext4 should wait for additional filesystem
+        operations to be batch together with a synchronous write operation.
+        Since a synchronous write operation is going to force a commit and then
+        a wait for the I/O complete, it doesn't cost much, and can be a huge
+        throughput win, we wait for a small amount of time to see if any other
+        transactions can piggyback on the synchronous write.   The algorithm
+        used is designed to automatically tune for the speed of the disk, by
+        measuring the amount of time (on average) that it takes to finish
+        committing a transaction.  Call this time the "commit time".  If the
+        time that the transaction has been running is less than the commit
+        time, ext4 will try sleeping for the commit time to see if other
+        operations will join the transaction.   The commit time is capped by
+        the max_batch_time, which defaults to 15000us (15ms).   This
+        optimization can be turned off entirely by setting max_batch_time to 0.
+
+  min_batch_time=usec
+        This parameter sets the commit time (as described above) to be at least
+        min_batch_time.  It defaults to zero microseconds.  Increasing this
+        parameter may improve the throughput of multi-threaded, synchronous
+        workloads on very fast disks, at the cost of increasing latency.
+
+  journal_ioprio=prio
+        The I/O priority (from 0 to 7, where 0 is the highest priority) which
+        should be used for I/O operations submitted by kjournald2 during a
+        commit operation.  This defaults to 3, which is a slightly higher
+        priority than the default I/O priority.
+
+  auto_da_alloc(*), noauto_da_alloc
+        Many broken applications don't use fsync() when replacing existing
+        files via patterns such as fd = open("foo.new")/write(fd,..)/close(fd)/
+        rename("foo.new", "foo"), or worse yet, fd = open("foo",
+        O_TRUNC)/write(fd,..)/close(fd).  If auto_da_alloc is enabled, ext4
+        will detect the replace-via-rename and replace-via-truncate patterns
+        and force that any delayed allocation blocks are allocated such that at
+        the next journal commit, in the default data=ordered mode, the data
+        blocks of the new file are forced to disk before the rename() operation
+        is committed.  This provides roughly the same level of guarantees as
+        ext3, and avoids the "zero-length" problem that can happen when a
+        system crashes before the delayed allocation blocks are forced to disk.
+
+  noinit_itable
+        Do not initialize any uninitialized inode table blocks in the
+        background.  This feature may be used by installation CD's so that the
+        install process can complete as quickly as possible; the inode table
+        initialization process would then be deferred until the next time the
+        file system is unmounted.
+
+  init_itable=n
+        The lazy itable init code will wait n times the number of milliseconds
+        it took to zero out the previous block group's inode table.  This
+        minimizes the impact on the system performance while file system's
+        inode table is being initialized.
+
+  discard, nodiscard(*)
+        Controls whether ext4 should issue discard/TRIM commands to the
+        underlying block device when blocks are freed.  This is useful for SSD
+        devices and sparse/thinly-provisioned LUNs, but it is off by default
+        until sufficient testing has been done.
+
+  nouid32
+        Disables 32-bit UIDs and GIDs.  This is for interoperability  with
+        older kernels which only store and expect 16-bit values.
+
+  block_validity(*), noblock_validity
+        These options enable or disable the in-kernel facility for tracking
+        filesystem metadata blocks within internal data structures.  This
+        allows multi- block allocator and other routines to notice bugs or
+        corrupted allocation bitmaps which cause blocks to be allocated which
+        overlap with filesystem metadata blocks.
+
+  dioread_lock, dioread_nolock
+        Controls whether or not ext4 should use the DIO read locking. If the
+        dioread_nolock option is specified ext4 will allocate uninitialized
+        extent before buffer write and convert the extent to initialized after
+        IO completes. This approach allows ext4 code to avoid using inode
+        mutex, which improves scalability on high speed storages. However this
+        does not work with data journaling and dioread_nolock option will be
+        ignored with kernel warning. Note that dioread_nolock code path is only
+        used for extent-based files.  Because of the restrictions this options
+        comprises it is off by default (e.g. dioread_lock).
+
+  max_dir_size_kb=n
+        This limits the size of directories so that any attempt to expand them
+        beyond the specified limit in kilobytes will cause an ENOSPC error.
+        This is useful in memory constrained environments, where a very large
+        directory can cause severe performance problems or even provoke the Out
+        Of Memory killer.  (For example, if there is only 512mb memory
+        available, a 176mb directory may seriously cramp the system's style.)
+
+  i_version
+        Enable 64-bit inode version support. This option is off by default.
+
+  dax
+        Use direct access (no page cache).  See
+        Documentation/filesystems/dax.txt.  Note that this option is
+        incompatible with data=journal.
+
+Data Mode
+=========
+There are 3 different data modes:
+
+* writeback mode
+
+  In data=writeback mode, ext4 does not journal data at all.  This mode provides
+  a similar level of journaling as that of XFS, JFS, and ReiserFS in its default
+  mode - metadata journaling.  A crash+recovery can cause incorrect data to
+  appear in files which were written shortly before the crash.  This mode will
+  typically provide the best ext4 performance.
+
+* ordered mode
+
+  In data=ordered mode, ext4 only officially journals metadata, but it logically
+  groups metadata information related to data changes with the data blocks into
+  a single unit called a transaction.  When it's time to write the new metadata
+  out to disk, the associated data blocks are written first.  In general, this
+  mode performs slightly slower than writeback but significantly faster than
+  journal mode.
+
+* journal mode
+
+  data=journal mode provides full data and metadata journaling.  All new data is
+  written to the journal first, and then to its final location.  In the event of
+  a crash, the journal can be replayed, bringing both data and metadata into a
+  consistent state.  This mode is the slowest except when data needs to be read
+  from and written to disk at the same time where it outperforms all others
+  modes.  Enabling this mode will disable delayed allocation and O_DIRECT
+  support.
+
+/proc entries
+=============
+
+Information about mounted ext4 file systems can be found in
+/proc/fs/ext4.  Each mounted filesystem will have a directory in
+/proc/fs/ext4 based on its device name (i.e., /proc/fs/ext4/hdc or
+/proc/fs/ext4/dm-0).   The files in each per-device directory are shown
+in table below.
+
+Files in /proc/fs/ext4/<devname>
+
+  mb_groups
+        details of multiblock allocator buddy cache of free blocks
+
+/sys entries
+============
+
+Information about mounted ext4 file systems can be found in
+/sys/fs/ext4.  Each mounted filesystem will have a directory in
+/sys/fs/ext4 based on its device name (i.e., /sys/fs/ext4/hdc or
+/sys/fs/ext4/dm-0).   The files in each per-device directory are shown
+in table below.
+
+Files in /sys/fs/ext4/<devname>:
+
+(see also Documentation/ABI/testing/sysfs-fs-ext4)
+
+  delayed_allocation_blocks
+        This file is read-only and shows the number of blocks that are dirty in
+        the page cache, but which do not have their location in the filesystem
+        allocated yet.
+
+  inode_goal
+        Tuning parameter which (if non-zero) controls the goal inode used by
+        the inode allocator in preference to all other allocation heuristics.
+        This is intended for debugging use only, and should be 0 on production
+        systems.
+
+  inode_readahead_blks
+        Tuning parameter which controls the maximum number of inode table
+        blocks that ext4's inode table readahead algorithm will pre-read into
+        the buffer cache.
+
+  lifetime_write_kbytes
+        This file is read-only and shows the number of kilobytes of data that
+        have been written to this filesystem since it was created.
+
+  max_writeback_mb_bump
+        The maximum number of megabytes the writeback code will try to write
+        out before move on to another inode.
+
+  mb_group_prealloc
+        The multiblock allocator will round up allocation requests to a
+        multiple of this tuning parameter if the stripe size is not set in the
+        ext4 superblock
+
+  mb_max_to_scan
+        The maximum number of extents the multiblock allocator will search to
+        find the best extent.
+
+  mb_min_to_scan
+        The minimum number of extents the multiblock allocator will search to
+        find the best extent.
+
+  mb_order2_req
+        Tuning parameter which controls the minimum size for requests (as a
+        power of 2) where the buddy cache is used.
+
+  mb_stats
+        Controls whether the multiblock allocator should collect statistics,
+        which are shown during the unmount. 1 means to collect statistics, 0
+        means not to collect statistics.
+
+  mb_stream_req
+        Files which have fewer blocks than this tunable parameter will have
+        their blocks allocated out of a block group specific preallocation
+        pool, so that small files are packed closely together.  Each large file
+        will have its blocks allocated out of its own unique preallocation
+        pool.
+
+  session_write_kbytes
+        This file is read-only and shows the number of kilobytes of data that
+        have been written to this filesystem since it was mounted.
+
+  reserved_clusters
+        This is RW file and contains number of reserved clusters in the file
+        system which will be used in the specific situations to avoid costly
+        zeroout, unexpected ENOSPC, or possible data loss. The default is 2% or
+        4096 clusters, whichever is smaller and this can be changed however it
+        can never exceed number of clusters in the file system. If there is not
+        enough space for the reserved space when mounting the file mount will
+        _not_ fail.
+
+Ioctls
+======
+
+There is some Ext4 specific functionality which can be accessed by applications
+through the system call interfaces. The list of all Ext4 specific ioctls are
+shown in the table below.
+
+Table of Ext4 specific ioctls
+
+  EXT4_IOC_GETFLAGS
+        Get additional attributes associated with inode.  The ioctl argument is
+        an integer bitfield, with bit values described in ext4.h. This ioctl is
+        an alias for FS_IOC_GETFLAGS.
+
+  EXT4_IOC_SETFLAGS
+        Set additional attributes associated with inode.  The ioctl argument is
+        an integer bitfield, with bit values described in ext4.h. This ioctl is
+        an alias for FS_IOC_SETFLAGS.
+
+  EXT4_IOC_GETVERSION, EXT4_IOC_GETVERSION_OLD
+        Get the inode i_generation number stored for each inode. The
+        i_generation number is normally changed only when new inode is created
+        and it is particularly useful for network filesystems. The '_OLD'
+        version of this ioctl is an alias for FS_IOC_GETVERSION.
+
+  EXT4_IOC_SETVERSION, EXT4_IOC_SETVERSION_OLD
+        Set the inode i_generation number stored for each inode. The '_OLD'
+        version of this ioctl is an alias for FS_IOC_SETVERSION.
+
+  EXT4_IOC_GROUP_EXTEND
+        This ioctl has the same purpose as the resize mount option. It allows
+        to resize filesystem to the end of the last existing block group,
+        further resize has to be done with resize2fs, either online, or
+        offline. The argument points to the unsigned logn number representing
+        the filesystem new block count.
+
+  EXT4_IOC_MOVE_EXT
+        Move the block extents from orig_fd (the one this ioctl is pointing to)
+        to the donor_fd (the one specified in move_extent structure passed as
+        an argument to this ioctl). Then, exchange inode metadata between
+        orig_fd and donor_fd.  This is especially useful for online
+        defragmentation, because the allocator has the opportunity to allocate
+        moved blocks better, ideally into one contiguous extent.
+
+  EXT4_IOC_GROUP_ADD
+        Add a new group descriptor to an existing or new group descriptor
+        block. The new group descriptor is described by ext4_new_group_input
+        structure, which is passed as an argument to this ioctl. This is
+        especially useful in conjunction with EXT4_IOC_GROUP_EXTEND, which
+        allows online resize of the filesystem to the end of the last existing
+        block group.  Those two ioctls combined is used in userspace online
+        resize tool (e.g. resize2fs).
+
+  EXT4_IOC_MIGRATE
+        This ioctl operates on the filesystem itself.  It converts (migrates)
+        ext3 indirect block mapped inode to ext4 extent mapped inode by walking
+        through indirect block mapping of the original inode and converting
+        contiguous block ranges into ext4 extents of the temporary inode. Then,
+        inodes are swapped. This ioctl might help, when migrating from ext3 to
+        ext4 filesystem, however suggestion is to create fresh ext4 filesystem
+        and copy data from the backup. Note, that filesystem has to support
+        extents for this ioctl to work.
+
+  EXT4_IOC_ALLOC_DA_BLKS
+        Force all of the delay allocated blocks to be allocated to preserve
+        application-expected ext3 behaviour. Note that this will also start
+        triggering a write of the data blocks, but this behaviour may change in
+        the future as it is not necessary and has been done this way only for
+        sake of simplicity.
+
+  EXT4_IOC_RESIZE_FS
+        Resize the filesystem to a new size.  The number of blocks of resized
+        filesystem is passed in via 64 bit integer argument.  The kernel
+        allocates bitmaps and inode table, the userspace tool thus just passes
+        the new number of blocks.
+
+  EXT4_IOC_SWAP_BOOT
+        Swap i_blocks and associated attributes (like i_blocks, i_size,
+        i_flags, ...) from the specified inode with inode EXT4_BOOT_LOADER_INO
+        (#5). This is typically used to store a boot loader in a secure part of
+        the filesystem, where it can't be changed by a normal user by accident.
+        The data blocks of the previous boot loader will be associated with the
+        given inode.
+
+References
+==========
+
+kernel source:	<file:fs/ext4/>
+		<file:fs/jbd2/>
+
+programs:	http://e2fsprogs.sourceforge.net/
+
+useful links:	http://fedoraproject.org/wiki/ext3-devel
+		http://www.bullopensource.org/ext4/
+		http://ext4.wiki.kernel.org/index.php/Main_Page
+		http://fedoraproject.org/wiki/Features/Ext4

+ 1 - 0
Documentation/admin-guide/index.rst

@@ -71,6 +71,7 @@ configure specific aspects of kernel behavior to your liking.
    java
    ras
    bcache
+   ext4
    pm/index
    thunderbolt
    LSM/index

+ 73 - 19
Documentation/admin-guide/kernel-parameters.txt

@@ -856,6 +856,11 @@
 			causing system reset or hang due to sending
 			INIT from AP to BSP.
 
+	disable_counter_freezing [HW]
+			Disable Intel PMU counter freezing feature.
+			The feature only exists starting from
+			Arch Perfmon v4 (Skylake and newer).
+
 	disable_ddw	[PPC/PSERIES]
 			Disable Dynamic DMA Window support. Use this if
 			to workaround buggy firmware.
@@ -1063,7 +1068,7 @@
 			earlyprintk=serial[,0x...[,baudrate]]
 			earlyprintk=ttySn[,baudrate]
 			earlyprintk=dbgp[debugController#]
-			earlyprintk=pciserial,bus:device.function[,baudrate]
+			earlyprintk=pciserial[,force],bus:device.function[,baudrate]
 			earlyprintk=xdbc[xhciController#]
 
 			earlyprintk is useful when the kernel crashes before
@@ -1095,6 +1100,10 @@
 
 			The sclp output can only be used on s390.
 
+			The optional "force" to "pciserial" enables use of a
+			PCI device even when its classcode is not of the
+			UART class.
+
 	edac_report=	[HW,EDAC] Control how to report EDAC event
 			Format: {"on" | "off" | "force"}
 			on: enable EDAC to report H/W event. May be overridden
@@ -1385,6 +1394,11 @@
 	hvc_iucv_allow=	[S390]	Comma-separated list of z/VM user IDs.
 				If specified, z/VM IUCV HVC accepts connections
 				from listed z/VM user IDs only.
+
+	hv_nopvspin	[X86,HYPER_V] Disables the paravirt spinlock optimizations
+				      which allow the hypervisor to 'idle' the
+				      guest on lock contention.
+
 	keep_bootcon	[KNL]
 			Do not unregister boot console at start. This is only
 			useful for debugging when something happens in the window
@@ -1749,12 +1763,24 @@
 		nobypass	[PPC/POWERNV]
 			Disable IOMMU bypass, using IOMMU for PCI devices.
 
+	iommu.strict=	[ARM64] Configure TLB invalidation behaviour
+			Format: { "0" | "1" }
+			0 - Lazy mode.
+			  Request that DMA unmap operations use deferred
+			  invalidation of hardware TLBs, for increased
+			  throughput at the cost of reduced device isolation.
+			  Will fall back to strict mode if not supported by
+			  the relevant IOMMU driver.
+			1 - Strict mode (default).
+			  DMA unmap operations invalidate IOMMU hardware TLBs
+			  synchronously.
+
 	iommu.passthrough=
 			[ARM64] Configure DMA to bypass the IOMMU by default.
 			Format: { "0" | "1" }
 			0 - Use IOMMU translation for DMA.
 			1 - Bypass the IOMMU for DMA.
-			unset - Use IOMMU translation for DMA.
+			unset - Use value of CONFIG_IOMMU_DEFAULT_PASSTHROUGH.
 
 	io7=		[HW] IO7 for Marvel based alpha systems
 			See comment before marvel_specify_io7 in
@@ -2274,6 +2300,8 @@
 	ltpc=		[NET]
 			Format: <io>,<irq>,<dma>
 
+	lsm.debug	[SECURITY] Enable LSM initialization debugging output.
+
 	machvec=	[IA-64] Force the use of a particular machine-vector
 			(machvec) in a generic kernel.
 			Example: machvec=hpzx1_swiotlb
@@ -2404,7 +2432,7 @@
 			seconds.  Use this parameter to check at some
 			other rate.  0 disables periodic checking.
 
-	memtest=	[KNL,X86,ARM] Enable memtest
+	memtest=	[KNL,X86,ARM,PPC] Enable memtest
 			Format: <integer>
 			default : 0 <disable>
 			Specifies the number of memtest passes to be
@@ -3523,6 +3551,12 @@
 	ramdisk_size=	[RAM] Sizes of RAM disks in kilobytes
 			See Documentation/blockdev/ramdisk.txt.
 
+	random.trust_cpu={on,off}
+			[KNL] Enable or disable trusting the use of the
+			CPU's random number generator (if available) to
+			fully seed the kernel's CRNG. Default is controlled
+			by CONFIG_RANDOM_TRUST_CPU.
+
 	ras=option[,option,...]	[KNL] RAS-specific options
 
 		cec_disable	[X86]
@@ -3534,14 +3568,14 @@
 
 			In kernels built with CONFIG_RCU_NOCB_CPU=y, set
 			the specified list of CPUs to be no-callback CPUs.
-			Invocation of these CPUs' RCU callbacks will
-			be offloaded to "rcuox/N" kthreads created for
-			that purpose, where "x" is "b" for RCU-bh, "p"
-			for RCU-preempt, and "s" for RCU-sched, and "N"
-			is the CPU number.  This reduces OS jitter on the
-			offloaded CPUs, which can be useful for HPC and
-			real-time workloads.  It can also improve energy
-			efficiency for asymmetric multiprocessors.
+			Invocation of these CPUs' RCU callbacks will be
+			offloaded to "rcuox/N" kthreads created for that
+			purpose, where "x" is "p" for RCU-preempt, and
+			"s" for RCU-sched, and "N" is the CPU number.
+			This reduces OS jitter on the offloaded CPUs,
+			which can be useful for HPC and real-time
+			workloads.  It can also improve energy efficiency
+			for asymmetric multiprocessors.
 
 	rcu_nocb_poll	[KNL]
 			Rather than requiring that offloaded CPUs
@@ -3595,7 +3629,14 @@
 			Set required age in jiffies for a
 			given grace period before RCU starts
 			soliciting quiescent-state help from
-			rcu_note_context_switch().
+			rcu_note_context_switch().  If not specified, the
+			kernel will calculate a value based on the most
+			recent settings of rcutree.jiffies_till_first_fqs
+			and rcutree.jiffies_till_next_fqs.
+			This calculated value may be viewed in
+			rcutree.jiffies_to_sched_qs.  Any attempt to
+			set rcutree.jiffies_to_sched_qs will be
+			cheerfully overwritten.
 
 	rcutree.jiffies_till_first_fqs= [KNL]
 			Set delay from grace-period initialization to
@@ -3863,12 +3904,6 @@
 	rcupdate.rcu_self_test= [KNL]
 			Run the RCU early boot self tests
 
-	rcupdate.rcu_self_test_bh= [KNL]
-			Run the RCU bh early boot self tests
-
-	rcupdate.rcu_self_test_sched= [KNL]
-			Run the RCU sched early boot self tests
-
 	rdinit=		[KNL]
 			Format: <full_path>
 			Run specified binary instead of /init from the ramdisk,
@@ -4604,7 +4639,8 @@
 
 	usbcore.old_scheme_first=
 			[USB] Start with the old device initialization
-			scheme (default 0 = off).
+			scheme,  applies only to low and full-speed devices
+			 (default 0 = off).
 
 	usbcore.usbfs_memory_mb=
 			[USB] Memory limit (in MB) for buffers allocated by
@@ -4819,6 +4855,18 @@
 			This is actually a boot loader parameter; the value is
 			passed to the kernel using a special protocol.
 
+	vm_debug[=options]	[KNL] Available with CONFIG_DEBUG_VM=y.
+			May slow down system boot speed, especially when
+			enabled on systems with a large amount of memory.
+			All options are enabled by default, and this
+			interface is meant to allow for selectively
+			enabling or disabling specific virtual memory
+			debugging features.
+
+			Available options are:
+			  P	Enable page structure init time poisoning
+			  -	Disable all of the above options
+
 	vmalloc=nn[KMG]	[KNL,BOOT] Forces the vmalloc area to have an exact
 			size of <nn>. This can be used to increase the
 			minimum size (128MB on x86). It can also be used to
@@ -4994,6 +5042,12 @@
 			Disables the PV optimizations forcing the HVM guest to
 			run as generic HVM guest with no PV drivers.
 
+	xen_scrub_pages=	[XEN]
+			Boolean option to control scrubbing pages before giving them back
+			to Xen, for use by other domains. Can be also changed at runtime
+			with /sys/devices/system/xen_memory/xen_memory0/scrub_pages.
+			Default value controlled with CONFIG_XEN_SCRUB_PAGES_DEFAULT.
+
 	xirc2ps_cs=	[NET,PCMCIA]
 			Format:
 			<irq>,<irq_mask>,<io>,<full_duplex>,<do_sound>,<lockup_hack>[,<irq2>[,<irq3>[,<irq4>]]]

+ 1 - 1
Documentation/admin-guide/l1tf.rst

@@ -553,7 +553,7 @@ When nested virtualization is in use, three operating systems are involved:
 the bare metal hypervisor, the nested hypervisor and the nested virtual
 machine.  VMENTER operations from the nested hypervisor into the nested
 guest will always be processed by the bare metal hypervisor. If KVM is the
-bare metal hypervisor it wiil:
+bare metal hypervisor it will:
 
  - Flush the L1D cache on every switch from the nested hypervisor to the
    nested virtual machine, so that the nested hypervisor's secrets are not

+ 1 - 0
Documentation/admin-guide/mm/index.rst

@@ -29,6 +29,7 @@ the Linux memory management.
    hugetlbpage
    idle_page_tracking
    ksm
+   memory-hotplug
    numa_memory_policy
    pagemap
    soft-dirty

+ 63 - 126
Documentation/memory-hotplug.txt → Documentation/admin-guide/mm/memory-hotplug.rst

@@ -1,47 +1,29 @@
+.. _admin_guide_memory_hotplug:
+
 ==============
 Memory Hotplug
 ==============
 
 :Created:							Jul 28 2007
-:Updated: Add description of notifier of memory hotplug:	Oct 11 2007
+:Updated: Add some details about locking internals:		Aug 20 2018
 
 This document is about memory hotplug including how-to-use and current status.
 Because Memory Hotplug is still under development, contents of this text will
 be changed often.
 
-.. CONTENTS
-
-  1. Introduction
-    1.1 purpose of memory hotplug
-    1.2. Phases of memory hotplug
-    1.3. Unit of Memory online/offline operation
-  2. Kernel Configuration
-  3. sysfs files for memory hotplug
-  4. Physical memory hot-add phase
-    4.1 Hardware(Firmware) Support
-    4.2 Notify memory hot-add event by hand
-  5. Logical Memory hot-add phase
-    5.1. State of memory
-    5.2. How to online memory
-  6. Logical memory remove
-    6.1 Memory offline and ZONE_MOVABLE
-    6.2. How to offline memory
-  7. Physical memory remove
-  8. Memory hotplug event notifier
-  9. Future Work List
-
+.. contents:: :local:
 
 .. note::
 
     (1) x86_64's has special implementation for memory hotplug.
         This text does not describe it.
-    (2) This text assumes that sysfs is mounted at /sys.
+    (2) This text assumes that sysfs is mounted at ``/sys``.
 
 
 Introduction
 ============
 
-purpose of memory hotplug
+Purpose of memory hotplug
 -------------------------
 
 Memory Hotplug allows users to increase/decrease the amount of memory.
@@ -57,7 +39,6 @@ hardware which supports memory power management.
 
 Linux memory hotplug is designed for both purpose.
 
-
 Phases of memory hotplug
 ------------------------
 
@@ -92,7 +73,6 @@ phase by hand.
 (However, if you writes udev's hotplug scripts for memory hotplug, these
 phases can be execute in seamless way.)
 
-
 Unit of Memory online/offline operation
 ---------------------------------------
 
@@ -107,10 +87,9 @@ unit upon which memory online/offline operations are to be performed. The
 default size of a memory block is the same as memory section size unless an
 architecture specifies otherwise. (see :ref:`memory_hotplug_sysfs_files`.)
 
-To determine the size (in bytes) of a memory block please read this file:
-
-/sys/devices/system/memory/block_size_bytes
+To determine the size (in bytes) of a memory block please read this file::
 
+  /sys/devices/system/memory/block_size_bytes
 
 Kernel Configuration
 ====================
@@ -119,22 +98,22 @@ To use memory hotplug feature, kernel must be compiled with following
 config options.
 
 - For all memory hotplug:
-    - Memory model -> Sparse Memory  (CONFIG_SPARSEMEM)
-    - Allow for memory hot-add       (CONFIG_MEMORY_HOTPLUG)
+    - Memory model -> Sparse Memory  (``CONFIG_SPARSEMEM``)
+    - Allow for memory hot-add       (``CONFIG_MEMORY_HOTPLUG``)
 
 - To enable memory removal, the following are also necessary:
-    - Allow for memory hot remove    (CONFIG_MEMORY_HOTREMOVE)
-    - Page Migration                 (CONFIG_MIGRATION)
+    - Allow for memory hot remove    (``CONFIG_MEMORY_HOTREMOVE``)
+    - Page Migration                 (``CONFIG_MIGRATION``)
 
 - For ACPI memory hotplug, the following are also necessary:
-    - Memory hotplug (under ACPI Support menu) (CONFIG_ACPI_HOTPLUG_MEMORY)
+    - Memory hotplug (under ACPI Support menu) (``CONFIG_ACPI_HOTPLUG_MEMORY``)
     - This option can be kernel module.
 
 - As a related configuration, if your box has a feature of NUMA-node hotplug
   via ACPI, then this option is necessary too.
 
     - ACPI0004,PNP0A05 and PNP0A06 Container Driver (under ACPI Support menu)
-      (CONFIG_ACPI_CONTAINER).
+      (``CONFIG_ACPI_CONTAINER``).
 
      This option can be kernel module too.
 
@@ -145,10 +124,11 @@ sysfs files for memory hotplug
 ==============================
 
 All memory blocks have their device information in sysfs.  Each memory block
-is described under /sys/devices/system/memory as:
+is described under ``/sys/devices/system/memory`` as::
 
 	/sys/devices/system/memory/memoryXXX
-	(XXX is the memory block id.)
+
+where XXX is the memory block id.
 
 For the memory block covered by the sysfs directory.  It is expected that all
 memory sections in this range are present and no memory holes exist in the
@@ -157,7 +137,7 @@ the existence of one should not affect the hotplug capabilities of the memory
 block.
 
 For example, assume 1GiB memory block size. A device for a memory starting at
-0x100000000 is /sys/device/system/memory/memory4::
+0x100000000 is ``/sys/device/system/memory/memory4``::
 
 	(0x100000000 / 1Gib = 4)
 
@@ -165,11 +145,11 @@ This device covers address range [0x100000000 ... 0x140000000)
 
 Under each memory block, you can see 5 files:
 
-- /sys/devices/system/memory/memoryXXX/phys_index
-- /sys/devices/system/memory/memoryXXX/phys_device
-- /sys/devices/system/memory/memoryXXX/state
-- /sys/devices/system/memory/memoryXXX/removable
-- /sys/devices/system/memory/memoryXXX/valid_zones
+- ``/sys/devices/system/memory/memoryXXX/phys_index``
+- ``/sys/devices/system/memory/memoryXXX/phys_device``
+- ``/sys/devices/system/memory/memoryXXX/state``
+- ``/sys/devices/system/memory/memoryXXX/removable``
+- ``/sys/devices/system/memory/memoryXXX/valid_zones``
 
 =================== ============================================================
 ``phys_index``      read-only and contains memory block id, same as XXX.
@@ -207,13 +187,15 @@ Under each memory block, you can see 5 files:
   These directories/files appear after physical memory hotplug phase.
 
 If CONFIG_NUMA is enabled the memoryXXX/ directories can also be accessed
-via symbolic links located in the /sys/devices/system/node/node* directories.
+via symbolic links located in the ``/sys/devices/system/node/node*`` directories.
+
+For example::
 
-For example:
-/sys/devices/system/node/node0/memory9 -> ../../memory/memory9
+	/sys/devices/system/node/node0/memory9 -> ../../memory/memory9
 
-A backlink will also be created:
-/sys/devices/system/memory/memory9/node0 -> ../../node/node0
+A backlink will also be created::
+
+	/sys/devices/system/memory/memory9/node0 -> ../../node/node0
 
 .. _memory_hotplug_physical_mem:
 
@@ -240,7 +222,6 @@ If firmware supports NUMA-node hotplug, and defines an object _HID "ACPI0004",
 calls hotplug code for all of objects which are defined in it.
 If memory device is found, memory hotplug code will be called.
 
-
 Notify memory hot-add event by hand
 -----------------------------------
 
@@ -251,8 +232,9 @@ CONFIG_ARCH_MEMORY_PROBE and can be configured on powerpc, sh, and x86
 if hotplug is supported, although for x86 this should be handled by ACPI
 notification.
 
-Probe interface is located at
-/sys/devices/system/memory/probe
+Probe interface is located at::
+
+	/sys/devices/system/memory/probe
 
 You can tell the physical address of new memory to the kernel by::
 
@@ -263,7 +245,6 @@ memory_block_size] memory range is hot-added. In this case, hotplug script is
 not called (in current implementation). You'll have to online memory by
 yourself.  Please see :ref:`memory_hotplug_how_to_online_memory`.
 
-
 Logical Memory hot-add phase
 ============================
 
@@ -301,7 +282,7 @@ This sets a global policy and impacts all memory blocks that will subsequently
 be hotplugged. Currently offline blocks keep their state. It is possible, under
 certain circumstances, that some memory blocks will be added but will fail to
 online. User space tools can check their "state" files
-(/sys/devices/system/memory/memoryXXX/state) and try to online them manually.
+(``/sys/devices/system/memory/memoryXXX/state``) and try to online them manually.
 
 If the automatic onlining wasn't requested, failed, or some memory block was
 offlined it is possible to change the individual block's state by writing to the
@@ -334,8 +315,6 @@ available memory will be increased.
 
 This may be changed in future.
 
-
-
 Logical memory remove
 =====================
 
@@ -413,87 +392,45 @@ Need more implementation yet....
  - Notification completion of remove works by OS to firmware.
  - Guard from remove if not yet.
 
-Memory hotplug event notifier
-=============================
-
-Hotplugging events are sent to a notification queue.
-
-There are six types of notification defined in include/linux/memory.h:
-
-MEM_GOING_ONLINE
-  Generated before new memory becomes available in order to be able to
-  prepare subsystems to handle memory. The page allocator is still unable
-  to allocate from the new memory.
-
-MEM_CANCEL_ONLINE
-  Generated if MEMORY_GOING_ONLINE fails.
-
-MEM_ONLINE
-  Generated when memory has successfully brought online. The callback may
-  allocate pages from the new memory.
-
-MEM_GOING_OFFLINE
-  Generated to begin the process of offlining memory. Allocations are no
-  longer possible from the memory but some of the memory to be offlined
-  is still in use. The callback can be used to free memory known to a
-  subsystem from the indicated memory block.
-
-MEM_CANCEL_OFFLINE
-  Generated if MEMORY_GOING_OFFLINE fails. Memory is available again from
-  the memory block that we attempted to offline.
-
-MEM_OFFLINE
-  Generated after offlining memory is complete.
-
-A callback routine can be registered by calling::
-
-  hotplug_memory_notifier(callback_func, priority)
-
-Callback functions with higher values of priority are called before callback
-functions with lower values.
 
-A callback function must have the following prototype::
+Locking Internals
+=================
 
-  int callback_func(
-    struct notifier_block *self, unsigned long action, void *arg);
+When adding/removing memory that uses memory block devices (i.e. ordinary RAM),
+the device_hotplug_lock should be held to:
 
-The first argument of the callback function (self) is a pointer to the block
-of the notifier chain that points to the callback function itself.
-The second argument (action) is one of the event types described above.
-The third argument (arg) passes a pointer of struct memory_notify::
+- synchronize against online/offline requests (e.g. via sysfs). This way, memory
+  block devices can only be accessed (.online/.state attributes) by user
+  space once memory has been fully added. And when removing memory, we
+  know nobody is in critical sections.
+- synchronize against CPU hotplug and similar (e.g. relevant for ACPI and PPC)
 
-	struct memory_notify {
-		unsigned long start_pfn;
-		unsigned long nr_pages;
-		int status_change_nid_normal;
-		int status_change_nid_high;
-		int status_change_nid;
-	}
+Especially, there is a possible lock inversion that is avoided using
+device_hotplug_lock when adding memory and user space tries to online that
+memory faster than expected:
 
-- start_pfn is start_pfn of online/offline memory.
-- nr_pages is # of pages of online/offline memory.
-- status_change_nid_normal is set node id when N_NORMAL_MEMORY of nodemask
-  is (will be) set/clear, if this is -1, then nodemask status is not changed.
-- status_change_nid_high is set node id when N_HIGH_MEMORY of nodemask
-  is (will be) set/clear, if this is -1, then nodemask status is not changed.
-- status_change_nid is set node id when N_MEMORY of nodemask is (will be)
-  set/clear. It means a new(memoryless) node gets new memory by online and a
-  node loses all memory. If this is -1, then nodemask status is not changed.
+- device_online() will first take the device_lock(), followed by
+  mem_hotplug_lock
+- add_memory_resource() will first take the mem_hotplug_lock, followed by
+  the device_lock() (while creating the devices, during bus_add_device()).
 
-  If status_changed_nid* >= 0, callback should create/discard structures for the
-  node if necessary.
+As the device is visible to user space before taking the device_lock(), this
+can result in a lock inversion.
 
-The callback routine shall return one of the values
-NOTIFY_DONE, NOTIFY_OK, NOTIFY_BAD, NOTIFY_STOP
-defined in include/linux/notifier.h
+onlining/offlining of memory should be done via device_online()/
+device_offline() - to make sure it is properly synchronized to actions
+via sysfs. Holding device_hotplug_lock is advised (to e.g. protect online_type)
 
-NOTIFY_DONE and NOTIFY_OK have no effect on the further processing.
+When adding/removing/onlining/offlining memory or adding/removing
+heterogeneous/device memory, we should always hold the mem_hotplug_lock in
+write mode to serialise memory hotplug (e.g. access to global/zone
+variables).
 
-NOTIFY_BAD is used as response to the MEM_GOING_ONLINE, MEM_GOING_OFFLINE,
-MEM_ONLINE, or MEM_OFFLINE action to cancel hotplugging. It stops
-further processing of the notification queue.
+In addition, mem_hotplug_lock (in contrast to device_hotplug_lock) in read
+mode allows for a quite efficient get_online_mems/put_online_mems
+implementation, so code accessing memory can protect from that memory
+vanishing.
 
-NOTIFY_STOP stops further processing of the notification queue.
 
 Future Work
 ===========

+ 7 - 0
Documentation/admin-guide/pm/intel_pstate.rst

@@ -465,6 +465,13 @@ Next, the following policy attributes have special meaning if
 	policy for the time interval between the last two invocations of the
 	driver's utilization update callback by the CPU scheduler for that CPU.
 
+One more policy attribute is present if the `HWP feature is enabled in the
+processor <Active Mode With HWP_>`_:
+
+``base_frequency``
+	Shows the base frequency of the CPU. Any frequency above this will be
+	in the turbo frequency range.
+
 The meaning of these attributes in the `passive mode <Passive Mode_>`_ is the
 same as for other scaling drivers.
 

+ 29 - 18
Documentation/admin-guide/security-bugs.rst

@@ -26,23 +26,34 @@ information is helpful.  Any exploit code is very helpful and will not
 be released without consent from the reporter unless it has already been
 made public.
 
-Disclosure
-----------
-
-The goal of the Linux kernel security team is to work with the bug
-submitter to understand and fix the bug.  We prefer to publish the fix as
-soon as possible, but try to avoid public discussion of the bug itself
-and leave that to others.
-
-Publishing the fix may be delayed when the bug or the fix is not yet
-fully understood, the solution is not well-tested or for vendor
-coordination.  However, we expect these delays to be short, measurable in
-days, not weeks or months.  A release date is negotiated by the security
-team working with the bug submitter as well as vendors.  However, the
-kernel security team holds the final say when setting a timeframe.  The
-timeframe varies from immediate (esp. if it's already publicly known bug)
-to a few weeks.  As a basic default policy, we expect report date to
-release date to be on the order of 7 days.
+Disclosure and embargoed information
+------------------------------------
+
+The security list is not a disclosure channel.  For that, see Coordination
+below.
+
+Once a robust fix has been developed, our preference is to release the
+fix in a timely fashion, treating it no differently than any of the other
+thousands of changes and fixes the Linux kernel project releases every
+month.
+
+However, at the request of the reporter, we will postpone releasing the
+fix for up to 5 business days after the date of the report or after the
+embargo has lifted; whichever comes first.  The only exception to that
+rule is if the bug is publicly known, in which case the preference is to
+release the fix as soon as it's available.
+
+Whilst embargoed information may be shared with trusted individuals in
+order to develop a fix, such information will not be published alongside
+the fix or on any other disclosure channel without the permission of the
+reporter.  This includes but is not limited to the original bug report
+and followup discussions (if any), exploits, CVE information or the
+identity of the reporter.
+
+In other words our only interest is in getting bugs fixed.  All other
+information submitted to the security list and any followup discussions
+of the report are treated confidentially even after the embargo has been
+lifted, in perpetuity.
 
 Coordination
 ------------
@@ -68,7 +79,7 @@ may delay the bug handling. If a reporter wishes to have a CVE identifier
 assigned ahead of public disclosure, they will need to contact the private
 linux-distros list, described above. When such a CVE identifier is known
 before a patch is provided, it is desirable to mention it in the commit
-message, though.
+message if the reporter agrees.
 
 Non-disclosure agreements
 -------------------------

+ 0 - 50
Documentation/arm/00-INDEX

@@ -1,50 +0,0 @@
-00-INDEX
-	- this file
-Booting
-	- requirements for booting
-CCN.txt
-	- Cache Coherent Network ring-bus and perf PMU driver.
-Interrupts
-	- ARM Interrupt subsystem documentation
-IXP4xx
-	- Intel IXP4xx Network processor.
-Netwinder
-	- Netwinder specific documentation
-Porting
-       - Symbol definitions for porting Linux to a new ARM machine.
-Setup
-       - Kernel initialization parameters on ARM Linux
-README
-	- General ARM documentation
-SA1100/
-	- SA1100 documentation
-Samsung-S3C24XX/
-	- S3C24XX ARM Linux Overview
-SPEAr/
-	- ST SPEAr platform Linux Overview
-VFP/
-	- Release notes for Linux Kernel Vector Floating Point support code
-cluster-pm-race-avoidance.txt
-	- Algorithm for CPU and Cluster setup/teardown
-empeg/
-	- Ltd's Empeg MP3 Car Audio Player
-firmware.txt
-	- Secure firmware registration and calling.
-kernel_mode_neon.txt
-	- How to use NEON instructions in kernel mode
-kernel_user_helpers.txt
-	- Helper functions in kernel space made available for userspace.
-mem_alignment
-	- alignment abort handler documentation
-memory.txt
-	- description of the virtual memory layout
-nwfpe/
-	- NWFPE floating point emulator documentation
-swp_emulation
-	- SWP/SWPB emulation handler/logging description
-tcm.txt
-	- ARM Tightly Coupled Memory
-uefi.txt
-	- [U]EFI configuration and runtime services documentation
-vlocks.txt
-	- Voting locks, low-level mechanism relying on memory system atomic writes.

+ 1 - 0
Documentation/arm/Samsung/Bootloader-interface.txt

@@ -26,6 +26,7 @@ Offset        Value                                        Purpose
 0x20          0xfcba0d10 (Magic cookie)                    AFTR
 0x24          exynos_cpu_resume_ns                         AFTR
 0x28 + 4*cpu  0x8 (Magic cookie, Exynos3250)               AFTR
+0x28          0x0 or last value during resume (Exynos542x) System suspend
 
 
 2. Secure mode

+ 8 - 4
Documentation/arm64/elf_hwcaps.txt

@@ -78,11 +78,11 @@ HWCAP_EVTSTRM
 
 HWCAP_AES
 
-    Functionality implied by ID_AA64ISAR1_EL1.AES == 0b0001.
+    Functionality implied by ID_AA64ISAR0_EL1.AES == 0b0001.
 
 HWCAP_PMULL
 
-    Functionality implied by ID_AA64ISAR1_EL1.AES == 0b0010.
+    Functionality implied by ID_AA64ISAR0_EL1.AES == 0b0010.
 
 HWCAP_SHA1
 
@@ -153,7 +153,7 @@ HWCAP_ASIMDDP
 
 HWCAP_SHA512
 
-    Functionality implied by ID_AA64ISAR0_EL1.SHA2 == 0b0002.
+    Functionality implied by ID_AA64ISAR0_EL1.SHA2 == 0b0010.
 
 HWCAP_SVE
 
@@ -173,8 +173,12 @@ HWCAP_USCAT
 
 HWCAP_ILRCPC
 
-    Functionality implied by ID_AA64ISR1_EL1.LRCPC == 0b0002.
+    Functionality implied by ID_AA64ISAR1_EL1.LRCPC == 0b0010.
 
 HWCAP_FLAGM
 
     Functionality implied by ID_AA64ISAR0_EL1.TS == 0b0001.
+
+HWCAP_SSBS
+
+    Functionality implied by ID_AA64PFR1_EL1.SSBS == 0b0010.

+ 38 - 0
Documentation/arm64/hugetlbpage.txt

@@ -0,0 +1,38 @@
+HugeTLBpage on ARM64
+====================
+
+Hugepage relies on making efficient use of TLBs to improve performance of
+address translations. The benefit depends on both -
+
+  - the size of hugepages
+  - size of entries supported by the TLBs
+
+The ARM64 port supports two flavours of hugepages.
+
+1) Block mappings at the pud/pmd level
+--------------------------------------
+
+These are regular hugepages where a pmd or a pud page table entry points to a
+block of memory. Regardless of the supported size of entries in TLB, block
+mappings reduce the depth of page table walk needed to translate hugepage
+addresses.
+
+2) Using the Contiguous bit
+---------------------------
+
+The architecture provides a contiguous bit in the translation table entries
+(D4.5.3, ARM DDI 0487C.a) that hints to the MMU to indicate that it is one of a
+contiguous set of entries that can be cached in a single TLB entry.
+
+The contiguous bit is used in Linux to increase the mapping size at the pmd and
+pte (last) level. The number of supported contiguous entries varies by page size
+and level of the page table.
+
+
+The following hugepage sizes are supported -
+
+         CONT PTE    PMD    CONT PMD    PUD
+         --------    ---    --------    ---
+  4K:         64K     2M         32M     1G
+  16K:         2M    32M          1G
+  64K:         2M   512M         16G

+ 1 - 0
Documentation/arm64/silicon-errata.txt

@@ -56,6 +56,7 @@ stable kernels.
 | ARM            | Cortex-A72      | #853709         | N/A                         |
 | ARM            | Cortex-A73      | #858921         | ARM64_ERRATUM_858921        |
 | ARM            | Cortex-A55      | #1024718        | ARM64_ERRATUM_1024718       |
+| ARM            | Cortex-A76      | #1188873        | ARM64_ERRATUM_1188873       |
 | ARM            | MMU-500         | #841119,#826419 | N/A                         |
 |                |                 |                 |                             |
 | Cavium         | ThunderX ITS    | #22375, #24313  | CAVIUM_ERRATUM_22375        |

+ 2 - 2
Documentation/arm64/sve.txt

@@ -200,7 +200,7 @@ prctl(PR_SVE_SET_VL, unsigned long arg)
       thread.
 
     * Changing the vector length causes all of P0..P15, FFR and all bits of
-      Z0..V31 except for Z0 bits [127:0] .. Z31 bits [127:0] to become
+      Z0..Z31 except for Z0 bits [127:0] .. Z31 bits [127:0] to become
       unspecified.  Calling PR_SVE_SET_VL with vl equal to the thread's current
       vector length, or calling PR_SVE_SET_VL with the PR_SVE_SET_VL_ONEXEC
       flag, does not constitute a change to the vector length for this purpose.
@@ -500,7 +500,7 @@ References
 [2] arch/arm64/include/uapi/asm/ptrace.h
     AArch64 Linux ptrace ABI definitions
 
-[3] linux/Documentation/arm64/cpu-feature-registers.txt
+[3] Documentation/arm64/cpu-feature-registers.txt
 
 [4] ARM IHI0055C
     http://infocenter.arm.com/help/topic/com.arm.doc.ihi0055c/IHI0055C_beta_aapcs64.pdf

+ 0 - 34
Documentation/block/00-INDEX

@@ -1,34 +0,0 @@
-00-INDEX
-	- This file
-bfq-iosched.txt
-	- BFQ IO scheduler and its tunables
-biodoc.txt
-	- Notes on the Generic Block Layer Rewrite in Linux 2.5
-biovecs.txt
-	- Immutable biovecs and biovec iterators
-capability.txt
-	- Generic Block Device Capability (/sys/block/<device>/capability)
-cfq-iosched.txt
-	- CFQ IO scheduler tunables
-cmdline-partition.txt
-	- how to specify block device partitions on kernel command line
-data-integrity.txt
-	- Block data integrity
-deadline-iosched.txt
-	- Deadline IO scheduler tunables
-ioprio.txt
-	- Block io priorities (in CFQ scheduler)
-pr.txt
-	- Block layer support for Persistent Reservations
-null_blk.txt
-	- Null block for block-layer benchmarking.
-queue-sysfs.txt
-	- Queue's sysfs entries
-request.txt
-	- The members of struct request (in include/linux/blkdev.h)
-stat.txt
-	- Block layer statistics in /sys/block/<device>/stat
-switching-sched.txt
-	- Switching I/O schedulers at runtime
-writeback_cache_control.txt
-	- Control of volatile write back caches

+ 0 - 18
Documentation/blockdev/00-INDEX

@@ -1,18 +0,0 @@
-00-INDEX
-	- this file
-README.DAC960
-	- info on Mylex DAC960/DAC1100 PCI RAID Controller Driver for Linux.
-cciss.txt
-	- info, major/minor #'s for Compaq's SMART Array Controllers.
-cpqarray.txt
-	- info on using Compaq's SMART2 Intelligent Disk Array Controllers.
-floppy.txt
-	- notes and driver options for the floppy disk driver.
-mflash.txt
-	- info on mGine m(g)flash driver for linux.
-nbd.txt
-	- info on a TCP implementation of a network block device.
-paride.txt
-	- information about the parallel port IDE subsystem.
-ramdisk.txt
-	- short guide on how to set up and use the RAM disk.

+ 0 - 756
Documentation/blockdev/README.DAC960

@@ -1,756 +0,0 @@
-   Linux Driver for Mylex DAC960/AcceleRAID/eXtremeRAID PCI RAID Controllers
-
-			Version 2.2.11 for Linux 2.2.19
-			Version 2.4.11 for Linux 2.4.12
-
-			      PRODUCTION RELEASE
-
-				11 October 2001
-
-			       Leonard N. Zubkoff
-			       Dandelion Digital
-			       lnz@dandelion.com
-
-	 Copyright 1998-2001 by Leonard N. Zubkoff <lnz@dandelion.com>
-
-
-				 INTRODUCTION
-
-Mylex, Inc. designs and manufactures a variety of high performance PCI RAID
-controllers.  Mylex Corporation is located at 34551 Ardenwood Blvd., Fremont,
-California 94555, USA and can be reached at 510.796.6100 or on the World Wide
-Web at http://www.mylex.com.  Mylex Technical Support can be reached by
-electronic mail at mylexsup@us.ibm.com, by voice at 510.608.2400, or by FAX at
-510.745.7715.  Contact information for offices in Europe and Japan is available
-on their Web site.
-
-The latest information on Linux support for DAC960 PCI RAID Controllers, as
-well as the most recent release of this driver, will always be available from
-my Linux Home Page at URL "http://www.dandelion.com/Linux/".  The Linux DAC960
-driver supports all current Mylex PCI RAID controllers including the new
-eXtremeRAID 2000/3000 and AcceleRAID 352/170/160 models which have an entirely
-new firmware interface from the older eXtremeRAID 1100, AcceleRAID 150/200/250,
-and DAC960PJ/PG/PU/PD/PL.  See below for a complete controller list as well as
-minimum firmware version requirements.  For simplicity, in most places this
-documentation refers to DAC960 generically rather than explicitly listing all
-the supported models.
-
-Driver bug reports should be sent via electronic mail to "lnz@dandelion.com".
-Please include with the bug report the complete configuration messages reported
-by the driver at startup, along with any subsequent system messages relevant to
-the controller's operation, and a detailed description of your system's
-hardware configuration.  Driver bugs are actually quite rare; if you encounter
-problems with disks being marked offline, for example, please contact Mylex
-Technical Support as the problem is related to the hardware configuration
-rather than the Linux driver.
-
-Please consult the RAID controller documentation for detailed information
-regarding installation and configuration of the controllers.  This document
-primarily provides information specific to the Linux support.
-
-
-				DRIVER FEATURES
-
-The DAC960 RAID controllers are supported solely as high performance RAID
-controllers, not as interfaces to arbitrary SCSI devices.  The Linux DAC960
-driver operates at the block device level, the same level as the SCSI and IDE
-drivers.  Unlike other RAID controllers currently supported on Linux, the
-DAC960 driver is not dependent on the SCSI subsystem, and hence avoids all the
-complexity and unnecessary code that would be associated with an implementation
-as a SCSI driver.  The DAC960 driver is designed for as high a performance as
-possible with no compromises or extra code for compatibility with lower
-performance devices.  The DAC960 driver includes extensive error logging and
-online configuration management capabilities.  Except for initial configuration
-of the controller and adding new disk drives, most everything can be handled
-from Linux while the system is operational.
-
-The DAC960 driver is architected to support up to 8 controllers per system.
-Each DAC960 parallel SCSI controller can support up to 15 disk drives per
-channel, for a maximum of 60 drives on a four channel controller; the fibre
-channel eXtremeRAID 3000 controller supports up to 125 disk drives per loop for
-a total of 250 drives.  The drives installed on a controller are divided into
-one or more "Drive Groups", and then each Drive Group is subdivided further
-into 1 to 32 "Logical Drives".  Each Logical Drive has a specific RAID Level
-and caching policy associated with it, and it appears to Linux as a single
-block device.  Logical Drives are further subdivided into up to 7 partitions
-through the normal Linux and PC disk partitioning schemes.  Logical Drives are
-also known as "System Drives", and Drive Groups are also called "Packs".  Both
-terms are in use in the Mylex documentation; I have chosen to standardize on
-the more generic "Logical Drive" and "Drive Group".
-
-DAC960 RAID disk devices are named in the style of the obsolete Device File
-System (DEVFS).  The device corresponding to Logical Drive D on Controller C
-is referred to as /dev/rd/cCdD, and the partitions are called /dev/rd/cCdDp1
-through /dev/rd/cCdDp7.  For example, partition 3 of Logical Drive 5 on
-Controller 2 is referred to as /dev/rd/c2d5p3.  Note that unlike with SCSI
-disks the device names will not change in the event of a disk drive failure.
-The DAC960 driver is assigned major numbers 48 - 55 with one major number per
-controller.  The 8 bits of minor number are divided into 5 bits for the Logical
-Drive and 3 bits for the partition.
-
-
-	  SUPPORTED DAC960/AcceleRAID/eXtremeRAID PCI RAID CONTROLLERS
-
-The following list comprises the supported DAC960, AcceleRAID, and eXtremeRAID
-PCI RAID Controllers as of the date of this document.  It is recommended that
-anyone purchasing a Mylex PCI RAID Controller not in the following table
-contact the author beforehand to verify that it is or will be supported.
-
-eXtremeRAID 3000
-	    1 Wide Ultra-2/LVD SCSI channel
-	    2 External Fibre FC-AL channels
-	    233MHz StrongARM SA 110 Processor
-	    64 Bit 33MHz PCI (backward compatible with 32 Bit PCI slots)
-	    32MB/64MB ECC SDRAM Memory
-
-eXtremeRAID 2000
-	    4 Wide Ultra-160 LVD SCSI channels
-	    233MHz StrongARM SA 110 Processor
-	    64 Bit 33MHz PCI (backward compatible with 32 Bit PCI slots)
-	    32MB/64MB ECC SDRAM Memory
-
-AcceleRAID 352
-	    2 Wide Ultra-160 LVD SCSI channels
-	    100MHz Intel i960RN RISC Processor
-	    64 Bit 33MHz PCI (backward compatible with 32 Bit PCI slots)
-	    32MB/64MB ECC SDRAM Memory
-
-AcceleRAID 170
-	    1 Wide Ultra-160 LVD SCSI channel
-	    100MHz Intel i960RM RISC Processor
-	    16MB/32MB/64MB ECC SDRAM Memory
-
-AcceleRAID 160 (AcceleRAID 170LP)
-	    1 Wide Ultra-160 LVD SCSI channel
-	    100MHz Intel i960RS RISC Processor
-	    Built in 16M ECC SDRAM Memory
-	    PCI Low Profile Form Factor - fit for 2U height
-
-eXtremeRAID 1100 (DAC1164P)
-	    3 Wide Ultra-2/LVD SCSI channels
-	    233MHz StrongARM SA 110 Processor
-	    64 Bit 33MHz PCI (backward compatible with 32 Bit PCI slots)
-	    16MB/32MB/64MB Parity SDRAM Memory with Battery Backup
-
-AcceleRAID 250 (DAC960PTL1)
-	    Uses onboard Symbios SCSI chips on certain motherboards
-	    Also includes one onboard Wide Ultra-2/LVD SCSI Channel
-	    66MHz Intel i960RD RISC Processor
-	    4MB/8MB/16MB/32MB/64MB/128MB ECC EDO Memory
-
-AcceleRAID 200 (DAC960PTL0)
-	    Uses onboard Symbios SCSI chips on certain motherboards
-	    Includes no onboard SCSI Channels
-	    66MHz Intel i960RD RISC Processor
-	    4MB/8MB/16MB/32MB/64MB/128MB ECC EDO Memory
-
-AcceleRAID 150 (DAC960PRL)
-	    Uses onboard Symbios SCSI chips on certain motherboards
-	    Also includes one onboard Wide Ultra-2/LVD SCSI Channel
-	    33MHz Intel i960RP RISC Processor
-	    4MB Parity EDO Memory
-
-DAC960PJ    1/2/3 Wide Ultra SCSI-3 Channels
-	    66MHz Intel i960RD RISC Processor
-	    4MB/8MB/16MB/32MB/64MB/128MB ECC EDO Memory
-
-DAC960PG    1/2/3 Wide Ultra SCSI-3 Channels
-	    33MHz Intel i960RP RISC Processor
-	    4MB/8MB ECC EDO Memory
-
-DAC960PU    1/2/3 Wide Ultra SCSI-3 Channels
-	    Intel i960CF RISC Processor
-	    4MB/8MB EDRAM or 2MB/4MB/8MB/16MB/32MB DRAM Memory
-
-DAC960PD    1/2/3 Wide Fast SCSI-2 Channels
-	    Intel i960CF RISC Processor
-	    4MB/8MB EDRAM or 2MB/4MB/8MB/16MB/32MB DRAM Memory
-
-DAC960PL    1/2/3 Wide Fast SCSI-2 Channels
-	    Intel i960 RISC Processor
-	    2MB/4MB/8MB/16MB/32MB DRAM Memory
-
-DAC960P	    1/2/3 Wide Fast SCSI-2 Channels
-	    Intel i960 RISC Processor
-	    2MB/4MB/8MB/16MB/32MB DRAM Memory
-
-For the eXtremeRAID 2000/3000 and AcceleRAID 352/170/160, firmware version
-6.00-01 or above is required.
-
-For the eXtremeRAID 1100, firmware version 5.06-0-52 or above is required.
-
-For the AcceleRAID 250, 200, and 150, firmware version 4.06-0-57 or above is
-required.
-
-For the DAC960PJ and DAC960PG, firmware version 4.06-0-00 or above is required.
-
-For the DAC960PU, DAC960PD, DAC960PL, and DAC960P, either firmware version
-3.51-0-04 or above is required (for dual Flash ROM controllers), or firmware
-version 2.73-0-00 or above is required (for single Flash ROM controllers)
-
-Please note that not all SCSI disk drives are suitable for use with DAC960
-controllers, and only particular firmware versions of any given model may
-actually function correctly.  Similarly, not all motherboards have a BIOS that
-properly initializes the AcceleRAID 250, AcceleRAID 200, AcceleRAID 150,
-DAC960PJ, and DAC960PG because the Intel i960RD/RP is a multi-function device.
-If in doubt, contact Mylex RAID Technical Support (mylexsup@us.ibm.com) to
-verify compatibility.  Mylex makes available a hard disk compatibility list at
-http://www.mylex.com/support/hdcomp/hd-lists.html.
-
-
-			      DRIVER INSTALLATION
-
-This distribution was prepared for Linux kernel version 2.2.19 or 2.4.12.
-
-To install the DAC960 RAID driver, you may use the following commands,
-replacing "/usr/src" with wherever you keep your Linux kernel source tree:
-
-  cd /usr/src
-  tar -xvzf DAC960-2.2.11.tar.gz (or DAC960-2.4.11.tar.gz)
-  mv README.DAC960 linux/Documentation
-  mv DAC960.[ch] linux/drivers/block
-  patch -p0 < DAC960.patch (if DAC960.patch is included)
-  cd linux
-  make config
-  make bzImage (or zImage)
-
-Then install "arch/x86/boot/bzImage" or "arch/x86/boot/zImage" as your
-standard kernel, run lilo if appropriate, and reboot.
-
-To create the necessary devices in /dev, the "make_rd" script included in
-"DAC960-Utilities.tar.gz" from http://www.dandelion.com/Linux/ may be used.
-LILO 21 and FDISK v2.9 include DAC960 support; also included in this archive
-are patches to LILO 20 and FDISK v2.8 that add DAC960 support, along with
-statically linked executables of LILO and FDISK.  This modified version of LILO
-will allow booting from a DAC960 controller and/or mounting the root file
-system from a DAC960.
-
-Red Hat Linux 6.0 and SuSE Linux 6.1 include support for Mylex PCI RAID
-controllers.  Installing directly onto a DAC960 may be problematic from other
-Linux distributions until their installation utilities are updated.
-
-
-			      INSTALLATION NOTES
-
-Before installing Linux or adding DAC960 logical drives to an existing Linux
-system, the controller must first be configured to provide one or more logical
-drives using the BIOS Configuration Utility or DACCF.  Please note that since
-there are only at most 6 usable partitions on each logical drive, systems
-requiring more partitions should subdivide a drive group into multiple logical
-drives, each of which can have up to 6 usable partitions.  Also, note that with
-large disk arrays it is advisable to enable the 8GB BIOS Geometry (255/63)
-rather than accepting the default 2GB BIOS Geometry (128/32); failing to so do
-will cause the logical drive geometry to have more than 65535 cylinders which
-will make it impossible for FDISK to be used properly.  The 8GB BIOS Geometry
-can be enabled by configuring the DAC960 BIOS, which is accessible via Alt-M
-during the BIOS initialization sequence.
-
-For maximum performance and the most efficient E2FSCK performance, it is
-recommended that EXT2 file systems be built with a 4KB block size and 16 block
-stride to match the DAC960 controller's 64KB default stripe size.  The command
-"mke2fs -b 4096 -R stride=16 <device>" is appropriate.  Unless there will be a
-large number of small files on the file systems, it is also beneficial to add
-the "-i 16384" option to increase the bytes per inode parameter thereby
-reducing the file system metadata.  Finally, on systems that will only be run
-with Linux 2.2 or later kernels it is beneficial to enable sparse superblocks
-with the "-s 1" option.
-
-
-		      DAC960 ANNOUNCEMENTS MAILING LIST
-
-The DAC960 Announcements Mailing List provides a forum for informing Linux
-users of new driver releases and other announcements regarding Linux support
-for DAC960 PCI RAID Controllers.  To join the mailing list, send a message to
-"dac960-announce-request@dandelion.com" with the line "subscribe" in the
-message body.
-
-
-		CONTROLLER CONFIGURATION AND STATUS MONITORING
-
-The DAC960 RAID controllers running firmware 4.06 or above include a Background
-Initialization facility so that system downtime is minimized both for initial
-installation and subsequent configuration of additional storage.  The BIOS
-Configuration Utility (accessible via Alt-R during the BIOS initialization
-sequence) is used to quickly configure the controller, and then the logical
-drives that have been created are available for immediate use even while they
-are still being initialized by the controller.  The primary need for online
-configuration and status monitoring is then to avoid system downtime when disk
-drives fail and must be replaced.  Mylex's online monitoring and configuration
-utilities are being ported to Linux and will become available at some point in
-the future.  Note that with a SAF-TE (SCSI Accessed Fault-Tolerant Enclosure)
-enclosure, the controller is able to rebuild failed drives automatically as
-soon as a drive replacement is made available.
-
-The primary interfaces for controller configuration and status monitoring are
-special files created in the /proc/rd/... hierarchy along with the normal
-system console logging mechanism.  Whenever the system is operating, the DAC960
-driver queries each controller for status information every 10 seconds, and
-checks for additional conditions every 60 seconds.  The initial status of each
-controller is always available for controller N in /proc/rd/cN/initial_status,
-and the current status as of the last status monitoring query is available in
-/proc/rd/cN/current_status.  In addition, status changes are also logged by the
-driver to the system console and will appear in the log files maintained by
-syslog.  The progress of asynchronous rebuild or consistency check operations
-is also available in /proc/rd/cN/current_status, and progress messages are
-logged to the system console at most every 60 seconds.
-
-Starting with the 2.2.3/2.0.3 versions of the driver, the status information
-available in /proc/rd/cN/initial_status and /proc/rd/cN/current_status has been
-augmented to include the vendor, model, revision, and serial number (if
-available) for each physical device found connected to the controller:
-
-***** DAC960 RAID Driver Version 2.2.3 of 19 August 1999 *****
-Copyright 1998-1999 by Leonard N. Zubkoff <lnz@dandelion.com>
-Configuring Mylex DAC960PRL PCI RAID Controller
-  Firmware Version: 4.07-0-07, Channels: 1, Memory Size: 16MB
-  PCI Bus: 1, Device: 4, Function: 1, I/O Address: Unassigned
-  PCI Address: 0xFE300000 mapped at 0xA0800000, IRQ Channel: 21
-  Controller Queue Depth: 128, Maximum Blocks per Command: 128
-  Driver Queue Depth: 127, Maximum Scatter/Gather Segments: 33
-  Stripe Size: 64KB, Segment Size: 8KB, BIOS Geometry: 255/63
-  SAF-TE Enclosure Management Enabled
-  Physical Devices:
-    0:0  Vendor: IBM       Model: DRVS09D           Revision: 0270
-         Serial Number:       68016775HA
-         Disk Status: Online, 17928192 blocks
-    0:1  Vendor: IBM       Model: DRVS09D           Revision: 0270
-         Serial Number:       68004E53HA
-         Disk Status: Online, 17928192 blocks
-    0:2  Vendor: IBM       Model: DRVS09D           Revision: 0270
-         Serial Number:       13013935HA
-         Disk Status: Online, 17928192 blocks
-    0:3  Vendor: IBM       Model: DRVS09D           Revision: 0270
-         Serial Number:       13016897HA
-         Disk Status: Online, 17928192 blocks
-    0:4  Vendor: IBM       Model: DRVS09D           Revision: 0270
-         Serial Number:       68019905HA
-         Disk Status: Online, 17928192 blocks
-    0:5  Vendor: IBM       Model: DRVS09D           Revision: 0270
-         Serial Number:       68012753HA
-         Disk Status: Online, 17928192 blocks
-    0:6  Vendor: ESG-SHV   Model: SCA HSBP M6       Revision: 0.61
-  Logical Drives:
-    /dev/rd/c0d0: RAID-5, Online, 89640960 blocks, Write Thru
-  No Rebuild or Consistency Check in Progress
-
-To simplify the monitoring process for custom software, the special file
-/proc/rd/status returns "OK" when all DAC960 controllers in the system are
-operating normally and no failures have occurred, or "ALERT" if any logical
-drives are offline or critical or any non-standby physical drives are dead.
-
-Configuration commands for controller N are available via the special file
-/proc/rd/cN/user_command.  A human readable command can be written to this
-special file to initiate a configuration operation, and the results of the
-operation can then be read back from the special file in addition to being
-logged to the system console.  The shell command sequence
-
-  echo "<configuration-command>" > /proc/rd/c0/user_command
-  cat /proc/rd/c0/user_command
-
-is typically used to execute configuration commands.  The configuration
-commands are:
-
-  flush-cache
-
-    The "flush-cache" command flushes the controller's cache.  The system
-    automatically flushes the cache at shutdown or if the driver module is
-    unloaded, so this command is only needed to be certain a write back cache
-    is flushed to disk before the system is powered off by a command to a UPS.
-    Note that the flush-cache command also stops an asynchronous rebuild or
-    consistency check, so it should not be used except when the system is being
-    halted.
-
-  kill <channel>:<target-id>
-
-    The "kill" command marks the physical drive <channel>:<target-id> as DEAD.
-    This command is provided primarily for testing, and should not be used
-    during normal system operation.
-
-  make-online <channel>:<target-id>
-
-    The "make-online" command changes the physical drive <channel>:<target-id>
-    from status DEAD to status ONLINE.  In cases where multiple physical drives
-    have been killed simultaneously, this command may be used to bring all but
-    one of them back online, after which a rebuild to the final drive is
-    necessary.
-
-    Warning: make-online should only be used on a dead physical drive that is
-    an active part of a drive group, never on a standby drive.  The command
-    should never be used on a dead drive that is part of a critical logical
-    drive; rebuild should be used if only a single drive is dead.
-
-  make-standby <channel>:<target-id>
-
-    The "make-standby" command changes physical drive <channel>:<target-id>
-    from status DEAD to status STANDBY.  It should only be used in cases where
-    a dead drive was replaced after an automatic rebuild was performed onto a
-    standby drive.  It cannot be used to add a standby drive to the controller
-    configuration if one was not created initially; the BIOS Configuration
-    Utility must be used for that currently.
-
-  rebuild <channel>:<target-id>
-
-    The "rebuild" command initiates an asynchronous rebuild onto physical drive
-    <channel>:<target-id>.  It should only be used when a dead drive has been
-    replaced.
-
-  check-consistency <logical-drive-number>
-
-    The "check-consistency" command initiates an asynchronous consistency check
-    of <logical-drive-number> with automatic restoration.  It can be used
-    whenever it is desired to verify the consistency of the redundancy
-    information.
-
-  cancel-rebuild
-  cancel-consistency-check
-
-    The "cancel-rebuild" and "cancel-consistency-check" commands cancel any
-    rebuild or consistency check operations previously initiated.
-
-
-	       EXAMPLE I - DRIVE FAILURE WITHOUT A STANDBY DRIVE
-
-The following annotated logs demonstrate the controller configuration and and
-online status monitoring capabilities of the Linux DAC960 Driver.  The test
-configuration comprises 6 1GB Quantum Atlas I disk drives on two channels of a
-DAC960PJ controller.  The physical drives are configured into a single drive
-group without a standby drive, and the drive group has been configured into two
-logical drives, one RAID-5 and one RAID-6.  Note that these logs are from an
-earlier version of the driver and the messages have changed somewhat with newer
-releases, but the functionality remains similar.  First, here is the current
-status of the RAID configuration:
-
-gwynedd:/u/lnz# cat /proc/rd/c0/current_status
-***** DAC960 RAID Driver Version 2.0.0 of 23 March 1999 *****
-Copyright 1998-1999 by Leonard N. Zubkoff <lnz@dandelion.com>
-Configuring Mylex DAC960PJ PCI RAID Controller
-  Firmware Version: 4.06-0-08, Channels: 3, Memory Size: 8MB
-  PCI Bus: 0, Device: 19, Function: 1, I/O Address: Unassigned
-  PCI Address: 0xFD4FC000 mapped at 0x8807000, IRQ Channel: 9
-  Controller Queue Depth: 128, Maximum Blocks per Command: 128
-  Driver Queue Depth: 127, Maximum Scatter/Gather Segments: 33
-  Stripe Size: 64KB, Segment Size: 8KB, BIOS Geometry: 255/63
-  Physical Devices:
-    0:1 - Disk: Online, 2201600 blocks
-    0:2 - Disk: Online, 2201600 blocks
-    0:3 - Disk: Online, 2201600 blocks
-    1:1 - Disk: Online, 2201600 blocks
-    1:2 - Disk: Online, 2201600 blocks
-    1:3 - Disk: Online, 2201600 blocks
-  Logical Drives:
-    /dev/rd/c0d0: RAID-5, Online, 5498880 blocks, Write Thru
-    /dev/rd/c0d1: RAID-6, Online, 3305472 blocks, Write Thru
-  No Rebuild or Consistency Check in Progress
-
-gwynedd:/u/lnz# cat /proc/rd/status
-OK
-
-The above messages indicate that everything is healthy, and /proc/rd/status
-returns "OK" indicating that there are no problems with any DAC960 controller
-in the system.  For demonstration purposes, while I/O is active Physical Drive
-1:1 is now disconnected, simulating a drive failure.  The failure is noted by
-the driver within 10 seconds of the controller's having detected it, and the
-driver logs the following console status messages indicating that Logical
-Drives 0 and 1 are now CRITICAL as a result of Physical Drive 1:1 being DEAD:
-
-DAC960#0: Physical Drive 1:2 Error Log: Sense Key = 6, ASC = 29, ASCQ = 02
-DAC960#0: Physical Drive 1:3 Error Log: Sense Key = 6, ASC = 29, ASCQ = 02
-DAC960#0: Physical Drive 1:1 killed because of timeout on SCSI command
-DAC960#0: Physical Drive 1:1 is now DEAD
-DAC960#0: Logical Drive 0 (/dev/rd/c0d0) is now CRITICAL
-DAC960#0: Logical Drive 1 (/dev/rd/c0d1) is now CRITICAL
-
-The Sense Keys logged here are just Check Condition / Unit Attention conditions
-arising from a SCSI bus reset that is forced by the controller during its error
-recovery procedures.  Concurrently with the above, the driver status available
-from /proc/rd also reflects the drive failure.  The status message in
-/proc/rd/status has changed from "OK" to "ALERT":
-
-gwynedd:/u/lnz# cat /proc/rd/status
-ALERT
-
-and /proc/rd/c0/current_status has been updated:
-
-gwynedd:/u/lnz# cat /proc/rd/c0/current_status
-  ...
-  Physical Devices:
-    0:1 - Disk: Online, 2201600 blocks
-    0:2 - Disk: Online, 2201600 blocks
-    0:3 - Disk: Online, 2201600 blocks
-    1:1 - Disk: Dead, 2201600 blocks
-    1:2 - Disk: Online, 2201600 blocks
-    1:3 - Disk: Online, 2201600 blocks
-  Logical Drives:
-    /dev/rd/c0d0: RAID-5, Critical, 5498880 blocks, Write Thru
-    /dev/rd/c0d1: RAID-6, Critical, 3305472 blocks, Write Thru
-  No Rebuild or Consistency Check in Progress
-
-Since there are no standby drives configured, the system can continue to access
-the logical drives in a performance degraded mode until the failed drive is
-replaced and a rebuild operation completed to restore the redundancy of the
-logical drives.  Once Physical Drive 1:1 is replaced with a properly
-functioning drive, or if the physical drive was killed without having failed
-(e.g., due to electrical problems on the SCSI bus), the user can instruct the
-controller to initiate a rebuild operation onto the newly replaced drive:
-
-gwynedd:/u/lnz# echo "rebuild 1:1" > /proc/rd/c0/user_command
-gwynedd:/u/lnz# cat /proc/rd/c0/user_command
-Rebuild of Physical Drive 1:1 Initiated
-
-The echo command instructs the controller to initiate an asynchronous rebuild
-operation onto Physical Drive 1:1, and the status message that results from the
-operation is then available for reading from /proc/rd/c0/user_command, as well
-as being logged to the console by the driver.
-
-Within 10 seconds of this command the driver logs the initiation of the
-asynchronous rebuild operation:
-
-DAC960#0: Rebuild of Physical Drive 1:1 Initiated
-DAC960#0: Physical Drive 1:1 Error Log: Sense Key = 6, ASC = 29, ASCQ = 01
-DAC960#0: Physical Drive 1:1 is now WRITE-ONLY
-DAC960#0: Rebuild in Progress: Logical Drive 0 (/dev/rd/c0d0) 1% completed
-
-and /proc/rd/c0/current_status is updated:
-
-gwynedd:/u/lnz# cat /proc/rd/c0/current_status
-  ...
-  Physical Devices:
-    0:1 - Disk: Online, 2201600 blocks
-    0:2 - Disk: Online, 2201600 blocks
-    0:3 - Disk: Online, 2201600 blocks
-    1:1 - Disk: Write-Only, 2201600 blocks
-    1:2 - Disk: Online, 2201600 blocks
-    1:3 - Disk: Online, 2201600 blocks
-  Logical Drives:
-    /dev/rd/c0d0: RAID-5, Critical, 5498880 blocks, Write Thru
-    /dev/rd/c0d1: RAID-6, Critical, 3305472 blocks, Write Thru
-  Rebuild in Progress: Logical Drive 0 (/dev/rd/c0d0) 6% completed
-
-As the rebuild progresses, the current status in /proc/rd/c0/current_status is
-updated every 10 seconds:
-
-gwynedd:/u/lnz# cat /proc/rd/c0/current_status
-  ...
-  Physical Devices:
-    0:1 - Disk: Online, 2201600 blocks
-    0:2 - Disk: Online, 2201600 blocks
-    0:3 - Disk: Online, 2201600 blocks
-    1:1 - Disk: Write-Only, 2201600 blocks
-    1:2 - Disk: Online, 2201600 blocks
-    1:3 - Disk: Online, 2201600 blocks
-  Logical Drives:
-    /dev/rd/c0d0: RAID-5, Critical, 5498880 blocks, Write Thru
-    /dev/rd/c0d1: RAID-6, Critical, 3305472 blocks, Write Thru
-  Rebuild in Progress: Logical Drive 0 (/dev/rd/c0d0) 15% completed
-
-and every minute a progress message is logged to the console by the driver:
-
-DAC960#0: Rebuild in Progress: Logical Drive 0 (/dev/rd/c0d0) 32% completed
-DAC960#0: Rebuild in Progress: Logical Drive 0 (/dev/rd/c0d0) 63% completed
-DAC960#0: Rebuild in Progress: Logical Drive 0 (/dev/rd/c0d0) 94% completed
-DAC960#0: Rebuild in Progress: Logical Drive 1 (/dev/rd/c0d1) 94% completed
-
-Finally, the rebuild completes successfully.  The driver logs the status of the 
-logical and physical drives and the rebuild completion:
-
-DAC960#0: Rebuild Completed Successfully
-DAC960#0: Physical Drive 1:1 is now ONLINE
-DAC960#0: Logical Drive 0 (/dev/rd/c0d0) is now ONLINE
-DAC960#0: Logical Drive 1 (/dev/rd/c0d1) is now ONLINE
-
-/proc/rd/c0/current_status is updated:
-
-gwynedd:/u/lnz# cat /proc/rd/c0/current_status
-  ...
-  Physical Devices:
-    0:1 - Disk: Online, 2201600 blocks
-    0:2 - Disk: Online, 2201600 blocks
-    0:3 - Disk: Online, 2201600 blocks
-    1:1 - Disk: Online, 2201600 blocks
-    1:2 - Disk: Online, 2201600 blocks
-    1:3 - Disk: Online, 2201600 blocks
-  Logical Drives:
-    /dev/rd/c0d0: RAID-5, Online, 5498880 blocks, Write Thru
-    /dev/rd/c0d1: RAID-6, Online, 3305472 blocks, Write Thru
-  Rebuild Completed Successfully
-
-and /proc/rd/status indicates that everything is healthy once again:
-
-gwynedd:/u/lnz# cat /proc/rd/status
-OK
-
-
-		EXAMPLE II - DRIVE FAILURE WITH A STANDBY DRIVE
-
-The following annotated logs demonstrate the controller configuration and and
-online status monitoring capabilities of the Linux DAC960 Driver.  The test
-configuration comprises 6 1GB Quantum Atlas I disk drives on two channels of a
-DAC960PJ controller.  The physical drives are configured into a single drive
-group with a standby drive, and the drive group has been configured into two
-logical drives, one RAID-5 and one RAID-6.  Note that these logs are from an
-earlier version of the driver and the messages have changed somewhat with newer
-releases, but the functionality remains similar.  First, here is the current
-status of the RAID configuration:
-
-gwynedd:/u/lnz# cat /proc/rd/c0/current_status
-***** DAC960 RAID Driver Version 2.0.0 of 23 March 1999 *****
-Copyright 1998-1999 by Leonard N. Zubkoff <lnz@dandelion.com>
-Configuring Mylex DAC960PJ PCI RAID Controller
-  Firmware Version: 4.06-0-08, Channels: 3, Memory Size: 8MB
-  PCI Bus: 0, Device: 19, Function: 1, I/O Address: Unassigned
-  PCI Address: 0xFD4FC000 mapped at 0x8807000, IRQ Channel: 9
-  Controller Queue Depth: 128, Maximum Blocks per Command: 128
-  Driver Queue Depth: 127, Maximum Scatter/Gather Segments: 33
-  Stripe Size: 64KB, Segment Size: 8KB, BIOS Geometry: 255/63
-  Physical Devices:
-    0:1 - Disk: Online, 2201600 blocks
-    0:2 - Disk: Online, 2201600 blocks
-    0:3 - Disk: Online, 2201600 blocks
-    1:1 - Disk: Online, 2201600 blocks
-    1:2 - Disk: Online, 2201600 blocks
-    1:3 - Disk: Standby, 2201600 blocks
-  Logical Drives:
-    /dev/rd/c0d0: RAID-5, Online, 4399104 blocks, Write Thru
-    /dev/rd/c0d1: RAID-6, Online, 2754560 blocks, Write Thru
-  No Rebuild or Consistency Check in Progress
-
-gwynedd:/u/lnz# cat /proc/rd/status
-OK
-
-The above messages indicate that everything is healthy, and /proc/rd/status
-returns "OK" indicating that there are no problems with any DAC960 controller
-in the system.  For demonstration purposes, while I/O is active Physical Drive
-1:2 is now disconnected, simulating a drive failure.  The failure is noted by
-the driver within 10 seconds of the controller's having detected it, and the
-driver logs the following console status messages:
-
-DAC960#0: Physical Drive 1:1 Error Log: Sense Key = 6, ASC = 29, ASCQ = 02
-DAC960#0: Physical Drive 1:3 Error Log: Sense Key = 6, ASC = 29, ASCQ = 02
-DAC960#0: Physical Drive 1:2 killed because of timeout on SCSI command
-DAC960#0: Physical Drive 1:2 is now DEAD
-DAC960#0: Physical Drive 1:2 killed because it was removed
-DAC960#0: Logical Drive 0 (/dev/rd/c0d0) is now CRITICAL
-DAC960#0: Logical Drive 1 (/dev/rd/c0d1) is now CRITICAL
-
-Since a standby drive is configured, the controller automatically begins
-rebuilding onto the standby drive:
-
-DAC960#0: Physical Drive 1:3 is now WRITE-ONLY
-DAC960#0: Rebuild in Progress: Logical Drive 0 (/dev/rd/c0d0) 4% completed
-
-Concurrently with the above, the driver status available from /proc/rd also
-reflects the drive failure and automatic rebuild.  The status message in
-/proc/rd/status has changed from "OK" to "ALERT":
-
-gwynedd:/u/lnz# cat /proc/rd/status
-ALERT
-
-and /proc/rd/c0/current_status has been updated:
-
-gwynedd:/u/lnz# cat /proc/rd/c0/current_status
-  ...
-  Physical Devices:
-    0:1 - Disk: Online, 2201600 blocks
-    0:2 - Disk: Online, 2201600 blocks
-    0:3 - Disk: Online, 2201600 blocks
-    1:1 - Disk: Online, 2201600 blocks
-    1:2 - Disk: Dead, 2201600 blocks
-    1:3 - Disk: Write-Only, 2201600 blocks
-  Logical Drives:
-    /dev/rd/c0d0: RAID-5, Critical, 4399104 blocks, Write Thru
-    /dev/rd/c0d1: RAID-6, Critical, 2754560 blocks, Write Thru
-  Rebuild in Progress: Logical Drive 0 (/dev/rd/c0d0) 4% completed
-
-As the rebuild progresses, the current status in /proc/rd/c0/current_status is
-updated every 10 seconds:
-
-gwynedd:/u/lnz# cat /proc/rd/c0/current_status
-  ...
-  Physical Devices:
-    0:1 - Disk: Online, 2201600 blocks
-    0:2 - Disk: Online, 2201600 blocks
-    0:3 - Disk: Online, 2201600 blocks
-    1:1 - Disk: Online, 2201600 blocks
-    1:2 - Disk: Dead, 2201600 blocks
-    1:3 - Disk: Write-Only, 2201600 blocks
-  Logical Drives:
-    /dev/rd/c0d0: RAID-5, Critical, 4399104 blocks, Write Thru
-    /dev/rd/c0d1: RAID-6, Critical, 2754560 blocks, Write Thru
-  Rebuild in Progress: Logical Drive 0 (/dev/rd/c0d0) 40% completed
-
-and every minute a progress message is logged on the console by the driver:
-
-DAC960#0: Rebuild in Progress: Logical Drive 0 (/dev/rd/c0d0) 40% completed
-DAC960#0: Rebuild in Progress: Logical Drive 0 (/dev/rd/c0d0) 76% completed
-DAC960#0: Rebuild in Progress: Logical Drive 1 (/dev/rd/c0d1) 66% completed
-DAC960#0: Rebuild in Progress: Logical Drive 1 (/dev/rd/c0d1) 84% completed
-
-Finally, the rebuild completes successfully.  The driver logs the status of the 
-logical and physical drives and the rebuild completion:
-
-DAC960#0: Rebuild Completed Successfully
-DAC960#0: Physical Drive 1:3 is now ONLINE
-DAC960#0: Logical Drive 0 (/dev/rd/c0d0) is now ONLINE
-DAC960#0: Logical Drive 1 (/dev/rd/c0d1) is now ONLINE
-
-/proc/rd/c0/current_status is updated:
-
-***** DAC960 RAID Driver Version 2.0.0 of 23 March 1999 *****
-Copyright 1998-1999 by Leonard N. Zubkoff <lnz@dandelion.com>
-Configuring Mylex DAC960PJ PCI RAID Controller
-  Firmware Version: 4.06-0-08, Channels: 3, Memory Size: 8MB
-  PCI Bus: 0, Device: 19, Function: 1, I/O Address: Unassigned
-  PCI Address: 0xFD4FC000 mapped at 0x8807000, IRQ Channel: 9
-  Controller Queue Depth: 128, Maximum Blocks per Command: 128
-  Driver Queue Depth: 127, Maximum Scatter/Gather Segments: 33
-  Stripe Size: 64KB, Segment Size: 8KB, BIOS Geometry: 255/63
-  Physical Devices:
-    0:1 - Disk: Online, 2201600 blocks
-    0:2 - Disk: Online, 2201600 blocks
-    0:3 - Disk: Online, 2201600 blocks
-    1:1 - Disk: Online, 2201600 blocks
-    1:2 - Disk: Dead, 2201600 blocks
-    1:3 - Disk: Online, 2201600 blocks
-  Logical Drives:
-    /dev/rd/c0d0: RAID-5, Online, 4399104 blocks, Write Thru
-    /dev/rd/c0d1: RAID-6, Online, 2754560 blocks, Write Thru
-  Rebuild Completed Successfully
-
-and /proc/rd/status indicates that everything is healthy once again:
-
-gwynedd:/u/lnz# cat /proc/rd/status
-OK
-
-Note that the absence of a viable standby drive does not create an "ALERT"
-status.  Once dead Physical Drive 1:2 has been replaced, the controller must be
-told that this has occurred and that the newly replaced drive should become the
-new standby drive:
-
-gwynedd:/u/lnz# echo "make-standby 1:2" > /proc/rd/c0/user_command
-gwynedd:/u/lnz# cat /proc/rd/c0/user_command
-Make Standby of Physical Drive 1:2 Succeeded
-
-The echo command instructs the controller to make Physical Drive 1:2 into a
-standby drive, and the status message that results from the operation is then
-available for reading from /proc/rd/c0/user_command, as well as being logged to
-the console by the driver.  Within 60 seconds of this command the driver logs:
-
-DAC960#0: Physical Drive 1:2 Error Log: Sense Key = 6, ASC = 29, ASCQ = 01
-DAC960#0: Physical Drive 1:2 is now STANDBY
-DAC960#0: Make Standby of Physical Drive 1:2 Succeeded
-
-and /proc/rd/c0/current_status is updated:
-
-gwynedd:/u/lnz# cat /proc/rd/c0/current_status
-  ...
-  Physical Devices:
-    0:1 - Disk: Online, 2201600 blocks
-    0:2 - Disk: Online, 2201600 blocks
-    0:3 - Disk: Online, 2201600 blocks
-    1:1 - Disk: Online, 2201600 blocks
-    1:2 - Disk: Standby, 2201600 blocks
-    1:3 - Disk: Online, 2201600 blocks
-  Logical Drives:
-    /dev/rd/c0d0: RAID-5, Online, 4399104 blocks, Write Thru
-    /dev/rd/c0d1: RAID-6, Online, 2754560 blocks, Write Thru
-  Rebuild Completed Successfully

+ 1 - 1
Documentation/blockdev/zram.txt

@@ -190,7 +190,7 @@ whitespace:
  notify_free      Depending on device usage scenario it may account
                   a) the number of pages freed because of swap slot free
                   notifications or b) the number of pages freed because of
-                  REQ_DISCARD requests sent by bio. The former ones are
+                  REQ_OP_DISCARD requests sent by bio. The former ones are
                   sent to a swap block device when a swap slot is freed,
                   which implies that this disk is being used as a swap disk.
                   The latter ones are sent by filesystem mounted with

+ 0 - 11
Documentation/cdrom/00-INDEX

@@ -1,11 +0,0 @@
-00-INDEX
-	- this file (info on CD-ROMs and Linux)
-Makefile
-	- only used to generate TeX output from the documentation.
-cdrom-standard.tex
-	- LaTeX document on standardizing the CD-ROM programming interface.
-ide-cd
-	- info on setting up and using ATAPI (aka IDE) CD-ROMs.
-packet-writing.txt
-	- Info on the CDRW packet writing module
-

+ 0 - 26
Documentation/cgroup-v1/00-INDEX

@@ -1,26 +0,0 @@
-00-INDEX
-	- this file
-blkio-controller.txt
-	- Description for Block IO Controller, implementation and usage details.
-cgroups.txt
-	- Control Groups definition, implementation details, examples and API.
-cpuacct.txt
-	- CPU Accounting Controller; account CPU usage for groups of tasks.
-cpusets.txt
-	- documents the cpusets feature; assign CPUs and Mem to a set of tasks.
-admin-guide/devices.rst
-	- Device Whitelist Controller; description, interface and security.
-freezer-subsystem.txt
-	- checkpointing; rationale to not use signals, interface.
-hugetlb.txt
-	- HugeTLB Controller implementation and usage details.
-memcg_test.txt
-	- Memory Resource Controller; implementation details.
-memory.txt
-	- Memory Resource Controller; design, accounting, interface, testing.
-net_cls.txt
-	- Network classifier cgroups details and usages.
-net_prio.txt
-	- Network priority cgroups details and usages.
-pids.txt
-	- Process number cgroups details and usages.

+ 1 - 1
Documentation/cgroup-v1/rdma.txt

@@ -27,7 +27,7 @@ cgroup.
 Currently user space applications can easily take away all the rdma verb
 specific resources such as AH, CQ, QP, MR etc. Due to which other applications
 in other cgroup or kernel space ULPs may not even get chance to allocate any
-rdma resources. This can leads to service unavailability.
+rdma resources. This can lead to service unavailability.
 
 Therefore RDMA controller is needed through which resource consumption
 of processes can be limited. Through this controller different rdma

+ 7 - 3
Documentation/conf.py

@@ -259,7 +259,7 @@ latex_elements = {
 'papersize': 'a4paper',
 
 # The font size ('10pt', '11pt' or '12pt').
-'pointsize': '8pt',
+'pointsize': '11pt',
 
 # Latex figure (float) alignment
 #'figure_align': 'htbp',
@@ -272,8 +272,8 @@ latex_elements = {
     'preamble': '''
 	% Use some font with UTF-8 support with XeLaTeX
         \\usepackage{fontspec}
-        \\setsansfont{DejaVu Serif}
-        \\setromanfont{DejaVu Sans}
+        \\setsansfont{DejaVu Sans}
+        \\setromanfont{DejaVu Serif}
         \\setmonofont{DejaVu Sans Mono}
 
      '''
@@ -383,6 +383,10 @@ latex_documents = [
      'The kernel development community', 'manual'),
     ('filesystems/index', 'filesystems.tex', 'Linux Filesystems API',
      'The kernel development community', 'manual'),
+    ('admin-guide/ext4', 'ext4-admin-guide.tex', 'ext4 Administration Guide',
+     'ext4 Community', 'manual'),
+    ('filesystems/ext4/index', 'ext4-data-structures.tex',
+     'ext4 Data Structures and Algorithms', 'ext4 Community', 'manual'),
     ('gpu/index', 'gpu.tex', 'Linux GPU Driver Developer\'s Guide',
      'The kernel development community', 'manual'),
     ('input/index', 'linux-input.tex', 'The Linux input driver subsystem',

+ 10 - 61
Documentation/core-api/boot-time-mm.rst

@@ -5,54 +5,23 @@ Boot time memory management
 Early system initialization cannot use "normal" memory management
 simply because it is not set up yet. But there is still need to
 allocate memory for various data structures, for instance for the
-physical page allocator. To address this, a specialized allocator
-called the :ref:`Boot Memory Allocator <bootmem>`, or bootmem, was
-introduced. Several years later PowerPC developers added a "Logical
-Memory Blocks" allocator, which was later adopted by other
-architectures and renamed to :ref:`memblock <memblock>`. There is also
-a compatibility layer called `nobootmem` that translates bootmem
-allocation interfaces to memblock calls.
+physical page allocator.
 
-The selection of the early allocator is done using
-``CONFIG_NO_BOOTMEM`` and ``CONFIG_HAVE_MEMBLOCK`` kernel
-configuration options. These options are enabled or disabled
-statically by the architectures' Kconfig files.
-
-* Architectures that rely only on bootmem select
-  ``CONFIG_NO_BOOTMEM=n && CONFIG_HAVE_MEMBLOCK=n``.
-* The users of memblock with the nobootmem compatibility layer set
-  ``CONFIG_NO_BOOTMEM=y && CONFIG_HAVE_MEMBLOCK=y``.
-* And for those that use both memblock and bootmem the configuration
-  includes ``CONFIG_NO_BOOTMEM=n && CONFIG_HAVE_MEMBLOCK=y``.
-
-Whichever allocator is used, it is the responsibility of the
-architecture specific initialization to set it up in
-:c:func:`setup_arch` and tear it down in :c:func:`mem_init` functions.
+A specialized allocator called ``memblock`` performs the
+boot time memory management. The architecture specific initialization
+must set it up in :c:func:`setup_arch` and tear it down in
+:c:func:`mem_init` functions.
 
 Once the early memory management is available it offers a variety of
 functions and macros for memory allocations. The allocation request
 may be directed to the first (and probably the only) node or to a
 particular node in a NUMA system. There are API variants that panic
-when an allocation fails and those that don't. And more recent and
-advanced memblock even allows controlling its own behaviour.
-
-.. _bootmem:
-
-Bootmem
-=======
-
-(mostly stolen from Mel Gorman's "Understanding the Linux Virtual
-Memory Manager" `book`_)
-
-.. _book: https://www.kernel.org/doc/gorman/
-
-.. kernel-doc:: mm/bootmem.c
-   :doc: bootmem overview
+when an allocation fails and those that don't.
 
-.. _memblock:
+Memblock also offers a variety of APIs that control its own behaviour.
 
-Memblock
-========
+Memblock Overview
+=================
 
 .. kernel-doc:: mm/memblock.c
    :doc: memblock overview
@@ -61,26 +30,6 @@ Memblock
 Functions and structures
 ========================
 
-Common API
-----------
-
-The functions that are described in this section are available
-regardless of what early memory manager is enabled.
-
-.. kernel-doc:: mm/nobootmem.c
-
-Bootmem specific API
---------------------
-
-These interfaces available only with bootmem, i.e when ``CONFIG_NO_BOOTMEM=n``
-
-.. kernel-doc:: include/linux/bootmem.h
-.. kernel-doc:: mm/bootmem.c
-   :nodocs:
-
-Memblock specific API
----------------------
-
 Here is the description of memblock data structures, functions and
 macros. Some of them are actually internal, but since they are
 documented it would be silly to omit them. Besides, reading the
@@ -89,4 +38,4 @@ really happens under the hood.
 
 .. kernel-doc:: include/linux/memblock.h
 .. kernel-doc:: mm/memblock.c
-   :nodocs:
+   :functions:

+ 2 - 0
Documentation/core-api/gfp_mask-from-fs-io.rst

@@ -1,3 +1,5 @@
+.. _gfp_mask_from_fs_io:
+
 =================================
 GFP masks used from FS/IO context
 =================================

+ 1 - 1
Documentation/core-api/idr.rst

@@ -1,4 +1,4 @@
-.. SPDX-License-Identifier: CC-BY-SA-4.0
+.. SPDX-License-Identifier: GPL-2.0+
 
 =============
 ID Allocation

+ 4 - 0
Documentation/core-api/index.rst

@@ -21,16 +21,20 @@ Core utilities
    local_ops
    workqueue
    genericirq
+   xarray
    flexible-arrays
    librs
    genalloc
    errseq
    printk-formats
    circular-buffers
+   memory-allocation
    mm-api
    gfp_mask-from-fs-io
    timekeeping
    boot-time-mm
+   memory-hotplug
+
 
 Interfaces for kernel debugging
 ===============================

+ 122 - 0
Documentation/core-api/memory-allocation.rst

@@ -0,0 +1,122 @@
+=======================
+Memory Allocation Guide
+=======================
+
+Linux provides a variety of APIs for memory allocation. You can
+allocate small chunks using `kmalloc` or `kmem_cache_alloc` families,
+large virtually contiguous areas using `vmalloc` and its derivatives,
+or you can directly request pages from the page allocator with
+`alloc_pages`. It is also possible to use more specialized allocators,
+for instance `cma_alloc` or `zs_malloc`.
+
+Most of the memory allocation APIs use GFP flags to express how that
+memory should be allocated. The GFP acronym stands for "get free
+pages", the underlying memory allocation function.
+
+Diversity of the allocation APIs combined with the numerous GFP flags
+makes the question "How should I allocate memory?" not that easy to
+answer, although very likely you should use
+
+::
+
+  kzalloc(<size>, GFP_KERNEL);
+
+Of course there are cases when other allocation APIs and different GFP
+flags must be used.
+
+Get Free Page flags
+===================
+
+The GFP flags control the allocators behavior. They tell what memory
+zones can be used, how hard the allocator should try to find free
+memory, whether the memory can be accessed by the userspace etc. The
+:ref:`Documentation/core-api/mm-api.rst <mm-api-gfp-flags>` provides
+reference documentation for the GFP flags and their combinations and
+here we briefly outline their recommended usage:
+
+  * Most of the time ``GFP_KERNEL`` is what you need. Memory for the
+    kernel data structures, DMAable memory, inode cache, all these and
+    many other allocations types can use ``GFP_KERNEL``. Note, that
+    using ``GFP_KERNEL`` implies ``GFP_RECLAIM``, which means that
+    direct reclaim may be triggered under memory pressure; the calling
+    context must be allowed to sleep.
+  * If the allocation is performed from an atomic context, e.g interrupt
+    handler, use ``GFP_NOWAIT``. This flag prevents direct reclaim and
+    IO or filesystem operations. Consequently, under memory pressure
+    ``GFP_NOWAIT`` allocation is likely to fail. Allocations which
+    have a reasonable fallback should be using ``GFP_NOWARN``.
+  * If you think that accessing memory reserves is justified and the kernel
+    will be stressed unless allocation succeeds, you may use ``GFP_ATOMIC``.
+  * Untrusted allocations triggered from userspace should be a subject
+    of kmem accounting and must have ``__GFP_ACCOUNT`` bit set. There
+    is the handy ``GFP_KERNEL_ACCOUNT`` shortcut for ``GFP_KERNEL``
+    allocations that should be accounted.
+  * Userspace allocations should use either of the ``GFP_USER``,
+    ``GFP_HIGHUSER`` or ``GFP_HIGHUSER_MOVABLE`` flags. The longer
+    the flag name the less restrictive it is.
+
+    ``GFP_HIGHUSER_MOVABLE`` does not require that allocated memory
+    will be directly accessible by the kernel and implies that the
+    data is movable.
+
+    ``GFP_HIGHUSER`` means that the allocated memory is not movable,
+    but it is not required to be directly accessible by the kernel. An
+    example may be a hardware allocation that maps data directly into
+    userspace but has no addressing limitations.
+
+    ``GFP_USER`` means that the allocated memory is not movable and it
+    must be directly accessible by the kernel.
+
+You may notice that quite a few allocations in the existing code
+specify ``GFP_NOIO`` or ``GFP_NOFS``. Historically, they were used to
+prevent recursion deadlocks caused by direct memory reclaim calling
+back into the FS or IO paths and blocking on already held
+resources. Since 4.12 the preferred way to address this issue is to
+use new scope APIs described in
+:ref:`Documentation/core-api/gfp_mask-from-fs-io.rst <gfp_mask_from_fs_io>`.
+
+Other legacy GFP flags are ``GFP_DMA`` and ``GFP_DMA32``. They are
+used to ensure that the allocated memory is accessible by hardware
+with limited addressing capabilities. So unless you are writing a
+driver for a device with such restrictions, avoid using these flags.
+And even with hardware with restrictions it is preferable to use
+`dma_alloc*` APIs.
+
+Selecting memory allocator
+==========================
+
+The most straightforward way to allocate memory is to use a function
+from the :c:func:`kmalloc` family. And, to be on the safe size it's
+best to use routines that set memory to zero, like
+:c:func:`kzalloc`. If you need to allocate memory for an array, there
+are :c:func:`kmalloc_array` and :c:func:`kcalloc` helpers.
+
+The maximal size of a chunk that can be allocated with `kmalloc` is
+limited. The actual limit depends on the hardware and the kernel
+configuration, but it is a good practice to use `kmalloc` for objects
+smaller than page size.
+
+For large allocations you can use :c:func:`vmalloc` and
+:c:func:`vzalloc`, or directly request pages from the page
+allocator. The memory allocated by `vmalloc` and related functions is
+not physically contiguous.
+
+If you are not sure whether the allocation size is too large for
+`kmalloc`, it is possible to use :c:func:`kvmalloc` and its
+derivatives. It will try to allocate memory with `kmalloc` and if the
+allocation fails it will be retried with `vmalloc`. There are
+restrictions on which GFP flags can be used with `kvmalloc`; please
+see :c:func:`kvmalloc_node` reference documentation. Note that
+`kvmalloc` may return memory that is not physically contiguous.
+
+If you need to allocate many identical objects you can use the slab
+cache allocator. The cache should be set up with
+:c:func:`kmem_cache_create` before it can be used. Afterwards
+:c:func:`kmem_cache_alloc` and its convenience wrappers can allocate
+memory from that cache.
+
+When the allocated memory is no longer needed it must be freed. You
+can use :c:func:`kvfree` for the memory allocated with `kmalloc`,
+`vmalloc` and `kvmalloc`. The slab caches should be freed with
+:c:func:`kmem_cache_free`. And don't forget to destroy the cache with
+:c:func:`kmem_cache_destroy`.

+ 125 - 0
Documentation/core-api/memory-hotplug.rst

@@ -0,0 +1,125 @@
+.. _memory_hotplug:
+
+==============
+Memory hotplug
+==============
+
+Memory hotplug event notifier
+=============================
+
+Hotplugging events are sent to a notification queue.
+
+There are six types of notification defined in ``include/linux/memory.h``:
+
+MEM_GOING_ONLINE
+  Generated before new memory becomes available in order to be able to
+  prepare subsystems to handle memory. The page allocator is still unable
+  to allocate from the new memory.
+
+MEM_CANCEL_ONLINE
+  Generated if MEM_GOING_ONLINE fails.
+
+MEM_ONLINE
+  Generated when memory has successfully brought online. The callback may
+  allocate pages from the new memory.
+
+MEM_GOING_OFFLINE
+  Generated to begin the process of offlining memory. Allocations are no
+  longer possible from the memory but some of the memory to be offlined
+  is still in use. The callback can be used to free memory known to a
+  subsystem from the indicated memory block.
+
+MEM_CANCEL_OFFLINE
+  Generated if MEM_GOING_OFFLINE fails. Memory is available again from
+  the memory block that we attempted to offline.
+
+MEM_OFFLINE
+  Generated after offlining memory is complete.
+
+A callback routine can be registered by calling::
+
+  hotplug_memory_notifier(callback_func, priority)
+
+Callback functions with higher values of priority are called before callback
+functions with lower values.
+
+A callback function must have the following prototype::
+
+  int callback_func(
+    struct notifier_block *self, unsigned long action, void *arg);
+
+The first argument of the callback function (self) is a pointer to the block
+of the notifier chain that points to the callback function itself.
+The second argument (action) is one of the event types described above.
+The third argument (arg) passes a pointer of struct memory_notify::
+
+	struct memory_notify {
+		unsigned long start_pfn;
+		unsigned long nr_pages;
+		int status_change_nid_normal;
+		int status_change_nid_high;
+		int status_change_nid;
+	}
+
+- start_pfn is start_pfn of online/offline memory.
+- nr_pages is # of pages of online/offline memory.
+- status_change_nid_normal is set node id when N_NORMAL_MEMORY of nodemask
+  is (will be) set/clear, if this is -1, then nodemask status is not changed.
+- status_change_nid_high is set node id when N_HIGH_MEMORY of nodemask
+  is (will be) set/clear, if this is -1, then nodemask status is not changed.
+- status_change_nid is set node id when N_MEMORY of nodemask is (will be)
+  set/clear. It means a new(memoryless) node gets new memory by online and a
+  node loses all memory. If this is -1, then nodemask status is not changed.
+
+  If status_changed_nid* >= 0, callback should create/discard structures for the
+  node if necessary.
+
+The callback routine shall return one of the values
+NOTIFY_DONE, NOTIFY_OK, NOTIFY_BAD, NOTIFY_STOP
+defined in ``include/linux/notifier.h``
+
+NOTIFY_DONE and NOTIFY_OK have no effect on the further processing.
+
+NOTIFY_BAD is used as response to the MEM_GOING_ONLINE, MEM_GOING_OFFLINE,
+MEM_ONLINE, or MEM_OFFLINE action to cancel hotplugging. It stops
+further processing of the notification queue.
+
+NOTIFY_STOP stops further processing of the notification queue.
+
+Locking Internals
+=================
+
+When adding/removing memory that uses memory block devices (i.e. ordinary RAM),
+the device_hotplug_lock should be held to:
+
+- synchronize against online/offline requests (e.g. via sysfs). This way, memory
+  block devices can only be accessed (.online/.state attributes) by user
+  space once memory has been fully added. And when removing memory, we
+  know nobody is in critical sections.
+- synchronize against CPU hotplug and similar (e.g. relevant for ACPI and PPC)
+
+Especially, there is a possible lock inversion that is avoided using
+device_hotplug_lock when adding memory and user space tries to online that
+memory faster than expected:
+
+- device_online() will first take the device_lock(), followed by
+  mem_hotplug_lock
+- add_memory_resource() will first take the mem_hotplug_lock, followed by
+  the device_lock() (while creating the devices, during bus_add_device()).
+
+As the device is visible to user space before taking the device_lock(), this
+can result in a lock inversion.
+
+onlining/offlining of memory should be done via device_online()/
+device_offline() - to make sure it is properly synchronized to actions
+via sysfs. Holding device_hotplug_lock is advised (to e.g. protect online_type)
+
+When adding/removing/onlining/offlining memory or adding/removing
+heterogeneous/device memory, we should always hold the mem_hotplug_lock in
+write mode to serialise memory hotplug (e.g. access to global/zone
+variables).
+
+In addition, mem_hotplug_lock (in contrast to device_hotplug_lock) in read
+mode allows for a quite efficient get_online_mems/put_online_mems
+implementation, so code accessing memory can protect from that memory
+vanishing.

+ 2 - 0
Documentation/core-api/mm-api.rst

@@ -14,6 +14,8 @@ User Space Memory Access
 .. kernel-doc:: mm/util.c
    :functions: get_user_pages_fast
 
+.. _mm-api-gfp-flags:
+
 Memory Allocation Controls
 ==========================
 

+ 5 - 6
Documentation/core-api/printk-formats.rst

@@ -376,15 +376,15 @@ correctness of the format string and va_list arguments.
 
 Passed by reference.
 
-kobjects
---------
+Device tree nodes
+-----------------
 
 ::
 
 	%pOF[fnpPcCF]
 
 
-For printing kobject based structs (device nodes). Default behaviour is
+For printing device tree node structures. Default behaviour is
 equivalent to %pOFf.
 
 	- f - device node full_name
@@ -420,9 +420,8 @@ struct clk
 	%pC	pll1
 	%pCn	pll1
 
-For printing struct clk structures. %pC and %pCn print the name
-(Common Clock Framework) or address (legacy clock framework) of the
-structure.
+For printing struct clk structures. %pC and %pCn print the name of the clock
+(Common Clock Framework) or a unique 32-bit ID (legacy clock framework).
 
 Passed by reference.
 

+ 435 - 0
Documentation/core-api/xarray.rst

@@ -0,0 +1,435 @@
+.. SPDX-License-Identifier: GPL-2.0+
+
+======
+XArray
+======
+
+:Author: Matthew Wilcox
+
+Overview
+========
+
+The XArray is an abstract data type which behaves like a very large array
+of pointers.  It meets many of the same needs as a hash or a conventional
+resizable array.  Unlike a hash, it allows you to sensibly go to the
+next or previous entry in a cache-efficient manner.  In contrast to a
+resizable array, there is no need to copy data or change MMU mappings in
+order to grow the array.  It is more memory-efficient, parallelisable
+and cache friendly than a doubly-linked list.  It takes advantage of
+RCU to perform lookups without locking.
+
+The XArray implementation is efficient when the indices used are densely
+clustered; hashing the object and using the hash as the index will not
+perform well.  The XArray is optimised for small indices, but still has
+good performance with large indices.  If your index can be larger than
+``ULONG_MAX`` then the XArray is not the data type for you.  The most
+important user of the XArray is the page cache.
+
+Each non-``NULL`` entry in the array has three bits associated with
+it called marks.  Each mark may be set or cleared independently of
+the others.  You can iterate over entries which are marked.
+
+Normal pointers may be stored in the XArray directly.  They must be 4-byte
+aligned, which is true for any pointer returned from :c:func:`kmalloc` and
+:c:func:`alloc_page`.  It isn't true for arbitrary user-space pointers,
+nor for function pointers.  You can store pointers to statically allocated
+objects, as long as those objects have an alignment of at least 4.
+
+You can also store integers between 0 and ``LONG_MAX`` in the XArray.
+You must first convert it into an entry using :c:func:`xa_mk_value`.
+When you retrieve an entry from the XArray, you can check whether it is
+a value entry by calling :c:func:`xa_is_value`, and convert it back to
+an integer by calling :c:func:`xa_to_value`.
+
+Some users want to store tagged pointers instead of using the marks
+described above.  They can call :c:func:`xa_tag_pointer` to create an
+entry with a tag, :c:func:`xa_untag_pointer` to turn a tagged entry
+back into an untagged pointer and :c:func:`xa_pointer_tag` to retrieve
+the tag of an entry.  Tagged pointers use the same bits that are used
+to distinguish value entries from normal pointers, so each user must
+decide whether they want to store value entries or tagged pointers in
+any particular XArray.
+
+The XArray does not support storing :c:func:`IS_ERR` pointers as some
+conflict with value entries or internal entries.
+
+An unusual feature of the XArray is the ability to create entries which
+occupy a range of indices.  Once stored to, looking up any index in
+the range will return the same entry as looking up any other index in
+the range.  Setting a mark on one index will set it on all of them.
+Storing to any index will store to all of them.  Multi-index entries can
+be explicitly split into smaller entries, or storing ``NULL`` into any
+entry will cause the XArray to forget about the range.
+
+Normal API
+==========
+
+Start by initialising an XArray, either with :c:func:`DEFINE_XARRAY`
+for statically allocated XArrays or :c:func:`xa_init` for dynamically
+allocated ones.  A freshly-initialised XArray contains a ``NULL``
+pointer at every index.
+
+You can then set entries using :c:func:`xa_store` and get entries
+using :c:func:`xa_load`.  xa_store will overwrite any entry with the
+new entry and return the previous entry stored at that index.  You can
+use :c:func:`xa_erase` instead of calling :c:func:`xa_store` with a
+``NULL`` entry.  There is no difference between an entry that has never
+been stored to and one that has most recently had ``NULL`` stored to it.
+
+You can conditionally replace an entry at an index by using
+:c:func:`xa_cmpxchg`.  Like :c:func:`cmpxchg`, it will only succeed if
+the entry at that index has the 'old' value.  It also returns the entry
+which was at that index; if it returns the same entry which was passed as
+'old', then :c:func:`xa_cmpxchg` succeeded.
+
+If you want to only store a new entry to an index if the current entry
+at that index is ``NULL``, you can use :c:func:`xa_insert` which
+returns ``-EEXIST`` if the entry is not empty.
+
+You can enquire whether a mark is set on an entry by using
+:c:func:`xa_get_mark`.  If the entry is not ``NULL``, you can set a mark
+on it by using :c:func:`xa_set_mark` and remove the mark from an entry by
+calling :c:func:`xa_clear_mark`.  You can ask whether any entry in the
+XArray has a particular mark set by calling :c:func:`xa_marked`.
+
+You can copy entries out of the XArray into a plain array by calling
+:c:func:`xa_extract`.  Or you can iterate over the present entries in
+the XArray by calling :c:func:`xa_for_each`.  You may prefer to use
+:c:func:`xa_find` or :c:func:`xa_find_after` to move to the next present
+entry in the XArray.
+
+Calling :c:func:`xa_store_range` stores the same entry in a range
+of indices.  If you do this, some of the other operations will behave
+in a slightly odd way.  For example, marking the entry at one index
+may result in the entry being marked at some, but not all of the other
+indices.  Storing into one index may result in the entry retrieved by
+some, but not all of the other indices changing.
+
+Finally, you can remove all entries from an XArray by calling
+:c:func:`xa_destroy`.  If the XArray entries are pointers, you may wish
+to free the entries first.  You can do this by iterating over all present
+entries in the XArray using the :c:func:`xa_for_each` iterator.
+
+ID assignment
+-------------
+
+You can call :c:func:`xa_alloc` to store the entry at any unused index
+in the XArray.  If you need to modify the array from interrupt context,
+you can use :c:func:`xa_alloc_bh` or :c:func:`xa_alloc_irq` to disable
+interrupts while allocating the ID.  Unlike :c:func:`xa_store`, allocating
+a ``NULL`` pointer does not delete an entry.  Instead it reserves an
+entry like :c:func:`xa_reserve` and you can release it using either
+:c:func:`xa_erase` or :c:func:`xa_release`.  To use ID assignment, the
+XArray must be defined with :c:func:`DEFINE_XARRAY_ALLOC`, or initialised
+by passing ``XA_FLAGS_ALLOC`` to :c:func:`xa_init_flags`,
+
+Memory allocation
+-----------------
+
+The :c:func:`xa_store`, :c:func:`xa_cmpxchg`, :c:func:`xa_alloc`,
+:c:func:`xa_reserve` and :c:func:`xa_insert` functions take a gfp_t
+parameter in case the XArray needs to allocate memory to store this entry.
+If the entry is being deleted, no memory allocation needs to be performed,
+and the GFP flags specified will be ignored.
+
+It is possible for no memory to be allocatable, particularly if you pass
+a restrictive set of GFP flags.  In that case, the functions return a
+special value which can be turned into an errno using :c:func:`xa_err`.
+If you don't need to know exactly which error occurred, using
+:c:func:`xa_is_err` is slightly more efficient.
+
+Locking
+-------
+
+When using the Normal API, you do not have to worry about locking.
+The XArray uses RCU and an internal spinlock to synchronise access:
+
+No lock needed:
+ * :c:func:`xa_empty`
+ * :c:func:`xa_marked`
+
+Takes RCU read lock:
+ * :c:func:`xa_load`
+ * :c:func:`xa_for_each`
+ * :c:func:`xa_find`
+ * :c:func:`xa_find_after`
+ * :c:func:`xa_extract`
+ * :c:func:`xa_get_mark`
+
+Takes xa_lock internally:
+ * :c:func:`xa_store`
+ * :c:func:`xa_insert`
+ * :c:func:`xa_erase`
+ * :c:func:`xa_erase_bh`
+ * :c:func:`xa_erase_irq`
+ * :c:func:`xa_cmpxchg`
+ * :c:func:`xa_store_range`
+ * :c:func:`xa_alloc`
+ * :c:func:`xa_alloc_bh`
+ * :c:func:`xa_alloc_irq`
+ * :c:func:`xa_destroy`
+ * :c:func:`xa_set_mark`
+ * :c:func:`xa_clear_mark`
+
+Assumes xa_lock held on entry:
+ * :c:func:`__xa_store`
+ * :c:func:`__xa_insert`
+ * :c:func:`__xa_erase`
+ * :c:func:`__xa_cmpxchg`
+ * :c:func:`__xa_alloc`
+ * :c:func:`__xa_set_mark`
+ * :c:func:`__xa_clear_mark`
+
+If you want to take advantage of the lock to protect the data structures
+that you are storing in the XArray, you can call :c:func:`xa_lock`
+before calling :c:func:`xa_load`, then take a reference count on the
+object you have found before calling :c:func:`xa_unlock`.  This will
+prevent stores from removing the object from the array between looking
+up the object and incrementing the refcount.  You can also use RCU to
+avoid dereferencing freed memory, but an explanation of that is beyond
+the scope of this document.
+
+The XArray does not disable interrupts or softirqs while modifying
+the array.  It is safe to read the XArray from interrupt or softirq
+context as the RCU lock provides enough protection.
+
+If, for example, you want to store entries in the XArray in process
+context and then erase them in softirq context, you can do that this way::
+
+    void foo_init(struct foo *foo)
+    {
+        xa_init_flags(&foo->array, XA_FLAGS_LOCK_BH);
+    }
+
+    int foo_store(struct foo *foo, unsigned long index, void *entry)
+    {
+        int err;
+
+        xa_lock_bh(&foo->array);
+        err = xa_err(__xa_store(&foo->array, index, entry, GFP_KERNEL));
+        if (!err)
+            foo->count++;
+        xa_unlock_bh(&foo->array);
+        return err;
+    }
+
+    /* foo_erase() is only called from softirq context */
+    void foo_erase(struct foo *foo, unsigned long index)
+    {
+        xa_lock(&foo->array);
+        __xa_erase(&foo->array, index);
+        foo->count--;
+        xa_unlock(&foo->array);
+    }
+
+If you are going to modify the XArray from interrupt or softirq context,
+you need to initialise the array using :c:func:`xa_init_flags`, passing
+``XA_FLAGS_LOCK_IRQ`` or ``XA_FLAGS_LOCK_BH``.
+
+The above example also shows a common pattern of wanting to extend the
+coverage of the xa_lock on the store side to protect some statistics
+associated with the array.
+
+Sharing the XArray with interrupt context is also possible, either
+using :c:func:`xa_lock_irqsave` in both the interrupt handler and process
+context, or :c:func:`xa_lock_irq` in process context and :c:func:`xa_lock`
+in the interrupt handler.  Some of the more common patterns have helper
+functions such as :c:func:`xa_erase_bh` and :c:func:`xa_erase_irq`.
+
+Sometimes you need to protect access to the XArray with a mutex because
+that lock sits above another mutex in the locking hierarchy.  That does
+not entitle you to use functions like :c:func:`__xa_erase` without taking
+the xa_lock; the xa_lock is used for lockdep validation and will be used
+for other purposes in the future.
+
+The :c:func:`__xa_set_mark` and :c:func:`__xa_clear_mark` functions are also
+available for situations where you look up an entry and want to atomically
+set or clear a mark.  It may be more efficient to use the advanced API
+in this case, as it will save you from walking the tree twice.
+
+Advanced API
+============
+
+The advanced API offers more flexibility and better performance at the
+cost of an interface which can be harder to use and has fewer safeguards.
+No locking is done for you by the advanced API, and you are required
+to use the xa_lock while modifying the array.  You can choose whether
+to use the xa_lock or the RCU lock while doing read-only operations on
+the array.  You can mix advanced and normal operations on the same array;
+indeed the normal API is implemented in terms of the advanced API.  The
+advanced API is only available to modules with a GPL-compatible license.
+
+The advanced API is based around the xa_state.  This is an opaque data
+structure which you declare on the stack using the :c:func:`XA_STATE`
+macro.  This macro initialises the xa_state ready to start walking
+around the XArray.  It is used as a cursor to maintain the position
+in the XArray and let you compose various operations together without
+having to restart from the top every time.
+
+The xa_state is also used to store errors.  You can call
+:c:func:`xas_error` to retrieve the error.  All operations check whether
+the xa_state is in an error state before proceeding, so there's no need
+for you to check for an error after each call; you can make multiple
+calls in succession and only check at a convenient point.  The only
+errors currently generated by the XArray code itself are ``ENOMEM`` and
+``EINVAL``, but it supports arbitrary errors in case you want to call
+:c:func:`xas_set_err` yourself.
+
+If the xa_state is holding an ``ENOMEM`` error, calling :c:func:`xas_nomem`
+will attempt to allocate more memory using the specified gfp flags and
+cache it in the xa_state for the next attempt.  The idea is that you take
+the xa_lock, attempt the operation and drop the lock.  The operation
+attempts to allocate memory while holding the lock, but it is more
+likely to fail.  Once you have dropped the lock, :c:func:`xas_nomem`
+can try harder to allocate more memory.  It will return ``true`` if it
+is worth retrying the operation (i.e. that there was a memory error *and*
+more memory was allocated).  If it has previously allocated memory, and
+that memory wasn't used, and there is no error (or some error that isn't
+``ENOMEM``), then it will free the memory previously allocated.
+
+Internal Entries
+----------------
+
+The XArray reserves some entries for its own purposes.  These are never
+exposed through the normal API, but when using the advanced API, it's
+possible to see them.  Usually the best way to handle them is to pass them
+to :c:func:`xas_retry`, and retry the operation if it returns ``true``.
+
+.. flat-table::
+   :widths: 1 1 6
+
+   * - Name
+     - Test
+     - Usage
+
+   * - Node
+     - :c:func:`xa_is_node`
+     - An XArray node.  May be visible when using a multi-index xa_state.
+
+   * - Sibling
+     - :c:func:`xa_is_sibling`
+     - A non-canonical entry for a multi-index entry.  The value indicates
+       which slot in this node has the canonical entry.
+
+   * - Retry
+     - :c:func:`xa_is_retry`
+     - This entry is currently being modified by a thread which has the
+       xa_lock.  The node containing this entry may be freed at the end
+       of this RCU period.  You should restart the lookup from the head
+       of the array.
+
+   * - Zero
+     - :c:func:`xa_is_zero`
+     - Zero entries appear as ``NULL`` through the Normal API, but occupy
+       an entry in the XArray which can be used to reserve the index for
+       future use.
+
+Other internal entries may be added in the future.  As far as possible, they
+will be handled by :c:func:`xas_retry`.
+
+Additional functionality
+------------------------
+
+The :c:func:`xas_create_range` function allocates all the necessary memory
+to store every entry in a range.  It will set ENOMEM in the xa_state if
+it cannot allocate memory.
+
+You can use :c:func:`xas_init_marks` to reset the marks on an entry
+to their default state.  This is usually all marks clear, unless the
+XArray is marked with ``XA_FLAGS_TRACK_FREE``, in which case mark 0 is set
+and all other marks are clear.  Replacing one entry with another using
+:c:func:`xas_store` will not reset the marks on that entry; if you want
+the marks reset, you should do that explicitly.
+
+The :c:func:`xas_load` will walk the xa_state as close to the entry
+as it can.  If you know the xa_state has already been walked to the
+entry and need to check that the entry hasn't changed, you can use
+:c:func:`xas_reload` to save a function call.
+
+If you need to move to a different index in the XArray, call
+:c:func:`xas_set`.  This resets the cursor to the top of the tree, which
+will generally make the next operation walk the cursor to the desired
+spot in the tree.  If you want to move to the next or previous index,
+call :c:func:`xas_next` or :c:func:`xas_prev`.  Setting the index does
+not walk the cursor around the array so does not require a lock to be
+held, while moving to the next or previous index does.
+
+You can search for the next present entry using :c:func:`xas_find`.  This
+is the equivalent of both :c:func:`xa_find` and :c:func:`xa_find_after`;
+if the cursor has been walked to an entry, then it will find the next
+entry after the one currently referenced.  If not, it will return the
+entry at the index of the xa_state.  Using :c:func:`xas_next_entry` to
+move to the next present entry instead of :c:func:`xas_find` will save
+a function call in the majority of cases at the expense of emitting more
+inline code.
+
+The :c:func:`xas_find_marked` function is similar.  If the xa_state has
+not been walked, it will return the entry at the index of the xa_state,
+if it is marked.  Otherwise, it will return the first marked entry after
+the entry referenced by the xa_state.  The :c:func:`xas_next_marked`
+function is the equivalent of :c:func:`xas_next_entry`.
+
+When iterating over a range of the XArray using :c:func:`xas_for_each`
+or :c:func:`xas_for_each_marked`, it may be necessary to temporarily stop
+the iteration.  The :c:func:`xas_pause` function exists for this purpose.
+After you have done the necessary work and wish to resume, the xa_state
+is in an appropriate state to continue the iteration after the entry
+you last processed.  If you have interrupts disabled while iterating,
+then it is good manners to pause the iteration and reenable interrupts
+every ``XA_CHECK_SCHED`` entries.
+
+The :c:func:`xas_get_mark`, :c:func:`xas_set_mark` and
+:c:func:`xas_clear_mark` functions require the xa_state cursor to have
+been moved to the appropriate location in the xarray; they will do
+nothing if you have called :c:func:`xas_pause` or :c:func:`xas_set`
+immediately before.
+
+You can call :c:func:`xas_set_update` to have a callback function
+called each time the XArray updates a node.  This is used by the page
+cache workingset code to maintain its list of nodes which contain only
+shadow entries.
+
+Multi-Index Entries
+-------------------
+
+The XArray has the ability to tie multiple indices together so that
+operations on one index affect all indices.  For example, storing into
+any index will change the value of the entry retrieved from any index.
+Setting or clearing a mark on any index will set or clear the mark
+on every index that is tied together.  The current implementation
+only allows tying ranges which are aligned powers of two together;
+eg indices 64-127 may be tied together, but 2-6 may not be.  This may
+save substantial quantities of memory; for example tying 512 entries
+together will save over 4kB.
+
+You can create a multi-index entry by using :c:func:`XA_STATE_ORDER`
+or :c:func:`xas_set_order` followed by a call to :c:func:`xas_store`.
+Calling :c:func:`xas_load` with a multi-index xa_state will walk the
+xa_state to the right location in the tree, but the return value is not
+meaningful, potentially being an internal entry or ``NULL`` even when there
+is an entry stored within the range.  Calling :c:func:`xas_find_conflict`
+will return the first entry within the range or ``NULL`` if there are no
+entries in the range.  The :c:func:`xas_for_each_conflict` iterator will
+iterate over every entry which overlaps the specified range.
+
+If :c:func:`xas_load` encounters a multi-index entry, the xa_index
+in the xa_state will not be changed.  When iterating over an XArray
+or calling :c:func:`xas_find`, if the initial index is in the middle
+of a multi-index entry, it will not be altered.  Subsequent calls
+or iterations will move the index to the first index in the range.
+Each entry will only be returned once, no matter how many indices it
+occupies.
+
+Using :c:func:`xas_next` or :c:func:`xas_prev` with a multi-index xa_state
+is not supported.  Using either of these functions on a multi-index entry
+will reveal sibling entries; these should be skipped over by the caller.
+
+Storing ``NULL`` into any index of a multi-index entry will set the entry
+at every index to ``NULL`` and dissolve the tie.  Splitting a multi-index
+entry into entries occupying smaller ranges is not yet supported.
+
+Functions and structures
+========================
+
+.. kernel-doc:: include/linux/xarray.h
+.. kernel-doc:: lib/xarray.c

+ 21 - 5
Documentation/crypto/asymmetric-keys.txt

@@ -183,6 +183,10 @@ and looks like the following:
 
 		void (*describe)(const struct key *key, struct seq_file *m);
 		void (*destroy)(void *payload);
+		int (*query)(const struct kernel_pkey_params *params,
+			     struct kernel_pkey_query *info);
+		int (*eds_op)(struct kernel_pkey_params *params,
+			      const void *in, void *out);
 		int (*verify_signature)(const struct key *key,
 					const struct public_key_signature *sig);
 	};
@@ -207,12 +211,22 @@ There are a number of operations defined by the subtype:
      asymmetric key will look after freeing the fingerprint and releasing the
      reference on the subtype module.
 
- (3) verify_signature().
+ (3) query().
 
-     Optional.  These are the entry points for the key usage operations.
-     Currently there is only the one defined.  If not set, the caller will be
-     given -ENOTSUPP.  The subtype may do anything it likes to implement an
-     operation, including offloading to hardware.
+     Mandatory.  This is a function for querying the capabilities of a key.
+
+ (4) eds_op().
+
+     Optional.  This is the entry point for the encryption, decryption and
+     signature creation operations (which are distinguished by the operation ID
+     in the parameter struct).  The subtype may do anything it likes to
+     implement an operation, including offloading to hardware.
+
+ (5) verify_signature().
+
+     Optional.  This is the entry point for signature verification.  The
+     subtype may do anything it likes to implement an operation, including
+     offloading to hardware.
 
 
 ==========================
@@ -234,6 +248,8 @@ Examples of blob formats for which parsers could be implemented include:
  - X.509 ASN.1 stream.
  - Pointer to TPM key.
  - Pointer to UEFI key.
+ - PKCS#8 private key [RFC 5208].
+ - PKCS#5 encrypted private key [RFC 2898].
 
 During key instantiation each parser in the list is tried until one doesn't
 return -EBADMSG.

+ 19 - 4
Documentation/dev-tools/coccinelle.rst

@@ -30,18 +30,29 @@ of many distributions, e.g. :
  - NetBSD
  - FreeBSD
 
-You can get the latest version released from the Coccinelle homepage at
+Some distribution packages are obsolete and it is recommended
+to use the latest version released from the Coccinelle homepage at
 http://coccinelle.lip6.fr/
 
-Once you have it, run the following command::
+Or from Github at:
 
-     	./configure
+https://github.com/coccinelle/coccinelle
+
+Once you have it, run the following commands::
+
+        ./autogen
+        ./configure
         make
 
 as a regular user, and install it with::
 
         sudo make install
 
+More detailed installation instructions to build from source can be
+found at:
+
+https://github.com/coccinelle/coccinelle/blob/master/install.txt
+
 Supplemental documentation
 ---------------------------
 
@@ -51,6 +62,10 @@ https://bottest.wiki.kernel.org/coccicheck
 
 The wiki documentation always refers to the linux-next version of the script.
 
+For Semantic Patch Language(SmPL) grammar documentation refer to:
+
+http://coccinelle.lip6.fr/documentation.php
+
 Using Coccinelle on the Linux kernel
 ------------------------------------
 
@@ -223,7 +238,7 @@ Since coccicheck runs through make, it naturally runs from the kernel
 proper dir, as such the second rule above would be implied for picking up a
 .cocciconfig when using ``make coccicheck``.
 
-``make coccicheck`` also supports using M= targets.If you do not supply
+``make coccicheck`` also supports using M= targets. If you do not supply
 any M= target, it is assumed you want to target the entire kernel.
 The kernel coccicheck script has::
 

+ 1 - 1
Documentation/dev-tools/kselftest.rst

@@ -159,7 +159,7 @@ Contributing new tests (details)
  * If a test needs specific kernel config options enabled, add a config file in
    the test directory to enable them.
 
-   e.g: tools/testing/selftests/android/ion/config
+   e.g: tools/testing/selftests/android/config
 
 Test Harness
 ============

+ 4 - 0
Documentation/device-mapper/dm-flakey.txt

@@ -33,6 +33,10 @@ Optional feature parameters:
 	All write I/O is silently ignored.
 	Read I/O is handled correctly.
 
+  error_writes:
+	All write I/O is failed with an error signalled.
+	Read I/O is handled correctly.
+
   corrupt_bio_byte <Nth_byte> <direction> <value> <flags>:
 	During <down interval>, replace <Nth_byte> of the data of
 	each matching bio with <value>.

+ 4 - 0
Documentation/device-mapper/dm-raid.txt

@@ -348,3 +348,7 @@ Version History
 1.13.1  Fix deadlock caused by early md_stop_writes().  Also fix size an
 	state races.
 1.13.2  Fix raid redundancy validation and avoid keeping raid set frozen
+1.14.0  Fix reshape race on small devices.  Fix stripe adding reshape
+	deadlock/potential data corruption.  Update superblock when
+	specific devices are requested via rebuild.  Fix RAID leg
+	rebuild errors.

+ 1 - 1
Documentation/device-mapper/log-writes.txt

@@ -38,7 +38,7 @@ inconsistent file system.
 Any REQ_FUA requests bypass this flushing mechanism and are logged as soon as
 they complete as those requests will obviously bypass the device cache.
 
-Any REQ_DISCARD requests are treated like WRITE requests.  Otherwise we would
+Any REQ_OP_DISCARD requests are treated like WRITE requests.  Otherwise we would
 have all the DISCARD requests, and then the WRITE requests and then the FLUSH
 request.  Consider the following example:
 

+ 0 - 12
Documentation/devicetree/00-INDEX

@@ -1,12 +0,0 @@
-Documentation for device trees, a data structure by which bootloaders pass
-hardware layout to Linux in a device-independent manner, simplifying hardware
-probing.  This subsystem is maintained by Grant Likely
-<grant.likely@secretlab.ca> and has a mailing list at
-https://lists.ozlabs.org/listinfo/devicetree-discuss
-
-00-INDEX
-	- this file
-booting-without-of.txt
-	- Booting Linux without Open Firmware, describes history and format of device trees.
-usage-model.txt
-	- How Linux uses DT and what DT aims to solve.

+ 0 - 72
Documentation/devicetree/bindings/arm/al,alpine.txt

@@ -14,75 +14,3 @@ compatible: must contain "al,alpine"
 
 	...
 }
-
-* CPU node:
-
-The Alpine platform includes cortex-a15 cores.
-enable-method: must be "al,alpine-smp" to allow smp  [1]
-
-Example:
-
-cpus {
-	#address-cells = <1>;
-	#size-cells = <0>;
-	enable-method = "al,alpine-smp";
-
-	cpu@0 {
-		compatible = "arm,cortex-a15";
-		device_type = "cpu";
-		reg = <0>;
-	};
-
-	cpu@1 {
-		compatible = "arm,cortex-a15";
-		device_type = "cpu";
-		reg = <1>;
-	};
-
-	cpu@2 {
-		compatible = "arm,cortex-a15";
-		device_type = "cpu";
-		reg = <2>;
-	};
-
-	cpu@3 {
-		compatible = "arm,cortex-a15";
-		device_type = "cpu";
-		reg = <3>;
-	};
-};
-
-
-* Alpine CPU resume registers
-
-The CPU resume register are used to define required resume address after
-reset.
-
-Properties:
-- compatible : Should contain "al,alpine-cpu-resume".
-- reg : Offset and length of the register set for the device
-
-Example:
-
-cpu_resume {
-	compatible = "al,alpine-cpu-resume";
-	reg = <0xfbff5ed0 0x30>;
-};
-
-* Alpine System-Fabric Service Registers
-
-The System-Fabric Service Registers allow various operation on CPU and
-system fabric, like powering CPUs off.
-
-Properties:
-- compatible : Should contain "al,alpine-sysfabric-service" and "syscon".
-- reg : Offset and length of the register set for the device
-
-Example:
-
-nb_service {
-        compatible = "al,alpine-sysfabric-service", "syscon";
-        reg = <0xfb070000 0x10000>;
-};
-
-[1] arm/cpu-enable-method/al,alpine-smp

+ 7 - 0
Documentation/devicetree/bindings/arm/amlogic.txt

@@ -57,12 +57,17 @@ Boards with the Amlogic Meson AXG A113D SoC shall have the following properties:
   Required root node property:
     compatible: "amlogic,a113d", "amlogic,meson-axg";
 
+Boards with the Amlogic Meson G12A S905D2 SoC shall have the following properties:
+  Required root node property:
+    compatible: "amlogic,g12a";
+
 Board compatible values (alphabetically, grouped by SoC):
 
   - "geniatech,atv1200" (Meson6)
 
   - "minix,neo-x8" (Meson8)
 
+  - "endless,ec100" (Meson8b)
   - "hardkernel,odroid-c1" (Meson8b)
   - "tronfy,mxq" (Meson8b)
 
@@ -101,6 +106,8 @@ Board compatible values (alphabetically, grouped by SoC):
 
   - "amlogic,s400" (Meson axg a113d)
 
+  - "amlogic,u200" (Meson g12a s905d2)
+
 Amlogic Meson Firmware registers Interface
 ------------------------------------------
 

+ 0 - 170
Documentation/devicetree/bindings/arm/atmel-at91.txt

@@ -70,173 +70,3 @@ compatible: must be one of:
        - "atmel,samv71q19"
        - "atmel,samv71q20"
        - "atmel,samv71q21"
-
-Chipid required properties:
-- compatible: Should be "atmel,sama5d2-chipid"
-- reg : Should contain registers location and length
-
-PIT Timer required properties:
-- compatible: Should be "atmel,at91sam9260-pit"
-- reg: Should contain registers location and length
-- interrupts: Should contain interrupt for the PIT which is the IRQ line
-  shared across all System Controller members.
-
-System Timer (ST) required properties:
-- compatible: Should be "atmel,at91rm9200-st", "syscon", "simple-mfd"
-- reg: Should contain registers location and length
-- interrupts: Should contain interrupt for the ST which is the IRQ line
-  shared across all System Controller members.
-- clocks: phandle to input clock.
-Its subnodes can be:
-- watchdog: compatible should be "atmel,at91rm9200-wdt"
-
-RSTC Reset Controller required properties:
-- compatible: Should be "atmel,<chip>-rstc".
-  <chip> can be "at91sam9260" or "at91sam9g45" or "sama5d3"
-- reg: Should contain registers location and length
-- clocks: phandle to input clock.
-
-Example:
-
-	rstc@fffffd00 {
-		compatible = "atmel,at91sam9260-rstc";
-		reg = <0xfffffd00 0x10>;
-		clocks = <&clk32k>;
-	};
-
-RAMC SDRAM/DDR Controller required properties:
-- compatible: Should be "atmel,at91rm9200-sdramc", "syscon"
-			"atmel,at91sam9260-sdramc",
-			"atmel,at91sam9g45-ddramc",
-			"atmel,sama5d3-ddramc",
-- reg: Should contain registers location and length
-
-Examples:
-
-	ramc0: ramc@ffffe800 {
-		compatible = "atmel,at91sam9g45-ddramc";
-		reg = <0xffffe800 0x200>;
-	};
-
-SHDWC Shutdown Controller
-
-required properties:
-- compatible: Should be "atmel,<chip>-shdwc".
-  <chip> can be "at91sam9260", "at91sam9rl" or "at91sam9x5".
-- reg: Should contain registers location and length
-- clocks: phandle to input clock.
-
-optional properties:
-- atmel,wakeup-mode: String, operation mode of the wakeup mode.
-  Supported values are: "none", "high", "low", "any".
-- atmel,wakeup-counter: Counter on Wake-up 0 (between 0x0 and 0xf).
-
-optional at91sam9260 properties:
-- atmel,wakeup-rtt-timer: boolean to enable Real-time Timer Wake-up.
-
-optional at91sam9rl properties:
-- atmel,wakeup-rtc-timer: boolean to enable Real-time Clock Wake-up.
-- atmel,wakeup-rtt-timer: boolean to enable Real-time Timer Wake-up.
-
-optional at91sam9x5 properties:
-- atmel,wakeup-rtc-timer: boolean to enable Real-time Clock Wake-up.
-
-Example:
-
-	shdwc@fffffd10 {
-		compatible = "atmel,at91sam9260-shdwc";
-		reg = <0xfffffd10 0x10>;
-		clocks = <&clk32k>;
-	};
-
-SHDWC SAMA5D2-Compatible Shutdown Controller
-
-1) shdwc node
-
-required properties:
-- compatible: should be "atmel,sama5d2-shdwc".
-- reg: should contain registers location and length
-- clocks: phandle to input clock.
-- #address-cells: should be one. The cell is the wake-up input index.
-- #size-cells: should be zero.
-
-optional properties:
-
-- debounce-delay-us: minimum wake-up inputs debouncer period in
-  microseconds. It's usually a board-related property.
-- atmel,wakeup-rtc-timer: boolean to enable Real-Time Clock wake-up.
-
-The node contains child nodes for each wake-up input that the platform uses.
-
-2) input nodes
-
-Wake-up input nodes are usually described in the "board" part of the Device
-Tree. Note also that input 0 is linked to the wake-up pin and is frequently
-used.
-
-Required properties:
-- reg: should contain the wake-up input index [0 - 15].
-
-Optional properties:
-- atmel,wakeup-active-high: boolean, the corresponding wake-up input described
-  by the child, forces the wake-up of the core power supply on a high level.
-  The default is to be active low.
-
-Example:
-
-On the SoC side:
-	shdwc@f8048010 {
-		compatible = "atmel,sama5d2-shdwc";
-		reg = <0xf8048010 0x10>;
-		clocks = <&clk32k>;
-		#address-cells = <1>;
-		#size-cells = <0>;
-		atmel,wakeup-rtc-timer;
-	};
-
-On the board side:
-	shdwc@f8048010 {
-		debounce-delay-us = <976>;
-
-		input@0 {
-			reg = <0>;
-		};
-
-		input@1 {
-			reg = <1>;
-			atmel,wakeup-active-high;
-		};
-	};
-
-Special Function Registers (SFR)
-
-Special Function Registers (SFR) manage specific aspects of the integrated
-memory, bridge implementations, processor and other functionality not controlled
-elsewhere.
-
-required properties:
-- compatible: Should be "atmel,<chip>-sfr", "syscon" or
-	"atmel,<chip>-sfrbu", "syscon"
-  <chip> can be "sama5d3", "sama5d4" or "sama5d2".
-- reg: Should contain registers location and length
-
-	sfr@f0038000 {
-		compatible = "atmel,sama5d3-sfr", "syscon";
-		reg = <0xf0038000 0x60>;
-	};
-
-Security Module (SECUMOD)
-
-The Security Module macrocell provides all necessary secure functions to avoid
-voltage, temperature, frequency and mechanical attacks on the chip. It also
-embeds secure memories that can be scrambled
-
-required properties:
-- compatible: Should be "atmel,<chip>-secumod", "syscon".
-  <chip> can be "sama5d2".
-- reg: Should contain registers location and length
-
-	secumod@fc040000 {
-		compatible = "atmel,sama5d2-secumod", "syscon";
-		reg = <0xfc040000 0x100>;
-	};

+ 171 - 0
Documentation/devicetree/bindings/arm/atmel-sysregs.txt

@@ -0,0 +1,171 @@
+Atmel system registers
+
+Chipid required properties:
+- compatible: Should be "atmel,sama5d2-chipid"
+- reg : Should contain registers location and length
+
+PIT Timer required properties:
+- compatible: Should be "atmel,at91sam9260-pit"
+- reg: Should contain registers location and length
+- interrupts: Should contain interrupt for the PIT which is the IRQ line
+  shared across all System Controller members.
+
+System Timer (ST) required properties:
+- compatible: Should be "atmel,at91rm9200-st", "syscon", "simple-mfd"
+- reg: Should contain registers location and length
+- interrupts: Should contain interrupt for the ST which is the IRQ line
+  shared across all System Controller members.
+- clocks: phandle to input clock.
+Its subnodes can be:
+- watchdog: compatible should be "atmel,at91rm9200-wdt"
+
+RSTC Reset Controller required properties:
+- compatible: Should be "atmel,<chip>-rstc".
+  <chip> can be "at91sam9260" or "at91sam9g45" or "sama5d3"
+- reg: Should contain registers location and length
+- clocks: phandle to input clock.
+
+Example:
+
+	rstc@fffffd00 {
+		compatible = "atmel,at91sam9260-rstc";
+		reg = <0xfffffd00 0x10>;
+		clocks = <&clk32k>;
+	};
+
+RAMC SDRAM/DDR Controller required properties:
+- compatible: Should be "atmel,at91rm9200-sdramc", "syscon"
+			"atmel,at91sam9260-sdramc",
+			"atmel,at91sam9g45-ddramc",
+			"atmel,sama5d3-ddramc",
+- reg: Should contain registers location and length
+
+Examples:
+
+	ramc0: ramc@ffffe800 {
+		compatible = "atmel,at91sam9g45-ddramc";
+		reg = <0xffffe800 0x200>;
+	};
+
+SHDWC Shutdown Controller
+
+required properties:
+- compatible: Should be "atmel,<chip>-shdwc".
+  <chip> can be "at91sam9260", "at91sam9rl" or "at91sam9x5".
+- reg: Should contain registers location and length
+- clocks: phandle to input clock.
+
+optional properties:
+- atmel,wakeup-mode: String, operation mode of the wakeup mode.
+  Supported values are: "none", "high", "low", "any".
+- atmel,wakeup-counter: Counter on Wake-up 0 (between 0x0 and 0xf).
+
+optional at91sam9260 properties:
+- atmel,wakeup-rtt-timer: boolean to enable Real-time Timer Wake-up.
+
+optional at91sam9rl properties:
+- atmel,wakeup-rtc-timer: boolean to enable Real-time Clock Wake-up.
+- atmel,wakeup-rtt-timer: boolean to enable Real-time Timer Wake-up.
+
+optional at91sam9x5 properties:
+- atmel,wakeup-rtc-timer: boolean to enable Real-time Clock Wake-up.
+
+Example:
+
+	shdwc@fffffd10 {
+		compatible = "atmel,at91sam9260-shdwc";
+		reg = <0xfffffd10 0x10>;
+		clocks = <&clk32k>;
+	};
+
+SHDWC SAMA5D2-Compatible Shutdown Controller
+
+1) shdwc node
+
+required properties:
+- compatible: should be "atmel,sama5d2-shdwc".
+- reg: should contain registers location and length
+- clocks: phandle to input clock.
+- #address-cells: should be one. The cell is the wake-up input index.
+- #size-cells: should be zero.
+
+optional properties:
+
+- debounce-delay-us: minimum wake-up inputs debouncer period in
+  microseconds. It's usually a board-related property.
+- atmel,wakeup-rtc-timer: boolean to enable Real-Time Clock wake-up.
+
+The node contains child nodes for each wake-up input that the platform uses.
+
+2) input nodes
+
+Wake-up input nodes are usually described in the "board" part of the Device
+Tree. Note also that input 0 is linked to the wake-up pin and is frequently
+used.
+
+Required properties:
+- reg: should contain the wake-up input index [0 - 15].
+
+Optional properties:
+- atmel,wakeup-active-high: boolean, the corresponding wake-up input described
+  by the child, forces the wake-up of the core power supply on a high level.
+  The default is to be active low.
+
+Example:
+
+On the SoC side:
+	shdwc@f8048010 {
+		compatible = "atmel,sama5d2-shdwc";
+		reg = <0xf8048010 0x10>;
+		clocks = <&clk32k>;
+		#address-cells = <1>;
+		#size-cells = <0>;
+		atmel,wakeup-rtc-timer;
+	};
+
+On the board side:
+	shdwc@f8048010 {
+		debounce-delay-us = <976>;
+
+		input@0 {
+			reg = <0>;
+		};
+
+		input@1 {
+			reg = <1>;
+			atmel,wakeup-active-high;
+		};
+	};
+
+Special Function Registers (SFR)
+
+Special Function Registers (SFR) manage specific aspects of the integrated
+memory, bridge implementations, processor and other functionality not controlled
+elsewhere.
+
+required properties:
+- compatible: Should be "atmel,<chip>-sfr", "syscon" or
+	"atmel,<chip>-sfrbu", "syscon"
+  <chip> can be "sama5d3", "sama5d4" or "sama5d2".
+- reg: Should contain registers location and length
+
+	sfr@f0038000 {
+		compatible = "atmel,sama5d3-sfr", "syscon";
+		reg = <0xf0038000 0x60>;
+	};
+
+Security Module (SECUMOD)
+
+The Security Module macrocell provides all necessary secure functions to avoid
+voltage, temperature, frequency and mechanical attacks on the chip. It also
+embeds secure memories that can be scrambled
+
+required properties:
+- compatible: Should be "atmel,<chip>-secumod", "syscon".
+  <chip> can be "sama5d2".
+- reg: Should contain registers location and length
+
+	secumod@fc040000 {
+		compatible = "atmel,sama5d2-secumod", "syscon";
+		reg = <0xfc040000 0x100>;
+	};

+ 8 - 0
Documentation/devicetree/bindings/arm/bcm/brcm,bcm2835.txt

@@ -42,6 +42,14 @@ Raspberry Pi Compute Module
 Required root node properties:
 compatible = "raspberrypi,compute-module", "brcm,bcm2835";
 
+Raspberry Pi Compute Module 3
+Required root node properties:
+compatible = "raspberrypi,3-compute-module", "brcm,bcm2837";
+
+Raspberry Pi Compute Module 3 Lite
+Required root node properties:
+compatible = "raspberrypi,3-compute-module-lite", "brcm,bcm2837";
+
 Raspberry Pi Zero
 Required root node properties:
 compatible = "raspberrypi,model-zero", "brcm,bcm2835";

+ 65 - 55
Documentation/devicetree/bindings/arm/coresight.txt

@@ -54,9 +54,7 @@ its hardware characteristcs.
 	  clocks the core of that coresight component. The latter clock
 	  is optional.
 
-	* port or ports: The representation of the component's port
-	  layout using the generic DT graph presentation found in
-	  "bindings/graph.txt".
+	* port or ports: see "Graph bindings for Coresight" below.
 
 * Additional required properties for System Trace Macrocells (STM):
 	* reg: along with the physical base address and length of the register
@@ -73,7 +71,7 @@ its hardware characteristcs.
 	  AMBA markee):
 		- "arm,coresight-replicator"
 
-	* port or ports: same as above.
+	* port or ports: see "Graph bindings for Coresight" below.
 
 * Optional properties for ETM/PTMs:
 
@@ -96,6 +94,20 @@ its hardware characteristcs.
 	* interrupts : Exactly one SPI may be listed for reporting the address
 	  error
 
+Graph bindings for Coresight
+-------------------------------
+
+Coresight components are interconnected to create a data path for the flow of
+trace data generated from the "sources" to their collection points "sink".
+Each coresight component must describe the "input" and "output" connections.
+The connections must be described via generic DT graph bindings as described
+by the "bindings/graph.txt", where each "port" along with an "endpoint"
+component represents a hardware port and the connection.
+
+ * All output ports must be listed inside a child node named "out-ports"
+ * All input ports must be listed inside a child node named "in-ports".
+ * Port address must match the hardware port number.
+
 Example:
 
 1. Sinks
@@ -105,10 +117,11 @@ Example:
 
 		clocks = <&oscclk6a>;
 		clock-names = "apb_pclk";
-		port {
-			etb_in_port: endpoint@0 {
-				slave-mode;
-				remote-endpoint = <&replicator_out_port0>;
+		in-ports {
+			port {
+				etb_in_port: endpoint@0 {
+					remote-endpoint = <&replicator_out_port0>;
+				};
 			};
 		};
 	};
@@ -119,10 +132,11 @@ Example:
 
 		clocks = <&oscclk6a>;
 		clock-names = "apb_pclk";
-		port {
-			tpiu_in_port: endpoint@0 {
-				slave-mode;
-				remote-endpoint = <&replicator_out_port1>;
+		in-ports {
+			port {
+				tpiu_in_port: endpoint@0 {
+					remote-endpoint = <&replicator_out_port1>;
+				};
 			};
 		};
 	};
@@ -133,22 +147,16 @@ Example:
 
 		clocks = <&oscclk6a>;
 		clock-names = "apb_pclk";
-		ports {
-			#address-cells = <1>;
-			#size-cells = <0>;
-
-			/* input port */
-			port@0 {
-				reg =  <0>;
+		in-ports {
+			port {
 				etr_in_port: endpoint {
-					slave-mode;
 					remote-endpoint = <&replicator2_out_port0>;
 				};
 			};
+		};
 
-			/* CATU link represented by output port */
-			port@1 {
-				reg = <1>;
+		out-ports {
+			port {
 				etr_out_port: endpoint {
 					remote-endpoint = <&catu_in_port>;
 				};
@@ -163,7 +171,7 @@ Example:
 		 */
 		compatible = "arm,coresight-replicator";
 
-		ports {
+		out-ports {
 			#address-cells = <1>;
 			#size-cells = <0>;
 
@@ -181,12 +189,11 @@ Example:
 					remote-endpoint = <&tpiu_in_port>;
 				};
 			};
+		};
 
-			/* replicator input port */
-			port@2 {
-				reg = <0>;
+		in-ports {
+			port {
 				replicator_in_port0: endpoint {
-					slave-mode;
 					remote-endpoint = <&funnel_out_port0>;
 				};
 			};
@@ -199,40 +206,36 @@ Example:
 
 		clocks = <&oscclk6a>;
 		clock-names = "apb_pclk";
-		ports {
-			#address-cells = <1>;
-			#size-cells = <0>;
-
-			/* funnel output port */
-			port@0 {
-				reg = <0>;
+		out-ports {
+			port {
 				funnel_out_port0: endpoint {
 					remote-endpoint =
 							<&replicator_in_port0>;
 				};
 			};
+		};
 
-			/* funnel input ports */
-			port@1 {
+		in-ports {
+			#address-cells = <1>;
+			#size-cells = <0>;
+
+			port@0 {
 				reg = <0>;
 				funnel_in_port0: endpoint {
-					slave-mode;
 					remote-endpoint = <&ptm0_out_port>;
 				};
 			};
 
-			port@2 {
+			port@1 {
 				reg = <1>;
 				funnel_in_port1: endpoint {
-					slave-mode;
 					remote-endpoint = <&ptm1_out_port>;
 				};
 			};
 
-			port@3 {
+			port@2 {
 				reg = <2>;
 				funnel_in_port2: endpoint {
-					slave-mode;
 					remote-endpoint = <&etm0_out_port>;
 				};
 			};
@@ -248,9 +251,11 @@ Example:
 		cpu = <&cpu0>;
 		clocks = <&oscclk6a>;
 		clock-names = "apb_pclk";
-		port {
-			ptm0_out_port: endpoint {
-				remote-endpoint = <&funnel_in_port0>;
+		out-ports {
+			port {
+				ptm0_out_port: endpoint {
+					remote-endpoint = <&funnel_in_port0>;
+				};
 			};
 		};
 	};
@@ -262,9 +267,11 @@ Example:
 		cpu = <&cpu1>;
 		clocks = <&oscclk6a>;
 		clock-names = "apb_pclk";
-		port {
-			ptm1_out_port: endpoint {
-				remote-endpoint = <&funnel_in_port1>;
+		out-ports {
+			port {
+				ptm1_out_port: endpoint {
+					remote-endpoint = <&funnel_in_port1>;
+				};
 			};
 		};
 	};
@@ -278,9 +285,11 @@ Example:
 
 		clocks = <&soc_smc50mhz>;
 		clock-names = "apb_pclk";
-		port {
-			stm_out_port: endpoint {
-				remote-endpoint = <&main_funnel_in_port2>;
+		out-ports {
+			port {
+				stm_out_port: endpoint {
+					remote-endpoint = <&main_funnel_in_port2>;
+				};
 			};
 		};
 	};
@@ -295,10 +304,11 @@ Example:
 		clock-names = "apb_pclk";
 
 		interrupts = <GIC_SPI 4 IRQ_TYPE_LEVEL_HIGH>;
-		port {
-			catu_in_port: endpoint {
-				slave-mode;
-				remote-endpoint = <&etr_out_port>;
+		in-ports {
+			port {
+				catu_in_port: endpoint {
+					remote-endpoint = <&etr_out_port>;
+				};
 			};
 		};
 	};

+ 5 - 3
Documentation/devicetree/bindings/arm/cpu-capacity.txt

@@ -59,9 +59,11 @@ mhz values (normalized w.r.t. the highest value found while parsing the DT).
 ===========================================
 
 Example 1 (ARM 64-bit, 6-cpu system, two clusters):
-capacities-dmips-mhz are scaled w.r.t. 1024 (cpu@0 and cpu@1)
-supposing cluster0@max-freq=1100 and custer1@max-freq=850,
-final capacities are 1024 for cluster0 and 446 for cluster1
+The capacities-dmips-mhz or DMIPS/MHz values (scaled to 1024)
+are 1024 and 578 for cluster0 and cluster1. Further normalization
+is done by the operating system based on cluster0@max-freq=1100 and
+custer1@max-freq=850, final capacities are 1024 for cluster0 and
+446 for cluster1 (576*850/1100).
 
 cpus {
 	#address-cells = <2>;

+ 31 - 3
Documentation/devicetree/bindings/arm/cpu-enable-method/al,alpine-smp

@@ -14,7 +14,28 @@ Related properties:	(none)
 
 Note:
 This enable method requires valid nodes compatible with
-"al,alpine-cpu-resume" and "al,alpine-nb-service"[1].
+"al,alpine-cpu-resume" and "al,alpine-nb-service".
+
+
+* Alpine CPU resume registers
+
+The CPU resume register are used to define required resume address after
+reset.
+
+Properties:
+- compatible : Should contain "al,alpine-cpu-resume".
+- reg : Offset and length of the register set for the device
+
+
+* Alpine System-Fabric Service Registers
+
+The System-Fabric Service Registers allow various operation on CPU and
+system fabric, like powering CPUs off.
+
+Properties:
+- compatible : Should contain "al,alpine-sysfabric-service" and "syscon".
+- reg : Offset and length of the register set for the device
+
 
 Example:
 
@@ -48,5 +69,12 @@ cpus {
 	};
 };
 
---
-[1] arm/al,alpine.txt
+cpu_resume {
+	compatible = "al,alpine-cpu-resume";
+	reg = <0xfbff5ed0 0x30>;
+};
+
+nb_service {
+        compatible = "al,alpine-sysfabric-service", "syscon";
+        reg = <0xfb070000 0x10000>;
+};

+ 2 - 2
Documentation/devicetree/bindings/arm/cpus.txt

@@ -276,7 +276,7 @@ described below.
 		Usage: optional
 		Value type: <prop-encoded-array>
 		Definition: A u32 value that represents the running time dynamic
-			    power coefficient in units of mW/MHz/uV^2. The
+			    power coefficient in units of uW/MHz/V^2. The
 			    coefficient can either be calculated from power
 			    measurements or derived by analysis.
 
@@ -287,7 +287,7 @@ described below.
 
 			    Pdyn = dynamic-power-coefficient * V^2 * f
 
-			    where voltage is in uV, frequency is in MHz.
+			    where voltage is in V, frequency is in MHz.
 
 Example 1 (dual-cluster big.LITTLE system 32-bit):
 

+ 19 - 0
Documentation/devicetree/bindings/arm/freescale/fsl,layerscape-dcfg.txt

@@ -0,0 +1,19 @@
+Freescale DCFG
+
+DCFG is the device configuration unit, that provides general purpose
+configuration and status for the device. Such as setting the secondary
+core start address and release the secondary core from holdoff and startup.
+
+Required properties:
+  - compatible: Should contain a chip-specific compatible string,
+	Chip-specific strings are of the form "fsl,<chip>-dcfg",
+	The following <chip>s are known to be supported:
+	ls1012a, ls1021a, ls1043a, ls1046a, ls2080a.
+
+  - reg : should contain base address and length of DCFG memory-mapped registers
+
+Example:
+	dcfg: dcfg@1ee0000 {
+		compatible = "fsl,ls1021a-dcfg";
+		reg = <0x0 0x1ee0000 0x0 0x10000>;
+	};

+ 19 - 0
Documentation/devicetree/bindings/arm/freescale/fsl,layerscape-scfg.txt

@@ -0,0 +1,19 @@
+Freescale SCFG
+
+SCFG is the supplemental configuration unit, that provides SoC specific
+configuration and status registers for the chip. Such as getting PEX port
+status.
+
+Required properties:
+  - compatible: Should contain a chip-specific compatible string,
+	Chip-specific strings are of the form "fsl,<chip>-scfg",
+	The following <chip>s are known to be supported:
+	ls1012a, ls1021a, ls1043a, ls1046a, ls2080a.
+
+  - reg: should contain base address and length of SCFG memory-mapped registers
+
+Example:
+	scfg: scfg@1570000 {
+		compatible = "fsl,ls1021a-scfg";
+		reg = <0x0 0x1570000 0x0 0x10000>;
+	};

+ 183 - 0
Documentation/devicetree/bindings/arm/freescale/fsl,scu.txt

@@ -0,0 +1,183 @@
+NXP i.MX System Controller Firmware (SCFW)
+--------------------------------------------------------------------
+
+The System Controller Firmware (SCFW) is a low-level system function
+which runs on a dedicated Cortex-M core to provide power, clock, and
+resource management. It exists on some i.MX8 processors. e.g. i.MX8QM
+(QM, QP), and i.MX8QX (QXP, DX).
+
+The AP communicates with the SC using a multi-ported MU module found
+in the LSIO subsystem. The current definition of this MU module provides
+5 remote AP connections to the SC to support up to 5 execution environments
+(TZ, HV, standard Linux, etc.). The SC side of this MU module interfaces
+with the LSIO DSC IP bus. The SC firmware will communicate with this MU
+using the MSI bus.
+
+System Controller Device Node:
+============================================================
+
+The scu node with the following properties shall be under the /firmware/ node.
+
+Required properties:
+-------------------
+- compatible:	should be "fsl,imx-scu".
+- mbox-names:	should include "tx0", "tx1", "tx2", "tx3",
+			       "rx0", "rx1", "rx2", "rx3".
+- mboxes:	List of phandle of 4 MU channels for tx and 4 MU channels
+		for rx. All 8 MU channels must be in the same MU instance.
+		Cross instances are not allowed. The MU instance can only
+		be one of LSIO MU0~M4 for imx8qxp and imx8qm. Users need
+		to make sure use the one which is not conflict with other
+		execution environments. e.g. ATF.
+		Note:
+		Channel 0 must be "tx0" or "rx0".
+		Channel 1 must be "tx1" or "rx1".
+		Channel 2 must be "tx2" or "rx2".
+		Channel 3 must be "tx3" or "rx3".
+		e.g.
+		mboxes = <&lsio_mu1 0 0
+			  &lsio_mu1 0 1
+			  &lsio_mu1 0 2
+			  &lsio_mu1 0 3
+			  &lsio_mu1 1 0
+			  &lsio_mu1 1 1
+			  &lsio_mu1 1 2
+			  &lsio_mu1 1 3>;
+		See Documentation/devicetree/bindings/mailbox/fsl,mu.txt
+		for detailed mailbox binding.
+
+i.MX SCU Client Device Node:
+============================================================
+
+Client nodes are maintained as children of the relevant IMX-SCU device node.
+
+Power domain bindings based on SCU Message Protocol
+------------------------------------------------------------
+
+This binding for the SCU power domain providers uses the generic power
+domain binding[2].
+
+Required properties:
+- compatible:		Should be "fsl,scu-pd".
+- #address-cells:	Should be 1.
+- #size-cells:		Should be 0.
+
+Required properties for power domain sub nodes:
+- #power-domain-cells:	Must be 0.
+
+Optional Properties:
+- reg:			Resource ID of this power domain.
+			No exist means uncontrollable by user.
+			See detailed Resource ID list from:
+			include/dt-bindings/power/imx-rsrc.h
+- power-domains:	phandle pointing to the parent power domain.
+
+Clock bindings based on SCU Message Protocol
+------------------------------------------------------------
+
+This binding uses the common clock binding[1].
+
+Required properties:
+- compatible:		Should be "fsl,imx8qxp-clock".
+- #clock-cells:		Should be 1. Contains the Clock ID value.
+- clocks:		List of clock specifiers, must contain an entry for
+			each required entry in clock-names
+- clock-names:		Should include entries "xtal_32KHz", "xtal_24MHz"
+
+The clock consumer should specify the desired clock by having the clock
+ID in its "clocks" phandle cell.
+
+See the full list of clock IDs from:
+include/dt-bindings/clock/imx8qxp-clock.h
+
+Pinctrl bindings based on SCU Message Protocol
+------------------------------------------------------------
+
+This binding uses the i.MX common pinctrl binding[3].
+
+Required properties:
+- compatible:		Should be "fsl,imx8qxp-iomuxc".
+
+Required properties for Pinctrl sub nodes:
+- fsl,pins:		Each entry consists of 3 integers which represents
+			the mux and config setting for one pin. The first 2
+			integers <pin_id mux_mode> are specified using a
+			PIN_FUNC_ID macro, which can be found in
+			<dt-bindings/pinctrl/pads-imx8qxp.h>.
+			The last integer CONFIG is the pad setting value like
+			pull-up on this pin.
+
+			Please refer to i.MX8QXP Reference Manual for detailed
+			CONFIG settings.
+
+[1] Documentation/devicetree/bindings/clock/clock-bindings.txt
+[2] Documentation/devicetree/bindings/power/power_domain.txt
+[3] Documentation/devicetree/bindings/pinctrl/fsl,imx-pinctrl.txt
+
+Example (imx8qxp):
+-------------
+lsio_mu1: mailbox@5d1c0000 {
+	...
+	#mbox-cells = <2>;
+};
+
+firmware {
+	scu {
+		compatible = "fsl,imx-scu";
+		mbox-names = "tx0", "tx1", "tx2", "tx3",
+			     "rx0", "rx1", "rx2", "rx3";
+		mboxes = <&lsio_mu1 0 0
+			  &lsio_mu1 0 1
+			  &lsio_mu1 0 2
+			  &lsio_mu1 0 3
+			  &lsio_mu1 1 0
+			  &lsio_mu1 1 1
+			  &lsio_mu1 1 2
+			  &lsio_mu1 1 3>;
+
+		clk: clk {
+			compatible = "fsl,imx8qxp-clk";
+			#clock-cells = <1>;
+		};
+
+		iomuxc {
+			compatible = "fsl,imx8qxp-iomuxc";
+
+			pinctrl_lpuart0: lpuart0grp {
+				fsl,pins = <
+					SC_P_UART0_RX_ADMA_UART0_RX	0x06000020
+					SC_P_UART0_TX_ADMA_UART0_TX	0x06000020
+				>;
+			};
+			...
+		};
+
+		imx8qx-pm {
+			compatible = "fsl,scu-pd";
+			#address-cells = <1>;
+			#size-cells = <0>;
+
+			pd_dma: dma-power-domain {
+				#power-domain-cells = <0>;
+
+				pd_dma_lpuart0: dma-lpuart0@57 {
+					reg = <SC_R_UART_0>;
+					#power-domain-cells = <0>;
+					power-domains = <&pd_dma>;
+				};
+				...
+			};
+			...
+		};
+	};
+};
+
+serial@5a060000 {
+	...
+	pinctrl-names = "default";
+	pinctrl-0 = <&pinctrl_lpuart0>;
+	clocks = <&clk IMX8QXP_UART0_CLK>,
+		 <&clk IMX8QXP_UART0_IPG_CLK>;
+	clock-names = "per", "ipg";
+	power-domains = <&pd_dma_lpuart0>;
+};

+ 44 - 39
Documentation/devicetree/bindings/arm/fsl.txt

@@ -57,6 +57,50 @@ i.MX6SLL EVK board
 Required root node properties:
     - compatible = "fsl,imx6sll-evk", "fsl,imx6sll";
 
+i.MX6 Quad Plus SABRE Smart Device Board
+Required root node properties:
+    - compatible = "fsl,imx6qp-sabresd", "fsl,imx6qp";
+
+i.MX6 Quad Plus SABRE Automotive Board
+Required root node properties:
+    - compatible = "fsl,imx6qp-sabreauto", "fsl,imx6qp";
+
+i.MX6 DualLite SABRE Smart Device Board
+Required root node properties:
+    - compatible = "fsl,imx6dl-sabresd", "fsl,imx6dl";
+
+i.MX6 DualLite/Solo SABRE Automotive Board
+Required root node properties:
+    - compatible = "fsl,imx6dl-sabreauto", "fsl,imx6dl";
+
+i.MX6 SoloLite EVK Board
+Required root node properties:
+    - compatible = "fsl,imx6sl-evk", "fsl,imx6sl";
+
+i.MX6 UltraLite 14x14 EVK Board
+Required root node properties:
+    - compatible = "fsl,imx6ul-14x14-evk", "fsl,imx6ul";
+
+i.MX6 UltraLiteLite 14x14 EVK Board
+Required root node properties:
+    - compatible = "fsl,imx6ull-14x14-evk", "fsl,imx6ull";
+
+i.MX6 ULZ 14x14 EVK Board
+Required root node properties:
+    - compatible = "fsl,imx6ulz-14x14-evk", "fsl,imx6ull", "fsl,imx6ulz";
+
+i.MX6 SoloX SDB Board
+Required root node properties:
+    - compatible = "fsl,imx6sx-sdb", "fsl,imx6sx";
+
+i.MX6 SoloX Sabre Auto Board
+Required root node properties:
+    - compatible = "fsl,imx6sx-sabreauto", "fsl,imx6sx";
+
+i.MX7 SabreSD Board
+Required root node properties:
+    - compatible = "fsl,imx7d-sdb", "fsl,imx7d";
+
 Generic i.MX boards
 -------------------
 
@@ -101,45 +145,6 @@ Freescale LS1021A Platform Device Tree Bindings
 Required root node compatible properties:
   - compatible = "fsl,ls1021a";
 
-Freescale SoC-specific Device Tree Bindings
--------------------------------------------
-
-Freescale SCFG
-  SCFG is the supplemental configuration unit, that provides SoC specific
-configuration and status registers for the chip. Such as getting PEX port
-status.
-  Required properties:
-  - compatible: Should contain a chip-specific compatible string,
-	Chip-specific strings are of the form "fsl,<chip>-scfg",
-	The following <chip>s are known to be supported:
-	ls1012a, ls1021a, ls1043a, ls1046a, ls2080a.
-
-  - reg: should contain base address and length of SCFG memory-mapped registers
-
-Example:
-	scfg: scfg@1570000 {
-		compatible = "fsl,ls1021a-scfg";
-		reg = <0x0 0x1570000 0x0 0x10000>;
-	};
-
-Freescale DCFG
-  DCFG is the device configuration unit, that provides general purpose
-configuration and status for the device. Such as setting the secondary
-core start address and release the secondary core from holdoff and startup.
-  Required properties:
-  - compatible: Should contain a chip-specific compatible string,
-	Chip-specific strings are of the form "fsl,<chip>-dcfg",
-	The following <chip>s are known to be supported:
-	ls1012a, ls1021a, ls1043a, ls1046a, ls2080a.
-
-  - reg : should contain base address and length of DCFG memory-mapped registers
-
-Example:
-	dcfg: dcfg@1ee0000 {
-		compatible = "fsl,ls1021a-dcfg";
-		reg = <0x0 0x1ee0000 0x0 0x10000>;
-	};
-
 Freescale ARMv8 based Layerscape SoC family Device Tree Bindings
 ----------------------------------------------------------------
 

+ 8 - 0
Documentation/devicetree/bindings/arm/hisilicon/hisilicon.txt

@@ -8,6 +8,14 @@ HiKey960 Board
 Required root node properties:
 	- compatible = "hisilicon,hi3660-hikey960", "hisilicon,hi3660";
 
+Hi3670 SoC
+Required root node properties:
+	- compatible = "hisilicon,hi3670";
+
+HiKey970 Board
+Required root node properties:
+	- compatible = "hisilicon,hi3670-hikey970", "hisilicon,hi3670";
+
 Hi3798cv200 SoC
 Required root node properties:
 	- compatible = "hisilicon,hi3798cv200";

+ 4 - 0
Documentation/devicetree/bindings/arm/keystone/ti,sci.txt

@@ -45,11 +45,15 @@ Optional Properties:
 	debug_messages - Map the Debug message region
 - reg:  register space corresponding to the debug_messages
 - ti,system-reboot-controller: If system reboot can be triggered by SoC reboot
+- ti,host-id: Integer value corresponding to the host ID assigned by Firmware
+	for identification of host processing entities such as virtual
+	machines
 
 Example (K2G):
 -------------
 	pmmc: pmmc {
 		compatible = "ti,k2g-sci";
+		ti,host-id = <2>;
 		mbox-names = "rx", "tx";
 		mboxes= <&msgmgr &msgmgr_proxy_pmmc_rx>,
 			<&msgmgr &msgmgr_proxy_pmmc_tx>;

+ 1 - 0
Documentation/devicetree/bindings/arm/mediatek/mediatek,apmixedsys.txt

@@ -10,6 +10,7 @@ Required Properties:
 	- "mediatek,mt2712-apmixedsys", "syscon"
 	- "mediatek,mt6797-apmixedsys"
 	- "mediatek,mt7622-apmixedsys"
+	- "mediatek,mt7623-apmixedsys", "mediatek,mt2701-apmixedsys"
 	- "mediatek,mt8135-apmixedsys"
 	- "mediatek,mt8173-apmixedsys"
 - #clock-cells: Must be 1

+ 1 - 0
Documentation/devicetree/bindings/arm/mediatek/mediatek,audsys.txt

@@ -8,6 +8,7 @@ Required Properties:
 - compatible: Should be one of:
 	- "mediatek,mt2701-audsys", "syscon"
 	- "mediatek,mt7622-audsys", "syscon"
+	- "mediatek,mt7623-audsys", "mediatek,mt2701-audsys", "syscon"
 - #clock-cells: Must be 1
 
 The AUDSYS controller uses the common clk binding from

+ 1 - 0
Documentation/devicetree/bindings/arm/mediatek/mediatek,bdpsys.txt

@@ -8,6 +8,7 @@ Required Properties:
 - compatible: Should be:
 	- "mediatek,mt2701-bdpsys", "syscon"
 	- "mediatek,mt2712-bdpsys", "syscon"
+	- "mediatek,mt7623-bdpsys", "mediatek,mt2701-bdpsys", "syscon"
 - #clock-cells: Must be 1
 
 The bdpsys controller uses the common clk binding from

+ 1 - 0
Documentation/devicetree/bindings/arm/mediatek/mediatek,ethsys.txt

@@ -8,6 +8,7 @@ Required Properties:
 - compatible: Should be:
 	- "mediatek,mt2701-ethsys", "syscon"
 	- "mediatek,mt7622-ethsys", "syscon"
+	- "mediatek,mt7623-ethsys", "mediatek,mt2701-ethsys", "syscon"
 - #clock-cells: Must be 1
 - #reset-cells: Must be 1
 

+ 1 - 0
Documentation/devicetree/bindings/arm/mediatek/mediatek,hifsys.txt

@@ -9,6 +9,7 @@ Required Properties:
 - compatible: Should be:
 	- "mediatek,mt2701-hifsys", "syscon"
 	- "mediatek,mt7622-hifsys", "syscon"
+	- "mediatek,mt7623-hifsys", "mediatek,mt2701-hifsys", "syscon"
 - #clock-cells: Must be 1
 
 The hifsys controller uses the common clk binding from

+ 1 - 0
Documentation/devicetree/bindings/arm/mediatek/mediatek,imgsys.txt

@@ -9,6 +9,7 @@ Required Properties:
 	- "mediatek,mt2701-imgsys", "syscon"
 	- "mediatek,mt2712-imgsys", "syscon"
 	- "mediatek,mt6797-imgsys", "syscon"
+	- "mediatek,mt7623-imgsys", "mediatek,mt2701-imgsys", "syscon"
 	- "mediatek,mt8173-imgsys", "syscon"
 - #clock-cells: Must be 1
 

+ 1 - 0
Documentation/devicetree/bindings/arm/mediatek/mediatek,infracfg.txt

@@ -11,6 +11,7 @@ Required Properties:
 	- "mediatek,mt2712-infracfg", "syscon"
 	- "mediatek,mt6797-infracfg", "syscon"
 	- "mediatek,mt7622-infracfg", "syscon"
+	- "mediatek,mt7623-infracfg", "mediatek,mt2701-infracfg", "syscon"
 	- "mediatek,mt8135-infracfg", "syscon"
 	- "mediatek,mt8173-infracfg", "syscon"
 - #clock-cells: Must be 1

+ 1 - 0
Documentation/devicetree/bindings/arm/mediatek/mediatek,mmsys.txt

@@ -9,6 +9,7 @@ Required Properties:
 	- "mediatek,mt2701-mmsys", "syscon"
 	- "mediatek,mt2712-mmsys", "syscon"
 	- "mediatek,mt6797-mmsys", "syscon"
+	- "mediatek,mt7623-mmsys", "mediatek,mt2701-mmsys", "syscon"
 	- "mediatek,mt8173-mmsys", "syscon"
 - #clock-cells: Must be 1
 

+ 1 - 0
Documentation/devicetree/bindings/arm/mediatek/mediatek,pericfg.txt

@@ -10,6 +10,7 @@ Required Properties:
 	- "mediatek,mt2701-pericfg", "syscon"
 	- "mediatek,mt2712-pericfg", "syscon"
 	- "mediatek,mt7622-pericfg", "syscon"
+	- "mediatek,mt7623-pericfg", "mediatek,mt2701-pericfg", "syscon"
 	- "mediatek,mt8135-pericfg", "syscon"
 	- "mediatek,mt8173-pericfg", "syscon"
 - #clock-cells: Must be 1

+ 1 - 0
Documentation/devicetree/bindings/arm/mediatek/mediatek,topckgen.txt

@@ -10,6 +10,7 @@ Required Properties:
 	- "mediatek,mt2712-topckgen", "syscon"
 	- "mediatek,mt6797-topckgen"
 	- "mediatek,mt7622-topckgen"
+	- "mediatek,mt7623-topckgen", "mediatek,mt2701-topckgen"
 	- "mediatek,mt8135-topckgen"
 	- "mediatek,mt8173-topckgen"
 - #clock-cells: Must be 1

+ 1 - 0
Documentation/devicetree/bindings/arm/mediatek/mediatek,vdecsys.txt

@@ -9,6 +9,7 @@ Required Properties:
 	- "mediatek,mt2701-vdecsys", "syscon"
 	- "mediatek,mt2712-vdecsys", "syscon"
 	- "mediatek,mt6797-vdecsys", "syscon"
+	- "mediatek,mt7623-vdecsys", "mediatek,mt2701-vdecsys", "syscon"
 	- "mediatek,mt8173-vdecsys", "syscon"
 - #clock-cells: Must be 1
 

+ 19 - 0
Documentation/devicetree/bindings/arm/msm/qcom,kpss-acc.txt

@@ -21,10 +21,29 @@ PROPERTIES
 		    the register region. An optional second element specifies
 		    the base address and size of the alias register region.
 
+- clocks:
+        Usage: required
+        Value type: <prop-encoded-array>
+        Definition: reference to the pll parents.
+
+- clock-names:
+        Usage: required
+        Value type: <stringlist>
+        Definition: must be "pll8_vote", "pxo".
+
+- clock-output-names:
+	Usage: optional
+	Value type: <string>
+	Definition: Name of the output clock. Typically acpuX_aux where X is a
+		    CPU number starting at 0.
+
 Example:
 
 	clock-controller@2088000 {
 		compatible = "qcom,kpss-acc-v2";
 		reg = <0x02088000 0x1000>,
 		      <0x02008000 0x1000>;
+		clocks = <&gcc PLL8_VOTE>, <&gcc PXO_SRC>;
+		clock-names = "pll8_vote", "pxo";
+		clock-output-names = "acpu0_aux";
 	};

Some files were not shown because too many files changed in this diff